Update abx dependencies and plugin test harness

2026-04-06 07:47:53 +10:00 · 2026-03-15 04:37:32 -07:00
parent ecb1764590
commit 4fa701fafe
14 changed files with 763 additions and 650 deletions
--- a/.github/workflows/test-parallel.yml
+++ b/.github/workflows/test-parallel.yml
@@ -28,13 +28,7 @@ jobs:
        id: set-matrix
        run: |
          # Find all main test files
-          main_tests=$(find tests -maxdepth 1 -name "test_*.py" -type f | sort)
+          all_tests=$(find archivebox/tests -maxdepth 1 -name "test_*.py" -type f | sort)
          # Find all plugin test files
          plugin_tests=$(find archivebox/plugins -path "*/tests/test_*.py" -type f | sort)
          # Combine and format as JSON array
          all_tests=$(echo "$main_tests $plugin_tests" | tr ' ' '\n' | grep -v '^$')
          # Create JSON array with test file info
          json_array="["
@@ -47,13 +41,7 @@ jobs:
            fi
            # Extract a display name for the test
            if [[ $test_file == tests/* ]]; then
            name="main/$(basename $test_file .py | sed 's/^test_//')"
            else
              plugin=$(echo $test_file | sed 's|archivebox/plugins/\([^/]*\)/.*|\1|')
              test_name=$(basename $test_file .py | sed 's/^test_//')
              name="plugin/$plugin/$test_name"
            fi
            json_array+="{\"path\":\"$test_file\",\"name\":\"$name\"}"
          done
@@ -111,9 +99,56 @@ jobs:
      - name: Install dependencies with uv
        run: |
-          uv sync --dev --all-extras
+          uv sync --dev --all-extras --no-sources
      - name: Run test - ${{ matrix.test.name }}
        run: |
          mkdir -p tests/out
          uv run pytest -xvs "${{ matrix.test.path }}" --basetemp=tests/out --ignore=archivebox/pkgs
  plugin-tests:
    name: Plugin tests
    runs-on: ubuntu-22.04
    steps:
      - uses: actions/checkout@v4
        with:
          submodules: true
          fetch-depth: 1
      - name: Set up Python 3.13
        uses: actions/setup-python@v4
        with:
          python-version: "3.13"
          architecture: x64
      - name: Install uv
        uses: astral-sh/setup-uv@v4
        with:
          version: "latest"
      - name: Set up Node JS
        uses: actions/setup-node@v4
        with:
          node-version: 22
      - name: Cache uv
        uses: actions/cache@v3
        with:
          path: ~/.cache/uv
          key: ${{ runner.os }}-3.13-uv-${{ hashFiles('pyproject.toml', 'uv.lock') }}
          restore-keys: |
            ${{ runner.os }}-3.13-uv-
      - uses: awalsh128/cache-apt-pkgs-action@latest
        with:
          packages: git ripgrep build-essential python3-dev python3-setuptools libssl-dev libldap2-dev libsasl2-dev zlib1g-dev libatomic1 python3-minimal gnupg2 curl wget python3-ldap python3-msgpack python3-mutagen python3-regex python3-pycryptodome procps
          version: 1.1
      - name: Install dependencies with uv
        run: |
          uv sync --dev --all-extras --no-sources
      - name: Run plugin tests
        run: |
          uv run bash ./bin/test_plugins.sh --no-coverage
--- a/.github/workflows/test.yml
+++ b/.github/workflows/test.yml
@@ -30,63 +30,33 @@ jobs:
          python-version: ${{ matrix.python }}
          architecture: x64
      - name: Install uv
        uses: astral-sh/setup-uv@v4
        with:
          version: "latest"
      - name: Set up Node JS
        uses: actions/setup-node@v4
        with:
-          node-version: 20.10.0
+          node-version: 22
      - name: Setup PDM
        uses: pdm-project/setup-pdm@v3
        with:
          python-version: '3.13'
          cache: true
      ### Install Python & JS Dependencies
-      - name: Get pip cache dir
+      - name: Cache uv
        id: pip-cache
        run: |
          echo "::set-output name=dir::$(pip cache dir)"
      - name: Cache pip
        uses: actions/cache@v3
        id: cache-pip
        with:
-          path: ${{ steps.pip-cache.outputs.dir }}
+          path: ~/.cache/uv
-          key: ${{ runner.os }}-${{ matrix.python }}-venv-${{ hashFiles('setup.py') }}
+          key: ${{ runner.os }}-${{ matrix.python }}-uv-${{ hashFiles('pyproject.toml', 'uv.lock') }}
          restore-keys: |
-            ${{ runner.os }}-${{ matrix.python }}-venv-
+            ${{ runner.os }}-${{ matrix.python }}-uv-
      - uses: awalsh128/cache-apt-pkgs-action@latest
        with:
          packages: ripgrep build-essential python3-dev python3-setuptools libssl-dev libldap2-dev libsasl2-dev zlib1g-dev libatomic1 python3-minimal gnupg2 curl wget python3-ldap python3-msgpack python3-mutagen python3-regex python3-pycryptodome procps
          version: 1.0
-      - name: Install pip dependencies
+      - name: Install dependencies with uv
        run: |
-          python -m pip install --upgrade pip setuptools wheel pytest bottle build
+          uv sync --dev --all-extras --no-sources
          python -m pip install -r requirements.txt
          python -m pip install -e .[sonic,ldap]
      - name: Get npm cache dir
        id: npm-cache
        run: |
          echo "::set-output name=dir::$GITHUB_WORKSPACE/node_modules"
      - name: Cache npm
        uses: actions/cache@v3
        id: cache-npm
        with:
          path: ${{ steps.npm-cache.outputs.dir }}
          key: ${{ runner.os }}-node_modules-${{ hashFiles('package-lock.json') }}
          restore-keys: |
            ${{ runner.os }}-node_modules
      - name: Install npm requirements
        run: |
          npm install
          echo "SINGLEFILE_BINARY=$GITHUB_WORKSPACE/node_modules/.bin/single-file" >> $GITHUB_ENV
          echo "READABILITY_BINARY=$GITHUB_WORKSPACE/node_modules/.bin/readability-extractor" >> $GITHUB_ENV
          echo "MERCURY_BINARY=$GITHUB_WORKSPACE/node_modules/.bin/mercury-parser" >> $GITHUB_ENV
      ### Run the tests
      - name: Directory listing for debugging
@@ -96,13 +66,20 @@ jobs:
      - name: Archivebox version
        run: |
-          archivebox version
+          mkdir -p tests/out/data
          DATA_DIR="$PWD/tests/out/data" uv run archivebox version
      - name: Test built package with pytest
        # TODO: remove this exception for windows once we get tests passing on that platform
        if: ${{ !contains(matrix.os, 'windows') }}
        run: |
-          python -m pytest -s --basetemp=tests/out --ignore=archivebox/pkgs
+          mkdir -p tests/out
          uv run pytest -s archivebox/tests --basetemp=tests/out --ignore=archivebox/pkgs
      - name: Run plugin tests
        if: ${{ !contains(matrix.os, 'windows') }}
        run: |
          uv run bash ./bin/test_plugins.sh --no-coverage
  docker_tests:
    runs-on: ubuntu-latest
--- a/archivebox/core/forms.py
+++ b/archivebox/core/forms.py
@@ -147,8 +147,8 @@ class AddLinkForm(forms.Form):
            'screenshot', 'seo', 'singlefile', 'ssl', 'staticfile', 'title'
        }
        archiving = {
-            'archivedotorg', 'favicon', 'forumdl', 'gallerydl', 'git',
+            'archivedotorg', 'defuddle', 'favicon', 'forumdl', 'gallerydl', 'git',
-            'htmltotext', 'media', 'mercury', 'papersdl', 'readability', 'wget'
+            'htmltotext', 'mercury', 'papersdl', 'readability', 'trafilatura', 'wget', 'ytdlp'
        }
        parsing = {
            'parse_html_urls', 'parse_jsonl_urls',
--- a/archivebox/core/models.py
+++ b/archivebox/core/models.py
@@ -2185,7 +2185,7 @@ class Snapshot(ModelWithOutputDir, ModelWithConfig, ModelWithNotes, ModelWithHea
 # Snapshot State Machine
 # =============================================================================
-class SnapshotMachine(BaseStateMachine, strict_states=True):
+class SnapshotMachine(BaseStateMachine):
    """
    State machine for managing Snapshot lifecycle.
@@ -3074,7 +3074,7 @@ class ArchiveResult(ModelWithOutputDir, ModelWithConfig, ModelWithNotes, ModelWi
 # ArchiveResult State Machine
 # =============================================================================
-class ArchiveResultMachine(BaseStateMachine, strict_states=True):
+class ArchiveResultMachine(BaseStateMachine):
    """
    State machine for managing ArchiveResult (single plugin execution) lifecycle.
--- a/archivebox/crawls/models.py
+++ b/archivebox/crawls/models.py
@@ -506,7 +506,7 @@ class Crawl(ModelWithOutputDir, ModelWithConfig, ModelWithHealthStats, ModelWith
 # State Machines
 # =============================================================================
-class CrawlMachine(BaseStateMachine, strict_states=True):
+class CrawlMachine(BaseStateMachine):
    """
    State machine for managing Crawl lifecycle.
--- a/archivebox/hooks.py
+++ b/archivebox/hooks.py
@@ -22,13 +22,13 @@ Execution order:
    - Failed extractors don't block subsequent extractors
 Hook Naming Convention:
-    on_{ModelName}__{run_order}_{description}[.bg].{ext}
+    on_{ModelName}__{run_order}_{description}[.finite.bg|.daemon.bg].{ext}
    Examples:
        on_Snapshot__00_setup.py         # runs first
-        on_Snapshot__10_chrome_tab.bg.js # background (doesn't block)
+        on_Snapshot__10_chrome_tab.daemon.bg.js # background (doesn't block)
        on_Snapshot__50_screenshot.js    # foreground (blocks)
-        on_Snapshot__63_media.bg.py      # background (long-running)
+        on_Snapshot__63_media.finite.bg.py      # background (long-running)
 Dependency handling:
    Extractor plugins that depend on other plugins' output should check at runtime:
@@ -108,19 +108,34 @@ def is_background_hook(hook_name: str) -> bool:
    Background hooks have '.bg.' in their filename before the extension.
    Args:
-        hook_name: Hook filename (e.g., 'on_Snapshot__10_chrome_tab.bg.js')
+        hook_name: Hook filename (e.g., 'on_Snapshot__10_chrome_tab.daemon.bg.js')
    Returns:
        True if background hook, False if foreground.
    Examples:
-        is_background_hook('on_Snapshot__10_chrome_tab.bg.js') -> True
+        is_background_hook('on_Snapshot__10_chrome_tab.daemon.bg.js') -> True
        is_background_hook('on_Snapshot__50_wget.py') -> False
-        is_background_hook('on_Snapshot__63_media.bg.py') -> True
+        is_background_hook('on_Snapshot__63_media.finite.bg.py') -> True
    """
    return '.bg.' in hook_name or '__background' in hook_name
 def iter_plugin_dirs() -> List[Path]:
    """Iterate over all built-in and user plugin directories."""
    plugin_dirs: List[Path] = []
    for base_dir in (BUILTIN_PLUGINS_DIR, USER_PLUGINS_DIR):
        if not base_dir.exists():
            continue
        for plugin_dir in base_dir.iterdir():
            if plugin_dir.is_dir() and not plugin_dir.name.startswith('_'):
                plugin_dirs.append(plugin_dir)
    return plugin_dirs
 class HookResult(TypedDict, total=False):
    """Raw result from run_hook()."""
    returncode: int
@@ -420,7 +435,7 @@ def run_hook(
    output_dir.mkdir(parents=True, exist_ok=True)
    # Detect if this is a background hook (long-running daemon)
-    # New convention: .bg. suffix (e.g., on_Snapshot__21_consolelog.bg.js)
+    # Background hooks use the .daemon.bg. or .finite.bg. filename convention.
    # Old convention: __background in stem (for backwards compatibility)
    is_background = '.bg.' in script.name or '__background' in script.stem
@@ -581,28 +596,20 @@ def run_hooks(
@lru_cache(maxsize=1)
 def get_plugins() -> List[str]:
    """
-    Get list of available plugins by discovering Snapshot hooks.
+    Get list of available plugins by discovering plugin directories.
-    Returns plugin names (directory names) that contain on_Snapshot hooks.
+    Returns plugin directory names for any plugin that exposes hooks, config.json,
-    The plugin name is the plugin directory name, not the hook script name.
+    or a standardized templates/icon.html asset. This includes non-extractor
-
+    plugins such as binary providers and shared base plugins.
    Example:
    abx_plugins/plugins/chrome/on_Snapshot__10_chrome_tab.bg.js
    -> plugin = 'chrome'
    Sorted alphabetically (plugins control their hook order via numeric prefixes in hook names).
    """
    plugins = []
-    for base_dir in (BUILTIN_PLUGINS_DIR, USER_PLUGINS_DIR):
+    for plugin_dir in iter_plugin_dirs():
-        if not base_dir.exists():
+        has_hooks = any(plugin_dir.glob('on_*__*.*'))
-            continue
+        has_config = (plugin_dir / 'config.json').exists()
-
+        has_icon = (plugin_dir / 'templates' / 'icon.html').exists()
-        for ext in ('sh', 'py', 'js'):
+        if has_hooks or has_config or has_icon:
-            for hook_path in base_dir.glob(f'*/on_Snapshot__*.{ext}'):
+            plugins.append(plugin_dir.name)
                # Use plugin directory name as plugin name
                plugin_name = hook_path.parent.name
                plugins.append(plugin_name)
    return sorted(set(plugins))
@@ -808,13 +815,7 @@ def discover_plugin_configs() -> Dict[str, Dict[str, Any]]:
    """
    configs = {}
-    for base_dir in (BUILTIN_PLUGINS_DIR, USER_PLUGINS_DIR):
+    for plugin_dir in iter_plugin_dirs():
        if not base_dir.exists():
            continue
        for plugin_dir in base_dir.iterdir():
            if not plugin_dir.is_dir():
                continue
        config_path = plugin_dir / 'config.json'
        if not config_path.exists():
@@ -1002,14 +1003,7 @@ def get_plugin_template(plugin: str, template_name: str, fallback: bool = True)
    if base_name in ('yt-dlp', 'youtube-dl'):
        base_name = 'ytdlp'
-    for base_dir in (BUILTIN_PLUGINS_DIR, USER_PLUGINS_DIR):
+    for plugin_dir in iter_plugin_dirs():
        if not base_dir.exists():
            continue
        # Look for plugin directory matching plugin name
        for plugin_dir in base_dir.iterdir():
            if not plugin_dir.is_dir():
                continue
        # Match by directory name (exact or partial)
        if plugin_dir.name == base_name or plugin_dir.name.endswith(f'_{base_name}'):
@@ -1068,13 +1062,7 @@ def discover_plugin_templates() -> Dict[str, Dict[str, str]]:
    """
    templates: Dict[str, Dict[str, str]] = {}
-    for base_dir in (BUILTIN_PLUGINS_DIR, USER_PLUGINS_DIR):
+    for plugin_dir in iter_plugin_dirs():
        if not base_dir.exists():
            continue
        for plugin_dir in base_dir.iterdir():
            if not plugin_dir.is_dir():
                continue
        templates_dir = plugin_dir / 'templates'
        if not templates_dir.exists():
--- a/archivebox/machine/migrations/0001_initial.py
+++ b/archivebox/machine/migrations/0001_initial.py
@@ -169,7 +169,7 @@ class Migration(migrations.Migration):
                        ('modified_at', models.DateTimeField(auto_now=True)),
                        ('name', models.CharField(blank=True, db_index=True, default='', max_length=63)),
                        ('binproviders', models.CharField(blank=True, default='env', help_text='Comma-separated list of allowed providers: apt,brew,pip,npm,env', max_length=127)),
-                        ('overrides', models.JSONField(blank=True, default=dict, help_text="Provider-specific overrides: {'apt': {'packages': ['pkg']}, ...}")),
+                        ('overrides', models.JSONField(blank=True, default=dict, help_text="Provider-specific overrides: {'apt': {'install_args': ['pkg']}, ...}")),
                        ('binprovider', models.CharField(blank=True, default='', help_text='Provider that successfully installed this binary', max_length=31)),
                        ('abspath', models.CharField(blank=True, default='', max_length=255)),
                        ('version', models.CharField(blank=True, default='', max_length=32)),
--- a/archivebox/machine/models.py
+++ b/archivebox/machine/models.py
@@ -227,7 +227,7 @@ class Binary(ModelWithHealthStats, ModelWithStateMachine):
    binproviders = models.CharField(max_length=127, default='env', null=False, blank=True,
        help_text="Comma-separated list of allowed providers: apt,brew,pip,npm,env")
    overrides = models.JSONField(default=dict, blank=True,
-        help_text="Provider-specific overrides: {'apt': {'packages': ['pkg']}, ...}")
+        help_text="Provider-specific overrides: {'apt': {'install_args': ['pkg']}, ...}")
    # Installation results (populated after installation)
    binprovider = models.CharField(max_length=31, default='', null=False, blank=True,
@@ -2042,7 +2042,7 @@ class Process(models.Model):
 # Binary State Machine
 # =============================================================================
-class BinaryMachine(BaseStateMachine, strict_states=True):
+class BinaryMachine(BaseStateMachine):
    """
    State machine for managing Binary installation lifecycle.
@@ -2133,7 +2133,7 @@ class BinaryMachine(BaseStateMachine, strict_states=True):
 # Process State Machine
 # =============================================================================
-class ProcessMachine(BaseStateMachine, strict_states=True):
+class ProcessMachine(BaseStateMachine):
    """
    State machine for managing Process (OS subprocess) lifecycle.
--- a/archivebox/tests/test_cli_run_binary_worker.py
+++ b/archivebox/tests/test_cli_run_binary_worker.py
@@ -129,7 +129,7 @@ class TestBinaryWorkerHooks:
    """Tests for specific Binary hook providers."""
    def test_env_provider_hook_detects_system_binary(self, initialized_archive):
-        """on_Binary__15_env_install.py hook detects system binaries."""
+        """on_Binary__15_env_discover.py hook detects system binaries."""
        binary_record = {
            'type': 'Binary',
            'name': 'python3',
--- a/archivebox/tests/test_hooks.py
+++ b/archivebox/tests/test_hooks.py
@@ -27,39 +27,33 @@ class TestBackgroundHookDetection(unittest.TestCase):
    def test_bg_js_suffix_detected(self):
        """Hooks with .bg.js suffix should be detected as background."""
-        script = Path('/path/to/on_Snapshot__21_consolelog.bg.js')
+        from archivebox.hooks import is_background_hook
-        is_background = '.bg.' in script.name or '__background' in script.stem
+        self.assertTrue(is_background_hook('on_Snapshot__21_consolelog.daemon.bg.js'))
        self.assertTrue(is_background)
    def test_bg_py_suffix_detected(self):
        """Hooks with .bg.py suffix should be detected as background."""
-        script = Path('/path/to/on_Snapshot__24_responses.bg.py')
+        from archivebox.hooks import is_background_hook
-        is_background = '.bg.' in script.name or '__background' in script.stem
+        self.assertTrue(is_background_hook('on_Snapshot__24_responses.finite.bg.py'))
        self.assertTrue(is_background)
    def test_bg_sh_suffix_detected(self):
        """Hooks with .bg.sh suffix should be detected as background."""
-        script = Path('/path/to/on_Snapshot__23_ssl.bg.sh')
+        from archivebox.hooks import is_background_hook
-        is_background = '.bg.' in script.name or '__background' in script.stem
+        self.assertTrue(is_background_hook('on_Snapshot__23_ssl.daemon.bg.sh'))
        self.assertTrue(is_background)
    def test_legacy_background_suffix_detected(self):
        """Hooks with __background in stem should be detected (backwards compat)."""
-        script = Path('/path/to/on_Snapshot__21_consolelog__background.js')
+        from archivebox.hooks import is_background_hook
-        is_background = '.bg.' in script.name or '__background' in script.stem
+        self.assertTrue(is_background_hook('on_Snapshot__21_consolelog__background.js'))
        self.assertTrue(is_background)
    def test_foreground_hook_not_detected(self):
        """Hooks without .bg. or __background should NOT be detected as background."""
-        script = Path('/path/to/on_Snapshot__11_favicon.js')
+        from archivebox.hooks import is_background_hook
-        is_background = '.bg.' in script.name or '__background' in script.stem
+        self.assertFalse(is_background_hook('on_Snapshot__11_favicon.js'))
        self.assertFalse(is_background)
    def test_foreground_py_hook_not_detected(self):
        """Python hooks without .bg. should NOT be detected as background."""
-        script = Path('/path/to/on_Snapshot__50_wget.py')
+        from archivebox.hooks import is_background_hook
-        is_background = '.bg.' in script.name or '__background' in script.stem
+        self.assertFalse(is_background_hook('on_Snapshot__50_wget.py'))
        self.assertFalse(is_background)
 class TestJSONLParsing(unittest.TestCase):
@@ -182,15 +176,15 @@ class TestHookDiscovery(unittest.TestCase):
        wget_dir = self.plugins_dir / 'wget'
        wget_dir.mkdir()
        (wget_dir / 'on_Snapshot__50_wget.py').write_text('# test hook')
-        (wget_dir / 'on_Crawl__00_install_wget.py').write_text('# install hook')
+        (wget_dir / 'on_Crawl__10_wget_install.finite.bg.py').write_text('# install hook')
        chrome_dir = self.plugins_dir / 'chrome'
        chrome_dir.mkdir()
-        (chrome_dir / 'on_Snapshot__20_chrome_tab.bg.js').write_text('// background hook')
+        (chrome_dir / 'on_Snapshot__20_chrome_tab.daemon.bg.js').write_text('// background hook')
        consolelog_dir = self.plugins_dir / 'consolelog'
        consolelog_dir.mkdir()
-        (consolelog_dir / 'on_Snapshot__21_consolelog.bg.js').write_text('// background hook')
+        (consolelog_dir / 'on_Snapshot__21_consolelog.daemon.bg.js').write_text('// background hook')
    def tearDown(self):
        """Clean up test directory."""
@@ -208,8 +202,8 @@ class TestHookDiscovery(unittest.TestCase):
        self.assertEqual(len(hooks), 3)
        hook_names = [h.name for h in hooks]
-        self.assertIn('on_Snapshot__20_chrome_tab.bg.js', hook_names)
+        self.assertIn('on_Snapshot__20_chrome_tab.daemon.bg.js', hook_names)
-        self.assertIn('on_Snapshot__21_consolelog.bg.js', hook_names)
+        self.assertIn('on_Snapshot__21_consolelog.daemon.bg.js', hook_names)
        self.assertIn('on_Snapshot__50_wget.py', hook_names)
    def test_discover_hooks_sorted_by_name(self):
@@ -222,10 +216,25 @@ class TestHookDiscovery(unittest.TestCase):
        hooks = sorted(set(hooks), key=lambda p: p.name)
        # Check numeric ordering
-        self.assertEqual(hooks[0].name, 'on_Snapshot__20_chrome_tab.bg.js')
+        self.assertEqual(hooks[0].name, 'on_Snapshot__20_chrome_tab.daemon.bg.js')
-        self.assertEqual(hooks[1].name, 'on_Snapshot__21_consolelog.bg.js')
+        self.assertEqual(hooks[1].name, 'on_Snapshot__21_consolelog.daemon.bg.js')
        self.assertEqual(hooks[2].name, 'on_Snapshot__50_wget.py')
    def test_get_plugins_includes_non_snapshot_plugin_dirs(self):
        """get_plugins() should include binary-only plugins with standardized metadata."""
        env_dir = self.plugins_dir / 'env'
        env_dir.mkdir()
        (env_dir / 'on_Binary__15_env_discover.py').write_text('# binary hook')
        (env_dir / 'config.json').write_text('{"type": "object", "properties": {}}')
        from archivebox import hooks as hooks_module
        hooks_module.get_plugins.cache_clear()
        with patch.object(hooks_module, 'BUILTIN_PLUGINS_DIR', self.plugins_dir), patch.object(hooks_module, 'USER_PLUGINS_DIR', self.test_dir / 'user_plugins'):
            plugins = hooks_module.get_plugins()
        self.assertIn('env', plugins)
 class TestGetExtractorName(unittest.TestCase):
    """Test get_extractor_name() function."""
--- a/archivebox/workers/models.py
+++ b/archivebox/workers/models.py
@@ -338,7 +338,7 @@ class BaseStateMachine(StateMachine):
    (e.g., 'snapshot', 'archiveresult', 'crawl', 'binary').
    Example usage:
-        class SnapshotMachine(BaseStateMachine, strict_states=True):
+        class SnapshotMachine(BaseStateMachine):
            model_attr_name = 'snapshot'
            # States and transitions...
--- a/bin/test_plugins.sh
+++ b/bin/test_plugins.sh
@@ -21,7 +21,7 @@
 #   coverage json
 #   ./bin/test_plugins.sh --coverage-report
-set -e
+set -euo pipefail
 # Color codes
 GREEN='\033[0;32m'
@@ -31,6 +31,7 @@ NC='\033[0m' # No Color
 # Save root directory first
 ROOT_DIR="$(cd "$(dirname "$0")/.." && pwd)"
 PLUGINS_DIR="${ABX_PLUGINS_DIR:-$(python3 -c 'from abx_plugins import get_plugins_dir; print(get_plugins_dir())')}"
 # Parse arguments
 PLUGIN_FILTER=""
@@ -49,7 +50,8 @@ done
 # Function to show JS coverage report (inlined from convert_v8_coverage.js)
 show_js_coverage() {
-    local coverage_dir="$1"
+    local plugin_root="$1"
    local coverage_dir="$2"
    if [ ! -d "$coverage_dir" ] || [ -z "$(ls -A "$coverage_dir" 2>/dev/null)" ]; then
        echo "No JavaScript coverage data collected"
@@ -57,10 +59,11 @@ show_js_coverage() {
        return
    fi
-    node - "$coverage_dir" << 'ENDJS'
+    node - "$plugin_root" "$coverage_dir" << 'ENDJS'
 const fs = require('fs');
 const path = require('path');
-const coverageDir = process.argv[2];
+const pluginRoot = path.resolve(process.argv[2]).replace(/\\/g, '/');
 const coverageDir = process.argv[3];
 const files = fs.readdirSync(coverageDir).filter(f => f.startsWith('coverage-') && f.endsWith('.json'));
 if (files.length === 0) {
@@ -90,8 +93,8 @@ files.forEach(file => {
 });
 const allFiles = Object.keys(coverageByFile).sort();
-const pluginFiles = allFiles.filter(url => url.includes('archivebox/plugins'));
+const pluginFiles = allFiles.filter(url => url.replace(/\\/g, '/').includes(pluginRoot));
-const otherFiles = allFiles.filter(url => !url.startsWith('node:') && !url.includes('archivebox/plugins'));
+const otherFiles = allFiles.filter(url => !url.startsWith('node:') && !url.replace(/\\/g, '/').includes(pluginRoot));
 console.log('Total files with coverage: ' + allFiles.length + '\n');
 console.log('Plugin files: ' + pluginFiles.length);
@@ -118,8 +121,8 @@ let totalRanges = 0, totalExecuted = 0;
 pluginFiles.forEach(url => {
    const cov = coverageByFile[url];
    const pct = cov.totalRanges > 0 ? (cov.executedRanges / cov.totalRanges * 100).toFixed(1) : '0.0';
-    const match = url.match(/archivebox\/plugins\/.+/);
+    const normalizedUrl = url.replace(/\\/g, '/');
-    const displayPath = match ? match[0] : url;
+    const displayPath = normalizedUrl.includes(pluginRoot) ? normalizedUrl.slice(normalizedUrl.indexOf(pluginRoot)) : url;
    console.log(displayPath + ': ' + pct + '% (' + cov.executedRanges + '/' + cov.totalRanges + ' ranges)');
    totalRanges += cov.totalRanges;
    totalExecuted += cov.executedRanges;
@@ -139,17 +142,17 @@ if [ "$COVERAGE_REPORT_ONLY" = true ]; then
    echo "Python Coverage Summary"
    echo "=========================================="
    coverage combine 2>/dev/null || true
-    coverage report --include="archivebox/plugins/*" --omit="*/tests/*"
+    coverage report --include="*/abx_plugins/plugins/*" --omit="*/tests/*"
    echo ""
    echo "=========================================="
    echo "JavaScript Coverage Summary"
    echo "=========================================="
-    show_js_coverage "$ROOT_DIR/coverage/js"
+    show_js_coverage "$PLUGINS_DIR" "$ROOT_DIR/coverage/js"
    echo ""
    echo "For detailed coverage reports:"
-    echo "  Python:     coverage report --show-missing --include='archivebox/plugins/*' --omit='*/tests/*'"
+    echo "  Python:     coverage report --show-missing --include='*/abx_plugins/plugins/*' --omit='*/tests/*'"
    echo "  Python:     coverage json  # LLM-friendly format"
    echo "  Python:     coverage html  # Interactive HTML report"
    exit 0
@@ -157,7 +160,7 @@ fi
 # Set DATA_DIR for tests (required by abx_pkg and plugins)
 # Use temp dir to isolate tests from project files
-if [ -z "$DATA_DIR" ]; then
+if [ -z "${DATA_DIR:-}" ]; then
    export DATA_DIR=$(mktemp -d -t archivebox_plugin_tests.XXXXXX)
    # Clean up on exit
    trap "rm -rf '$DATA_DIR'" EXIT
@@ -173,7 +176,7 @@ if [ "$ENABLE_COVERAGE" = true ]; then
    # Enable Python subprocess coverage
    export COVERAGE_PROCESS_START="$ROOT_DIR/pyproject.toml"
-    export PYTHONPATH="$ROOT_DIR:$PYTHONPATH"  # For sitecustomize.py
+    export PYTHONPATH="$ROOT_DIR${PYTHONPATH:+:$PYTHONPATH}"  # For sitecustomize.py
    # Enable Node.js V8 coverage (built-in, no packages needed)
    export NODE_V8_COVERAGE="$ROOT_DIR/coverage/js"
@@ -183,8 +186,7 @@ if [ "$ENABLE_COVERAGE" = true ]; then
    echo ""
 fi
-# Change to plugins directory
+cd "$ROOT_DIR" || exit 1
 cd "$ROOT_DIR/archivebox/plugins" || exit 1
 echo "=========================================="
 echo "ArchiveBox Plugin Tests"
@@ -212,10 +214,10 @@ FAILED_PLUGINS=0
 # Find and run plugin tests
 if [ -n "$PLUGIN_FILTER" ]; then
    # Run tests for specific plugin(s) matching pattern
-    TEST_DIRS=$(find . -maxdepth 2 -type d -path "./${PLUGIN_FILTER}*/tests" 2>/dev/null | sort)
+    TEST_DIRS=$(find "$PLUGINS_DIR" -maxdepth 2 -type d -path "$PLUGINS_DIR/${PLUGIN_FILTER}*/tests" 2>/dev/null | sort)
 else
    # Run all plugin tests
-    TEST_DIRS=$(find . -maxdepth 2 -type d -name "tests" -path "./*/tests" 2>/dev/null | sort)
+    TEST_DIRS=$(find "$PLUGINS_DIR" -maxdepth 2 -type d -name "tests" -path "$PLUGINS_DIR/*/tests" 2>/dev/null | sort)
 fi
 if [ -z "$TEST_DIRS" ]; then
@@ -230,26 +232,35 @@ for test_dir in $TEST_DIRS; do
        continue
    fi
-    plugin_name=$(basename $(dirname "$test_dir"))
+    plugin_name=$(basename "$(dirname "$test_dir")")
    TOTAL_PLUGINS=$((TOTAL_PLUGINS + 1))
    echo -e "${YELLOW}[RUNNING]${NC} $plugin_name"
    # Build pytest command with optional coverage
-    PYTEST_CMD="python -m pytest $test_dir -p no:django -v --tb=short"
+    PYTEST_CMD=(python -m pytest "$test_dir" -p no:django -v --tb=short)
    if [ "$ENABLE_COVERAGE" = true ]; then
-        PYTEST_CMD="$PYTEST_CMD --cov=$plugin_name --cov-append --cov-branch"
+        PYTEST_CMD+=(--cov="$(dirname "$test_dir")" --cov-append --cov-branch)
        echo "[DEBUG] NODE_V8_COVERAGE before pytest: $NODE_V8_COVERAGE"
        python -c "import os; print('[DEBUG BASH->PYTHON] NODE_V8_COVERAGE:', os.environ.get('NODE_V8_COVERAGE', 'NOT_SET'))"
    fi
-    if eval "$PYTEST_CMD" 2>&1 | grep -v "^platform\|^cachedir\|^rootdir\|^configfile\|^plugins:" | tail -100; then
+    LOG_FILE=$(mktemp -t "archivebox_plugin_${plugin_name}.XXXXXX.log")
    PLUGIN_TMPDIR=$(mktemp -d -t "archivebox_plugin_${plugin_name}.XXXXXX")
    if (
        cd "$PLUGIN_TMPDIR"
        TMPDIR="$PLUGIN_TMPDIR" "${PYTEST_CMD[@]}"
    ) >"$LOG_FILE" 2>&1; then
        grep -v "^platform\|^cachedir\|^rootdir\|^configfile\|^plugins:" "$LOG_FILE" | tail -100
        echo -e "${GREEN}[PASSED]${NC} $plugin_name"
        PASSED_PLUGINS=$((PASSED_PLUGINS + 1))
    else
        grep -v "^platform\|^cachedir\|^rootdir\|^configfile\|^plugins:" "$LOG_FILE" | tail -100
        echo -e "${RED}[FAILED]${NC} $plugin_name"
        FAILED_PLUGINS=$((FAILED_PLUGINS + 1))
    fi
    rm -f "$LOG_FILE"
    rm -rf "$PLUGIN_TMPDIR"
    echo ""
 done
@@ -277,21 +288,18 @@ elif [ $FAILED_PLUGINS -eq 0 ]; then
        # Coverage data is in ROOT_DIR, combine and report from there
        cd "$ROOT_DIR" || exit 1
        # Copy coverage data from plugins dir if it exists
        if [ -f "$ROOT_DIR/archivebox/plugins/.coverage" ]; then
            cp "$ROOT_DIR/archivebox/plugins/.coverage" "$ROOT_DIR/.coverage"
        fi
        coverage combine 2>/dev/null || true
-        coverage report --include="archivebox/plugins/*" --omit="*/tests/*" 2>&1 | head -50
+        coverage report --include="*/abx_plugins/plugins/*" --omit="*/tests/*" 2>&1 | head -50
        echo ""
        echo "=========================================="
        echo "JavaScript Coverage Summary"
        echo "=========================================="
-        show_js_coverage "$ROOT_DIR/coverage/js"
+        show_js_coverage "$PLUGINS_DIR" "$ROOT_DIR/coverage/js"
        echo ""
        echo "For detailed coverage reports (from project root):"
-        echo "  Python:     coverage report --show-missing --include='archivebox/plugins/*' --omit='*/tests/*'"
+        echo "  Python:     coverage report --show-missing --include='*/abx_plugins/plugins/*' --omit='*/tests/*'"
        echo "  Python:     coverage json  # LLM-friendly format"
        echo "  Python:     coverage html  # Interactive HTML report"
        echo "  JavaScript: ./bin/test_plugins.sh --coverage-report"
--- a/pyproject.toml
+++ b/pyproject.toml
@@ -83,8 +83,8 @@ dependencies = [
    ### Extractor dependencies (optional binaries detected at runtime via shutil.which)
    "yt-dlp>=2024.1.0",      # for: media extractor
    ### Binary/Package Management
-    "abx-pkg>=0.1.0",        # for: detecting, versioning, and installing binaries via apt/brew/pip/npm
+    "abx-pkg>=0.7.0",        # for: detecting, versioning, and installing binaries via apt/brew/pip/npm
-    "abx-plugins>=0.1.0",    # shared plugin package (sourced from uv workspace in local dev)
+    "abx-plugins>=0.9.5",    # shared ArchiveBox plugin package with install_args-only overrides
    "gallery-dl>=1.31.1",
    ### UUID7 backport for Python <3.14
    "uuid7>=0.1.0; python_version < '3.14'",  # for: uuid7 support on Python 3.13 (provides uuid_extensions module)
@@ -165,9 +165,6 @@ package = true
 python-version = "3.13"
 # compile-bytecode = true
 [tool.uv.sources]
 abx-plugins = { workspace = true }
 [build-system]
 requires = ["pdm-backend"]
 build-backend = "pdm.backend"
@@ -188,9 +185,9 @@ exclude = ["*.pyi", "typings/", "migrations/"]
 ignore = ["E731", "E303", "E266", "E241", "E222"]
 [tool.pytest.ini_options]
-testpaths = [ "tests" ]
+testpaths = [ "archivebox/tests" ]
 DJANGO_SETTINGS_MODULE = "archivebox.core.settings"
-# Note: Plugin tests under archivebox/plugins/ must NOT load Django
+# Note: Plugin tests under abx_plugins/plugins/ must NOT load Django
 # They use a conftest.py to disable Django automatically
 [tool.coverage.run]
--- a/uv.lock
+++ b/uv.lock