Add tests for merkletree and custom binary provider plugins

- merkletree: Tests merkle tree generation with real files, empty directory handling, and disabled mode - custom: Tests custom bash command execution and binary discovery
2026-01-03 09:25:42 +10:00 · 2025-12-31 18:30:04 +00:00
parent 9703a8e88c
commit 263335dc6d
4 changed files with 308 additions and 0 deletions
--- a/archivebox/plugins/custom/tests/init.py
+++ b/archivebox/plugins/custom/tests/init.py
@@ -0,0 +1 @@
+"""Tests for the custom binary provider plugin."""
--- a/archivebox/plugins/custom/tests/test_custom_provider.py
+++ b/archivebox/plugins/custom/tests/test_custom_provider.py
@@ -0,0 +1,149 @@
+"""
+Tests for the custom binary provider plugin.
+
+Tests the custom bash binary installer with safe commands.
+"""
+
+import json
+import os
+import subprocess
+import sys
+import tempfile
+from pathlib import Path
+
+import pytest
+from django.test import TestCase
+
+
+# Get the path to the custom provider hook
+PLUGIN_DIR = Path(__file__).parent.parent
+INSTALL_HOOK = PLUGIN_DIR / 'on_Binary__install_using_custom_bash.py'
+
+
+class TestCustomProviderHook(TestCase):
+    """Test the custom binary provider hook."""
+
+    def setUp(self):
+        """Set up test environment."""
+        self.temp_dir = tempfile.mkdtemp()
+
+    def tearDown(self):
+        """Clean up."""
+        import shutil
+        shutil.rmtree(self.temp_dir, ignore_errors=True)
+
+    def test_hook_script_exists(self):
+        """Hook script should exist."""
+        self.assertTrue(INSTALL_HOOK.exists(), f"Hook not found: {INSTALL_HOOK}")
+
+    def test_hook_skips_when_custom_not_allowed(self):
+        """Hook should skip when custom not in allowed binproviders."""
+        env = os.environ.copy()
+        env['DATA_DIR'] = self.temp_dir
+
+        result = subprocess.run(
+            [
+                sys.executable, str(INSTALL_HOOK),
+                '--name=echo',
+                '--binary-id=test-uuid',
+                '--machine-id=test-machine',
+                '--binproviders=pip,apt',  # custom not allowed
+                '--custom-cmd=echo hello',
+            ],
+            capture_output=True,
+            text=True,
+            timeout=30,
+            env=env
+        )
+
+        # Should exit cleanly (code 0) when custom not allowed
+        self.assertEqual(result.returncode, 0)
+        self.assertIn('custom provider not allowed', result.stderr)
+
+    def test_hook_runs_custom_command_and_finds_binary(self):
+        """Hook should run custom command and find the binary in PATH."""
+        env = os.environ.copy()
+        env['DATA_DIR'] = self.temp_dir
+
+        # Use a simple echo command that doesn't actually install anything
+        # Then check for 'echo' which is already in PATH
+        result = subprocess.run(
+            [
+                sys.executable, str(INSTALL_HOOK),
+                '--name=echo',
+                '--binary-id=test-uuid',
+                '--machine-id=test-machine',
+                '--custom-cmd=echo "custom install simulation"',
+            ],
+            capture_output=True,
+            text=True,
+            timeout=30,
+            env=env
+        )
+
+        # Should succeed since echo is in PATH
+        self.assertEqual(result.returncode, 0, f"Hook failed: {result.stderr}")
+
+        # Parse JSONL output
+        for line in result.stdout.split('\n'):
+            line = line.strip()
+            if line.startswith('{'):
+                try:
+                    record = json.loads(line)
+                    if record.get('type') == 'Binary' and record.get('name') == 'echo':
+                        self.assertEqual(record['binprovider'], 'custom')
+                        self.assertTrue(record['abspath'])
+                        return
+                except json.JSONDecodeError:
+                    continue
+
+        self.fail("No Binary JSONL record found in output")
+
+    def test_hook_fails_for_missing_binary_after_command(self):
+        """Hook should fail if binary not found after running custom command."""
+        env = os.environ.copy()
+        env['DATA_DIR'] = self.temp_dir
+
+        result = subprocess.run(
+            [
+                sys.executable, str(INSTALL_HOOK),
+                '--name=nonexistent_binary_xyz123',
+                '--binary-id=test-uuid',
+                '--machine-id=test-machine',
+                '--custom-cmd=echo "failed install"',  # Doesn't actually install
+            ],
+            capture_output=True,
+            text=True,
+            timeout=30,
+            env=env
+        )
+
+        # Should fail since binary not found after command
+        self.assertEqual(result.returncode, 1)
+        self.assertIn('not found', result.stderr.lower())
+
+    def test_hook_fails_for_failing_command(self):
+        """Hook should fail if custom command returns non-zero exit code."""
+        env = os.environ.copy()
+        env['DATA_DIR'] = self.temp_dir
+
+        result = subprocess.run(
+            [
+                sys.executable, str(INSTALL_HOOK),
+                '--name=echo',
+                '--binary-id=test-uuid',
+                '--machine-id=test-machine',
+                '--custom-cmd=exit 1',  # Command that fails
+            ],
+            capture_output=True,
+            text=True,
+            timeout=30,
+            env=env
+        )
+
+        # Should fail with exit code 1
+        self.assertEqual(result.returncode, 1)
+
+
+if __name__ == '__main__':
+    pytest.main([__file__, '-v'])
--- a/archivebox/plugins/merkletree/tests/init.py
+++ b/archivebox/plugins/merkletree/tests/init.py
@@ -0,0 +1 @@
+"""Tests for the merkletree plugin."""
--- a/archivebox/plugins/merkletree/tests/test_merkletree.py
+++ b/archivebox/plugins/merkletree/tests/test_merkletree.py
@@ -0,0 +1,157 @@
+"""
+Tests for the merkletree plugin.
+
+Tests the real merkle tree generation with actual files.
+"""
+
+import json
+import os
+import subprocess
+import sys
+import tempfile
+from pathlib import Path
+
+import pytest
+from django.test import TestCase
+
+
+# Get the path to the merkletree hook
+PLUGIN_DIR = Path(__file__).parent.parent
+MERKLETREE_HOOK = PLUGIN_DIR / 'on_Snapshot__93_merkletree.py'
+
+
+class TestMerkletreePlugin(TestCase):
+    """Test the merkletree plugin."""
+
+    def test_merkletree_hook_exists(self):
+        """Merkletree hook script should exist."""
+        self.assertTrue(MERKLETREE_HOOK.exists(), f"Hook not found: {MERKLETREE_HOOK}")
+
+    def test_merkletree_generates_tree_for_files(self):
+        """Merkletree hook should generate merkle tree for files in snapshot directory."""
+        with tempfile.TemporaryDirectory() as temp_dir:
+            # Create a mock snapshot directory structure
+            snapshot_dir = Path(temp_dir) / 'snapshot'
+            snapshot_dir.mkdir()
+
+            # Create output directory for merkletree
+            output_dir = snapshot_dir / 'merkletree'
+            output_dir.mkdir()
+
+            # Create some test files
+            (snapshot_dir / 'index.html').write_text('<html><body>Test</body></html>')
+            (snapshot_dir / 'screenshot.png').write_bytes(b'\x89PNG\r\n\x1a\n' + b'\x00' * 100)
+
+            subdir = snapshot_dir / 'media'
+            subdir.mkdir()
+            (subdir / 'video.mp4').write_bytes(b'\x00\x00\x00\x18ftypmp42')
+
+            # Run the hook from the output directory
+            env = os.environ.copy()
+            env['MERKLETREE_ENABLED'] = 'true'
+
+            result = subprocess.run(
+                [
+                    sys.executable, str(MERKLETREE_HOOK),
+                    '--url=https://example.com',
+                    '--snapshot-id=test-snapshot',
+                ],
+                capture_output=True,
+                text=True,
+                cwd=str(output_dir),  # Hook expects to run from output dir
+                env=env,
+                timeout=30
+            )
+
+            # Should succeed
+            self.assertEqual(result.returncode, 0, f"Hook failed: {result.stderr}")
+
+            # Check output file exists
+            output_file = output_dir / 'merkletree.json'
+            self.assertTrue(output_file.exists(), "merkletree.json not created")
+
+            # Parse and verify output
+            with open(output_file) as f:
+                data = json.load(f)
+
+            self.assertIn('root_hash', data)
+            self.assertIn('files', data)
+            self.assertIn('metadata', data)
+
+            # Should have indexed our test files
+            file_paths = [f['path'] for f in data['files']]
+            self.assertIn('index.html', file_paths)
+            self.assertIn('screenshot.png', file_paths)
+
+            # Verify metadata
+            self.assertGreater(data['metadata']['file_count'], 0)
+            self.assertGreater(data['metadata']['total_size'], 0)
+
+    def test_merkletree_skips_when_disabled(self):
+        """Merkletree hook should skip when MERKLETREE_ENABLED=false."""
+        with tempfile.TemporaryDirectory() as temp_dir:
+            snapshot_dir = Path(temp_dir) / 'snapshot'
+            snapshot_dir.mkdir()
+            output_dir = snapshot_dir / 'merkletree'
+            output_dir.mkdir()
+
+            env = os.environ.copy()
+            env['MERKLETREE_ENABLED'] = 'false'
+
+            result = subprocess.run(
+                [
+                    sys.executable, str(MERKLETREE_HOOK),
+                    '--url=https://example.com',
+                    '--snapshot-id=test-snapshot',
+                ],
+                capture_output=True,
+                text=True,
+                cwd=str(output_dir),
+                env=env,
+                timeout=30
+            )
+
+            # Should succeed (exit 0) but skip
+            self.assertEqual(result.returncode, 0)
+            self.assertIn('skipped', result.stdout)
+
+    def test_merkletree_handles_empty_directory(self):
+        """Merkletree hook should handle empty snapshot directory."""
+        with tempfile.TemporaryDirectory() as temp_dir:
+            snapshot_dir = Path(temp_dir) / 'snapshot'
+            snapshot_dir.mkdir()
+            output_dir = snapshot_dir / 'merkletree'
+            output_dir.mkdir()
+
+            env = os.environ.copy()
+            env['MERKLETREE_ENABLED'] = 'true'
+
+            result = subprocess.run(
+                [
+                    sys.executable, str(MERKLETREE_HOOK),
+                    '--url=https://example.com',
+                    '--snapshot-id=test-snapshot',
+                ],
+                capture_output=True,
+                text=True,
+                cwd=str(output_dir),
+                env=env,
+                timeout=30
+            )
+
+            # Should succeed even with empty directory
+            self.assertEqual(result.returncode, 0, f"Hook failed: {result.stderr}")
+
+            # Check output file exists
+            output_file = output_dir / 'merkletree.json'
+            self.assertTrue(output_file.exists())
+
+            with open(output_file) as f:
+                data = json.load(f)
+
+            # Should have empty file list
+            self.assertEqual(data['metadata']['file_count'], 0)
+
+
+if __name__ == '__main__':
+    pytest.main([__file__, '-v'])
				`@@ -0,0 +1 @@`
				`"""Tests for the custom binary provider plugin."""`