diff --git a/archivebox/plugins/custom/tests/__init__.py b/archivebox/plugins/custom/tests/__init__.py new file mode 100644 index 00000000..63791d76 --- /dev/null +++ b/archivebox/plugins/custom/tests/__init__.py @@ -0,0 +1 @@ +"""Tests for the custom binary provider plugin.""" diff --git a/archivebox/plugins/custom/tests/test_custom_provider.py b/archivebox/plugins/custom/tests/test_custom_provider.py new file mode 100644 index 00000000..301f8657 --- /dev/null +++ b/archivebox/plugins/custom/tests/test_custom_provider.py @@ -0,0 +1,149 @@ +""" +Tests for the custom binary provider plugin. + +Tests the custom bash binary installer with safe commands. +""" + +import json +import os +import subprocess +import sys +import tempfile +from pathlib import Path + +import pytest +from django.test import TestCase + + +# Get the path to the custom provider hook +PLUGIN_DIR = Path(__file__).parent.parent +INSTALL_HOOK = PLUGIN_DIR / 'on_Binary__install_using_custom_bash.py' + + +class TestCustomProviderHook(TestCase): + """Test the custom binary provider hook.""" + + def setUp(self): + """Set up test environment.""" + self.temp_dir = tempfile.mkdtemp() + + def tearDown(self): + """Clean up.""" + import shutil + shutil.rmtree(self.temp_dir, ignore_errors=True) + + def test_hook_script_exists(self): + """Hook script should exist.""" + self.assertTrue(INSTALL_HOOK.exists(), f"Hook not found: {INSTALL_HOOK}") + + def test_hook_skips_when_custom_not_allowed(self): + """Hook should skip when custom not in allowed binproviders.""" + env = os.environ.copy() + env['DATA_DIR'] = self.temp_dir + + result = subprocess.run( + [ + sys.executable, str(INSTALL_HOOK), + '--name=echo', + '--binary-id=test-uuid', + '--machine-id=test-machine', + '--binproviders=pip,apt', # custom not allowed + '--custom-cmd=echo hello', + ], + capture_output=True, + text=True, + timeout=30, + env=env + ) + + # Should exit cleanly (code 0) when custom not allowed + self.assertEqual(result.returncode, 0) + self.assertIn('custom provider not allowed', result.stderr) + + def test_hook_runs_custom_command_and_finds_binary(self): + """Hook should run custom command and find the binary in PATH.""" + env = os.environ.copy() + env['DATA_DIR'] = self.temp_dir + + # Use a simple echo command that doesn't actually install anything + # Then check for 'echo' which is already in PATH + result = subprocess.run( + [ + sys.executable, str(INSTALL_HOOK), + '--name=echo', + '--binary-id=test-uuid', + '--machine-id=test-machine', + '--custom-cmd=echo "custom install simulation"', + ], + capture_output=True, + text=True, + timeout=30, + env=env + ) + + # Should succeed since echo is in PATH + self.assertEqual(result.returncode, 0, f"Hook failed: {result.stderr}") + + # Parse JSONL output + for line in result.stdout.split('\n'): + line = line.strip() + if line.startswith('{'): + try: + record = json.loads(line) + if record.get('type') == 'Binary' and record.get('name') == 'echo': + self.assertEqual(record['binprovider'], 'custom') + self.assertTrue(record['abspath']) + return + except json.JSONDecodeError: + continue + + self.fail("No Binary JSONL record found in output") + + def test_hook_fails_for_missing_binary_after_command(self): + """Hook should fail if binary not found after running custom command.""" + env = os.environ.copy() + env['DATA_DIR'] = self.temp_dir + + result = subprocess.run( + [ + sys.executable, str(INSTALL_HOOK), + '--name=nonexistent_binary_xyz123', + '--binary-id=test-uuid', + '--machine-id=test-machine', + '--custom-cmd=echo "failed install"', # Doesn't actually install + ], + capture_output=True, + text=True, + timeout=30, + env=env + ) + + # Should fail since binary not found after command + self.assertEqual(result.returncode, 1) + self.assertIn('not found', result.stderr.lower()) + + def test_hook_fails_for_failing_command(self): + """Hook should fail if custom command returns non-zero exit code.""" + env = os.environ.copy() + env['DATA_DIR'] = self.temp_dir + + result = subprocess.run( + [ + sys.executable, str(INSTALL_HOOK), + '--name=echo', + '--binary-id=test-uuid', + '--machine-id=test-machine', + '--custom-cmd=exit 1', # Command that fails + ], + capture_output=True, + text=True, + timeout=30, + env=env + ) + + # Should fail with exit code 1 + self.assertEqual(result.returncode, 1) + + +if __name__ == '__main__': + pytest.main([__file__, '-v']) diff --git a/archivebox/plugins/merkletree/tests/__init__.py b/archivebox/plugins/merkletree/tests/__init__.py new file mode 100644 index 00000000..1eb43866 --- /dev/null +++ b/archivebox/plugins/merkletree/tests/__init__.py @@ -0,0 +1 @@ +"""Tests for the merkletree plugin.""" diff --git a/archivebox/plugins/merkletree/tests/test_merkletree.py b/archivebox/plugins/merkletree/tests/test_merkletree.py new file mode 100644 index 00000000..ebdd5808 --- /dev/null +++ b/archivebox/plugins/merkletree/tests/test_merkletree.py @@ -0,0 +1,157 @@ +""" +Tests for the merkletree plugin. + +Tests the real merkle tree generation with actual files. +""" + +import json +import os +import subprocess +import sys +import tempfile +from pathlib import Path + +import pytest +from django.test import TestCase + + +# Get the path to the merkletree hook +PLUGIN_DIR = Path(__file__).parent.parent +MERKLETREE_HOOK = PLUGIN_DIR / 'on_Snapshot__93_merkletree.py' + + +class TestMerkletreePlugin(TestCase): + """Test the merkletree plugin.""" + + def test_merkletree_hook_exists(self): + """Merkletree hook script should exist.""" + self.assertTrue(MERKLETREE_HOOK.exists(), f"Hook not found: {MERKLETREE_HOOK}") + + def test_merkletree_generates_tree_for_files(self): + """Merkletree hook should generate merkle tree for files in snapshot directory.""" + with tempfile.TemporaryDirectory() as temp_dir: + # Create a mock snapshot directory structure + snapshot_dir = Path(temp_dir) / 'snapshot' + snapshot_dir.mkdir() + + # Create output directory for merkletree + output_dir = snapshot_dir / 'merkletree' + output_dir.mkdir() + + # Create some test files + (snapshot_dir / 'index.html').write_text('Test') + (snapshot_dir / 'screenshot.png').write_bytes(b'\x89PNG\r\n\x1a\n' + b'\x00' * 100) + + subdir = snapshot_dir / 'media' + subdir.mkdir() + (subdir / 'video.mp4').write_bytes(b'\x00\x00\x00\x18ftypmp42') + + # Run the hook from the output directory + env = os.environ.copy() + env['MERKLETREE_ENABLED'] = 'true' + + result = subprocess.run( + [ + sys.executable, str(MERKLETREE_HOOK), + '--url=https://example.com', + '--snapshot-id=test-snapshot', + ], + capture_output=True, + text=True, + cwd=str(output_dir), # Hook expects to run from output dir + env=env, + timeout=30 + ) + + # Should succeed + self.assertEqual(result.returncode, 0, f"Hook failed: {result.stderr}") + + # Check output file exists + output_file = output_dir / 'merkletree.json' + self.assertTrue(output_file.exists(), "merkletree.json not created") + + # Parse and verify output + with open(output_file) as f: + data = json.load(f) + + self.assertIn('root_hash', data) + self.assertIn('files', data) + self.assertIn('metadata', data) + + # Should have indexed our test files + file_paths = [f['path'] for f in data['files']] + self.assertIn('index.html', file_paths) + self.assertIn('screenshot.png', file_paths) + + # Verify metadata + self.assertGreater(data['metadata']['file_count'], 0) + self.assertGreater(data['metadata']['total_size'], 0) + + def test_merkletree_skips_when_disabled(self): + """Merkletree hook should skip when MERKLETREE_ENABLED=false.""" + with tempfile.TemporaryDirectory() as temp_dir: + snapshot_dir = Path(temp_dir) / 'snapshot' + snapshot_dir.mkdir() + output_dir = snapshot_dir / 'merkletree' + output_dir.mkdir() + + env = os.environ.copy() + env['MERKLETREE_ENABLED'] = 'false' + + result = subprocess.run( + [ + sys.executable, str(MERKLETREE_HOOK), + '--url=https://example.com', + '--snapshot-id=test-snapshot', + ], + capture_output=True, + text=True, + cwd=str(output_dir), + env=env, + timeout=30 + ) + + # Should succeed (exit 0) but skip + self.assertEqual(result.returncode, 0) + self.assertIn('skipped', result.stdout) + + def test_merkletree_handles_empty_directory(self): + """Merkletree hook should handle empty snapshot directory.""" + with tempfile.TemporaryDirectory() as temp_dir: + snapshot_dir = Path(temp_dir) / 'snapshot' + snapshot_dir.mkdir() + output_dir = snapshot_dir / 'merkletree' + output_dir.mkdir() + + env = os.environ.copy() + env['MERKLETREE_ENABLED'] = 'true' + + result = subprocess.run( + [ + sys.executable, str(MERKLETREE_HOOK), + '--url=https://example.com', + '--snapshot-id=test-snapshot', + ], + capture_output=True, + text=True, + cwd=str(output_dir), + env=env, + timeout=30 + ) + + # Should succeed even with empty directory + self.assertEqual(result.returncode, 0, f"Hook failed: {result.stderr}") + + # Check output file exists + output_file = output_dir / 'merkletree.json' + self.assertTrue(output_file.exists()) + + with open(output_file) as f: + data = json.load(f) + + # Should have empty file list + self.assertEqual(data['metadata']['file_count'], 0) + + +if __name__ == '__main__': + pytest.main([__file__, '-v'])