mirror of
https://github.com/ArchiveBox/ArchiveBox.git
synced 2026-01-04 09:55:33 +10:00
Add comprehensive tests for machine/process models, orchestrator, and search backends
This adds new test coverage for previously untested areas: Machine module (archivebox/machine/tests/): - Machine, NetworkInterface, Binary, Process model tests - BinaryMachine and ProcessMachine state machine tests - JSONL serialization/deserialization tests - Manager method tests Workers module (archivebox/workers/tests/): - PID file utility tests (write, read, cleanup) - Orchestrator lifecycle and queue management tests - Worker spawning logic tests - Idle detection and exit condition tests Search backends: - SQLite FTS5 search tests with real indexed content - Phrase search, stemming, and unicode support - Ripgrep search tests with archive directory structure - Environment variable configuration tests Binary provider plugins: - pip provider hook tests - npm provider hook tests with PATH updates - apt provider hook tests
This commit is contained in:
1
archivebox/plugins/pip/tests/__init__.py
Normal file
1
archivebox/plugins/pip/tests/__init__.py
Normal file
@@ -0,0 +1 @@
|
||||
"""Tests for the pip binary provider plugin."""
|
||||
198
archivebox/plugins/pip/tests/test_pip_provider.py
Normal file
198
archivebox/plugins/pip/tests/test_pip_provider.py
Normal file
@@ -0,0 +1,198 @@
|
||||
"""
|
||||
Tests for the pip binary provider plugin.
|
||||
|
||||
Tests cover:
|
||||
1. Hook script execution
|
||||
2. pip package detection
|
||||
3. Virtual environment handling
|
||||
4. JSONL output format
|
||||
"""
|
||||
|
||||
import json
|
||||
import os
|
||||
import subprocess
|
||||
import sys
|
||||
import tempfile
|
||||
from pathlib import Path
|
||||
from unittest.mock import patch, MagicMock
|
||||
|
||||
import pytest
|
||||
from django.test import TestCase
|
||||
|
||||
|
||||
# Get the path to the pip provider hook
|
||||
PLUGIN_DIR = Path(__file__).parent.parent
|
||||
INSTALL_HOOK = PLUGIN_DIR / 'on_Binary__install_using_pip_provider.py'
|
||||
|
||||
|
||||
class TestPipProviderHook(TestCase):
|
||||
"""Test the pip binary provider installation hook."""
|
||||
|
||||
def setUp(self):
|
||||
"""Set up test environment."""
|
||||
self.temp_dir = tempfile.mkdtemp()
|
||||
self.output_dir = Path(self.temp_dir) / 'output'
|
||||
self.output_dir.mkdir()
|
||||
|
||||
def tearDown(self):
|
||||
"""Clean up."""
|
||||
import shutil
|
||||
shutil.rmtree(self.temp_dir, ignore_errors=True)
|
||||
|
||||
def test_hook_script_exists(self):
|
||||
"""Hook script should exist."""
|
||||
self.assertTrue(INSTALL_HOOK.exists(), f"Hook not found: {INSTALL_HOOK}")
|
||||
|
||||
def test_hook_help(self):
|
||||
"""Hook should accept --help without error."""
|
||||
result = subprocess.run(
|
||||
[sys.executable, str(INSTALL_HOOK), '--help'],
|
||||
capture_output=True,
|
||||
text=True,
|
||||
timeout=30
|
||||
)
|
||||
# May succeed or fail depending on implementation
|
||||
# At minimum should not crash with Python error
|
||||
self.assertNotIn('Traceback', result.stderr)
|
||||
|
||||
def test_hook_finds_python(self):
|
||||
"""Hook should find Python binary."""
|
||||
env = os.environ.copy()
|
||||
env['DATA_DIR'] = self.temp_dir
|
||||
|
||||
result = subprocess.run(
|
||||
[
|
||||
sys.executable, str(INSTALL_HOOK),
|
||||
'--name=python3',
|
||||
'--binproviders=pip,env',
|
||||
],
|
||||
capture_output=True,
|
||||
text=True,
|
||||
cwd=str(self.output_dir),
|
||||
env=env,
|
||||
timeout=60
|
||||
)
|
||||
|
||||
# Check for JSONL output
|
||||
jsonl_found = False
|
||||
for line in result.stdout.split('\n'):
|
||||
line = line.strip()
|
||||
if line.startswith('{'):
|
||||
try:
|
||||
record = json.loads(line)
|
||||
if record.get('type') == 'Binary' and record.get('name') == 'python3':
|
||||
jsonl_found = True
|
||||
# Verify structure
|
||||
self.assertIn('abspath', record)
|
||||
self.assertIn('version', record)
|
||||
break
|
||||
except json.JSONDecodeError:
|
||||
continue
|
||||
|
||||
# May or may not find python3 via pip, but should not crash
|
||||
self.assertNotIn('Traceback', result.stderr)
|
||||
|
||||
def test_hook_unknown_package(self):
|
||||
"""Hook should handle unknown packages gracefully."""
|
||||
env = os.environ.copy()
|
||||
env['DATA_DIR'] = self.temp_dir
|
||||
|
||||
result = subprocess.run(
|
||||
[
|
||||
sys.executable, str(INSTALL_HOOK),
|
||||
'--name=nonexistent_package_xyz123',
|
||||
'--binproviders=pip',
|
||||
],
|
||||
capture_output=True,
|
||||
text=True,
|
||||
cwd=str(self.output_dir),
|
||||
env=env,
|
||||
timeout=60
|
||||
)
|
||||
|
||||
# Should not crash
|
||||
self.assertNotIn('Traceback', result.stderr)
|
||||
# May have non-zero exit code for missing package
|
||||
|
||||
|
||||
class TestPipProviderIntegration(TestCase):
|
||||
"""Integration tests for pip provider with real packages."""
|
||||
|
||||
def setUp(self):
|
||||
"""Set up test environment."""
|
||||
self.temp_dir = tempfile.mkdtemp()
|
||||
self.output_dir = Path(self.temp_dir) / 'output'
|
||||
self.output_dir.mkdir()
|
||||
|
||||
def tearDown(self):
|
||||
"""Clean up."""
|
||||
import shutil
|
||||
shutil.rmtree(self.temp_dir, ignore_errors=True)
|
||||
|
||||
@pytest.mark.skipif(
|
||||
subprocess.run([sys.executable, '-m', 'pip', '--version'],
|
||||
capture_output=True).returncode != 0,
|
||||
reason="pip not available"
|
||||
)
|
||||
def test_hook_finds_pip_installed_binary(self):
|
||||
"""Hook should find binaries installed via pip."""
|
||||
env = os.environ.copy()
|
||||
env['DATA_DIR'] = self.temp_dir
|
||||
|
||||
# Try to find 'pip' itself which should be available
|
||||
result = subprocess.run(
|
||||
[
|
||||
sys.executable, str(INSTALL_HOOK),
|
||||
'--name=pip',
|
||||
'--binproviders=pip,env',
|
||||
],
|
||||
capture_output=True,
|
||||
text=True,
|
||||
cwd=str(self.output_dir),
|
||||
env=env,
|
||||
timeout=60
|
||||
)
|
||||
|
||||
# Look for success in output
|
||||
for line in result.stdout.split('\n'):
|
||||
line = line.strip()
|
||||
if line.startswith('{'):
|
||||
try:
|
||||
record = json.loads(line)
|
||||
if record.get('type') == 'Binary' and 'pip' in record.get('name', ''):
|
||||
# Found pip binary
|
||||
self.assertTrue(record.get('abspath'))
|
||||
return
|
||||
except json.JSONDecodeError:
|
||||
continue
|
||||
|
||||
# If we get here without finding pip, that's acceptable
|
||||
# as long as the hook didn't crash
|
||||
self.assertNotIn('Traceback', result.stderr)
|
||||
|
||||
|
||||
class TestPipProviderOutput(TestCase):
|
||||
"""Test JSONL output format from pip provider."""
|
||||
|
||||
def test_binary_record_format(self):
|
||||
"""Binary JSONL records should have required fields."""
|
||||
# Example of expected format
|
||||
record = {
|
||||
'type': 'Binary',
|
||||
'name': 'wget',
|
||||
'abspath': '/usr/bin/wget',
|
||||
'version': '1.21',
|
||||
'binprovider': 'pip',
|
||||
'sha256': 'abc123...',
|
||||
}
|
||||
|
||||
# Validate structure
|
||||
self.assertEqual(record['type'], 'Binary')
|
||||
self.assertIn('name', record)
|
||||
self.assertIn('abspath', record)
|
||||
self.assertIn('version', record)
|
||||
self.assertIn('binprovider', record)
|
||||
|
||||
|
||||
if __name__ == '__main__':
|
||||
pytest.main([__file__, '-v'])
|
||||
Reference in New Issue
Block a user