Add comprehensive tests for machine/process models, orchestrator, and search backends

This adds new test coverage for previously untested areas:

Machine module (archivebox/machine/tests/):
- Machine, NetworkInterface, Binary, Process model tests
- BinaryMachine and ProcessMachine state machine tests
- JSONL serialization/deserialization tests
- Manager method tests

Workers module (archivebox/workers/tests/):
- PID file utility tests (write, read, cleanup)
- Orchestrator lifecycle and queue management tests
- Worker spawning logic tests
- Idle detection and exit condition tests

Search backends:
- SQLite FTS5 search tests with real indexed content
- Phrase search, stemming, and unicode support
- Ripgrep search tests with archive directory structure
- Environment variable configuration tests

Binary provider plugins:
- pip provider hook tests
- npm provider hook tests with PATH updates
- apt provider hook tests
This commit is contained in:
Claude
2025-12-31 11:33:27 +00:00
parent 7dd2d65770
commit 0cb5f0712d
13 changed files with 2101 additions and 0 deletions

View File

@@ -0,0 +1 @@
"""Tests for the pip binary provider plugin."""

View File

@@ -0,0 +1,198 @@
"""
Tests for the pip binary provider plugin.
Tests cover:
1. Hook script execution
2. pip package detection
3. Virtual environment handling
4. JSONL output format
"""
import json
import os
import subprocess
import sys
import tempfile
from pathlib import Path
from unittest.mock import patch, MagicMock
import pytest
from django.test import TestCase
# Get the path to the pip provider hook
PLUGIN_DIR = Path(__file__).parent.parent
INSTALL_HOOK = PLUGIN_DIR / 'on_Binary__install_using_pip_provider.py'
class TestPipProviderHook(TestCase):
"""Test the pip binary provider installation hook."""
def setUp(self):
"""Set up test environment."""
self.temp_dir = tempfile.mkdtemp()
self.output_dir = Path(self.temp_dir) / 'output'
self.output_dir.mkdir()
def tearDown(self):
"""Clean up."""
import shutil
shutil.rmtree(self.temp_dir, ignore_errors=True)
def test_hook_script_exists(self):
"""Hook script should exist."""
self.assertTrue(INSTALL_HOOK.exists(), f"Hook not found: {INSTALL_HOOK}")
def test_hook_help(self):
"""Hook should accept --help without error."""
result = subprocess.run(
[sys.executable, str(INSTALL_HOOK), '--help'],
capture_output=True,
text=True,
timeout=30
)
# May succeed or fail depending on implementation
# At minimum should not crash with Python error
self.assertNotIn('Traceback', result.stderr)
def test_hook_finds_python(self):
"""Hook should find Python binary."""
env = os.environ.copy()
env['DATA_DIR'] = self.temp_dir
result = subprocess.run(
[
sys.executable, str(INSTALL_HOOK),
'--name=python3',
'--binproviders=pip,env',
],
capture_output=True,
text=True,
cwd=str(self.output_dir),
env=env,
timeout=60
)
# Check for JSONL output
jsonl_found = False
for line in result.stdout.split('\n'):
line = line.strip()
if line.startswith('{'):
try:
record = json.loads(line)
if record.get('type') == 'Binary' and record.get('name') == 'python3':
jsonl_found = True
# Verify structure
self.assertIn('abspath', record)
self.assertIn('version', record)
break
except json.JSONDecodeError:
continue
# May or may not find python3 via pip, but should not crash
self.assertNotIn('Traceback', result.stderr)
def test_hook_unknown_package(self):
"""Hook should handle unknown packages gracefully."""
env = os.environ.copy()
env['DATA_DIR'] = self.temp_dir
result = subprocess.run(
[
sys.executable, str(INSTALL_HOOK),
'--name=nonexistent_package_xyz123',
'--binproviders=pip',
],
capture_output=True,
text=True,
cwd=str(self.output_dir),
env=env,
timeout=60
)
# Should not crash
self.assertNotIn('Traceback', result.stderr)
# May have non-zero exit code for missing package
class TestPipProviderIntegration(TestCase):
"""Integration tests for pip provider with real packages."""
def setUp(self):
"""Set up test environment."""
self.temp_dir = tempfile.mkdtemp()
self.output_dir = Path(self.temp_dir) / 'output'
self.output_dir.mkdir()
def tearDown(self):
"""Clean up."""
import shutil
shutil.rmtree(self.temp_dir, ignore_errors=True)
@pytest.mark.skipif(
subprocess.run([sys.executable, '-m', 'pip', '--version'],
capture_output=True).returncode != 0,
reason="pip not available"
)
def test_hook_finds_pip_installed_binary(self):
"""Hook should find binaries installed via pip."""
env = os.environ.copy()
env['DATA_DIR'] = self.temp_dir
# Try to find 'pip' itself which should be available
result = subprocess.run(
[
sys.executable, str(INSTALL_HOOK),
'--name=pip',
'--binproviders=pip,env',
],
capture_output=True,
text=True,
cwd=str(self.output_dir),
env=env,
timeout=60
)
# Look for success in output
for line in result.stdout.split('\n'):
line = line.strip()
if line.startswith('{'):
try:
record = json.loads(line)
if record.get('type') == 'Binary' and 'pip' in record.get('name', ''):
# Found pip binary
self.assertTrue(record.get('abspath'))
return
except json.JSONDecodeError:
continue
# If we get here without finding pip, that's acceptable
# as long as the hook didn't crash
self.assertNotIn('Traceback', result.stderr)
class TestPipProviderOutput(TestCase):
"""Test JSONL output format from pip provider."""
def test_binary_record_format(self):
"""Binary JSONL records should have required fields."""
# Example of expected format
record = {
'type': 'Binary',
'name': 'wget',
'abspath': '/usr/bin/wget',
'version': '1.21',
'binprovider': 'pip',
'sha256': 'abc123...',
}
# Validate structure
self.assertEqual(record['type'], 'Binary')
self.assertIn('name', record)
self.assertIn('abspath', record)
self.assertIn('version', record)
self.assertIn('binprovider', record)
if __name__ == '__main__':
pytest.main([__file__, '-v'])