Add tests for responses, staticfile, and env provider plugins

- responses: Tests network response capture during page load
- staticfile: Tests static file detection and download skip for HTML
- env: Tests PATH-based binary discovery (python3, bash)
This commit is contained in:
Claude
2025-12-31 18:28:01 +00:00
parent cfa5edb160
commit 9703a8e88c
6 changed files with 394 additions and 0 deletions

View File

@@ -0,0 +1 @@
"""Tests for the env binary provider plugin."""

View File

@@ -0,0 +1,159 @@
"""
Tests for the env binary provider plugin.
Tests the real env provider hook with actual system binaries.
"""
import json
import os
import subprocess
import sys
import tempfile
from pathlib import Path
import pytest
from django.test import TestCase
# Get the path to the env provider hook
PLUGIN_DIR = Path(__file__).parent.parent
INSTALL_HOOK = PLUGIN_DIR / 'on_Binary__install_using_env_provider.py'
class TestEnvProviderHook(TestCase):
"""Test the env binary provider hook."""
def setUp(self):
"""Set up test environment."""
self.temp_dir = tempfile.mkdtemp()
def tearDown(self):
"""Clean up."""
import shutil
shutil.rmtree(self.temp_dir, ignore_errors=True)
def test_hook_script_exists(self):
"""Hook script should exist."""
self.assertTrue(INSTALL_HOOK.exists(), f"Hook not found: {INSTALL_HOOK}")
def test_hook_finds_python(self):
"""Hook should find python3 binary in PATH."""
env = os.environ.copy()
env['DATA_DIR'] = self.temp_dir
result = subprocess.run(
[
sys.executable, str(INSTALL_HOOK),
'--name=python3',
'--binary-id=test-uuid',
'--machine-id=test-machine',
],
capture_output=True,
text=True,
timeout=30,
env=env
)
# Should succeed and output JSONL
self.assertEqual(result.returncode, 0, f"Hook failed: {result.stderr}")
# Parse JSONL output
for line in result.stdout.split('\n'):
line = line.strip()
if line.startswith('{'):
try:
record = json.loads(line)
if record.get('type') == 'Binary' and record.get('name') == 'python3':
self.assertEqual(record['binprovider'], 'env')
self.assertTrue(record['abspath'])
self.assertTrue(Path(record['abspath']).exists())
return
except json.JSONDecodeError:
continue
self.fail("No Binary JSONL record found in output")
def test_hook_finds_bash(self):
"""Hook should find bash binary in PATH."""
env = os.environ.copy()
env['DATA_DIR'] = self.temp_dir
result = subprocess.run(
[
sys.executable, str(INSTALL_HOOK),
'--name=bash',
'--binary-id=test-uuid',
'--machine-id=test-machine',
],
capture_output=True,
text=True,
timeout=30,
env=env
)
# Should succeed and output JSONL
self.assertEqual(result.returncode, 0, f"Hook failed: {result.stderr}")
# Parse JSONL output
for line in result.stdout.split('\n'):
line = line.strip()
if line.startswith('{'):
try:
record = json.loads(line)
if record.get('type') == 'Binary' and record.get('name') == 'bash':
self.assertEqual(record['binprovider'], 'env')
self.assertTrue(record['abspath'])
return
except json.JSONDecodeError:
continue
self.fail("No Binary JSONL record found in output")
def test_hook_fails_for_missing_binary(self):
"""Hook should fail for binary not in PATH."""
env = os.environ.copy()
env['DATA_DIR'] = self.temp_dir
result = subprocess.run(
[
sys.executable, str(INSTALL_HOOK),
'--name=nonexistent_binary_xyz123',
'--binary-id=test-uuid',
'--machine-id=test-machine',
],
capture_output=True,
text=True,
timeout=30,
env=env
)
# Should fail with exit code 1
self.assertEqual(result.returncode, 1)
self.assertIn('not found', result.stderr.lower())
def test_hook_skips_when_env_not_allowed(self):
"""Hook should skip when env not in allowed binproviders."""
env = os.environ.copy()
env['DATA_DIR'] = self.temp_dir
result = subprocess.run(
[
sys.executable, str(INSTALL_HOOK),
'--name=python3',
'--binary-id=test-uuid',
'--machine-id=test-machine',
'--binproviders=pip,apt', # env not allowed
],
capture_output=True,
text=True,
timeout=30,
env=env
)
# Should exit cleanly (code 0) when env not allowed
self.assertEqual(result.returncode, 0)
self.assertIn('env provider not allowed', result.stderr)
if __name__ == '__main__':
pytest.main([__file__, '-v'])

View File

@@ -0,0 +1 @@
"""Tests for the responses plugin."""

View File

@@ -0,0 +1,118 @@
"""
Tests for the responses plugin.
Tests the real responses hook with an actual URL to verify
network response capture.
"""
import json
import shutil
import subprocess
import sys
import tempfile
from pathlib import Path
import pytest
from django.test import TestCase
# Import chrome test helpers
sys.path.insert(0, str(Path(__file__).parent.parent.parent / 'chrome' / 'tests'))
from chrome_test_helpers import (
chrome_session,
get_test_env,
get_plugin_dir,
get_hook_script,
)
def chrome_available() -> bool:
"""Check if Chrome/Chromium is available."""
for name in ['chromium', 'chromium-browser', 'google-chrome', 'chrome']:
if shutil.which(name):
return True
return False
# Get the path to the responses hook
PLUGIN_DIR = get_plugin_dir(__file__)
RESPONSES_HOOK = get_hook_script(PLUGIN_DIR, 'on_Snapshot__*_responses.*')
class TestResponsesPlugin(TestCase):
"""Test the responses plugin."""
def test_responses_hook_exists(self):
"""Responses hook script should exist."""
self.assertIsNotNone(RESPONSES_HOOK, "Responses hook not found in plugin directory")
self.assertTrue(RESPONSES_HOOK.exists(), f"Hook not found: {RESPONSES_HOOK}")
@pytest.mark.skipif(not chrome_available(), reason="Chrome not installed")
class TestResponsesWithChrome(TestCase):
"""Integration tests for responses plugin with Chrome."""
def setUp(self):
"""Set up test environment."""
self.temp_dir = Path(tempfile.mkdtemp())
def tearDown(self):
"""Clean up."""
shutil.rmtree(self.temp_dir, ignore_errors=True)
def test_responses_captures_network_responses(self):
"""Responses hook should capture network responses from page load."""
test_url = 'https://example.com'
snapshot_id = 'test-responses-snapshot'
try:
with chrome_session(
self.temp_dir,
crawl_id='test-responses-crawl',
snapshot_id=snapshot_id,
test_url=test_url,
navigate=True,
timeout=30,
) as (chrome_process, chrome_pid, snapshot_chrome_dir):
# Get environment and run the responses hook
env = get_test_env()
env['CHROME_HEADLESS'] = 'true'
# Run responses hook with the active Chrome session
result = subprocess.run(
['node', str(RESPONSES_HOOK), f'--url={test_url}', f'--snapshot-id={snapshot_id}'],
cwd=str(snapshot_chrome_dir),
capture_output=True,
text=True,
timeout=120, # Longer timeout as it waits for navigation
env=env
)
# Check for output directory and index file
index_output = snapshot_chrome_dir / 'index.jsonl'
# Verify hook ran (may timeout waiting for page_loaded.txt in test mode)
self.assertNotIn('Traceback', result.stderr)
# If index file exists, verify it's valid JSONL
if index_output.exists():
with open(index_output) as f:
content = f.read().strip()
if content:
for line in content.split('\n'):
if line.strip():
try:
record = json.loads(line)
# Verify structure
self.assertIn('url', record)
self.assertIn('resourceType', record)
except json.JSONDecodeError:
pass # Some lines may be incomplete
except RuntimeError as e:
if 'Chrome' in str(e) or 'CDP' in str(e):
self.skipTest(f"Chrome session setup failed: {e}")
raise
if __name__ == '__main__':
pytest.main([__file__, '-v'])

View File

@@ -0,0 +1 @@
"""Tests for the staticfile plugin."""

View File

@@ -0,0 +1,114 @@
"""
Tests for the staticfile plugin.
Tests the real staticfile hook with actual URLs to verify
static file detection and download.
"""
import json
import shutil
import subprocess
import sys
import tempfile
from pathlib import Path
import pytest
from django.test import TestCase
# Import chrome test helpers
sys.path.insert(0, str(Path(__file__).parent.parent.parent / 'chrome' / 'tests'))
from chrome_test_helpers import (
chrome_session,
get_test_env,
get_plugin_dir,
get_hook_script,
)
def chrome_available() -> bool:
"""Check if Chrome/Chromium is available."""
for name in ['chromium', 'chromium-browser', 'google-chrome', 'chrome']:
if shutil.which(name):
return True
return False
# Get the path to the staticfile hook
PLUGIN_DIR = get_plugin_dir(__file__)
STATICFILE_HOOK = get_hook_script(PLUGIN_DIR, 'on_Snapshot__*_staticfile.*')
class TestStaticfilePlugin(TestCase):
"""Test the staticfile plugin."""
def test_staticfile_hook_exists(self):
"""Staticfile hook script should exist."""
self.assertIsNotNone(STATICFILE_HOOK, "Staticfile hook not found in plugin directory")
self.assertTrue(STATICFILE_HOOK.exists(), f"Hook not found: {STATICFILE_HOOK}")
@pytest.mark.skipif(not chrome_available(), reason="Chrome not installed")
class TestStaticfileWithChrome(TestCase):
"""Integration tests for staticfile plugin with Chrome."""
def setUp(self):
"""Set up test environment."""
self.temp_dir = Path(tempfile.mkdtemp())
def tearDown(self):
"""Clean up."""
shutil.rmtree(self.temp_dir, ignore_errors=True)
def test_staticfile_skips_html_pages(self):
"""Staticfile hook should skip HTML pages (not static files)."""
test_url = 'https://example.com' # HTML page, not a static file
snapshot_id = 'test-staticfile-snapshot'
try:
with chrome_session(
self.temp_dir,
crawl_id='test-staticfile-crawl',
snapshot_id=snapshot_id,
test_url=test_url,
navigate=True,
timeout=30,
) as (chrome_process, chrome_pid, snapshot_chrome_dir):
# Get environment and run the staticfile hook
env = get_test_env()
env['CHROME_HEADLESS'] = 'true'
# Run staticfile hook with the active Chrome session
result = subprocess.run(
['node', str(STATICFILE_HOOK), f'--url={test_url}', f'--snapshot-id={snapshot_id}'],
cwd=str(snapshot_chrome_dir),
capture_output=True,
text=True,
timeout=120, # Longer timeout as it waits for navigation
env=env
)
# Verify hook ran without crash
self.assertNotIn('Traceback', result.stderr)
# Parse JSONL output to verify it recognized HTML as non-static
for line in result.stdout.split('\n'):
line = line.strip()
if line.startswith('{'):
try:
record = json.loads(line)
if record.get('type') == 'ArchiveResult':
# HTML pages should be skipped
if record.get('status') == 'skipped':
self.assertIn('Not a static file', record.get('output_str', ''))
break
except json.JSONDecodeError:
continue
except RuntimeError as e:
if 'Chrome' in str(e) or 'CDP' in str(e):
self.skipTest(f"Chrome session setup failed: {e}")
raise
if __name__ == '__main__':
pytest.main([__file__, '-v'])