""" Tests for the staticfile plugin. Tests the real staticfile hook with actual URLs to verify static file detection and download. """ import json import shutil import subprocess import sys import tempfile from pathlib import Path import pytest from django.test import TestCase # Import chrome test helpers sys.path.insert(0, str(Path(__file__).parent.parent.parent / 'chrome' / 'tests')) from chrome_test_helpers import ( chrome_session, get_test_env, get_plugin_dir, get_hook_script, ) def chrome_available() -> bool: """Check if Chrome/Chromium is available.""" for name in ['chromium', 'chromium-browser', 'google-chrome', 'chrome']: if shutil.which(name): return True return False # Get the path to the staticfile hook PLUGIN_DIR = get_plugin_dir(__file__) STATICFILE_HOOK = get_hook_script(PLUGIN_DIR, 'on_Snapshot__*_staticfile.*') class TestStaticfilePlugin(TestCase): """Test the staticfile plugin.""" def test_staticfile_hook_exists(self): """Staticfile hook script should exist.""" self.assertIsNotNone(STATICFILE_HOOK, "Staticfile hook not found in plugin directory") self.assertTrue(STATICFILE_HOOK.exists(), f"Hook not found: {STATICFILE_HOOK}") @pytest.mark.skipif(not chrome_available(), reason="Chrome not installed") class TestStaticfileWithChrome(TestCase): """Integration tests for staticfile plugin with Chrome.""" def setUp(self): """Set up test environment.""" self.temp_dir = Path(tempfile.mkdtemp()) def tearDown(self): """Clean up.""" shutil.rmtree(self.temp_dir, ignore_errors=True) def test_staticfile_skips_html_pages(self): """Staticfile hook should skip HTML pages (not static files).""" test_url = 'https://example.com' # HTML page, not a static file snapshot_id = 'test-staticfile-snapshot' try: with chrome_session( self.temp_dir, crawl_id='test-staticfile-crawl', snapshot_id=snapshot_id, test_url=test_url, navigate=True, timeout=30, ) as (chrome_process, chrome_pid, snapshot_chrome_dir, env): # Use the environment from chrome_session (already has CHROME_HEADLESS=true) # Run staticfile hook with the active Chrome session result = subprocess.run( ['node', str(STATICFILE_HOOK), f'--url={test_url}', f'--snapshot-id={snapshot_id}'], cwd=str(snapshot_chrome_dir), capture_output=True, text=True, timeout=120, # Longer timeout as it waits for navigation env=env ) # Verify hook ran without crash self.assertNotIn('Traceback', result.stderr) # Parse JSONL output to verify it recognized HTML as non-static for line in result.stdout.split('\n'): line = line.strip() if line.startswith('{'): try: record = json.loads(line) if record.get('type') == 'ArchiveResult': # HTML pages should be skipped if record.get('status') == 'skipped': self.assertIn('Not a static file', record.get('output_str', '')) break except json.JSONDecodeError: continue except RuntimeError as e: if 'Chrome' in str(e) or 'CDP' in str(e): self.skipTest(f"Chrome session setup failed: {e}") raise if __name__ == '__main__': pytest.main([__file__, '-v'])