""" Integration tests for singlefile plugin Tests verify: 1. on_Crawl hook validates and installs single-file 2. Verify deps with abx-pkg 3. Extraction works on https://example.com 4. JSONL output is correct 5. Filesystem output is valid HTML """ import json import subprocess import sys import tempfile from pathlib import Path import pytest PLUGIN_DIR = Path(__file__).parent.parent PLUGINS_ROOT = PLUGIN_DIR.parent SINGLEFILE_HOOK = PLUGIN_DIR / "on_Snapshot__04_singlefile.js" CHROME_VALIDATE_HOOK = PLUGINS_ROOT / 'chrome_session' / 'on_Crawl__00_validate_chrome.py' NPM_PROVIDER_HOOK = PLUGINS_ROOT / 'npm' / 'on_Dependency__install_using_npm_provider.py' TEST_URL = "https://example.com" def test_hook_script_exists(): """Verify on_Snapshot hook exists.""" assert SINGLEFILE_HOOK.exists(), f"Hook not found: {SINGLEFILE_HOOK}" def test_chrome_validation_and_install(): """Test chrome validation hook to install puppeteer-core if needed.""" # Run chrome validation hook (from chrome_session plugin) result = subprocess.run( [sys.executable, str(CHROME_VALIDATE_HOOK)], capture_output=True, text=True, timeout=30 ) # If exit 1, binary not found - need to install if result.returncode == 1: # Parse Dependency request from JSONL dependency_request = None for line in result.stdout.strip().split('\n'): if line.strip(): try: record = json.loads(line) if record.get('type') == 'Dependency': dependency_request = record break except json.JSONDecodeError: pass if dependency_request: bin_name = dependency_request['bin_name'] bin_providers = dependency_request['bin_providers'] # Install via npm provider hook install_result = subprocess.run( [ sys.executable, str(NPM_PROVIDER_HOOK), '--dependency-id', 'test-dep-001', '--bin-name', bin_name, '--bin-providers', bin_providers ], capture_output=True, text=True, timeout=600 ) assert install_result.returncode == 0, f"Install failed: {install_result.stderr}" # Verify installation via JSONL output for line in install_result.stdout.strip().split('\n'): if line.strip(): try: record = json.loads(line) if record.get('type') == 'InstalledBinary': assert record['name'] == bin_name assert record['abspath'] break except json.JSONDecodeError: pass else: # Binary already available, verify via JSONL output assert result.returncode == 0, f"Validation failed: {result.stderr}" def test_verify_deps_with_abx_pkg(): """Verify dependencies are available via abx-pkg after hook installation.""" from abx_pkg import Binary, EnvProvider, BinProviderOverrides EnvProvider.model_rebuild() # Verify node is available (singlefile uses Chrome extension, needs Node) node_binary = Binary(name='node', binproviders=[EnvProvider()]) node_loaded = node_binary.load() assert node_loaded and node_loaded.abspath, "Node.js required for singlefile plugin" def test_singlefile_hook_runs(): """Verify singlefile hook can be executed and completes.""" # Prerequisites checked by earlier test with tempfile.TemporaryDirectory() as tmpdir: tmpdir = Path(tmpdir) # Run singlefile extraction hook result = subprocess.run( ['node', str(SINGLEFILE_HOOK), f'--url={TEST_URL}', '--snapshot-id=test789'], cwd=tmpdir, capture_output=True, text=True, timeout=120 ) # Hook should complete successfully (even if it just installs extension) assert result.returncode == 0, f"Hook execution failed: {result.stderr}" # Verify extension installation happens assert 'SingleFile extension' in result.stdout or result.returncode == 0, "Should install extension or complete"