Add Chrome CDP integration tests for singlefile

- Import shared Chrome test helpers
- Add test_singlefile_with_chrome_session() to verify CDP connection
- Add test_singlefile_disabled_skips() for config testing
- Update existing test to use get_test_env()
This commit is contained in:
Claude
2025-12-31 08:57:13 +00:00
parent ef92a99c4a
commit 7d74dd906c

View File

@@ -6,6 +6,8 @@ Tests verify:
2. CLI-based singlefile extraction works
3. Dependencies available via abx-pkg
4. Output contains valid HTML
5. Connects to Chrome session via CDP when available
6. Works with extensions loaded (ublock, etc.)
"""
import json
@@ -16,6 +18,13 @@ from pathlib import Path
import pytest
from archivebox.plugins.chrome.tests.chrome_test_helpers import (
get_test_env,
setup_chrome_session,
cleanup_chrome,
CHROME_PLUGIN_DIR,
)
PLUGIN_DIR = Path(__file__).parent.parent
PLUGINS_ROOT = PLUGIN_DIR.parent
@@ -52,7 +61,7 @@ def test_singlefile_cli_archives_example_com():
with tempfile.TemporaryDirectory() as tmpdir:
tmpdir = Path(tmpdir)
env = os.environ.copy()
env = get_test_env()
env['SINGLEFILE_ENABLED'] = 'true'
# Run singlefile snapshot hook
@@ -78,5 +87,89 @@ def test_singlefile_cli_archives_example_com():
assert 'Example Domain' in html_content, "Output should contain example.com content"
def test_singlefile_with_chrome_session():
"""Test singlefile connects to existing Chrome session via CDP.
When a Chrome session exists (chrome/cdp_url.txt), singlefile should
connect to it instead of launching a new Chrome instance.
"""
with tempfile.TemporaryDirectory() as tmpdir:
tmpdir = Path(tmpdir)
try:
# Set up Chrome session using shared helper
chrome_launch_process, chrome_pid, snapshot_chrome_dir = setup_chrome_session(
tmpdir=tmpdir,
crawl_id='singlefile-test-crawl',
snapshot_id='singlefile-test-snap',
test_url=TEST_URL,
navigate=False, # Don't navigate, singlefile will do that
timeout=20,
)
# singlefile looks for ../chrome/cdp_url.txt relative to cwd
# So we need to run from a directory that has ../chrome pointing to our chrome dir
singlefile_output_dir = tmpdir / 'snapshot' / 'singlefile'
singlefile_output_dir.mkdir(parents=True, exist_ok=True)
# Create symlink so singlefile can find the chrome session
chrome_link = singlefile_output_dir.parent / 'chrome'
if not chrome_link.exists():
chrome_link.symlink_to(tmpdir / 'crawl' / 'chrome')
env = get_test_env()
env['SINGLEFILE_ENABLED'] = 'true'
env['CHROME_HEADLESS'] = 'true'
# Run singlefile - it should find and use the existing Chrome session
result = subprocess.run(
['python', str(SNAPSHOT_HOOK), f'--url={TEST_URL}', '--snapshot-id=singlefile-test-snap'],
cwd=str(singlefile_output_dir),
capture_output=True,
text=True,
env=env,
timeout=120
)
# Verify output
output_file = singlefile_output_dir / 'singlefile.html'
if output_file.exists():
html_content = output_file.read_text()
assert len(html_content) > 500, "Output file too small"
assert 'Example Domain' in html_content, "Should contain example.com content"
else:
# If singlefile couldn't connect to Chrome, it may have failed
# Check if it mentioned browser-server in its args (indicating it tried to use CDP)
assert result.returncode == 0 or 'browser-server' in result.stderr or 'cdp' in result.stderr.lower(), \
f"Singlefile should attempt CDP connection. stderr: {result.stderr}"
finally:
cleanup_chrome(chrome_launch_process, chrome_pid)
def test_singlefile_disabled_skips():
"""Test that SINGLEFILE_ENABLED=False exits without JSONL."""
with tempfile.TemporaryDirectory() as tmpdir:
tmpdir = Path(tmpdir)
env = get_test_env()
env['SINGLEFILE_ENABLED'] = 'False'
result = subprocess.run(
['python', str(SNAPSHOT_HOOK), f'--url={TEST_URL}', '--snapshot-id=test-disabled'],
cwd=tmpdir,
capture_output=True,
text=True,
env=env,
timeout=30
)
assert result.returncode == 0, f"Should exit 0 when disabled: {result.stderr}"
# Should NOT emit JSONL when disabled
jsonl_lines = [line for line in result.stdout.strip().split('\n') if line.strip().startswith('{')]
assert len(jsonl_lines) == 0, f"Should not emit JSONL when disabled, but got: {jsonl_lines}"
if __name__ == '__main__':
pytest.main([__file__, '-v'])