This commit is contained in:
Nick Sweeting
2025-12-24 21:46:14 -08:00
parent 1915333b81
commit 6c769d831c
69 changed files with 3586 additions and 4216 deletions

View File

@@ -0,0 +1,149 @@
#!/usr/bin/env python3
"""
Install Chrome/Chromium if not already available.
Runs at crawl start to ensure Chrome is installed.
Uses playwright to install chromium if no system Chrome found.
Outputs JSONL for InstalledBinary.
"""
import json
import sys
import os
import shutil
from pathlib import Path
def find_chrome():
"""Try to find system Chrome/Chromium."""
# Comprehensive list of Chrome/Chromium binary names and paths
chromium_names_linux = [
'chromium',
'chromium-browser',
'chromium-browser-beta',
'chromium-browser-unstable',
'chromium-browser-canary',
'chromium-browser-dev',
]
chrome_names_linux = [
'google-chrome',
'google-chrome-stable',
'google-chrome-beta',
'google-chrome-canary',
'google-chrome-unstable',
'google-chrome-dev',
'chrome',
]
chrome_paths_macos = [
'/Applications/Google Chrome.app/Contents/MacOS/Google Chrome',
'/Applications/Google Chrome Canary.app/Contents/MacOS/Google Chrome Canary',
'/Applications/Chromium.app/Contents/MacOS/Chromium',
]
chrome_paths_linux = [
'/usr/bin/google-chrome',
'/usr/bin/google-chrome-stable',
'/usr/bin/chromium',
'/usr/bin/chromium-browser',
'/snap/bin/chromium',
'/opt/google/chrome/chrome',
]
all_chrome_names = chrome_names_linux + chromium_names_linux
all_chrome_paths = chrome_paths_macos + chrome_paths_linux
# Check env var first
env_path = os.environ.get('CHROME_BINARY', '')
if env_path and Path(env_path).is_file():
return env_path
# Try shutil.which for various names
for name in all_chrome_names:
abspath = shutil.which(name)
if abspath:
return abspath
# Check common paths
for path in all_chrome_paths:
if Path(path).is_file():
return path
return None
def main():
try:
# First try to find system Chrome
system_chrome = find_chrome()
if system_chrome:
print(json.dumps({
'type': 'InstalledBinary',
'name': 'chrome',
'abspath': str(system_chrome),
'version': None,
'sha256': None,
'binprovider': 'env',
}))
sys.exit(0)
# If not found in system, try to install chromium via apt/brew
from abx_pkg import Binary, AptProvider, BrewProvider, EnvProvider, BinProviderOverrides
AptProvider.model_rebuild()
BrewProvider.model_rebuild()
EnvProvider.model_rebuild()
# Try chromium-browser or chromium via system package managers
for binary_name in ['chromium', 'chromium-browser', 'google-chrome']:
try:
chrome_binary = Binary(
name=binary_name,
binproviders=[AptProvider(), BrewProvider(), EnvProvider()]
)
# Try to load, install if not found
try:
loaded = chrome_binary.load()
if not loaded or not loaded.abspath:
raise Exception("Not loaded")
except Exception:
# Install via system package manager
loaded = chrome_binary.install()
if loaded and loaded.abspath:
# Output InstalledBinary JSONL
print(json.dumps({
'type': 'InstalledBinary',
'name': 'chrome',
'abspath': str(loaded.abspath),
'version': str(loaded.version) if loaded.version else None,
'sha256': loaded.sha256,
'binprovider': loaded.loaded_binprovider.name if loaded.loaded_binprovider else 'unknown',
}))
sys.exit(0)
except Exception:
continue
# If all attempts failed
print(json.dumps({
'type': 'Dependency',
'bin_name': 'chrome',
'bin_providers': 'apt,brew,env',
}))
print("Failed to install Chrome/Chromium", file=sys.stderr)
sys.exit(1)
except Exception as e:
print(json.dumps({
'type': 'Dependency',
'bin_name': 'chrome',
'bin_providers': 'apt,brew,env',
}))
print(f"Error installing Chrome: {e}", file=sys.stderr)
sys.exit(1)
if __name__ == '__main__':
main()

View File

@@ -0,0 +1,85 @@
"""
Integration tests for chrome_session plugin
Tests verify:
1. Install hook finds system Chrome or installs chromium
2. Verify deps with abx-pkg
3. Chrome session script exists
"""
import json
import subprocess
import sys
from pathlib import Path
import pytest
PLUGIN_DIR = Path(__file__).parent.parent
CHROME_INSTALL_HOOK = PLUGIN_DIR / 'on_Crawl__00_install_chrome.py'
CHROME_SESSION_HOOK = PLUGIN_DIR / 'on_Snapshot__20_chrome_session.js'
def test_hook_script_exists():
"""Verify chrome session hook exists."""
assert CHROME_SESSION_HOOK.exists(), f"Hook not found: {CHROME_SESSION_HOOK}"
def test_chrome_install_hook():
"""Test chrome install hook to find or install Chrome/Chromium."""
result = subprocess.run(
[sys.executable, str(CHROME_INSTALL_HOOK)],
capture_output=True,
text=True,
timeout=600
)
assert result.returncode == 0, f"Install hook failed: {result.stderr}"
# Verify InstalledBinary JSONL output
found_binary = False
for line in result.stdout.strip().split('\n'):
if line.strip():
try:
record = json.loads(line)
if record.get('type') == 'InstalledBinary':
assert record['name'] == 'chrome'
assert record['abspath']
assert Path(record['abspath']).exists(), f"Chrome binary should exist at {record['abspath']}"
found_binary = True
break
except json.JSONDecodeError:
pass
assert found_binary, "Should output InstalledBinary record"
def test_verify_deps_with_abx_pkg():
"""Verify chrome is available via abx-pkg after hook installation."""
from abx_pkg import Binary, AptProvider, BrewProvider, EnvProvider, BinProviderOverrides
AptProvider.model_rebuild()
BrewProvider.model_rebuild()
EnvProvider.model_rebuild()
# Try various chrome binary names
for binary_name in ['chromium', 'chromium-browser', 'google-chrome', 'chrome']:
try:
chrome_binary = Binary(
name=binary_name,
binproviders=[AptProvider(), BrewProvider(), EnvProvider()]
)
chrome_loaded = chrome_binary.load()
if chrome_loaded and chrome_loaded.abspath:
# Found at least one chrome variant
assert Path(chrome_loaded.abspath).exists()
return
except Exception:
continue
# If we get here, chrome should still be available from system
import shutil
assert shutil.which('chromium') or shutil.which('chrome') or shutil.which('google-chrome'), \
"Chrome should be available after install hook"
if __name__ == '__main__':
pytest.main([__file__, '-v'])