remove huey

This commit is contained in:
Nick Sweeting
2025-12-24 23:40:18 -08:00
parent 6c769d831c
commit d95f0dc186
105 changed files with 3635 additions and 1402 deletions

View File

@@ -1,149 +0,0 @@
#!/usr/bin/env python3
"""
Install Chrome/Chromium if not already available.
Runs at crawl start to ensure Chrome is installed.
Uses playwright to install chromium if no system Chrome found.
Outputs JSONL for InstalledBinary.
"""
import json
import sys
import os
import shutil
from pathlib import Path
def find_chrome():
"""Try to find system Chrome/Chromium."""
# Comprehensive list of Chrome/Chromium binary names and paths
chromium_names_linux = [
'chromium',
'chromium-browser',
'chromium-browser-beta',
'chromium-browser-unstable',
'chromium-browser-canary',
'chromium-browser-dev',
]
chrome_names_linux = [
'google-chrome',
'google-chrome-stable',
'google-chrome-beta',
'google-chrome-canary',
'google-chrome-unstable',
'google-chrome-dev',
'chrome',
]
chrome_paths_macos = [
'/Applications/Google Chrome.app/Contents/MacOS/Google Chrome',
'/Applications/Google Chrome Canary.app/Contents/MacOS/Google Chrome Canary',
'/Applications/Chromium.app/Contents/MacOS/Chromium',
]
chrome_paths_linux = [
'/usr/bin/google-chrome',
'/usr/bin/google-chrome-stable',
'/usr/bin/chromium',
'/usr/bin/chromium-browser',
'/snap/bin/chromium',
'/opt/google/chrome/chrome',
]
all_chrome_names = chrome_names_linux + chromium_names_linux
all_chrome_paths = chrome_paths_macos + chrome_paths_linux
# Check env var first
env_path = os.environ.get('CHROME_BINARY', '')
if env_path and Path(env_path).is_file():
return env_path
# Try shutil.which for various names
for name in all_chrome_names:
abspath = shutil.which(name)
if abspath:
return abspath
# Check common paths
for path in all_chrome_paths:
if Path(path).is_file():
return path
return None
def main():
try:
# First try to find system Chrome
system_chrome = find_chrome()
if system_chrome:
print(json.dumps({
'type': 'InstalledBinary',
'name': 'chrome',
'abspath': str(system_chrome),
'version': None,
'sha256': None,
'binprovider': 'env',
}))
sys.exit(0)
# If not found in system, try to install chromium via apt/brew
from abx_pkg import Binary, AptProvider, BrewProvider, EnvProvider, BinProviderOverrides
AptProvider.model_rebuild()
BrewProvider.model_rebuild()
EnvProvider.model_rebuild()
# Try chromium-browser or chromium via system package managers
for binary_name in ['chromium', 'chromium-browser', 'google-chrome']:
try:
chrome_binary = Binary(
name=binary_name,
binproviders=[AptProvider(), BrewProvider(), EnvProvider()]
)
# Try to load, install if not found
try:
loaded = chrome_binary.load()
if not loaded or not loaded.abspath:
raise Exception("Not loaded")
except Exception:
# Install via system package manager
loaded = chrome_binary.install()
if loaded and loaded.abspath:
# Output InstalledBinary JSONL
print(json.dumps({
'type': 'InstalledBinary',
'name': 'chrome',
'abspath': str(loaded.abspath),
'version': str(loaded.version) if loaded.version else None,
'sha256': loaded.sha256,
'binprovider': loaded.loaded_binprovider.name if loaded.loaded_binprovider else 'unknown',
}))
sys.exit(0)
except Exception:
continue
# If all attempts failed
print(json.dumps({
'type': 'Dependency',
'bin_name': 'chrome',
'bin_providers': 'apt,brew,env',
}))
print("Failed to install Chrome/Chromium", file=sys.stderr)
sys.exit(1)
except Exception as e:
print(json.dumps({
'type': 'Dependency',
'bin_name': 'chrome',
'bin_providers': 'apt,brew,env',
}))
print(f"Error installing Chrome: {e}", file=sys.stderr)
sys.exit(1)
if __name__ == '__main__':
main()

View File

@@ -2,7 +2,7 @@
Integration tests for chrome_session plugin
Tests verify:
1. Install hook finds system Chrome or installs chromium
1. Validate hook checks for Chrome/Chromium binary
2. Verify deps with abx-pkg
3. Chrome session script exists
"""
@@ -14,7 +14,7 @@ from pathlib import Path
import pytest
PLUGIN_DIR = Path(__file__).parent.parent
CHROME_INSTALL_HOOK = PLUGIN_DIR / 'on_Crawl__00_install_chrome.py'
CHROME_VALIDATE_HOOK = PLUGIN_DIR / 'on_Crawl__00_validate_chrome.py'
CHROME_SESSION_HOOK = PLUGIN_DIR / 'on_Snapshot__20_chrome_session.js'
@@ -23,37 +23,50 @@ def test_hook_script_exists():
assert CHROME_SESSION_HOOK.exists(), f"Hook not found: {CHROME_SESSION_HOOK}"
def test_chrome_install_hook():
"""Test chrome install hook to find or install Chrome/Chromium."""
def test_chrome_validate_hook():
"""Test chrome validate hook checks for Chrome/Chromium binary."""
result = subprocess.run(
[sys.executable, str(CHROME_INSTALL_HOOK)],
[sys.executable, str(CHROME_VALIDATE_HOOK)],
capture_output=True,
text=True,
timeout=600
timeout=30
)
assert result.returncode == 0, f"Install hook failed: {result.stderr}"
# Verify InstalledBinary JSONL output
found_binary = False
for line in result.stdout.strip().split('\n'):
if line.strip():
try:
record = json.loads(line)
if record.get('type') == 'InstalledBinary':
assert record['name'] == 'chrome'
assert record['abspath']
assert Path(record['abspath']).exists(), f"Chrome binary should exist at {record['abspath']}"
found_binary = True
break
except json.JSONDecodeError:
pass
assert found_binary, "Should output InstalledBinary record"
# Hook exits 0 if binary found, 1 if not found (with Dependency record)
if result.returncode == 0:
# Binary found - verify InstalledBinary JSONL output
found_binary = False
for line in result.stdout.strip().split('\n'):
if line.strip():
try:
record = json.loads(line)
if record.get('type') == 'InstalledBinary':
assert record['name'] == 'chrome'
assert record['abspath']
assert Path(record['abspath']).exists(), f"Chrome binary should exist at {record['abspath']}"
found_binary = True
break
except json.JSONDecodeError:
pass
assert found_binary, "Should output InstalledBinary record when binary found"
else:
# Binary not found - verify Dependency JSONL output
found_dependency = False
for line in result.stdout.strip().split('\n'):
if line.strip():
try:
record = json.loads(line)
if record.get('type') == 'Dependency':
assert record['bin_name'] == 'chrome'
found_dependency = True
break
except json.JSONDecodeError:
pass
assert found_dependency, "Should output Dependency record when binary not found"
def test_verify_deps_with_abx_pkg():
"""Verify chrome is available via abx-pkg after hook installation."""
"""Verify chrome is available via abx-pkg."""
from abx_pkg import Binary, AptProvider, BrewProvider, EnvProvider, BinProviderOverrides
AptProvider.model_rebuild()
@@ -75,10 +88,10 @@ def test_verify_deps_with_abx_pkg():
except Exception:
continue
# If we get here, chrome should still be available from system
# If we get here, chrome not available
import shutil
assert shutil.which('chromium') or shutil.which('chrome') or shutil.which('google-chrome'), \
"Chrome should be available after install hook"
if not (shutil.which('chromium') or shutil.which('chrome') or shutil.which('google-chrome')):
pytest.skip("Chrome/Chromium not available - Dependency record should have been emitted")
if __name__ == '__main__':