remove huey

This commit is contained in:
Nick Sweeting
2025-12-24 23:40:18 -08:00
parent 6c769d831c
commit d95f0dc186
105 changed files with 3635 additions and 1402 deletions

View File

@@ -1,68 +0,0 @@
#!/usr/bin/env python3
"""
Install wget if not already available.
Runs at crawl start to ensure wget is installed.
Outputs JSONL for InstalledBinary.
"""
import json
import sys
from pathlib import Path
def main():
try:
from abx_pkg import Binary, AptProvider, BrewProvider, EnvProvider, BinProviderOverrides
AptProvider.model_rebuild()
BrewProvider.model_rebuild()
EnvProvider.model_rebuild()
# wget binary and package have same name
wget_binary = Binary(
name='wget',
binproviders=[AptProvider(), BrewProvider(), EnvProvider()]
)
# Try to load, install if not found
try:
loaded = wget_binary.load()
if not loaded or not loaded.abspath:
raise Exception("Not loaded")
except Exception:
# Install via system package manager
loaded = wget_binary.install()
if loaded and loaded.abspath:
# Output InstalledBinary JSONL
print(json.dumps({
'type': 'InstalledBinary',
'name': 'wget',
'abspath': str(loaded.abspath),
'version': str(loaded.version) if loaded.version else None,
'sha256': loaded.sha256,
'binprovider': loaded.loaded_binprovider.name if loaded.loaded_binprovider else 'unknown',
}))
sys.exit(0)
else:
print(json.dumps({
'type': 'Dependency',
'bin_name': 'wget',
'bin_providers': 'apt,brew,env',
}))
print("Failed to install wget", file=sys.stderr)
sys.exit(1)
except Exception as e:
print(json.dumps({
'type': 'Dependency',
'bin_name': 'wget',
'bin_providers': 'apt,brew,env',
}))
print(f"Error installing wget: {e}", file=sys.stderr)
sys.exit(1)
if __name__ == '__main__':
main()

View File

@@ -0,0 +1,6 @@
<!-- Wget embed - full iframe of mirrored site -->
<iframe src="{{ output_path }}"
class="extractor-embed wget-embed"
style="width: 100%; height: 100%; min-height: 500px; border: none;"
sandbox="allow-same-origin allow-scripts allow-forms">
</iframe>

View File

@@ -0,0 +1,6 @@
<!-- Wget fullscreen - full page iframe of mirrored site -->
<iframe src="{{ output_path }}"
class="extractor-fullscreen wget-fullscreen"
style="width: 100%; height: 100vh; border: none;"
sandbox="allow-same-origin allow-scripts allow-forms allow-top-navigation-by-user-activation">
</iframe>

View File

@@ -0,0 +1 @@
📥

View File

@@ -0,0 +1,8 @@
<!-- Wget thumbnail - scaled down iframe preview of mirrored site -->
<div class="extractor-thumbnail wget-thumbnail" style="width: 100%; height: 100px; overflow: hidden; background: #fff;">
<iframe src="{{ output_path }}"
style="width: 400%; height: 400px; transform: scale(0.25); transform-origin: top left; pointer-events: none; border: none;"
loading="lazy"
sandbox="allow-same-origin">
</iframe>
</div>

View File

@@ -2,8 +2,8 @@
Integration tests for wget plugin
Tests verify:
1. Plugin reports missing dependency correctly
2. wget can be installed via brew/apt provider hooks
1. Validate hook checks for wget binary
2. Verify deps with abx-pkg
3. Config options work (SAVE_WGET, SAVE_WARC, etc.)
4. Extraction works against real example.com
5. Output files contain actual page content
@@ -26,7 +26,7 @@ import pytest
PLUGIN_DIR = Path(__file__).parent.parent
PLUGINS_ROOT = PLUGIN_DIR.parent
WGET_HOOK = next(PLUGIN_DIR.glob('on_Snapshot__*_wget.py'))
WGET_INSTALL_HOOK = PLUGIN_DIR / 'on_Crawl__00_install_wget.py'
WGET_VALIDATE_HOOK = PLUGIN_DIR / 'on_Crawl__00_validate_wget.py'
BREW_HOOK = PLUGINS_ROOT / 'brew' / 'on_Dependency__install_using_brew_provider.py'
APT_HOOK = PLUGINS_ROOT / 'apt' / 'on_Dependency__install_using_apt_provider.py'
TEST_URL = 'https://example.com'
@@ -37,45 +37,59 @@ def test_hook_script_exists():
assert WGET_HOOK.exists(), f"Hook script not found: {WGET_HOOK}"
def test_wget_install_hook():
"""Test wget install hook to install wget if needed."""
def test_wget_validate_hook():
"""Test wget validate hook checks for wget binary."""
result = subprocess.run(
[sys.executable, str(WGET_INSTALL_HOOK)],
[sys.executable, str(WGET_VALIDATE_HOOK)],
capture_output=True,
text=True,
timeout=600
timeout=30
)
assert result.returncode == 0, f"Install hook failed: {result.stderr}"
# Verify InstalledBinary JSONL output
found_binary = False
for line in result.stdout.strip().split('\n'):
if line.strip():
try:
record = json.loads(line)
if record.get('type') == 'InstalledBinary':
assert record['name'] == 'wget'
assert record['abspath']
found_binary = True
break
except json.JSONDecodeError:
pass
assert found_binary, "Should output InstalledBinary record"
# Hook exits 0 if binary found, 1 if not found (with Dependency record)
if result.returncode == 0:
# Binary found - verify InstalledBinary JSONL output
found_binary = False
for line in result.stdout.strip().split('\n'):
if line.strip():
try:
record = json.loads(line)
if record.get('type') == 'InstalledBinary':
assert record['name'] == 'wget'
assert record['abspath']
found_binary = True
break
except json.JSONDecodeError:
pass
assert found_binary, "Should output InstalledBinary record when binary found"
else:
# Binary not found - verify Dependency JSONL output
found_dependency = False
for line in result.stdout.strip().split('\n'):
if line.strip():
try:
record = json.loads(line)
if record.get('type') == 'Dependency':
assert record['bin_name'] == 'wget'
assert 'env' in record['bin_providers']
found_dependency = True
break
except json.JSONDecodeError:
pass
assert found_dependency, "Should output Dependency record when binary not found"
def test_verify_deps_with_abx_pkg():
"""Verify wget is available via abx-pkg after hook installation."""
from abx_pkg import Binary, AptProvider, BrewProvider, EnvProvider
AptProvider.model_rebuild()
BrewProvider.model_rebuild()
EnvProvider.model_rebuild()
"""Verify wget is available via abx-pkg."""
from abx_pkg import Binary, AptProvider, BrewProvider, EnvProvider, BinProviderOverrides
wget_binary = Binary(name='wget', binproviders=[AptProvider(), BrewProvider(), EnvProvider()])
wget_loaded = wget_binary.load()
assert wget_loaded and wget_loaded.abspath, "wget should be available after install hook"
if wget_loaded and wget_loaded.abspath:
assert True, "wget is available"
else:
pytest.skip("wget not available - Dependency record should have been emitted")
def test_reports_missing_dependency_when_not_installed():