mirror of
https://github.com/ArchiveBox/ArchiveBox.git
synced 2026-04-05 23:37:58 +10:00
wip 2
This commit is contained in:
61
archivebox/plugins/archive_org/tests/test_archive_org.py
Normal file
61
archivebox/plugins/archive_org/tests/test_archive_org.py
Normal file
@@ -0,0 +1,61 @@
|
||||
"""
|
||||
Integration tests for archive_org plugin
|
||||
|
||||
Tests verify standalone archive.org extractor execution.
|
||||
"""
|
||||
|
||||
import subprocess
|
||||
import sys
|
||||
import tempfile
|
||||
from pathlib import Path
|
||||
import pytest
|
||||
|
||||
PLUGIN_DIR = Path(__file__).parent.parent
|
||||
ARCHIVE_ORG_HOOK = PLUGIN_DIR / 'on_Snapshot__13_archive_org.py'
|
||||
TEST_URL = 'https://example.com'
|
||||
|
||||
def test_hook_script_exists():
|
||||
assert ARCHIVE_ORG_HOOK.exists()
|
||||
|
||||
def test_submits_to_archive_org():
|
||||
with tempfile.TemporaryDirectory() as tmpdir:
|
||||
result = subprocess.run(
|
||||
[sys.executable, str(ARCHIVE_ORG_HOOK), '--url', TEST_URL, '--snapshot-id', 'test789'],
|
||||
cwd=tmpdir, capture_output=True, text=True, timeout=60
|
||||
)
|
||||
|
||||
assert result.returncode in (0, 1)
|
||||
assert 'RESULT_JSON=' in result.stdout
|
||||
|
||||
# Should either succeed or fail gracefully
|
||||
assert 'STATUS=' in result.stdout
|
||||
|
||||
def test_config_save_archive_org_false_skips():
|
||||
with tempfile.TemporaryDirectory() as tmpdir:
|
||||
import os
|
||||
env = os.environ.copy()
|
||||
env['SAVE_ARCHIVE_DOT_ORG'] = 'False'
|
||||
|
||||
result = subprocess.run(
|
||||
[sys.executable, str(ARCHIVE_ORG_HOOK), '--url', TEST_URL, '--snapshot-id', 'test999'],
|
||||
cwd=tmpdir, capture_output=True, text=True, env=env, timeout=30
|
||||
)
|
||||
|
||||
if result.returncode == 0:
|
||||
assert 'STATUS=skipped' in result.stdout or 'STATUS=succeeded' in result.stdout
|
||||
|
||||
def test_handles_timeout():
|
||||
with tempfile.TemporaryDirectory() as tmpdir:
|
||||
import os
|
||||
env = os.environ.copy()
|
||||
env['TIMEOUT'] = '1'
|
||||
|
||||
result = subprocess.run(
|
||||
[sys.executable, str(ARCHIVE_ORG_HOOK), '--url', TEST_URL, '--snapshot-id', 'testtimeout'],
|
||||
cwd=tmpdir, capture_output=True, text=True, env=env, timeout=30
|
||||
)
|
||||
|
||||
assert result.returncode in (0, 1)
|
||||
|
||||
if __name__ == '__main__':
|
||||
pytest.main([__file__, '-v'])
|
||||
149
archivebox/plugins/chrome_session/on_Crawl__00_install_chrome.py
Executable file
149
archivebox/plugins/chrome_session/on_Crawl__00_install_chrome.py
Executable file
@@ -0,0 +1,149 @@
|
||||
#!/usr/bin/env python3
|
||||
"""
|
||||
Install Chrome/Chromium if not already available.
|
||||
|
||||
Runs at crawl start to ensure Chrome is installed.
|
||||
Uses playwright to install chromium if no system Chrome found.
|
||||
Outputs JSONL for InstalledBinary.
|
||||
"""
|
||||
|
||||
import json
|
||||
import sys
|
||||
import os
|
||||
import shutil
|
||||
from pathlib import Path
|
||||
|
||||
|
||||
def find_chrome():
|
||||
"""Try to find system Chrome/Chromium."""
|
||||
# Comprehensive list of Chrome/Chromium binary names and paths
|
||||
chromium_names_linux = [
|
||||
'chromium',
|
||||
'chromium-browser',
|
||||
'chromium-browser-beta',
|
||||
'chromium-browser-unstable',
|
||||
'chromium-browser-canary',
|
||||
'chromium-browser-dev',
|
||||
]
|
||||
|
||||
chrome_names_linux = [
|
||||
'google-chrome',
|
||||
'google-chrome-stable',
|
||||
'google-chrome-beta',
|
||||
'google-chrome-canary',
|
||||
'google-chrome-unstable',
|
||||
'google-chrome-dev',
|
||||
'chrome',
|
||||
]
|
||||
|
||||
chrome_paths_macos = [
|
||||
'/Applications/Google Chrome.app/Contents/MacOS/Google Chrome',
|
||||
'/Applications/Google Chrome Canary.app/Contents/MacOS/Google Chrome Canary',
|
||||
'/Applications/Chromium.app/Contents/MacOS/Chromium',
|
||||
]
|
||||
|
||||
chrome_paths_linux = [
|
||||
'/usr/bin/google-chrome',
|
||||
'/usr/bin/google-chrome-stable',
|
||||
'/usr/bin/chromium',
|
||||
'/usr/bin/chromium-browser',
|
||||
'/snap/bin/chromium',
|
||||
'/opt/google/chrome/chrome',
|
||||
]
|
||||
|
||||
all_chrome_names = chrome_names_linux + chromium_names_linux
|
||||
all_chrome_paths = chrome_paths_macos + chrome_paths_linux
|
||||
|
||||
# Check env var first
|
||||
env_path = os.environ.get('CHROME_BINARY', '')
|
||||
if env_path and Path(env_path).is_file():
|
||||
return env_path
|
||||
|
||||
# Try shutil.which for various names
|
||||
for name in all_chrome_names:
|
||||
abspath = shutil.which(name)
|
||||
if abspath:
|
||||
return abspath
|
||||
|
||||
# Check common paths
|
||||
for path in all_chrome_paths:
|
||||
if Path(path).is_file():
|
||||
return path
|
||||
|
||||
return None
|
||||
|
||||
|
||||
def main():
|
||||
try:
|
||||
# First try to find system Chrome
|
||||
system_chrome = find_chrome()
|
||||
if system_chrome:
|
||||
print(json.dumps({
|
||||
'type': 'InstalledBinary',
|
||||
'name': 'chrome',
|
||||
'abspath': str(system_chrome),
|
||||
'version': None,
|
||||
'sha256': None,
|
||||
'binprovider': 'env',
|
||||
}))
|
||||
sys.exit(0)
|
||||
|
||||
# If not found in system, try to install chromium via apt/brew
|
||||
from abx_pkg import Binary, AptProvider, BrewProvider, EnvProvider, BinProviderOverrides
|
||||
|
||||
AptProvider.model_rebuild()
|
||||
BrewProvider.model_rebuild()
|
||||
EnvProvider.model_rebuild()
|
||||
|
||||
# Try chromium-browser or chromium via system package managers
|
||||
for binary_name in ['chromium', 'chromium-browser', 'google-chrome']:
|
||||
try:
|
||||
chrome_binary = Binary(
|
||||
name=binary_name,
|
||||
binproviders=[AptProvider(), BrewProvider(), EnvProvider()]
|
||||
)
|
||||
|
||||
# Try to load, install if not found
|
||||
try:
|
||||
loaded = chrome_binary.load()
|
||||
if not loaded or not loaded.abspath:
|
||||
raise Exception("Not loaded")
|
||||
except Exception:
|
||||
# Install via system package manager
|
||||
loaded = chrome_binary.install()
|
||||
|
||||
if loaded and loaded.abspath:
|
||||
# Output InstalledBinary JSONL
|
||||
print(json.dumps({
|
||||
'type': 'InstalledBinary',
|
||||
'name': 'chrome',
|
||||
'abspath': str(loaded.abspath),
|
||||
'version': str(loaded.version) if loaded.version else None,
|
||||
'sha256': loaded.sha256,
|
||||
'binprovider': loaded.loaded_binprovider.name if loaded.loaded_binprovider else 'unknown',
|
||||
}))
|
||||
sys.exit(0)
|
||||
except Exception:
|
||||
continue
|
||||
|
||||
# If all attempts failed
|
||||
print(json.dumps({
|
||||
'type': 'Dependency',
|
||||
'bin_name': 'chrome',
|
||||
'bin_providers': 'apt,brew,env',
|
||||
}))
|
||||
print("Failed to install Chrome/Chromium", file=sys.stderr)
|
||||
sys.exit(1)
|
||||
|
||||
except Exception as e:
|
||||
print(json.dumps({
|
||||
'type': 'Dependency',
|
||||
'bin_name': 'chrome',
|
||||
'bin_providers': 'apt,brew,env',
|
||||
}))
|
||||
print(f"Error installing Chrome: {e}", file=sys.stderr)
|
||||
sys.exit(1)
|
||||
|
||||
|
||||
if __name__ == '__main__':
|
||||
main()
|
||||
0
archivebox/plugins/chrome_session/tests/__init__.py
Normal file
0
archivebox/plugins/chrome_session/tests/__init__.py
Normal file
@@ -0,0 +1,85 @@
|
||||
"""
|
||||
Integration tests for chrome_session plugin
|
||||
|
||||
Tests verify:
|
||||
1. Install hook finds system Chrome or installs chromium
|
||||
2. Verify deps with abx-pkg
|
||||
3. Chrome session script exists
|
||||
"""
|
||||
|
||||
import json
|
||||
import subprocess
|
||||
import sys
|
||||
from pathlib import Path
|
||||
import pytest
|
||||
|
||||
PLUGIN_DIR = Path(__file__).parent.parent
|
||||
CHROME_INSTALL_HOOK = PLUGIN_DIR / 'on_Crawl__00_install_chrome.py'
|
||||
CHROME_SESSION_HOOK = PLUGIN_DIR / 'on_Snapshot__20_chrome_session.js'
|
||||
|
||||
|
||||
def test_hook_script_exists():
|
||||
"""Verify chrome session hook exists."""
|
||||
assert CHROME_SESSION_HOOK.exists(), f"Hook not found: {CHROME_SESSION_HOOK}"
|
||||
|
||||
|
||||
def test_chrome_install_hook():
|
||||
"""Test chrome install hook to find or install Chrome/Chromium."""
|
||||
result = subprocess.run(
|
||||
[sys.executable, str(CHROME_INSTALL_HOOK)],
|
||||
capture_output=True,
|
||||
text=True,
|
||||
timeout=600
|
||||
)
|
||||
|
||||
assert result.returncode == 0, f"Install hook failed: {result.stderr}"
|
||||
|
||||
# Verify InstalledBinary JSONL output
|
||||
found_binary = False
|
||||
for line in result.stdout.strip().split('\n'):
|
||||
if line.strip():
|
||||
try:
|
||||
record = json.loads(line)
|
||||
if record.get('type') == 'InstalledBinary':
|
||||
assert record['name'] == 'chrome'
|
||||
assert record['abspath']
|
||||
assert Path(record['abspath']).exists(), f"Chrome binary should exist at {record['abspath']}"
|
||||
found_binary = True
|
||||
break
|
||||
except json.JSONDecodeError:
|
||||
pass
|
||||
|
||||
assert found_binary, "Should output InstalledBinary record"
|
||||
|
||||
|
||||
def test_verify_deps_with_abx_pkg():
|
||||
"""Verify chrome is available via abx-pkg after hook installation."""
|
||||
from abx_pkg import Binary, AptProvider, BrewProvider, EnvProvider, BinProviderOverrides
|
||||
|
||||
AptProvider.model_rebuild()
|
||||
BrewProvider.model_rebuild()
|
||||
EnvProvider.model_rebuild()
|
||||
|
||||
# Try various chrome binary names
|
||||
for binary_name in ['chromium', 'chromium-browser', 'google-chrome', 'chrome']:
|
||||
try:
|
||||
chrome_binary = Binary(
|
||||
name=binary_name,
|
||||
binproviders=[AptProvider(), BrewProvider(), EnvProvider()]
|
||||
)
|
||||
chrome_loaded = chrome_binary.load()
|
||||
if chrome_loaded and chrome_loaded.abspath:
|
||||
# Found at least one chrome variant
|
||||
assert Path(chrome_loaded.abspath).exists()
|
||||
return
|
||||
except Exception:
|
||||
continue
|
||||
|
||||
# If we get here, chrome should still be available from system
|
||||
import shutil
|
||||
assert shutil.which('chromium') or shutil.which('chrome') or shutil.which('google-chrome'), \
|
||||
"Chrome should be available after install hook"
|
||||
|
||||
|
||||
if __name__ == '__main__':
|
||||
pytest.main([__file__, '-v'])
|
||||
205
archivebox/plugins/dom/tests/test_dom.py
Normal file
205
archivebox/plugins/dom/tests/test_dom.py
Normal file
@@ -0,0 +1,205 @@
|
||||
"""
|
||||
Integration tests for dom plugin
|
||||
|
||||
Tests verify:
|
||||
1. Hook script exists
|
||||
2. Dependencies installed via chrome_session validation hooks
|
||||
3. Verify deps with abx-pkg
|
||||
4. DOM extraction works on https://example.com
|
||||
5. JSONL output is correct
|
||||
6. Filesystem output contains actual page content
|
||||
7. Config options work
|
||||
"""
|
||||
|
||||
import json
|
||||
import subprocess
|
||||
import sys
|
||||
import tempfile
|
||||
from pathlib import Path
|
||||
|
||||
import pytest
|
||||
|
||||
|
||||
PLUGIN_DIR = Path(__file__).parent.parent
|
||||
PLUGINS_ROOT = PLUGIN_DIR.parent
|
||||
DOM_HOOK = PLUGIN_DIR / 'on_Snapshot__36_dom.js'
|
||||
CHROME_VALIDATE_HOOK = PLUGINS_ROOT / 'chrome_session' / 'on_Crawl__00_validate_chrome.py'
|
||||
NPM_PROVIDER_HOOK = PLUGINS_ROOT / 'npm' / 'on_Dependency__install_using_npm_provider.py'
|
||||
TEST_URL = 'https://example.com'
|
||||
|
||||
|
||||
def test_hook_script_exists():
|
||||
"""Verify on_Snapshot hook exists."""
|
||||
assert DOM_HOOK.exists(), f"Hook not found: {DOM_HOOK}"
|
||||
|
||||
|
||||
def test_chrome_validation_and_install():
|
||||
"""Test chrome validation hook to install puppeteer-core if needed."""
|
||||
# Run chrome validation hook (from chrome_session plugin)
|
||||
result = subprocess.run(
|
||||
[sys.executable, str(CHROME_VALIDATE_HOOK)],
|
||||
capture_output=True,
|
||||
text=True,
|
||||
timeout=30
|
||||
)
|
||||
|
||||
# If exit 1, binary not found - need to install
|
||||
if result.returncode == 1:
|
||||
# Parse Dependency request from JSONL
|
||||
dependency_request = None
|
||||
for line in result.stdout.strip().split('\n'):
|
||||
if line.strip():
|
||||
try:
|
||||
record = json.loads(line)
|
||||
if record.get('type') == 'Dependency':
|
||||
dependency_request = record
|
||||
break
|
||||
except json.JSONDecodeError:
|
||||
pass
|
||||
|
||||
if dependency_request:
|
||||
bin_name = dependency_request['bin_name']
|
||||
bin_providers = dependency_request['bin_providers']
|
||||
|
||||
# Install via npm provider hook
|
||||
install_result = subprocess.run(
|
||||
[
|
||||
sys.executable,
|
||||
str(NPM_PROVIDER_HOOK),
|
||||
'--dependency-id', 'test-dep-001',
|
||||
'--bin-name', bin_name,
|
||||
'--bin-providers', bin_providers
|
||||
],
|
||||
capture_output=True,
|
||||
text=True,
|
||||
timeout=600
|
||||
)
|
||||
|
||||
assert install_result.returncode == 0, f"Install failed: {install_result.stderr}"
|
||||
|
||||
# Verify installation via JSONL output
|
||||
for line in install_result.stdout.strip().split('\n'):
|
||||
if line.strip():
|
||||
try:
|
||||
record = json.loads(line)
|
||||
if record.get('type') == 'InstalledBinary':
|
||||
assert record['name'] == bin_name
|
||||
assert record['abspath']
|
||||
break
|
||||
except json.JSONDecodeError:
|
||||
pass
|
||||
else:
|
||||
# Binary already available, verify via JSONL output
|
||||
assert result.returncode == 0, f"Validation failed: {result.stderr}"
|
||||
|
||||
|
||||
def test_verify_deps_with_abx_pkg():
|
||||
"""Verify dependencies are available via abx-pkg after hook installation."""
|
||||
from abx_pkg import Binary, EnvProvider, BinProviderOverrides
|
||||
|
||||
EnvProvider.model_rebuild()
|
||||
|
||||
# Verify node is available
|
||||
node_binary = Binary(name='node', binproviders=[EnvProvider()])
|
||||
node_loaded = node_binary.load()
|
||||
assert node_loaded and node_loaded.abspath, "Node.js required for dom plugin"
|
||||
|
||||
|
||||
def test_extracts_dom_from_example_com():
|
||||
"""Test full workflow: extract DOM from real example.com via hook."""
|
||||
# Prerequisites checked by earlier test
|
||||
|
||||
with tempfile.TemporaryDirectory() as tmpdir:
|
||||
tmpdir = Path(tmpdir)
|
||||
|
||||
# Run DOM extraction hook
|
||||
result = subprocess.run(
|
||||
['node', str(DOM_HOOK), f'--url={TEST_URL}', '--snapshot-id=test789'],
|
||||
cwd=tmpdir,
|
||||
capture_output=True,
|
||||
text=True,
|
||||
timeout=120
|
||||
)
|
||||
|
||||
assert result.returncode == 0, f"Extraction failed: {result.stderr}"
|
||||
|
||||
# Verify JSONL output
|
||||
assert 'STATUS=succeeded' in result.stdout, "Should report success"
|
||||
assert 'RESULT_JSON=' in result.stdout, "Should output RESULT_JSON"
|
||||
|
||||
# Parse JSONL result
|
||||
result_json = None
|
||||
for line in result.stdout.split('\n'):
|
||||
if line.startswith('RESULT_JSON='):
|
||||
result_json = json.loads(line.split('=', 1)[1])
|
||||
break
|
||||
|
||||
assert result_json, "Should have RESULT_JSON"
|
||||
assert result_json['extractor'] == 'dom'
|
||||
assert result_json['status'] == 'succeeded'
|
||||
assert result_json['url'] == TEST_URL
|
||||
|
||||
# Verify filesystem output
|
||||
dom_dir = tmpdir / 'dom'
|
||||
assert dom_dir.exists(), "Output directory not created"
|
||||
|
||||
dom_file = dom_dir / 'output.html'
|
||||
assert dom_file.exists(), "output.html not created"
|
||||
|
||||
# Verify HTML content contains REAL example.com text
|
||||
html_content = dom_file.read_text(errors='ignore')
|
||||
assert len(html_content) > 200, f"HTML content too short: {len(html_content)} bytes"
|
||||
assert '<html' in html_content.lower(), "Missing <html> tag"
|
||||
assert 'example domain' in html_content.lower(), "Missing 'Example Domain' in HTML"
|
||||
assert ('this domain' in html_content.lower() or
|
||||
'illustrative examples' in html_content.lower()), \
|
||||
"Missing example.com description text"
|
||||
|
||||
|
||||
def test_config_save_dom_false_skips():
|
||||
"""Test that SAVE_DOM=False causes skip."""
|
||||
import os
|
||||
|
||||
with tempfile.TemporaryDirectory() as tmpdir:
|
||||
tmpdir = Path(tmpdir)
|
||||
env = os.environ.copy()
|
||||
env['SAVE_DOM'] = 'False'
|
||||
|
||||
result = subprocess.run(
|
||||
['node', str(DOM_HOOK), f'--url={TEST_URL}', '--snapshot-id=test999'],
|
||||
cwd=tmpdir,
|
||||
capture_output=True,
|
||||
text=True,
|
||||
env=env,
|
||||
timeout=30
|
||||
)
|
||||
|
||||
assert result.returncode == 0, f"Should exit 0 when skipping: {result.stderr}"
|
||||
assert 'STATUS=skipped' in result.stdout, "Should report skipped status"
|
||||
|
||||
|
||||
def test_staticfile_present_skips():
|
||||
"""Test that dom skips when staticfile already downloaded."""
|
||||
with tempfile.TemporaryDirectory() as tmpdir:
|
||||
tmpdir = Path(tmpdir)
|
||||
|
||||
# Create staticfile directory to simulate staticfile extractor ran
|
||||
staticfile_dir = tmpdir / 'staticfile'
|
||||
staticfile_dir.mkdir()
|
||||
(staticfile_dir / 'index.html').write_text('<html>test</html>')
|
||||
|
||||
result = subprocess.run(
|
||||
['node', str(DOM_HOOK), f'--url={TEST_URL}', '--snapshot-id=teststatic'],
|
||||
cwd=tmpdir,
|
||||
capture_output=True,
|
||||
text=True,
|
||||
timeout=30
|
||||
)
|
||||
|
||||
assert result.returncode == 0, "Should exit 0 when skipping"
|
||||
assert 'STATUS=skipped' in result.stdout, "Should report skipped status"
|
||||
assert 'staticfile' in result.stdout.lower(), "Should mention staticfile"
|
||||
|
||||
|
||||
if __name__ == '__main__':
|
||||
pytest.main([__file__, '-v'])
|
||||
68
archivebox/plugins/git/on_Crawl__00_install_git.py
Executable file
68
archivebox/plugins/git/on_Crawl__00_install_git.py
Executable file
@@ -0,0 +1,68 @@
|
||||
#!/usr/bin/env python3
|
||||
"""
|
||||
Install git if not already available.
|
||||
|
||||
Runs at crawl start to ensure git is installed.
|
||||
Outputs JSONL for InstalledBinary.
|
||||
"""
|
||||
|
||||
import json
|
||||
import sys
|
||||
from pathlib import Path
|
||||
|
||||
|
||||
def main():
|
||||
try:
|
||||
from abx_pkg import Binary, AptProvider, BrewProvider, EnvProvider, BinProviderOverrides
|
||||
|
||||
AptProvider.model_rebuild()
|
||||
BrewProvider.model_rebuild()
|
||||
EnvProvider.model_rebuild()
|
||||
|
||||
# git binary and package have same name
|
||||
git_binary = Binary(
|
||||
name='git',
|
||||
binproviders=[AptProvider(), BrewProvider(), EnvProvider()]
|
||||
)
|
||||
|
||||
# Try to load, install if not found
|
||||
try:
|
||||
loaded = git_binary.load()
|
||||
if not loaded or not loaded.abspath:
|
||||
raise Exception("Not loaded")
|
||||
except Exception:
|
||||
# Install via system package manager
|
||||
loaded = git_binary.install()
|
||||
|
||||
if loaded and loaded.abspath:
|
||||
# Output InstalledBinary JSONL
|
||||
print(json.dumps({
|
||||
'type': 'InstalledBinary',
|
||||
'name': 'git',
|
||||
'abspath': str(loaded.abspath),
|
||||
'version': str(loaded.version) if loaded.version else None,
|
||||
'sha256': loaded.sha256,
|
||||
'binprovider': loaded.loaded_binprovider.name if loaded.loaded_binprovider else 'unknown',
|
||||
}))
|
||||
sys.exit(0)
|
||||
else:
|
||||
print(json.dumps({
|
||||
'type': 'Dependency',
|
||||
'bin_name': 'git',
|
||||
'bin_providers': 'apt,brew,env',
|
||||
}))
|
||||
print("Failed to install git", file=sys.stderr)
|
||||
sys.exit(1)
|
||||
|
||||
except Exception as e:
|
||||
print(json.dumps({
|
||||
'type': 'Dependency',
|
||||
'bin_name': 'git',
|
||||
'bin_providers': 'apt,brew,env',
|
||||
}))
|
||||
print(f"Error installing git: {e}", file=sys.stderr)
|
||||
sys.exit(1)
|
||||
|
||||
|
||||
if __name__ == '__main__':
|
||||
main()
|
||||
90
archivebox/plugins/git/tests/test_git.py
Normal file
90
archivebox/plugins/git/tests/test_git.py
Normal file
@@ -0,0 +1,90 @@
|
||||
"""
|
||||
Integration tests for git plugin
|
||||
|
||||
Tests verify:
|
||||
1. Install hook installs git via abx-pkg
|
||||
2. Verify deps with abx-pkg
|
||||
3. Standalone git extractor execution
|
||||
"""
|
||||
|
||||
import json
|
||||
import shutil
|
||||
import subprocess
|
||||
import sys
|
||||
import tempfile
|
||||
from pathlib import Path
|
||||
import pytest
|
||||
|
||||
PLUGIN_DIR = Path(__file__).parent.parent
|
||||
GIT_HOOK = PLUGIN_DIR / 'on_Snapshot__12_git.py'
|
||||
GIT_INSTALL_HOOK = PLUGIN_DIR / 'on_Crawl__00_install_git.py'
|
||||
TEST_URL = 'https://github.com/example/repo.git'
|
||||
|
||||
def test_hook_script_exists():
|
||||
assert GIT_HOOK.exists()
|
||||
|
||||
def test_git_install_hook():
|
||||
"""Test git install hook to install git if needed."""
|
||||
result = subprocess.run(
|
||||
[sys.executable, str(GIT_INSTALL_HOOK)],
|
||||
capture_output=True,
|
||||
text=True,
|
||||
timeout=600
|
||||
)
|
||||
|
||||
assert result.returncode == 0, f"Install hook failed: {result.stderr}"
|
||||
|
||||
# Verify InstalledBinary JSONL output
|
||||
found_binary = False
|
||||
for line in result.stdout.strip().split('\n'):
|
||||
if line.strip():
|
||||
try:
|
||||
record = json.loads(line)
|
||||
if record.get('type') == 'InstalledBinary':
|
||||
assert record['name'] == 'git'
|
||||
assert record['abspath']
|
||||
found_binary = True
|
||||
break
|
||||
except json.JSONDecodeError:
|
||||
pass
|
||||
|
||||
assert found_binary, "Should output InstalledBinary record"
|
||||
|
||||
def test_verify_deps_with_abx_pkg():
|
||||
"""Verify git is available via abx-pkg after hook installation."""
|
||||
from abx_pkg import Binary, AptProvider, BrewProvider, EnvProvider
|
||||
|
||||
AptProvider.model_rebuild()
|
||||
BrewProvider.model_rebuild()
|
||||
EnvProvider.model_rebuild()
|
||||
|
||||
git_binary = Binary(name='git', binproviders=[AptProvider(), BrewProvider(), EnvProvider()])
|
||||
git_loaded = git_binary.load()
|
||||
assert git_loaded and git_loaded.abspath, "git should be available after install hook"
|
||||
|
||||
def test_reports_missing_git():
|
||||
with tempfile.TemporaryDirectory() as tmpdir:
|
||||
env = {'PATH': '/nonexistent'}
|
||||
result = subprocess.run(
|
||||
[sys.executable, str(GIT_HOOK), '--url', TEST_URL, '--snapshot-id', 'test123'],
|
||||
cwd=tmpdir, capture_output=True, text=True, env=env
|
||||
)
|
||||
if result.returncode != 0:
|
||||
combined = result.stdout + result.stderr
|
||||
assert 'DEPENDENCY_NEEDED' in combined or 'git' in combined.lower() or 'ERROR=' in combined
|
||||
|
||||
def test_handles_non_git_url():
|
||||
if not shutil.which('git'):
|
||||
pytest.skip("git not installed")
|
||||
|
||||
with tempfile.TemporaryDirectory() as tmpdir:
|
||||
result = subprocess.run(
|
||||
[sys.executable, str(GIT_HOOK), '--url', 'https://example.com', '--snapshot-id', 'test789'],
|
||||
cwd=tmpdir, capture_output=True, text=True, timeout=30
|
||||
)
|
||||
# Should fail or skip for non-git URL
|
||||
assert result.returncode in (0, 1)
|
||||
assert 'STATUS=' in result.stdout
|
||||
|
||||
if __name__ == '__main__':
|
||||
pytest.main([__file__, '-v'])
|
||||
53
archivebox/plugins/htmltotext/tests/test_htmltotext.py
Normal file
53
archivebox/plugins/htmltotext/tests/test_htmltotext.py
Normal file
@@ -0,0 +1,53 @@
|
||||
"""
|
||||
Integration tests for htmltotext plugin
|
||||
|
||||
Tests verify standalone htmltotext extractor execution.
|
||||
"""
|
||||
|
||||
import subprocess
|
||||
import sys
|
||||
import tempfile
|
||||
from pathlib import Path
|
||||
import pytest
|
||||
|
||||
PLUGIN_DIR = Path(__file__).parent.parent
|
||||
HTMLTOTEXT_HOOK = PLUGIN_DIR / 'on_Snapshot__54_htmltotext.py'
|
||||
TEST_URL = 'https://example.com'
|
||||
|
||||
def test_hook_script_exists():
|
||||
assert HTMLTOTEXT_HOOK.exists()
|
||||
|
||||
def test_extracts_text_from_html():
|
||||
with tempfile.TemporaryDirectory() as tmpdir:
|
||||
tmpdir = Path(tmpdir)
|
||||
# Create HTML source
|
||||
(tmpdir / 'singlefile').mkdir()
|
||||
(tmpdir / 'singlefile' / 'singlefile.html').write_text('<html><body><h1>Example Domain</h1><p>This domain is for examples.</p></body></html>')
|
||||
|
||||
result = subprocess.run(
|
||||
[sys.executable, str(HTMLTOTEXT_HOOK), '--url', TEST_URL, '--snapshot-id', 'test789'],
|
||||
cwd=tmpdir, capture_output=True, text=True, timeout=30
|
||||
)
|
||||
|
||||
assert result.returncode in (0, 1)
|
||||
assert 'RESULT_JSON=' in result.stdout
|
||||
|
||||
if result.returncode == 0:
|
||||
assert 'STATUS=succeeded' in result.stdout
|
||||
output_file = tmpdir / 'htmltotext' / 'content.txt'
|
||||
if output_file.exists():
|
||||
content = output_file.read_text()
|
||||
assert len(content) > 0
|
||||
|
||||
def test_fails_gracefully_without_html():
|
||||
with tempfile.TemporaryDirectory() as tmpdir:
|
||||
result = subprocess.run(
|
||||
[sys.executable, str(HTMLTOTEXT_HOOK), '--url', TEST_URL, '--snapshot-id', 'test999'],
|
||||
cwd=tmpdir, capture_output=True, text=True, timeout=30
|
||||
)
|
||||
assert result.returncode in (0, 1)
|
||||
combined = result.stdout + result.stderr
|
||||
assert 'STATUS=' in combined
|
||||
|
||||
if __name__ == '__main__':
|
||||
pytest.main([__file__, '-v'])
|
||||
67
archivebox/plugins/media/on_Crawl__00_install_ytdlp.py
Executable file
67
archivebox/plugins/media/on_Crawl__00_install_ytdlp.py
Executable file
@@ -0,0 +1,67 @@
|
||||
#!/usr/bin/env python3
|
||||
"""
|
||||
Install yt-dlp if not already available.
|
||||
|
||||
Runs at crawl start to ensure yt-dlp is installed.
|
||||
Outputs JSONL for InstalledBinary.
|
||||
"""
|
||||
|
||||
import json
|
||||
import sys
|
||||
from pathlib import Path
|
||||
|
||||
|
||||
def main():
|
||||
try:
|
||||
from abx_pkg import Binary, PipProvider, EnvProvider, BinProviderOverrides
|
||||
|
||||
PipProvider.model_rebuild()
|
||||
EnvProvider.model_rebuild()
|
||||
|
||||
# yt-dlp binary and package have same name
|
||||
ytdlp_binary = Binary(
|
||||
name='yt-dlp',
|
||||
binproviders=[PipProvider(), EnvProvider()]
|
||||
)
|
||||
|
||||
# Try to load, install if not found
|
||||
try:
|
||||
loaded = ytdlp_binary.load()
|
||||
if not loaded or not loaded.abspath:
|
||||
raise Exception("Not loaded")
|
||||
except Exception:
|
||||
# Install via pip
|
||||
loaded = ytdlp_binary.install()
|
||||
|
||||
if loaded and loaded.abspath:
|
||||
# Output InstalledBinary JSONL
|
||||
print(json.dumps({
|
||||
'type': 'InstalledBinary',
|
||||
'name': 'yt-dlp',
|
||||
'abspath': str(loaded.abspath),
|
||||
'version': str(loaded.version) if loaded.version else None,
|
||||
'sha256': loaded.sha256,
|
||||
'binprovider': loaded.loaded_binprovider.name if loaded.loaded_binprovider else 'unknown',
|
||||
}))
|
||||
sys.exit(0)
|
||||
else:
|
||||
print(json.dumps({
|
||||
'type': 'Dependency',
|
||||
'bin_name': 'yt-dlp',
|
||||
'bin_providers': 'pip,brew,env',
|
||||
}))
|
||||
print("Failed to install yt-dlp", file=sys.stderr)
|
||||
sys.exit(1)
|
||||
|
||||
except Exception as e:
|
||||
print(json.dumps({
|
||||
'type': 'Dependency',
|
||||
'bin_name': 'yt-dlp',
|
||||
'bin_providers': 'pip,brew,env',
|
||||
}))
|
||||
print(f"Error installing yt-dlp: {e}", file=sys.stderr)
|
||||
sys.exit(1)
|
||||
|
||||
|
||||
if __name__ == '__main__':
|
||||
main()
|
||||
148
archivebox/plugins/media/tests/test_media.py
Normal file
148
archivebox/plugins/media/tests/test_media.py
Normal file
@@ -0,0 +1,148 @@
|
||||
"""
|
||||
Integration tests for media plugin
|
||||
|
||||
Tests verify:
|
||||
1. Hook script exists
|
||||
2. Dependencies installed via validation hooks
|
||||
3. Verify deps with abx-pkg
|
||||
4. Media extraction works on video URLs
|
||||
5. JSONL output is correct
|
||||
6. Config options work
|
||||
7. Handles non-media URLs gracefully
|
||||
"""
|
||||
|
||||
import json
|
||||
import subprocess
|
||||
import sys
|
||||
import tempfile
|
||||
from pathlib import Path
|
||||
import pytest
|
||||
|
||||
PLUGIN_DIR = Path(__file__).parent.parent
|
||||
PLUGINS_ROOT = PLUGIN_DIR.parent
|
||||
MEDIA_HOOK = PLUGIN_DIR / 'on_Snapshot__51_media.py'
|
||||
MEDIA_INSTALL_HOOK = PLUGIN_DIR / 'on_Crawl__00_install_ytdlp.py'
|
||||
TEST_URL = 'https://example.com/video.mp4'
|
||||
|
||||
def test_hook_script_exists():
|
||||
"""Verify on_Snapshot hook exists."""
|
||||
assert MEDIA_HOOK.exists(), f"Hook not found: {MEDIA_HOOK}"
|
||||
|
||||
|
||||
def test_ytdlp_install_hook():
|
||||
"""Test yt-dlp install hook to install yt-dlp if needed."""
|
||||
# Run yt-dlp install hook
|
||||
result = subprocess.run(
|
||||
[sys.executable, str(MEDIA_INSTALL_HOOK)],
|
||||
capture_output=True,
|
||||
text=True,
|
||||
timeout=600
|
||||
)
|
||||
|
||||
assert result.returncode == 0, f"Install hook failed: {result.stderr}"
|
||||
|
||||
# Verify InstalledBinary JSONL output
|
||||
found_binary = False
|
||||
for line in result.stdout.strip().split('\n'):
|
||||
if line.strip():
|
||||
try:
|
||||
record = json.loads(line)
|
||||
if record.get('type') == 'InstalledBinary':
|
||||
assert record['name'] == 'yt-dlp'
|
||||
assert record['abspath']
|
||||
found_binary = True
|
||||
break
|
||||
except json.JSONDecodeError:
|
||||
pass
|
||||
|
||||
assert found_binary, "Should output InstalledBinary record"
|
||||
|
||||
|
||||
def test_verify_deps_with_abx_pkg():
|
||||
"""Verify yt-dlp is available via abx-pkg after hook installation."""
|
||||
from abx_pkg import Binary, PipProvider, EnvProvider, BinProviderOverrides
|
||||
|
||||
PipProvider.model_rebuild()
|
||||
EnvProvider.model_rebuild()
|
||||
|
||||
# Verify yt-dlp is available
|
||||
ytdlp_binary = Binary(name='yt-dlp', binproviders=[PipProvider(), EnvProvider()])
|
||||
ytdlp_loaded = ytdlp_binary.load()
|
||||
assert ytdlp_loaded and ytdlp_loaded.abspath, "yt-dlp should be available after install hook"
|
||||
|
||||
def test_handles_non_media_url():
|
||||
"""Test that media extractor handles non-media URLs gracefully via hook."""
|
||||
# Prerequisites checked by earlier test
|
||||
|
||||
with tempfile.TemporaryDirectory() as tmpdir:
|
||||
tmpdir = Path(tmpdir)
|
||||
|
||||
# Run media extraction hook on non-media URL
|
||||
result = subprocess.run(
|
||||
[sys.executable, str(MEDIA_HOOK), '--url', 'https://example.com', '--snapshot-id', 'test789'],
|
||||
cwd=tmpdir,
|
||||
capture_output=True,
|
||||
text=True,
|
||||
timeout=60
|
||||
)
|
||||
|
||||
# Should exit 0 even for non-media URL
|
||||
assert result.returncode == 0, f"Should handle non-media URL gracefully: {result.stderr}"
|
||||
|
||||
# Verify JSONL output
|
||||
assert 'STATUS=' in result.stdout, "Should report status"
|
||||
assert 'RESULT_JSON=' in result.stdout, "Should output RESULT_JSON"
|
||||
|
||||
# Parse JSONL result
|
||||
result_json = None
|
||||
for line in result.stdout.split('\n'):
|
||||
if line.startswith('RESULT_JSON='):
|
||||
result_json = json.loads(line.split('=', 1)[1])
|
||||
break
|
||||
|
||||
assert result_json, "Should have RESULT_JSON"
|
||||
assert result_json['extractor'] == 'media'
|
||||
|
||||
|
||||
def test_config_save_media_false_skips():
|
||||
"""Test that SAVE_MEDIA=False causes skip."""
|
||||
import os
|
||||
|
||||
with tempfile.TemporaryDirectory() as tmpdir:
|
||||
env = os.environ.copy()
|
||||
env['SAVE_MEDIA'] = 'False'
|
||||
|
||||
result = subprocess.run(
|
||||
[sys.executable, str(MEDIA_HOOK), '--url', TEST_URL, '--snapshot-id', 'test999'],
|
||||
cwd=tmpdir,
|
||||
capture_output=True,
|
||||
text=True,
|
||||
env=env,
|
||||
timeout=30
|
||||
)
|
||||
|
||||
assert result.returncode == 0, f"Should exit 0 when skipping: {result.stderr}"
|
||||
assert 'STATUS=' in result.stdout
|
||||
|
||||
|
||||
def test_config_timeout():
|
||||
"""Test that MEDIA_TIMEOUT config is respected."""
|
||||
import os
|
||||
|
||||
with tempfile.TemporaryDirectory() as tmpdir:
|
||||
env = os.environ.copy()
|
||||
env['MEDIA_TIMEOUT'] = '5'
|
||||
|
||||
result = subprocess.run(
|
||||
[sys.executable, str(MEDIA_HOOK), '--url', 'https://example.com', '--snapshot-id', 'testtimeout'],
|
||||
cwd=tmpdir,
|
||||
capture_output=True,
|
||||
text=True,
|
||||
env=env,
|
||||
timeout=30
|
||||
)
|
||||
|
||||
assert result.returncode == 0, "Should complete without hanging"
|
||||
|
||||
if __name__ == '__main__':
|
||||
pytest.main([__file__, '-v'])
|
||||
68
archivebox/plugins/mercury/on_Crawl__00_install_mercury.py
Executable file
68
archivebox/plugins/mercury/on_Crawl__00_install_mercury.py
Executable file
@@ -0,0 +1,68 @@
|
||||
#!/usr/bin/env python3
|
||||
"""
|
||||
Install mercury-parser if not already available.
|
||||
|
||||
Runs at crawl start to ensure mercury-parser is installed.
|
||||
Outputs JSONL for InstalledBinary.
|
||||
"""
|
||||
|
||||
import json
|
||||
import sys
|
||||
from pathlib import Path
|
||||
|
||||
|
||||
def main():
|
||||
try:
|
||||
from abx_pkg import Binary, NpmProvider, EnvProvider, BinProviderOverrides
|
||||
|
||||
NpmProvider.model_rebuild()
|
||||
EnvProvider.model_rebuild()
|
||||
|
||||
# Note: npm package is @postlight/mercury-parser, binary is mercury-parser
|
||||
mercury_binary = Binary(
|
||||
name='mercury-parser',
|
||||
binproviders=[NpmProvider(), EnvProvider()],
|
||||
overrides={'npm': {'packages': ['@postlight/mercury-parser']}}
|
||||
)
|
||||
|
||||
# Try to load, install if not found
|
||||
try:
|
||||
loaded = mercury_binary.load()
|
||||
if not loaded or not loaded.abspath:
|
||||
raise Exception("Not loaded")
|
||||
except Exception:
|
||||
# Install via npm
|
||||
loaded = mercury_binary.install()
|
||||
|
||||
if loaded and loaded.abspath:
|
||||
# Output InstalledBinary JSONL
|
||||
print(json.dumps({
|
||||
'type': 'InstalledBinary',
|
||||
'name': 'mercury-parser',
|
||||
'abspath': str(loaded.abspath),
|
||||
'version': str(loaded.version) if loaded.version else None,
|
||||
'sha256': loaded.sha256,
|
||||
'binprovider': loaded.loaded_binprovider.name if loaded.loaded_binprovider else 'unknown',
|
||||
}))
|
||||
sys.exit(0)
|
||||
else:
|
||||
print(json.dumps({
|
||||
'type': 'Dependency',
|
||||
'bin_name': 'mercury-parser',
|
||||
'bin_providers': 'npm,env',
|
||||
}))
|
||||
print("Failed to install mercury-parser", file=sys.stderr)
|
||||
sys.exit(1)
|
||||
|
||||
except Exception as e:
|
||||
print(json.dumps({
|
||||
'type': 'Dependency',
|
||||
'bin_name': 'mercury-parser',
|
||||
'bin_providers': 'npm,env',
|
||||
}))
|
||||
print(f"Error installing mercury-parser: {e}", file=sys.stderr)
|
||||
sys.exit(1)
|
||||
|
||||
|
||||
if __name__ == '__main__':
|
||||
main()
|
||||
164
archivebox/plugins/mercury/tests/test_mercury.py
Normal file
164
archivebox/plugins/mercury/tests/test_mercury.py
Normal file
@@ -0,0 +1,164 @@
|
||||
"""
|
||||
Integration tests for mercury plugin
|
||||
|
||||
Tests verify:
|
||||
1. Hook script exists
|
||||
2. Dependencies installed via validation hooks
|
||||
3. Verify deps with abx-pkg
|
||||
4. Mercury extraction works on https://example.com
|
||||
5. JSONL output is correct
|
||||
6. Filesystem output contains extracted content
|
||||
7. Config options work
|
||||
"""
|
||||
|
||||
import json
|
||||
import subprocess
|
||||
import sys
|
||||
import tempfile
|
||||
from pathlib import Path
|
||||
import pytest
|
||||
|
||||
PLUGIN_DIR = Path(__file__).parent.parent
|
||||
PLUGINS_ROOT = PLUGIN_DIR.parent
|
||||
MERCURY_HOOK = PLUGIN_DIR / 'on_Snapshot__53_mercury.py'
|
||||
MERCURY_INSTALL_HOOK = PLUGIN_DIR / 'on_Crawl__00_install_mercury.py'
|
||||
TEST_URL = 'https://example.com'
|
||||
|
||||
def test_hook_script_exists():
|
||||
"""Verify on_Snapshot hook exists."""
|
||||
assert MERCURY_HOOK.exists(), f"Hook not found: {MERCURY_HOOK}"
|
||||
|
||||
|
||||
def test_mercury_install_hook():
|
||||
"""Test mercury install hook to install mercury-parser if needed."""
|
||||
# Run mercury install hook
|
||||
result = subprocess.run(
|
||||
[sys.executable, str(MERCURY_INSTALL_HOOK)],
|
||||
capture_output=True,
|
||||
text=True,
|
||||
timeout=600
|
||||
)
|
||||
|
||||
assert result.returncode == 0, f"Install hook failed: {result.stderr}"
|
||||
|
||||
# Verify InstalledBinary JSONL output
|
||||
found_binary = False
|
||||
for line in result.stdout.strip().split('\n'):
|
||||
if line.strip():
|
||||
try:
|
||||
record = json.loads(line)
|
||||
if record.get('type') == 'InstalledBinary':
|
||||
assert record['name'] == 'mercury-parser'
|
||||
assert record['abspath']
|
||||
found_binary = True
|
||||
break
|
||||
except json.JSONDecodeError:
|
||||
pass
|
||||
|
||||
assert found_binary, "Should output InstalledBinary record"
|
||||
|
||||
|
||||
def test_verify_deps_with_abx_pkg():
|
||||
"""Verify mercury-parser is available via abx-pkg after hook installation."""
|
||||
from abx_pkg import Binary, NpmProvider, EnvProvider, BinProviderOverrides
|
||||
|
||||
NpmProvider.model_rebuild()
|
||||
EnvProvider.model_rebuild()
|
||||
|
||||
# Verify mercury-parser is available
|
||||
mercury_binary = Binary(
|
||||
name='mercury-parser',
|
||||
binproviders=[NpmProvider(), EnvProvider()],
|
||||
overrides={'npm': {'packages': ['@postlight/mercury-parser']}}
|
||||
)
|
||||
mercury_loaded = mercury_binary.load()
|
||||
assert mercury_loaded and mercury_loaded.abspath, "mercury-parser should be available after install hook"
|
||||
|
||||
def test_extracts_with_mercury_parser():
|
||||
"""Test full workflow: extract with mercury-parser from real HTML via hook."""
|
||||
# Prerequisites checked by earlier test
|
||||
|
||||
with tempfile.TemporaryDirectory() as tmpdir:
|
||||
tmpdir = Path(tmpdir)
|
||||
|
||||
# Create HTML source that mercury can parse
|
||||
(tmpdir / 'singlefile').mkdir()
|
||||
(tmpdir / 'singlefile' / 'singlefile.html').write_text(
|
||||
'<html><head><title>Test Article</title></head><body>'
|
||||
'<article><h1>Example Article</h1><p>This is test content for mercury parser.</p></article>'
|
||||
'</body></html>'
|
||||
)
|
||||
|
||||
# Run mercury extraction hook
|
||||
result = subprocess.run(
|
||||
[sys.executable, str(MERCURY_HOOK), '--url', TEST_URL, '--snapshot-id', 'test789'],
|
||||
cwd=tmpdir,
|
||||
capture_output=True,
|
||||
text=True,
|
||||
timeout=60
|
||||
)
|
||||
|
||||
assert result.returncode == 0, f"Extraction failed: {result.stderr}"
|
||||
|
||||
# Verify JSONL output
|
||||
assert 'STATUS=' in result.stdout, "Should report status"
|
||||
assert 'RESULT_JSON=' in result.stdout, "Should output RESULT_JSON"
|
||||
|
||||
# Parse JSONL result
|
||||
result_json = None
|
||||
for line in result.stdout.split('\n'):
|
||||
if line.startswith('RESULT_JSON='):
|
||||
result_json = json.loads(line.split('=', 1)[1])
|
||||
break
|
||||
|
||||
assert result_json, "Should have RESULT_JSON"
|
||||
assert result_json['extractor'] == 'mercury'
|
||||
|
||||
# Verify filesystem output if extraction succeeded
|
||||
if result_json['status'] == 'succeeded':
|
||||
mercury_dir = tmpdir / 'mercury'
|
||||
assert mercury_dir.exists(), "Output directory not created"
|
||||
|
||||
output_file = mercury_dir / 'content.html'
|
||||
assert output_file.exists(), "content.html not created"
|
||||
|
||||
content = output_file.read_text()
|
||||
assert len(content) > 0, "Output should not be empty"
|
||||
|
||||
def test_config_save_mercury_false_skips():
|
||||
"""Test that SAVE_MERCURY=False causes skip."""
|
||||
import os
|
||||
|
||||
with tempfile.TemporaryDirectory() as tmpdir:
|
||||
env = os.environ.copy()
|
||||
env['SAVE_MERCURY'] = 'False'
|
||||
|
||||
result = subprocess.run(
|
||||
[sys.executable, str(MERCURY_HOOK), '--url', TEST_URL, '--snapshot-id', 'test999'],
|
||||
cwd=tmpdir,
|
||||
capture_output=True,
|
||||
text=True,
|
||||
env=env,
|
||||
timeout=30
|
||||
)
|
||||
|
||||
assert result.returncode == 0, f"Should exit 0 when skipping: {result.stderr}"
|
||||
assert 'STATUS=' in result.stdout
|
||||
|
||||
|
||||
def test_fails_gracefully_without_html():
|
||||
"""Test that mercury fails gracefully when no HTML source exists."""
|
||||
with tempfile.TemporaryDirectory() as tmpdir:
|
||||
result = subprocess.run(
|
||||
[sys.executable, str(MERCURY_HOOK), '--url', TEST_URL, '--snapshot-id', 'test999'],
|
||||
cwd=tmpdir,
|
||||
capture_output=True,
|
||||
text=True,
|
||||
timeout=30
|
||||
)
|
||||
|
||||
assert result.returncode == 0, "Should exit 0 even when no HTML source"
|
||||
assert 'STATUS=' in result.stdout
|
||||
|
||||
if __name__ == '__main__':
|
||||
pytest.main([__file__, '-v'])
|
||||
925
archivebox/plugins/package-lock.json
generated
Normal file
925
archivebox/plugins/package-lock.json
generated
Normal file
@@ -0,0 +1,925 @@
|
||||
{
|
||||
"name": "archivebox-plugins",
|
||||
"lockfileVersion": 3,
|
||||
"requires": true,
|
||||
"packages": {
|
||||
"": {
|
||||
"name": "archivebox-plugins",
|
||||
"dependencies": {
|
||||
"puppeteer-core": "^24.34.0"
|
||||
}
|
||||
},
|
||||
"node_modules/@puppeteer/browsers": {
|
||||
"version": "2.11.0",
|
||||
"resolved": "https://registry.npmjs.org/@puppeteer/browsers/-/browsers-2.11.0.tgz",
|
||||
"integrity": "sha512-n6oQX6mYkG8TRPuPXmbPidkUbsSRalhmaaVAQxvH1IkQy63cwsH+kOjB3e4cpCDHg0aSvsiX9bQ4s2VB6mGWUQ==",
|
||||
"license": "Apache-2.0",
|
||||
"dependencies": {
|
||||
"debug": "^4.4.3",
|
||||
"extract-zip": "^2.0.1",
|
||||
"progress": "^2.0.3",
|
||||
"proxy-agent": "^6.5.0",
|
||||
"semver": "^7.7.3",
|
||||
"tar-fs": "^3.1.1",
|
||||
"yargs": "^17.7.2"
|
||||
},
|
||||
"bin": {
|
||||
"browsers": "lib/cjs/main-cli.js"
|
||||
},
|
||||
"engines": {
|
||||
"node": ">=18"
|
||||
}
|
||||
},
|
||||
"node_modules/@tootallnate/quickjs-emscripten": {
|
||||
"version": "0.23.0",
|
||||
"resolved": "https://registry.npmjs.org/@tootallnate/quickjs-emscripten/-/quickjs-emscripten-0.23.0.tgz",
|
||||
"integrity": "sha512-C5Mc6rdnsaJDjO3UpGW/CQTHtCKaYlScZTly4JIu97Jxo/odCiH0ITnDXSJPTOrEKk/ycSZ0AOgTmkDtkOsvIA==",
|
||||
"license": "MIT"
|
||||
},
|
||||
"node_modules/@types/node": {
|
||||
"version": "25.0.3",
|
||||
"resolved": "https://registry.npmjs.org/@types/node/-/node-25.0.3.tgz",
|
||||
"integrity": "sha512-W609buLVRVmeW693xKfzHeIV6nJGGz98uCPfeXI1ELMLXVeKYZ9m15fAMSaUPBHYLGFsVRcMmSCksQOrZV9BYA==",
|
||||
"license": "MIT",
|
||||
"optional": true,
|
||||
"dependencies": {
|
||||
"undici-types": "~7.16.0"
|
||||
}
|
||||
},
|
||||
"node_modules/@types/yauzl": {
|
||||
"version": "2.10.3",
|
||||
"resolved": "https://registry.npmjs.org/@types/yauzl/-/yauzl-2.10.3.tgz",
|
||||
"integrity": "sha512-oJoftv0LSuaDZE3Le4DbKX+KS9G36NzOeSap90UIK0yMA/NhKJhqlSGtNDORNRaIbQfzjXDrQa0ytJ6mNRGz/Q==",
|
||||
"license": "MIT",
|
||||
"optional": true,
|
||||
"dependencies": {
|
||||
"@types/node": "*"
|
||||
}
|
||||
},
|
||||
"node_modules/agent-base": {
|
||||
"version": "7.1.4",
|
||||
"resolved": "https://registry.npmjs.org/agent-base/-/agent-base-7.1.4.tgz",
|
||||
"integrity": "sha512-MnA+YT8fwfJPgBx3m60MNqakm30XOkyIoH1y6huTQvC0PwZG7ki8NacLBcrPbNoo8vEZy7Jpuk7+jMO+CUovTQ==",
|
||||
"license": "MIT",
|
||||
"engines": {
|
||||
"node": ">= 14"
|
||||
}
|
||||
},
|
||||
"node_modules/ansi-regex": {
|
||||
"version": "5.0.1",
|
||||
"resolved": "https://registry.npmjs.org/ansi-regex/-/ansi-regex-5.0.1.tgz",
|
||||
"integrity": "sha512-quJQXlTSUGL2LH9SUXo8VwsY4soanhgo6LNSm84E1LBcE8s3O0wpdiRzyR9z/ZZJMlMWv37qOOb9pdJlMUEKFQ==",
|
||||
"license": "MIT",
|
||||
"engines": {
|
||||
"node": ">=8"
|
||||
}
|
||||
},
|
||||
"node_modules/ansi-styles": {
|
||||
"version": "4.3.0",
|
||||
"resolved": "https://registry.npmjs.org/ansi-styles/-/ansi-styles-4.3.0.tgz",
|
||||
"integrity": "sha512-zbB9rCJAT1rbjiVDb2hqKFHNYLxgtk8NURxZ3IZwD3F6NtxbXZQCnnSi1Lkx+IDohdPlFp222wVALIheZJQSEg==",
|
||||
"license": "MIT",
|
||||
"dependencies": {
|
||||
"color-convert": "^2.0.1"
|
||||
},
|
||||
"engines": {
|
||||
"node": ">=8"
|
||||
},
|
||||
"funding": {
|
||||
"url": "https://github.com/chalk/ansi-styles?sponsor=1"
|
||||
}
|
||||
},
|
||||
"node_modules/ast-types": {
|
||||
"version": "0.13.4",
|
||||
"resolved": "https://registry.npmjs.org/ast-types/-/ast-types-0.13.4.tgz",
|
||||
"integrity": "sha512-x1FCFnFifvYDDzTaLII71vG5uvDwgtmDTEVWAxrgeiR8VjMONcCXJx7E+USjDtHlwFmt9MysbqgF9b9Vjr6w+w==",
|
||||
"license": "MIT",
|
||||
"dependencies": {
|
||||
"tslib": "^2.0.1"
|
||||
},
|
||||
"engines": {
|
||||
"node": ">=4"
|
||||
}
|
||||
},
|
||||
"node_modules/b4a": {
|
||||
"version": "1.7.3",
|
||||
"resolved": "https://registry.npmjs.org/b4a/-/b4a-1.7.3.tgz",
|
||||
"integrity": "sha512-5Q2mfq2WfGuFp3uS//0s6baOJLMoVduPYVeNmDYxu5OUA1/cBfvr2RIS7vi62LdNj/urk1hfmj867I3qt6uZ7Q==",
|
||||
"license": "Apache-2.0",
|
||||
"peerDependencies": {
|
||||
"react-native-b4a": "*"
|
||||
},
|
||||
"peerDependenciesMeta": {
|
||||
"react-native-b4a": {
|
||||
"optional": true
|
||||
}
|
||||
}
|
||||
},
|
||||
"node_modules/bare-events": {
|
||||
"version": "2.8.2",
|
||||
"resolved": "https://registry.npmjs.org/bare-events/-/bare-events-2.8.2.tgz",
|
||||
"integrity": "sha512-riJjyv1/mHLIPX4RwiK+oW9/4c3TEUeORHKefKAKnZ5kyslbN+HXowtbaVEqt4IMUB7OXlfixcs6gsFeo/jhiQ==",
|
||||
"license": "Apache-2.0",
|
||||
"peerDependencies": {
|
||||
"bare-abort-controller": "*"
|
||||
},
|
||||
"peerDependenciesMeta": {
|
||||
"bare-abort-controller": {
|
||||
"optional": true
|
||||
}
|
||||
}
|
||||
},
|
||||
"node_modules/bare-fs": {
|
||||
"version": "4.5.2",
|
||||
"resolved": "https://registry.npmjs.org/bare-fs/-/bare-fs-4.5.2.tgz",
|
||||
"integrity": "sha512-veTnRzkb6aPHOvSKIOy60KzURfBdUflr5VReI+NSaPL6xf+XLdONQgZgpYvUuZLVQ8dCqxpBAudaOM1+KpAUxw==",
|
||||
"license": "Apache-2.0",
|
||||
"optional": true,
|
||||
"dependencies": {
|
||||
"bare-events": "^2.5.4",
|
||||
"bare-path": "^3.0.0",
|
||||
"bare-stream": "^2.6.4",
|
||||
"bare-url": "^2.2.2",
|
||||
"fast-fifo": "^1.3.2"
|
||||
},
|
||||
"engines": {
|
||||
"bare": ">=1.16.0"
|
||||
},
|
||||
"peerDependencies": {
|
||||
"bare-buffer": "*"
|
||||
},
|
||||
"peerDependenciesMeta": {
|
||||
"bare-buffer": {
|
||||
"optional": true
|
||||
}
|
||||
}
|
||||
},
|
||||
"node_modules/bare-os": {
|
||||
"version": "3.6.2",
|
||||
"resolved": "https://registry.npmjs.org/bare-os/-/bare-os-3.6.2.tgz",
|
||||
"integrity": "sha512-T+V1+1srU2qYNBmJCXZkUY5vQ0B4FSlL3QDROnKQYOqeiQR8UbjNHlPa+TIbM4cuidiN9GaTaOZgSEgsvPbh5A==",
|
||||
"license": "Apache-2.0",
|
||||
"optional": true,
|
||||
"engines": {
|
||||
"bare": ">=1.14.0"
|
||||
}
|
||||
},
|
||||
"node_modules/bare-path": {
|
||||
"version": "3.0.0",
|
||||
"resolved": "https://registry.npmjs.org/bare-path/-/bare-path-3.0.0.tgz",
|
||||
"integrity": "sha512-tyfW2cQcB5NN8Saijrhqn0Zh7AnFNsnczRcuWODH0eYAXBsJ5gVxAUuNr7tsHSC6IZ77cA0SitzT+s47kot8Mw==",
|
||||
"license": "Apache-2.0",
|
||||
"optional": true,
|
||||
"dependencies": {
|
||||
"bare-os": "^3.0.1"
|
||||
}
|
||||
},
|
||||
"node_modules/bare-stream": {
|
||||
"version": "2.7.0",
|
||||
"resolved": "https://registry.npmjs.org/bare-stream/-/bare-stream-2.7.0.tgz",
|
||||
"integrity": "sha512-oyXQNicV1y8nc2aKffH+BUHFRXmx6VrPzlnaEvMhram0nPBrKcEdcyBg5r08D0i8VxngHFAiVyn1QKXpSG0B8A==",
|
||||
"license": "Apache-2.0",
|
||||
"optional": true,
|
||||
"dependencies": {
|
||||
"streamx": "^2.21.0"
|
||||
},
|
||||
"peerDependencies": {
|
||||
"bare-buffer": "*",
|
||||
"bare-events": "*"
|
||||
},
|
||||
"peerDependenciesMeta": {
|
||||
"bare-buffer": {
|
||||
"optional": true
|
||||
},
|
||||
"bare-events": {
|
||||
"optional": true
|
||||
}
|
||||
}
|
||||
},
|
||||
"node_modules/bare-url": {
|
||||
"version": "2.3.2",
|
||||
"resolved": "https://registry.npmjs.org/bare-url/-/bare-url-2.3.2.tgz",
|
||||
"integrity": "sha512-ZMq4gd9ngV5aTMa5p9+UfY0b3skwhHELaDkhEHetMdX0LRkW9kzaym4oo/Eh+Ghm0CCDuMTsRIGM/ytUc1ZYmw==",
|
||||
"license": "Apache-2.0",
|
||||
"optional": true,
|
||||
"dependencies": {
|
||||
"bare-path": "^3.0.0"
|
||||
}
|
||||
},
|
||||
"node_modules/basic-ftp": {
|
||||
"version": "5.0.5",
|
||||
"resolved": "https://registry.npmjs.org/basic-ftp/-/basic-ftp-5.0.5.tgz",
|
||||
"integrity": "sha512-4Bcg1P8xhUuqcii/S0Z9wiHIrQVPMermM1any+MX5GeGD7faD3/msQUDGLol9wOcz4/jbg/WJnGqoJF6LiBdtg==",
|
||||
"license": "MIT",
|
||||
"engines": {
|
||||
"node": ">=10.0.0"
|
||||
}
|
||||
},
|
||||
"node_modules/buffer-crc32": {
|
||||
"version": "0.2.13",
|
||||
"resolved": "https://registry.npmjs.org/buffer-crc32/-/buffer-crc32-0.2.13.tgz",
|
||||
"integrity": "sha512-VO9Ht/+p3SN7SKWqcrgEzjGbRSJYTx+Q1pTQC0wrWqHx0vpJraQ6GtHx8tvcg1rlK1byhU5gccxgOgj7B0TDkQ==",
|
||||
"license": "MIT",
|
||||
"engines": {
|
||||
"node": "*"
|
||||
}
|
||||
},
|
||||
"node_modules/chromium-bidi": {
|
||||
"version": "12.0.1",
|
||||
"resolved": "https://registry.npmjs.org/chromium-bidi/-/chromium-bidi-12.0.1.tgz",
|
||||
"integrity": "sha512-fGg+6jr0xjQhzpy5N4ErZxQ4wF7KLEvhGZXD6EgvZKDhu7iOhZXnZhcDxPJDcwTcrD48NPzOCo84RP2lv3Z+Cg==",
|
||||
"license": "Apache-2.0",
|
||||
"dependencies": {
|
||||
"mitt": "^3.0.1",
|
||||
"zod": "^3.24.1"
|
||||
},
|
||||
"peerDependencies": {
|
||||
"devtools-protocol": "*"
|
||||
}
|
||||
},
|
||||
"node_modules/cliui": {
|
||||
"version": "8.0.1",
|
||||
"resolved": "https://registry.npmjs.org/cliui/-/cliui-8.0.1.tgz",
|
||||
"integrity": "sha512-BSeNnyus75C4//NQ9gQt1/csTXyo/8Sb+afLAkzAptFuMsod9HFokGNudZpi/oQV73hnVK+sR+5PVRMd+Dr7YQ==",
|
||||
"license": "ISC",
|
||||
"dependencies": {
|
||||
"string-width": "^4.2.0",
|
||||
"strip-ansi": "^6.0.1",
|
||||
"wrap-ansi": "^7.0.0"
|
||||
},
|
||||
"engines": {
|
||||
"node": ">=12"
|
||||
}
|
||||
},
|
||||
"node_modules/color-convert": {
|
||||
"version": "2.0.1",
|
||||
"resolved": "https://registry.npmjs.org/color-convert/-/color-convert-2.0.1.tgz",
|
||||
"integrity": "sha512-RRECPsj7iu/xb5oKYcsFHSppFNnsj/52OVTRKb4zP5onXwVF3zVmmToNcOfGC+CRDpfK/U584fMg38ZHCaElKQ==",
|
||||
"license": "MIT",
|
||||
"dependencies": {
|
||||
"color-name": "~1.1.4"
|
||||
},
|
||||
"engines": {
|
||||
"node": ">=7.0.0"
|
||||
}
|
||||
},
|
||||
"node_modules/color-name": {
|
||||
"version": "1.1.4",
|
||||
"resolved": "https://registry.npmjs.org/color-name/-/color-name-1.1.4.tgz",
|
||||
"integrity": "sha512-dOy+3AuW3a2wNbZHIuMZpTcgjGuLU/uBL/ubcZF9OXbDo8ff4O8yVp5Bf0efS8uEoYo5q4Fx7dY9OgQGXgAsQA==",
|
||||
"license": "MIT"
|
||||
},
|
||||
"node_modules/data-uri-to-buffer": {
|
||||
"version": "6.0.2",
|
||||
"resolved": "https://registry.npmjs.org/data-uri-to-buffer/-/data-uri-to-buffer-6.0.2.tgz",
|
||||
"integrity": "sha512-7hvf7/GW8e86rW0ptuwS3OcBGDjIi6SZva7hCyWC0yYry2cOPmLIjXAUHI6DK2HsnwJd9ifmt57i8eV2n4YNpw==",
|
||||
"license": "MIT",
|
||||
"engines": {
|
||||
"node": ">= 14"
|
||||
}
|
||||
},
|
||||
"node_modules/debug": {
|
||||
"version": "4.4.3",
|
||||
"resolved": "https://registry.npmjs.org/debug/-/debug-4.4.3.tgz",
|
||||
"integrity": "sha512-RGwwWnwQvkVfavKVt22FGLw+xYSdzARwm0ru6DhTVA3umU5hZc28V3kO4stgYryrTlLpuvgI9GiijltAjNbcqA==",
|
||||
"license": "MIT",
|
||||
"dependencies": {
|
||||
"ms": "^2.1.3"
|
||||
},
|
||||
"engines": {
|
||||
"node": ">=6.0"
|
||||
},
|
||||
"peerDependenciesMeta": {
|
||||
"supports-color": {
|
||||
"optional": true
|
||||
}
|
||||
}
|
||||
},
|
||||
"node_modules/degenerator": {
|
||||
"version": "5.0.1",
|
||||
"resolved": "https://registry.npmjs.org/degenerator/-/degenerator-5.0.1.tgz",
|
||||
"integrity": "sha512-TllpMR/t0M5sqCXfj85i4XaAzxmS5tVA16dqvdkMwGmzI+dXLXnw3J+3Vdv7VKw+ThlTMboK6i9rnZ6Nntj5CQ==",
|
||||
"license": "MIT",
|
||||
"dependencies": {
|
||||
"ast-types": "^0.13.4",
|
||||
"escodegen": "^2.1.0",
|
||||
"esprima": "^4.0.1"
|
||||
},
|
||||
"engines": {
|
||||
"node": ">= 14"
|
||||
}
|
||||
},
|
||||
"node_modules/devtools-protocol": {
|
||||
"version": "0.0.1534754",
|
||||
"resolved": "https://registry.npmjs.org/devtools-protocol/-/devtools-protocol-0.0.1534754.tgz",
|
||||
"integrity": "sha512-26T91cV5dbOYnXdJi5qQHoTtUoNEqwkHcAyu/IKtjIAxiEqPMrDiRkDOPWVsGfNZGmlQVHQbZRSjD8sxagWVsQ==",
|
||||
"license": "BSD-3-Clause",
|
||||
"peer": true
|
||||
},
|
||||
"node_modules/emoji-regex": {
|
||||
"version": "8.0.0",
|
||||
"resolved": "https://registry.npmjs.org/emoji-regex/-/emoji-regex-8.0.0.tgz",
|
||||
"integrity": "sha512-MSjYzcWNOA0ewAHpz0MxpYFvwg6yjy1NG3xteoqz644VCo/RPgnr1/GGt+ic3iJTzQ8Eu3TdM14SawnVUmGE6A==",
|
||||
"license": "MIT"
|
||||
},
|
||||
"node_modules/end-of-stream": {
|
||||
"version": "1.4.5",
|
||||
"resolved": "https://registry.npmjs.org/end-of-stream/-/end-of-stream-1.4.5.tgz",
|
||||
"integrity": "sha512-ooEGc6HP26xXq/N+GCGOT0JKCLDGrq2bQUZrQ7gyrJiZANJ/8YDTxTpQBXGMn+WbIQXNVpyWymm7KYVICQnyOg==",
|
||||
"license": "MIT",
|
||||
"dependencies": {
|
||||
"once": "^1.4.0"
|
||||
}
|
||||
},
|
||||
"node_modules/escalade": {
|
||||
"version": "3.2.0",
|
||||
"resolved": "https://registry.npmjs.org/escalade/-/escalade-3.2.0.tgz",
|
||||
"integrity": "sha512-WUj2qlxaQtO4g6Pq5c29GTcWGDyd8itL8zTlipgECz3JesAiiOKotd8JU6otB3PACgG6xkJUyVhboMS+bje/jA==",
|
||||
"license": "MIT",
|
||||
"engines": {
|
||||
"node": ">=6"
|
||||
}
|
||||
},
|
||||
"node_modules/escodegen": {
|
||||
"version": "2.1.0",
|
||||
"resolved": "https://registry.npmjs.org/escodegen/-/escodegen-2.1.0.tgz",
|
||||
"integrity": "sha512-2NlIDTwUWJN0mRPQOdtQBzbUHvdGY2P1VXSyU83Q3xKxM7WHX2Ql8dKq782Q9TgQUNOLEzEYu9bzLNj1q88I5w==",
|
||||
"license": "BSD-2-Clause",
|
||||
"dependencies": {
|
||||
"esprima": "^4.0.1",
|
||||
"estraverse": "^5.2.0",
|
||||
"esutils": "^2.0.2"
|
||||
},
|
||||
"bin": {
|
||||
"escodegen": "bin/escodegen.js",
|
||||
"esgenerate": "bin/esgenerate.js"
|
||||
},
|
||||
"engines": {
|
||||
"node": ">=6.0"
|
||||
},
|
||||
"optionalDependencies": {
|
||||
"source-map": "~0.6.1"
|
||||
}
|
||||
},
|
||||
"node_modules/esprima": {
|
||||
"version": "4.0.1",
|
||||
"resolved": "https://registry.npmjs.org/esprima/-/esprima-4.0.1.tgz",
|
||||
"integrity": "sha512-eGuFFw7Upda+g4p+QHvnW0RyTX/SVeJBDM/gCtMARO0cLuT2HcEKnTPvhjV6aGeqrCB/sbNop0Kszm0jsaWU4A==",
|
||||
"license": "BSD-2-Clause",
|
||||
"bin": {
|
||||
"esparse": "bin/esparse.js",
|
||||
"esvalidate": "bin/esvalidate.js"
|
||||
},
|
||||
"engines": {
|
||||
"node": ">=4"
|
||||
}
|
||||
},
|
||||
"node_modules/estraverse": {
|
||||
"version": "5.3.0",
|
||||
"resolved": "https://registry.npmjs.org/estraverse/-/estraverse-5.3.0.tgz",
|
||||
"integrity": "sha512-MMdARuVEQziNTeJD8DgMqmhwR11BRQ/cBP+pLtYdSTnf3MIO8fFeiINEbX36ZdNlfU/7A9f3gUw49B3oQsvwBA==",
|
||||
"license": "BSD-2-Clause",
|
||||
"engines": {
|
||||
"node": ">=4.0"
|
||||
}
|
||||
},
|
||||
"node_modules/esutils": {
|
||||
"version": "2.0.3",
|
||||
"resolved": "https://registry.npmjs.org/esutils/-/esutils-2.0.3.tgz",
|
||||
"integrity": "sha512-kVscqXk4OCp68SZ0dkgEKVi6/8ij300KBWTJq32P/dYeWTSwK41WyTxalN1eRmA5Z9UU/LX9D7FWSmV9SAYx6g==",
|
||||
"license": "BSD-2-Clause",
|
||||
"engines": {
|
||||
"node": ">=0.10.0"
|
||||
}
|
||||
},
|
||||
"node_modules/events-universal": {
|
||||
"version": "1.0.1",
|
||||
"resolved": "https://registry.npmjs.org/events-universal/-/events-universal-1.0.1.tgz",
|
||||
"integrity": "sha512-LUd5euvbMLpwOF8m6ivPCbhQeSiYVNb8Vs0fQ8QjXo0JTkEHpz8pxdQf0gStltaPpw0Cca8b39KxvK9cfKRiAw==",
|
||||
"license": "Apache-2.0",
|
||||
"dependencies": {
|
||||
"bare-events": "^2.7.0"
|
||||
}
|
||||
},
|
||||
"node_modules/extract-zip": {
|
||||
"version": "2.0.1",
|
||||
"resolved": "https://registry.npmjs.org/extract-zip/-/extract-zip-2.0.1.tgz",
|
||||
"integrity": "sha512-GDhU9ntwuKyGXdZBUgTIe+vXnWj0fppUEtMDL0+idd5Sta8TGpHssn/eusA9mrPr9qNDym6SxAYZjNvCn/9RBg==",
|
||||
"license": "BSD-2-Clause",
|
||||
"dependencies": {
|
||||
"debug": "^4.1.1",
|
||||
"get-stream": "^5.1.0",
|
||||
"yauzl": "^2.10.0"
|
||||
},
|
||||
"bin": {
|
||||
"extract-zip": "cli.js"
|
||||
},
|
||||
"engines": {
|
||||
"node": ">= 10.17.0"
|
||||
},
|
||||
"optionalDependencies": {
|
||||
"@types/yauzl": "^2.9.1"
|
||||
}
|
||||
},
|
||||
"node_modules/fast-fifo": {
|
||||
"version": "1.3.2",
|
||||
"resolved": "https://registry.npmjs.org/fast-fifo/-/fast-fifo-1.3.2.tgz",
|
||||
"integrity": "sha512-/d9sfos4yxzpwkDkuN7k2SqFKtYNmCTzgfEpz82x34IM9/zc8KGxQoXg1liNC/izpRM/MBdt44Nmx41ZWqk+FQ==",
|
||||
"license": "MIT"
|
||||
},
|
||||
"node_modules/fd-slicer": {
|
||||
"version": "1.1.0",
|
||||
"resolved": "https://registry.npmjs.org/fd-slicer/-/fd-slicer-1.1.0.tgz",
|
||||
"integrity": "sha512-cE1qsB/VwyQozZ+q1dGxR8LBYNZeofhEdUNGSMbQD3Gw2lAzX9Zb3uIU6Ebc/Fmyjo9AWWfnn0AUCHqtevs/8g==",
|
||||
"license": "MIT",
|
||||
"dependencies": {
|
||||
"pend": "~1.2.0"
|
||||
}
|
||||
},
|
||||
"node_modules/get-caller-file": {
|
||||
"version": "2.0.5",
|
||||
"resolved": "https://registry.npmjs.org/get-caller-file/-/get-caller-file-2.0.5.tgz",
|
||||
"integrity": "sha512-DyFP3BM/3YHTQOCUL/w0OZHR0lpKeGrxotcHWcqNEdnltqFwXVfhEBQ94eIo34AfQpo0rGki4cyIiftY06h2Fg==",
|
||||
"license": "ISC",
|
||||
"engines": {
|
||||
"node": "6.* || 8.* || >= 10.*"
|
||||
}
|
||||
},
|
||||
"node_modules/get-stream": {
|
||||
"version": "5.2.0",
|
||||
"resolved": "https://registry.npmjs.org/get-stream/-/get-stream-5.2.0.tgz",
|
||||
"integrity": "sha512-nBF+F1rAZVCu/p7rjzgA+Yb4lfYXrpl7a6VmJrU8wF9I1CKvP/QwPNZHnOlwbTkY6dvtFIzFMSyQXbLoTQPRpA==",
|
||||
"license": "MIT",
|
||||
"dependencies": {
|
||||
"pump": "^3.0.0"
|
||||
},
|
||||
"engines": {
|
||||
"node": ">=8"
|
||||
},
|
||||
"funding": {
|
||||
"url": "https://github.com/sponsors/sindresorhus"
|
||||
}
|
||||
},
|
||||
"node_modules/get-uri": {
|
||||
"version": "6.0.5",
|
||||
"resolved": "https://registry.npmjs.org/get-uri/-/get-uri-6.0.5.tgz",
|
||||
"integrity": "sha512-b1O07XYq8eRuVzBNgJLstU6FYc1tS6wnMtF1I1D9lE8LxZSOGZ7LhxN54yPP6mGw5f2CkXY2BQUL9Fx41qvcIg==",
|
||||
"license": "MIT",
|
||||
"dependencies": {
|
||||
"basic-ftp": "^5.0.2",
|
||||
"data-uri-to-buffer": "^6.0.2",
|
||||
"debug": "^4.3.4"
|
||||
},
|
||||
"engines": {
|
||||
"node": ">= 14"
|
||||
}
|
||||
},
|
||||
"node_modules/http-proxy-agent": {
|
||||
"version": "7.0.2",
|
||||
"resolved": "https://registry.npmjs.org/http-proxy-agent/-/http-proxy-agent-7.0.2.tgz",
|
||||
"integrity": "sha512-T1gkAiYYDWYx3V5Bmyu7HcfcvL7mUrTWiM6yOfa3PIphViJ/gFPbvidQ+veqSOHci/PxBcDabeUNCzpOODJZig==",
|
||||
"license": "MIT",
|
||||
"dependencies": {
|
||||
"agent-base": "^7.1.0",
|
||||
"debug": "^4.3.4"
|
||||
},
|
||||
"engines": {
|
||||
"node": ">= 14"
|
||||
}
|
||||
},
|
||||
"node_modules/https-proxy-agent": {
|
||||
"version": "7.0.6",
|
||||
"resolved": "https://registry.npmjs.org/https-proxy-agent/-/https-proxy-agent-7.0.6.tgz",
|
||||
"integrity": "sha512-vK9P5/iUfdl95AI+JVyUuIcVtd4ofvtrOr3HNtM2yxC9bnMbEdp3x01OhQNnjb8IJYi38VlTE3mBXwcfvywuSw==",
|
||||
"license": "MIT",
|
||||
"dependencies": {
|
||||
"agent-base": "^7.1.2",
|
||||
"debug": "4"
|
||||
},
|
||||
"engines": {
|
||||
"node": ">= 14"
|
||||
}
|
||||
},
|
||||
"node_modules/ip-address": {
|
||||
"version": "10.1.0",
|
||||
"resolved": "https://registry.npmjs.org/ip-address/-/ip-address-10.1.0.tgz",
|
||||
"integrity": "sha512-XXADHxXmvT9+CRxhXg56LJovE+bmWnEWB78LB83VZTprKTmaC5QfruXocxzTZ2Kl0DNwKuBdlIhjL8LeY8Sf8Q==",
|
||||
"license": "MIT",
|
||||
"engines": {
|
||||
"node": ">= 12"
|
||||
}
|
||||
},
|
||||
"node_modules/is-fullwidth-code-point": {
|
||||
"version": "3.0.0",
|
||||
"resolved": "https://registry.npmjs.org/is-fullwidth-code-point/-/is-fullwidth-code-point-3.0.0.tgz",
|
||||
"integrity": "sha512-zymm5+u+sCsSWyD9qNaejV3DFvhCKclKdizYaJUuHA83RLjb7nSuGnddCHGv0hk+KY7BMAlsWeK4Ueg6EV6XQg==",
|
||||
"license": "MIT",
|
||||
"engines": {
|
||||
"node": ">=8"
|
||||
}
|
||||
},
|
||||
"node_modules/lru-cache": {
|
||||
"version": "7.18.3",
|
||||
"resolved": "https://registry.npmjs.org/lru-cache/-/lru-cache-7.18.3.tgz",
|
||||
"integrity": "sha512-jumlc0BIUrS3qJGgIkWZsyfAM7NCWiBcCDhnd+3NNM5KbBmLTgHVfWBcg6W+rLUsIpzpERPsvwUP7CckAQSOoA==",
|
||||
"license": "ISC",
|
||||
"engines": {
|
||||
"node": ">=12"
|
||||
}
|
||||
},
|
||||
"node_modules/mitt": {
|
||||
"version": "3.0.1",
|
||||
"resolved": "https://registry.npmjs.org/mitt/-/mitt-3.0.1.tgz",
|
||||
"integrity": "sha512-vKivATfr97l2/QBCYAkXYDbrIWPM2IIKEl7YPhjCvKlG3kE2gm+uBo6nEXK3M5/Ffh/FLpKExzOQ3JJoJGFKBw==",
|
||||
"license": "MIT"
|
||||
},
|
||||
"node_modules/ms": {
|
||||
"version": "2.1.3",
|
||||
"resolved": "https://registry.npmjs.org/ms/-/ms-2.1.3.tgz",
|
||||
"integrity": "sha512-6FlzubTLZG3J2a/NVCAleEhjzq5oxgHyaCU9yYXvcLsvoVaHJq/s5xXI6/XXP6tz7R9xAOtHnSO/tXtF3WRTlA==",
|
||||
"license": "MIT"
|
||||
},
|
||||
"node_modules/netmask": {
|
||||
"version": "2.0.2",
|
||||
"resolved": "https://registry.npmjs.org/netmask/-/netmask-2.0.2.tgz",
|
||||
"integrity": "sha512-dBpDMdxv9Irdq66304OLfEmQ9tbNRFnFTuZiLo+bD+r332bBmMJ8GBLXklIXXgxd3+v9+KUnZaUR5PJMa75Gsg==",
|
||||
"license": "MIT",
|
||||
"engines": {
|
||||
"node": ">= 0.4.0"
|
||||
}
|
||||
},
|
||||
"node_modules/once": {
|
||||
"version": "1.4.0",
|
||||
"resolved": "https://registry.npmjs.org/once/-/once-1.4.0.tgz",
|
||||
"integrity": "sha512-lNaJgI+2Q5URQBkccEKHTQOPaXdUxnZZElQTZY0MFUAuaEqe1E+Nyvgdz/aIyNi6Z9MzO5dv1H8n58/GELp3+w==",
|
||||
"license": "ISC",
|
||||
"dependencies": {
|
||||
"wrappy": "1"
|
||||
}
|
||||
},
|
||||
"node_modules/pac-proxy-agent": {
|
||||
"version": "7.2.0",
|
||||
"resolved": "https://registry.npmjs.org/pac-proxy-agent/-/pac-proxy-agent-7.2.0.tgz",
|
||||
"integrity": "sha512-TEB8ESquiLMc0lV8vcd5Ql/JAKAoyzHFXaStwjkzpOpC5Yv+pIzLfHvjTSdf3vpa2bMiUQrg9i6276yn8666aA==",
|
||||
"license": "MIT",
|
||||
"dependencies": {
|
||||
"@tootallnate/quickjs-emscripten": "^0.23.0",
|
||||
"agent-base": "^7.1.2",
|
||||
"debug": "^4.3.4",
|
||||
"get-uri": "^6.0.1",
|
||||
"http-proxy-agent": "^7.0.0",
|
||||
"https-proxy-agent": "^7.0.6",
|
||||
"pac-resolver": "^7.0.1",
|
||||
"socks-proxy-agent": "^8.0.5"
|
||||
},
|
||||
"engines": {
|
||||
"node": ">= 14"
|
||||
}
|
||||
},
|
||||
"node_modules/pac-resolver": {
|
||||
"version": "7.0.1",
|
||||
"resolved": "https://registry.npmjs.org/pac-resolver/-/pac-resolver-7.0.1.tgz",
|
||||
"integrity": "sha512-5NPgf87AT2STgwa2ntRMr45jTKrYBGkVU36yT0ig/n/GMAa3oPqhZfIQ2kMEimReg0+t9kZViDVZ83qfVUlckg==",
|
||||
"license": "MIT",
|
||||
"dependencies": {
|
||||
"degenerator": "^5.0.0",
|
||||
"netmask": "^2.0.2"
|
||||
},
|
||||
"engines": {
|
||||
"node": ">= 14"
|
||||
}
|
||||
},
|
||||
"node_modules/pend": {
|
||||
"version": "1.2.0",
|
||||
"resolved": "https://registry.npmjs.org/pend/-/pend-1.2.0.tgz",
|
||||
"integrity": "sha512-F3asv42UuXchdzt+xXqfW1OGlVBe+mxa2mqI0pg5yAHZPvFmY3Y6drSf/GQ1A86WgWEN9Kzh/WrgKa6iGcHXLg==",
|
||||
"license": "MIT"
|
||||
},
|
||||
"node_modules/progress": {
|
||||
"version": "2.0.3",
|
||||
"resolved": "https://registry.npmjs.org/progress/-/progress-2.0.3.tgz",
|
||||
"integrity": "sha512-7PiHtLll5LdnKIMw100I+8xJXR5gW2QwWYkT6iJva0bXitZKa/XMrSbdmg3r2Xnaidz9Qumd0VPaMrZlF9V9sA==",
|
||||
"license": "MIT",
|
||||
"engines": {
|
||||
"node": ">=0.4.0"
|
||||
}
|
||||
},
|
||||
"node_modules/proxy-agent": {
|
||||
"version": "6.5.0",
|
||||
"resolved": "https://registry.npmjs.org/proxy-agent/-/proxy-agent-6.5.0.tgz",
|
||||
"integrity": "sha512-TmatMXdr2KlRiA2CyDu8GqR8EjahTG3aY3nXjdzFyoZbmB8hrBsTyMezhULIXKnC0jpfjlmiZ3+EaCzoInSu/A==",
|
||||
"license": "MIT",
|
||||
"dependencies": {
|
||||
"agent-base": "^7.1.2",
|
||||
"debug": "^4.3.4",
|
||||
"http-proxy-agent": "^7.0.1",
|
||||
"https-proxy-agent": "^7.0.6",
|
||||
"lru-cache": "^7.14.1",
|
||||
"pac-proxy-agent": "^7.1.0",
|
||||
"proxy-from-env": "^1.1.0",
|
||||
"socks-proxy-agent": "^8.0.5"
|
||||
},
|
||||
"engines": {
|
||||
"node": ">= 14"
|
||||
}
|
||||
},
|
||||
"node_modules/proxy-from-env": {
|
||||
"version": "1.1.0",
|
||||
"resolved": "https://registry.npmjs.org/proxy-from-env/-/proxy-from-env-1.1.0.tgz",
|
||||
"integrity": "sha512-D+zkORCbA9f1tdWRK0RaCR3GPv50cMxcrz4X8k5LTSUD1Dkw47mKJEZQNunItRTkWwgtaUSo1RVFRIG9ZXiFYg==",
|
||||
"license": "MIT"
|
||||
},
|
||||
"node_modules/pump": {
|
||||
"version": "3.0.3",
|
||||
"resolved": "https://registry.npmjs.org/pump/-/pump-3.0.3.tgz",
|
||||
"integrity": "sha512-todwxLMY7/heScKmntwQG8CXVkWUOdYxIvY2s0VWAAMh/nd8SoYiRaKjlr7+iCs984f2P8zvrfWcDDYVb73NfA==",
|
||||
"license": "MIT",
|
||||
"dependencies": {
|
||||
"end-of-stream": "^1.1.0",
|
||||
"once": "^1.3.1"
|
||||
}
|
||||
},
|
||||
"node_modules/puppeteer-core": {
|
||||
"version": "24.34.0",
|
||||
"resolved": "https://registry.npmjs.org/puppeteer-core/-/puppeteer-core-24.34.0.tgz",
|
||||
"integrity": "sha512-24evawO+mUGW4mvS2a2ivwLdX3gk8zRLZr9HP+7+VT2vBQnm0oh9jJEZmUE3ePJhRkYlZ93i7OMpdcoi2qNCLg==",
|
||||
"license": "Apache-2.0",
|
||||
"dependencies": {
|
||||
"@puppeteer/browsers": "2.11.0",
|
||||
"chromium-bidi": "12.0.1",
|
||||
"debug": "^4.4.3",
|
||||
"devtools-protocol": "0.0.1534754",
|
||||
"typed-query-selector": "^2.12.0",
|
||||
"webdriver-bidi-protocol": "0.3.10",
|
||||
"ws": "^8.18.3"
|
||||
},
|
||||
"engines": {
|
||||
"node": ">=18"
|
||||
}
|
||||
},
|
||||
"node_modules/require-directory": {
|
||||
"version": "2.1.1",
|
||||
"resolved": "https://registry.npmjs.org/require-directory/-/require-directory-2.1.1.tgz",
|
||||
"integrity": "sha512-fGxEI7+wsG9xrvdjsrlmL22OMTTiHRwAMroiEeMgq8gzoLC/PQr7RsRDSTLUg/bZAZtF+TVIkHc6/4RIKrui+Q==",
|
||||
"license": "MIT",
|
||||
"engines": {
|
||||
"node": ">=0.10.0"
|
||||
}
|
||||
},
|
||||
"node_modules/semver": {
|
||||
"version": "7.7.3",
|
||||
"resolved": "https://registry.npmjs.org/semver/-/semver-7.7.3.tgz",
|
||||
"integrity": "sha512-SdsKMrI9TdgjdweUSR9MweHA4EJ8YxHn8DFaDisvhVlUOe4BF1tLD7GAj0lIqWVl+dPb/rExr0Btby5loQm20Q==",
|
||||
"license": "ISC",
|
||||
"bin": {
|
||||
"semver": "bin/semver.js"
|
||||
},
|
||||
"engines": {
|
||||
"node": ">=10"
|
||||
}
|
||||
},
|
||||
"node_modules/smart-buffer": {
|
||||
"version": "4.2.0",
|
||||
"resolved": "https://registry.npmjs.org/smart-buffer/-/smart-buffer-4.2.0.tgz",
|
||||
"integrity": "sha512-94hK0Hh8rPqQl2xXc3HsaBoOXKV20MToPkcXvwbISWLEs+64sBq5kFgn2kJDHb1Pry9yrP0dxrCI9RRci7RXKg==",
|
||||
"license": "MIT",
|
||||
"engines": {
|
||||
"node": ">= 6.0.0",
|
||||
"npm": ">= 3.0.0"
|
||||
}
|
||||
},
|
||||
"node_modules/socks": {
|
||||
"version": "2.8.7",
|
||||
"resolved": "https://registry.npmjs.org/socks/-/socks-2.8.7.tgz",
|
||||
"integrity": "sha512-HLpt+uLy/pxB+bum/9DzAgiKS8CX1EvbWxI4zlmgGCExImLdiad2iCwXT5Z4c9c3Eq8rP2318mPW2c+QbtjK8A==",
|
||||
"license": "MIT",
|
||||
"dependencies": {
|
||||
"ip-address": "^10.0.1",
|
||||
"smart-buffer": "^4.2.0"
|
||||
},
|
||||
"engines": {
|
||||
"node": ">= 10.0.0",
|
||||
"npm": ">= 3.0.0"
|
||||
}
|
||||
},
|
||||
"node_modules/socks-proxy-agent": {
|
||||
"version": "8.0.5",
|
||||
"resolved": "https://registry.npmjs.org/socks-proxy-agent/-/socks-proxy-agent-8.0.5.tgz",
|
||||
"integrity": "sha512-HehCEsotFqbPW9sJ8WVYB6UbmIMv7kUUORIF2Nncq4VQvBfNBLibW9YZR5dlYCSUhwcD628pRllm7n+E+YTzJw==",
|
||||
"license": "MIT",
|
||||
"dependencies": {
|
||||
"agent-base": "^7.1.2",
|
||||
"debug": "^4.3.4",
|
||||
"socks": "^2.8.3"
|
||||
},
|
||||
"engines": {
|
||||
"node": ">= 14"
|
||||
}
|
||||
},
|
||||
"node_modules/source-map": {
|
||||
"version": "0.6.1",
|
||||
"resolved": "https://registry.npmjs.org/source-map/-/source-map-0.6.1.tgz",
|
||||
"integrity": "sha512-UjgapumWlbMhkBgzT7Ykc5YXUT46F0iKu8SGXq0bcwP5dz/h0Plj6enJqjz1Zbq2l5WaqYnrVbwWOWMyF3F47g==",
|
||||
"license": "BSD-3-Clause",
|
||||
"optional": true,
|
||||
"engines": {
|
||||
"node": ">=0.10.0"
|
||||
}
|
||||
},
|
||||
"node_modules/streamx": {
|
||||
"version": "2.23.0",
|
||||
"resolved": "https://registry.npmjs.org/streamx/-/streamx-2.23.0.tgz",
|
||||
"integrity": "sha512-kn+e44esVfn2Fa/O0CPFcex27fjIL6MkVae0Mm6q+E6f0hWv578YCERbv+4m02cjxvDsPKLnmxral/rR6lBMAg==",
|
||||
"license": "MIT",
|
||||
"dependencies": {
|
||||
"events-universal": "^1.0.0",
|
||||
"fast-fifo": "^1.3.2",
|
||||
"text-decoder": "^1.1.0"
|
||||
}
|
||||
},
|
||||
"node_modules/string-width": {
|
||||
"version": "4.2.3",
|
||||
"resolved": "https://registry.npmjs.org/string-width/-/string-width-4.2.3.tgz",
|
||||
"integrity": "sha512-wKyQRQpjJ0sIp62ErSZdGsjMJWsap5oRNihHhu6G7JVO/9jIB6UyevL+tXuOqrng8j/cxKTWyWUwvSTriiZz/g==",
|
||||
"license": "MIT",
|
||||
"dependencies": {
|
||||
"emoji-regex": "^8.0.0",
|
||||
"is-fullwidth-code-point": "^3.0.0",
|
||||
"strip-ansi": "^6.0.1"
|
||||
},
|
||||
"engines": {
|
||||
"node": ">=8"
|
||||
}
|
||||
},
|
||||
"node_modules/strip-ansi": {
|
||||
"version": "6.0.1",
|
||||
"resolved": "https://registry.npmjs.org/strip-ansi/-/strip-ansi-6.0.1.tgz",
|
||||
"integrity": "sha512-Y38VPSHcqkFrCpFnQ9vuSXmquuv5oXOKpGeT6aGrr3o3Gc9AlVa6JBfUSOCnbxGGZF+/0ooI7KrPuUSztUdU5A==",
|
||||
"license": "MIT",
|
||||
"dependencies": {
|
||||
"ansi-regex": "^5.0.1"
|
||||
},
|
||||
"engines": {
|
||||
"node": ">=8"
|
||||
}
|
||||
},
|
||||
"node_modules/tar-fs": {
|
||||
"version": "3.1.1",
|
||||
"resolved": "https://registry.npmjs.org/tar-fs/-/tar-fs-3.1.1.tgz",
|
||||
"integrity": "sha512-LZA0oaPOc2fVo82Txf3gw+AkEd38szODlptMYejQUhndHMLQ9M059uXR+AfS7DNo0NpINvSqDsvyaCrBVkptWg==",
|
||||
"license": "MIT",
|
||||
"dependencies": {
|
||||
"pump": "^3.0.0",
|
||||
"tar-stream": "^3.1.5"
|
||||
},
|
||||
"optionalDependencies": {
|
||||
"bare-fs": "^4.0.1",
|
||||
"bare-path": "^3.0.0"
|
||||
}
|
||||
},
|
||||
"node_modules/tar-stream": {
|
||||
"version": "3.1.7",
|
||||
"resolved": "https://registry.npmjs.org/tar-stream/-/tar-stream-3.1.7.tgz",
|
||||
"integrity": "sha512-qJj60CXt7IU1Ffyc3NJMjh6EkuCFej46zUqJ4J7pqYlThyd9bO0XBTmcOIhSzZJVWfsLks0+nle/j538YAW9RQ==",
|
||||
"license": "MIT",
|
||||
"dependencies": {
|
||||
"b4a": "^1.6.4",
|
||||
"fast-fifo": "^1.2.0",
|
||||
"streamx": "^2.15.0"
|
||||
}
|
||||
},
|
||||
"node_modules/text-decoder": {
|
||||
"version": "1.2.3",
|
||||
"resolved": "https://registry.npmjs.org/text-decoder/-/text-decoder-1.2.3.tgz",
|
||||
"integrity": "sha512-3/o9z3X0X0fTupwsYvR03pJ/DjWuqqrfwBgTQzdWDiQSm9KitAyz/9WqsT2JQW7KV2m+bC2ol/zqpW37NHxLaA==",
|
||||
"license": "Apache-2.0",
|
||||
"dependencies": {
|
||||
"b4a": "^1.6.4"
|
||||
}
|
||||
},
|
||||
"node_modules/tslib": {
|
||||
"version": "2.8.1",
|
||||
"resolved": "https://registry.npmjs.org/tslib/-/tslib-2.8.1.tgz",
|
||||
"integrity": "sha512-oJFu94HQb+KVduSUQL7wnpmqnfmLsOA/nAh6b6EH0wCEoK0/mPeXU6c3wKDV83MkOuHPRHtSXKKU99IBazS/2w==",
|
||||
"license": "0BSD"
|
||||
},
|
||||
"node_modules/typed-query-selector": {
|
||||
"version": "2.12.0",
|
||||
"resolved": "https://registry.npmjs.org/typed-query-selector/-/typed-query-selector-2.12.0.tgz",
|
||||
"integrity": "sha512-SbklCd1F0EiZOyPiW192rrHZzZ5sBijB6xM+cpmrwDqObvdtunOHHIk9fCGsoK5JVIYXoyEp4iEdE3upFH3PAg==",
|
||||
"license": "MIT"
|
||||
},
|
||||
"node_modules/undici-types": {
|
||||
"version": "7.16.0",
|
||||
"resolved": "https://registry.npmjs.org/undici-types/-/undici-types-7.16.0.tgz",
|
||||
"integrity": "sha512-Zz+aZWSj8LE6zoxD+xrjh4VfkIG8Ya6LvYkZqtUQGJPZjYl53ypCaUwWqo7eI0x66KBGeRo+mlBEkMSeSZ38Nw==",
|
||||
"license": "MIT",
|
||||
"optional": true
|
||||
},
|
||||
"node_modules/webdriver-bidi-protocol": {
|
||||
"version": "0.3.10",
|
||||
"resolved": "https://registry.npmjs.org/webdriver-bidi-protocol/-/webdriver-bidi-protocol-0.3.10.tgz",
|
||||
"integrity": "sha512-5LAE43jAVLOhB/QqX4bwSiv0Hg1HBfMmOuwBSXHdvg4GMGu9Y0lIq7p4R/yySu6w74WmaR4GM4H9t2IwLW7hgw==",
|
||||
"license": "Apache-2.0"
|
||||
},
|
||||
"node_modules/wrap-ansi": {
|
||||
"version": "7.0.0",
|
||||
"resolved": "https://registry.npmjs.org/wrap-ansi/-/wrap-ansi-7.0.0.tgz",
|
||||
"integrity": "sha512-YVGIj2kamLSTxw6NsZjoBxfSwsn0ycdesmc4p+Q21c5zPuZ1pl+NfxVdxPtdHvmNVOQ6XSYG4AUtyt/Fi7D16Q==",
|
||||
"license": "MIT",
|
||||
"dependencies": {
|
||||
"ansi-styles": "^4.0.0",
|
||||
"string-width": "^4.1.0",
|
||||
"strip-ansi": "^6.0.0"
|
||||
},
|
||||
"engines": {
|
||||
"node": ">=10"
|
||||
},
|
||||
"funding": {
|
||||
"url": "https://github.com/chalk/wrap-ansi?sponsor=1"
|
||||
}
|
||||
},
|
||||
"node_modules/wrappy": {
|
||||
"version": "1.0.2",
|
||||
"resolved": "https://registry.npmjs.org/wrappy/-/wrappy-1.0.2.tgz",
|
||||
"integrity": "sha512-l4Sp/DRseor9wL6EvV2+TuQn63dMkPjZ/sp9XkghTEbV9KlPS1xUsZ3u7/IQO4wxtcFB4bgpQPRcR3QCvezPcQ==",
|
||||
"license": "ISC"
|
||||
},
|
||||
"node_modules/ws": {
|
||||
"version": "8.18.3",
|
||||
"resolved": "https://registry.npmjs.org/ws/-/ws-8.18.3.tgz",
|
||||
"integrity": "sha512-PEIGCY5tSlUt50cqyMXfCzX+oOPqN0vuGqWzbcJ2xvnkzkq46oOpz7dQaTDBdfICb4N14+GARUDw2XV2N4tvzg==",
|
||||
"license": "MIT",
|
||||
"engines": {
|
||||
"node": ">=10.0.0"
|
||||
},
|
||||
"peerDependencies": {
|
||||
"bufferutil": "^4.0.1",
|
||||
"utf-8-validate": ">=5.0.2"
|
||||
},
|
||||
"peerDependenciesMeta": {
|
||||
"bufferutil": {
|
||||
"optional": true
|
||||
},
|
||||
"utf-8-validate": {
|
||||
"optional": true
|
||||
}
|
||||
}
|
||||
},
|
||||
"node_modules/y18n": {
|
||||
"version": "5.0.8",
|
||||
"resolved": "https://registry.npmjs.org/y18n/-/y18n-5.0.8.tgz",
|
||||
"integrity": "sha512-0pfFzegeDWJHJIAmTLRP2DwHjdF5s7jo9tuztdQxAhINCdvS+3nGINqPd00AphqJR/0LhANUS6/+7SCb98YOfA==",
|
||||
"license": "ISC",
|
||||
"engines": {
|
||||
"node": ">=10"
|
||||
}
|
||||
},
|
||||
"node_modules/yargs": {
|
||||
"version": "17.7.2",
|
||||
"resolved": "https://registry.npmjs.org/yargs/-/yargs-17.7.2.tgz",
|
||||
"integrity": "sha512-7dSzzRQ++CKnNI/krKnYRV7JKKPUXMEh61soaHKg9mrWEhzFWhFnxPxGl+69cD1Ou63C13NUPCnmIcrvqCuM6w==",
|
||||
"license": "MIT",
|
||||
"dependencies": {
|
||||
"cliui": "^8.0.1",
|
||||
"escalade": "^3.1.1",
|
||||
"get-caller-file": "^2.0.5",
|
||||
"require-directory": "^2.1.1",
|
||||
"string-width": "^4.2.3",
|
||||
"y18n": "^5.0.5",
|
||||
"yargs-parser": "^21.1.1"
|
||||
},
|
||||
"engines": {
|
||||
"node": ">=12"
|
||||
}
|
||||
},
|
||||
"node_modules/yargs-parser": {
|
||||
"version": "21.1.1",
|
||||
"resolved": "https://registry.npmjs.org/yargs-parser/-/yargs-parser-21.1.1.tgz",
|
||||
"integrity": "sha512-tVpsJW7DdjecAiFpbIB1e3qxIQsE6NoPc5/eTdrbbIC4h0LVsWhnoa3g+m2HclBIujHzsxZ4VJVA+GUuc2/LBw==",
|
||||
"license": "ISC",
|
||||
"engines": {
|
||||
"node": ">=12"
|
||||
}
|
||||
},
|
||||
"node_modules/yauzl": {
|
||||
"version": "2.10.0",
|
||||
"resolved": "https://registry.npmjs.org/yauzl/-/yauzl-2.10.0.tgz",
|
||||
"integrity": "sha512-p4a9I6X6nu6IhoGmBqAcbJy1mlC4j27vEPZX9F4L4/vZT3Lyq1VkFHw/V/PUcB9Buo+DG3iHkT0x3Qya58zc3g==",
|
||||
"license": "MIT",
|
||||
"dependencies": {
|
||||
"buffer-crc32": "~0.2.3",
|
||||
"fd-slicer": "~1.1.0"
|
||||
}
|
||||
},
|
||||
"node_modules/zod": {
|
||||
"version": "3.25.76",
|
||||
"resolved": "https://registry.npmjs.org/zod/-/zod-3.25.76.tgz",
|
||||
"integrity": "sha512-gzUt/qt81nXsFGKIFcC3YnfEAx5NkunCfnDlvuBSSFS02bcXu4Lmea0AFIUwbLWxWPx3d9p8S5QoaujKcNQxcQ==",
|
||||
"license": "MIT",
|
||||
"funding": {
|
||||
"url": "https://github.com/sponsors/colinhacks"
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
1
archivebox/plugins/package.json
Normal file
1
archivebox/plugins/package.json
Normal file
@@ -0,0 +1 @@
|
||||
{"name":"archivebox-plugins","private":true,"dependencies":{"puppeteer-core":"^24.34.0"}}
|
||||
232
archivebox/plugins/pdf/tests/test_pdf.py
Normal file
232
archivebox/plugins/pdf/tests/test_pdf.py
Normal file
@@ -0,0 +1,232 @@
|
||||
"""
|
||||
Integration tests for pdf plugin
|
||||
|
||||
Tests verify:
|
||||
1. Hook script exists
|
||||
2. Dependencies installed via chrome_session validation hooks
|
||||
3. Verify deps with abx-pkg
|
||||
4. PDF extraction works on https://example.com
|
||||
5. JSONL output is correct
|
||||
6. Filesystem output is valid PDF file
|
||||
7. Config options work
|
||||
"""
|
||||
|
||||
import json
|
||||
import subprocess
|
||||
import sys
|
||||
import tempfile
|
||||
from pathlib import Path
|
||||
|
||||
import pytest
|
||||
|
||||
|
||||
PLUGIN_DIR = Path(__file__).parent.parent
|
||||
PLUGINS_ROOT = PLUGIN_DIR.parent
|
||||
PDF_HOOK = PLUGIN_DIR / 'on_Snapshot__35_pdf.js'
|
||||
CHROME_VALIDATE_HOOK = PLUGINS_ROOT / 'chrome_session' / 'on_Crawl__00_validate_chrome.py'
|
||||
NPM_PROVIDER_HOOK = PLUGINS_ROOT / 'npm' / 'on_Dependency__install_using_npm_provider.py'
|
||||
TEST_URL = 'https://example.com'
|
||||
|
||||
|
||||
def test_hook_script_exists():
|
||||
"""Verify on_Snapshot hook exists."""
|
||||
assert PDF_HOOK.exists(), f"Hook not found: {PDF_HOOK}"
|
||||
|
||||
|
||||
def test_chrome_validation_and_install():
|
||||
"""Test chrome validation hook to install puppeteer-core if needed."""
|
||||
# Run chrome validation hook (from chrome_session plugin)
|
||||
result = subprocess.run(
|
||||
[sys.executable, str(CHROME_VALIDATE_HOOK)],
|
||||
capture_output=True,
|
||||
text=True,
|
||||
timeout=30
|
||||
)
|
||||
|
||||
# If exit 1, binary not found - need to install
|
||||
if result.returncode == 1:
|
||||
# Parse Dependency request from JSONL
|
||||
dependency_request = None
|
||||
for line in result.stdout.strip().split('\n'):
|
||||
if line.strip():
|
||||
try:
|
||||
record = json.loads(line)
|
||||
if record.get('type') == 'Dependency':
|
||||
dependency_request = record
|
||||
break
|
||||
except json.JSONDecodeError:
|
||||
pass
|
||||
|
||||
if dependency_request:
|
||||
bin_name = dependency_request['bin_name']
|
||||
bin_providers = dependency_request['bin_providers']
|
||||
|
||||
# Install via npm provider hook
|
||||
install_result = subprocess.run(
|
||||
[
|
||||
sys.executable,
|
||||
str(NPM_PROVIDER_HOOK),
|
||||
'--dependency-id', 'test-dep-001',
|
||||
'--bin-name', bin_name,
|
||||
'--bin-providers', bin_providers
|
||||
],
|
||||
capture_output=True,
|
||||
text=True,
|
||||
timeout=600
|
||||
)
|
||||
|
||||
assert install_result.returncode == 0, f"Install failed: {install_result.stderr}"
|
||||
|
||||
# Verify installation via JSONL output
|
||||
for line in install_result.stdout.strip().split('\n'):
|
||||
if line.strip():
|
||||
try:
|
||||
record = json.loads(line)
|
||||
if record.get('type') == 'InstalledBinary':
|
||||
assert record['name'] == bin_name
|
||||
assert record['abspath']
|
||||
break
|
||||
except json.JSONDecodeError:
|
||||
pass
|
||||
else:
|
||||
# Binary already available, verify via JSONL output
|
||||
assert result.returncode == 0, f"Validation failed: {result.stderr}"
|
||||
|
||||
|
||||
def test_verify_deps_with_abx_pkg():
|
||||
"""Verify dependencies are available via abx-pkg after hook installation."""
|
||||
from abx_pkg import Binary, EnvProvider, BinProviderOverrides
|
||||
|
||||
EnvProvider.model_rebuild()
|
||||
|
||||
# Verify node is available
|
||||
node_binary = Binary(name='node', binproviders=[EnvProvider()])
|
||||
node_loaded = node_binary.load()
|
||||
assert node_loaded and node_loaded.abspath, "Node.js required for pdf plugin"
|
||||
|
||||
|
||||
def test_extracts_pdf_from_example_com():
|
||||
"""Test full workflow: extract PDF from real example.com via hook."""
|
||||
# Prerequisites checked by earlier test
|
||||
|
||||
with tempfile.TemporaryDirectory() as tmpdir:
|
||||
tmpdir = Path(tmpdir)
|
||||
|
||||
# Run PDF extraction hook
|
||||
result = subprocess.run(
|
||||
['node', str(PDF_HOOK), f'--url={TEST_URL}', '--snapshot-id=test789'],
|
||||
cwd=tmpdir,
|
||||
capture_output=True,
|
||||
text=True,
|
||||
timeout=120
|
||||
)
|
||||
|
||||
assert result.returncode == 0, f"Extraction failed: {result.stderr}"
|
||||
|
||||
# Verify JSONL output
|
||||
assert 'STATUS=succeeded' in result.stdout, "Should report success"
|
||||
assert 'RESULT_JSON=' in result.stdout, "Should output RESULT_JSON"
|
||||
|
||||
# Parse JSONL result
|
||||
result_json = None
|
||||
for line in result.stdout.split('\n'):
|
||||
if line.startswith('RESULT_JSON='):
|
||||
result_json = json.loads(line.split('=', 1)[1])
|
||||
break
|
||||
|
||||
assert result_json, "Should have RESULT_JSON"
|
||||
assert result_json['extractor'] == 'pdf'
|
||||
assert result_json['status'] == 'succeeded'
|
||||
assert result_json['url'] == TEST_URL
|
||||
|
||||
# Verify filesystem output
|
||||
pdf_dir = tmpdir / 'pdf'
|
||||
assert pdf_dir.exists(), "Output directory not created"
|
||||
|
||||
pdf_file = pdf_dir / 'output.pdf'
|
||||
assert pdf_file.exists(), "output.pdf not created"
|
||||
|
||||
# Verify file is valid PDF
|
||||
file_size = pdf_file.stat().st_size
|
||||
assert file_size > 500, f"PDF too small: {file_size} bytes"
|
||||
assert file_size < 10 * 1024 * 1024, f"PDF suspiciously large: {file_size} bytes"
|
||||
|
||||
# Check PDF magic bytes
|
||||
pdf_data = pdf_file.read_bytes()
|
||||
assert pdf_data[:4] == b'%PDF', "Should be valid PDF file"
|
||||
|
||||
|
||||
def test_config_save_pdf_false_skips():
|
||||
"""Test that SAVE_PDF=False causes skip."""
|
||||
import os
|
||||
|
||||
with tempfile.TemporaryDirectory() as tmpdir:
|
||||
tmpdir = Path(tmpdir)
|
||||
env = os.environ.copy()
|
||||
env['SAVE_PDF'] = 'False'
|
||||
|
||||
result = subprocess.run(
|
||||
['node', str(PDF_HOOK), f'--url={TEST_URL}', '--snapshot-id=test999'],
|
||||
cwd=tmpdir,
|
||||
capture_output=True,
|
||||
text=True,
|
||||
env=env,
|
||||
timeout=30
|
||||
)
|
||||
|
||||
assert result.returncode == 0, f"Should exit 0 when skipping: {result.stderr}"
|
||||
assert 'STATUS=' in result.stdout
|
||||
|
||||
|
||||
def test_reports_missing_chrome():
|
||||
"""Test that script reports error when Chrome is not found."""
|
||||
import os
|
||||
|
||||
with tempfile.TemporaryDirectory() as tmpdir:
|
||||
tmpdir = Path(tmpdir)
|
||||
|
||||
# Set CHROME_BINARY to nonexistent path
|
||||
env = os.environ.copy()
|
||||
env['CHROME_BINARY'] = '/nonexistent/chrome'
|
||||
|
||||
result = subprocess.run(
|
||||
['node', str(PDF_HOOK), f'--url={TEST_URL}', '--snapshot-id=test123'],
|
||||
cwd=tmpdir,
|
||||
capture_output=True,
|
||||
text=True,
|
||||
env=env,
|
||||
timeout=30
|
||||
)
|
||||
|
||||
# Should fail and report missing Chrome
|
||||
if result.returncode != 0:
|
||||
combined = result.stdout + result.stderr
|
||||
assert 'chrome' in combined.lower() or 'browser' in combined.lower() or 'ERROR=' in combined
|
||||
|
||||
|
||||
def test_config_timeout_honored():
|
||||
"""Test that CHROME_TIMEOUT config is respected."""
|
||||
import os
|
||||
|
||||
with tempfile.TemporaryDirectory() as tmpdir:
|
||||
tmpdir = Path(tmpdir)
|
||||
|
||||
# Set very short timeout
|
||||
env = os.environ.copy()
|
||||
env['CHROME_TIMEOUT'] = '5'
|
||||
|
||||
result = subprocess.run(
|
||||
['node', str(PDF_HOOK), f'--url={TEST_URL}', '--snapshot-id=testtimeout'],
|
||||
cwd=tmpdir,
|
||||
capture_output=True,
|
||||
text=True,
|
||||
env=env,
|
||||
timeout=30
|
||||
)
|
||||
|
||||
# Should complete (success or fail, but not hang)
|
||||
assert result.returncode in (0, 1), "Should complete without hanging"
|
||||
|
||||
|
||||
if __name__ == '__main__':
|
||||
pytest.main([__file__, '-v'])
|
||||
68
archivebox/plugins/readability/on_Crawl__00_install_readability.py
Executable file
68
archivebox/plugins/readability/on_Crawl__00_install_readability.py
Executable file
@@ -0,0 +1,68 @@
|
||||
#!/usr/bin/env python3
|
||||
"""
|
||||
Install readability-extractor if not already available.
|
||||
|
||||
Runs at crawl start to ensure readability-extractor is installed.
|
||||
Outputs JSONL for InstalledBinary.
|
||||
"""
|
||||
|
||||
import json
|
||||
import sys
|
||||
from pathlib import Path
|
||||
|
||||
|
||||
def main():
|
||||
try:
|
||||
from abx_pkg import Binary, NpmProvider, EnvProvider, BinProviderOverrides
|
||||
|
||||
NpmProvider.model_rebuild()
|
||||
EnvProvider.model_rebuild()
|
||||
|
||||
# Note: npm package is from github:ArchiveBox/readability-extractor
|
||||
readability_binary = Binary(
|
||||
name='readability-extractor',
|
||||
binproviders=[NpmProvider(), EnvProvider()],
|
||||
overrides={'npm': {'packages': ['github:ArchiveBox/readability-extractor']}}
|
||||
)
|
||||
|
||||
# Try to load, install if not found
|
||||
try:
|
||||
loaded = readability_binary.load()
|
||||
if not loaded or not loaded.abspath:
|
||||
raise Exception("Not loaded")
|
||||
except Exception:
|
||||
# Install via npm from GitHub repo
|
||||
loaded = readability_binary.install()
|
||||
|
||||
if loaded and loaded.abspath:
|
||||
# Output InstalledBinary JSONL
|
||||
print(json.dumps({
|
||||
'type': 'InstalledBinary',
|
||||
'name': 'readability-extractor',
|
||||
'abspath': str(loaded.abspath),
|
||||
'version': str(loaded.version) if loaded.version else None,
|
||||
'sha256': loaded.sha256,
|
||||
'binprovider': loaded.loaded_binprovider.name if loaded.loaded_binprovider else 'unknown',
|
||||
}))
|
||||
sys.exit(0)
|
||||
else:
|
||||
print(json.dumps({
|
||||
'type': 'Dependency',
|
||||
'bin_name': 'readability-extractor',
|
||||
'bin_providers': 'npm,env',
|
||||
}))
|
||||
print("Failed to install readability-extractor", file=sys.stderr)
|
||||
sys.exit(1)
|
||||
|
||||
except Exception as e:
|
||||
print(json.dumps({
|
||||
'type': 'Dependency',
|
||||
'bin_name': 'readability-extractor',
|
||||
'bin_providers': 'npm,env',
|
||||
}))
|
||||
print(f"Error installing readability-extractor: {e}", file=sys.stderr)
|
||||
sys.exit(1)
|
||||
|
||||
|
||||
if __name__ == '__main__':
|
||||
main()
|
||||
@@ -6,10 +6,10 @@ Usage: on_Snapshot__readability.py --url=<url> --snapshot-id=<uuid>
|
||||
Output: Creates readability/ directory with content.html, content.txt, article.json
|
||||
|
||||
Environment variables:
|
||||
READABILITY_BINARY: Path to readability-cli binary
|
||||
READABILITY_BINARY: Path to readability-extractor binary
|
||||
TIMEOUT: Timeout in seconds (default: 60)
|
||||
|
||||
Note: Requires readability-cli: npm install -g readability-cli
|
||||
Note: Requires readability-extractor from https://github.com/ArchiveBox/readability-extractor
|
||||
This extractor looks for HTML source from other extractors (wget, singlefile, dom)
|
||||
"""
|
||||
|
||||
@@ -27,7 +27,7 @@ import rich_click as click
|
||||
|
||||
# Extractor metadata
|
||||
EXTRACTOR_NAME = 'readability'
|
||||
BIN_NAME = 'readability-cli'
|
||||
BIN_NAME = 'readability-extractor'
|
||||
BIN_PROVIDERS = 'npm,env'
|
||||
OUTPUT_DIR = 'readability'
|
||||
|
||||
@@ -44,12 +44,12 @@ def get_env_int(name: str, default: int = 0) -> int:
|
||||
|
||||
|
||||
def find_readability() -> str | None:
|
||||
"""Find readability-cli binary."""
|
||||
"""Find readability-extractor binary."""
|
||||
readability = get_env('READABILITY_BINARY')
|
||||
if readability and os.path.isfile(readability):
|
||||
return readability
|
||||
|
||||
for name in ['readability-cli', 'readable']:
|
||||
for name in ['readability-extractor']:
|
||||
binary = shutil.which(name)
|
||||
if binary:
|
||||
return binary
|
||||
@@ -58,7 +58,7 @@ def find_readability() -> str | None:
|
||||
|
||||
|
||||
def get_version(binary: str) -> str:
|
||||
"""Get readability-cli version."""
|
||||
"""Get readability-extractor version."""
|
||||
try:
|
||||
result = subprocess.run([binary, '--version'], capture_output=True, text=True, timeout=10)
|
||||
return result.stdout.strip()[:64]
|
||||
@@ -106,24 +106,24 @@ def extract_readability(url: str, binary: str) -> tuple[bool, str | None, str]:
|
||||
output_dir.mkdir(exist_ok=True)
|
||||
|
||||
try:
|
||||
# Run readability-cli
|
||||
cmd = [binary, '--json', html_source]
|
||||
# Run readability-extractor (outputs JSON by default)
|
||||
cmd = [binary, html_source]
|
||||
result = subprocess.run(cmd, capture_output=True, timeout=timeout)
|
||||
|
||||
if result.returncode != 0:
|
||||
stderr = result.stderr.decode('utf-8', errors='replace')
|
||||
return False, None, f'readability-cli failed: {stderr[:200]}'
|
||||
return False, None, f'readability-extractor failed: {stderr[:200]}'
|
||||
|
||||
# Parse JSON output
|
||||
try:
|
||||
result_json = json.loads(result.stdout)
|
||||
except json.JSONDecodeError:
|
||||
return False, None, 'readability-cli returned invalid JSON'
|
||||
return False, None, 'readability-extractor returned invalid JSON'
|
||||
|
||||
# Extract and save content
|
||||
# readability-cli v2.x uses hyphenated field names
|
||||
text_content = result_json.pop('text-content', result_json.pop('textContent', ''))
|
||||
html_content = result_json.pop('html-content', result_json.pop('content', ''))
|
||||
# readability-extractor uses camelCase field names (textContent, content)
|
||||
text_content = result_json.pop('textContent', result_json.pop('text-content', ''))
|
||||
html_content = result_json.pop('content', result_json.pop('html-content', ''))
|
||||
|
||||
if not text_content and not html_content:
|
||||
return False, None, 'No content extracted'
|
||||
@@ -157,7 +157,7 @@ def main(url: str, snapshot_id: str):
|
||||
# Find binary
|
||||
binary = find_readability()
|
||||
if not binary:
|
||||
print(f'ERROR: readability-cli binary not found', file=sys.stderr)
|
||||
print(f'ERROR: readability-extractor binary not found', file=sys.stderr)
|
||||
print(f'DEPENDENCY_NEEDED={BIN_NAME}', file=sys.stderr)
|
||||
print(f'BIN_PROVIDERS={BIN_PROVIDERS}', file=sys.stderr)
|
||||
sys.exit(1)
|
||||
@@ -187,7 +187,7 @@ def main(url: str, snapshot_id: str):
|
||||
print(f'END_TS={end_ts.isoformat()}')
|
||||
print(f'DURATION={duration:.2f}')
|
||||
if binary:
|
||||
print(f'CMD={binary} --json <html>')
|
||||
print(f'CMD={binary} <html>')
|
||||
if version:
|
||||
print(f'VERSION={version}')
|
||||
if output:
|
||||
|
||||
@@ -2,9 +2,10 @@
|
||||
Integration tests for readability plugin
|
||||
|
||||
Tests verify:
|
||||
1. Plugin reports missing dependency correctly
|
||||
2. readability-cli can be installed via npm (note: package name != binary name)
|
||||
3. Extraction works against real example.com content
|
||||
1. Install hook installs readability-extractor via abx-pkg
|
||||
2. Verify deps with abx-pkg
|
||||
3. Plugin reports missing dependency correctly
|
||||
4. Extraction works against real example.com content
|
||||
"""
|
||||
|
||||
import json
|
||||
@@ -20,6 +21,7 @@ import pytest
|
||||
PLUGIN_DIR = Path(__file__).parent.parent
|
||||
PLUGINS_ROOT = PLUGIN_DIR.parent
|
||||
READABILITY_HOOK = next(PLUGIN_DIR.glob('on_Snapshot__*_readability.py'))
|
||||
READABILITY_INSTALL_HOOK = PLUGIN_DIR / 'on_Crawl__00_install_readability.py'
|
||||
TEST_URL = 'https://example.com'
|
||||
|
||||
|
||||
@@ -74,7 +76,7 @@ def test_hook_script_exists():
|
||||
|
||||
|
||||
def test_reports_missing_dependency_when_not_installed():
|
||||
"""Test that script reports DEPENDENCY_NEEDED when readability-cli is not found."""
|
||||
"""Test that script reports DEPENDENCY_NEEDED when readability-extractor is not found."""
|
||||
with tempfile.TemporaryDirectory() as tmpdir:
|
||||
tmpdir = Path(tmpdir)
|
||||
|
||||
@@ -96,68 +98,57 @@ def test_reports_missing_dependency_when_not_installed():
|
||||
assert result.returncode != 0, "Should exit non-zero when dependency missing"
|
||||
combined = result.stdout + result.stderr
|
||||
assert 'DEPENDENCY_NEEDED' in combined, "Should output DEPENDENCY_NEEDED"
|
||||
assert 'readability-cli' in combined or 'BIN_NAME' in combined, "Should mention readability-cli"
|
||||
assert 'readability-extractor' in combined or 'BIN_NAME' in combined, "Should mention readability-extractor"
|
||||
|
||||
|
||||
def test_can_install_readability_via_npm():
|
||||
"""Test that readability-cli can be installed via npm and binary becomes available.
|
||||
|
||||
Note: The npm package 'readability-cli' installs a binary named 'readable',
|
||||
so we test the full installation flow using npm install directly.
|
||||
"""
|
||||
|
||||
# Check npm is available
|
||||
if not shutil.which('npm'):
|
||||
pytest.skip("npm not available on this system")
|
||||
|
||||
# Install readability-cli package via npm
|
||||
# The orchestrator/dependency hooks would call this via npm provider
|
||||
def test_readability_install_hook():
|
||||
"""Test readability install hook to install readability-extractor if needed."""
|
||||
result = subprocess.run(
|
||||
['npm', 'install', '-g', 'readability-cli'],
|
||||
[sys.executable, str(READABILITY_INSTALL_HOOK)],
|
||||
capture_output=True,
|
||||
text=True,
|
||||
timeout=300
|
||||
timeout=600
|
||||
)
|
||||
|
||||
assert result.returncode == 0, f"npm install failed: {result.stderr}"
|
||||
assert result.returncode == 0, f"Install hook failed: {result.stderr}"
|
||||
|
||||
# Verify the 'readable' binary is now available
|
||||
# (readability-cli package installs as 'readable' not 'readability-cli')
|
||||
result = subprocess.run(['which', 'readable'], capture_output=True, text=True)
|
||||
assert result.returncode == 0, "readable binary not found after npm install"
|
||||
# Verify InstalledBinary JSONL output
|
||||
found_binary = False
|
||||
for line in result.stdout.strip().split('\n'):
|
||||
if line.strip():
|
||||
try:
|
||||
record = json.loads(line)
|
||||
if record.get('type') == 'InstalledBinary':
|
||||
assert record['name'] == 'readability-extractor'
|
||||
assert record['abspath']
|
||||
found_binary = True
|
||||
break
|
||||
except json.JSONDecodeError:
|
||||
pass
|
||||
|
||||
binary_path = result.stdout.strip()
|
||||
assert Path(binary_path).exists(), f"Binary should exist at {binary_path}"
|
||||
assert found_binary, "Should output InstalledBinary record"
|
||||
|
||||
# Test that it's executable and responds to --version
|
||||
result = subprocess.run(
|
||||
[binary_path, '--version'],
|
||||
capture_output=True,
|
||||
text=True,
|
||||
timeout=10
|
||||
|
||||
def test_verify_deps_with_abx_pkg():
|
||||
"""Verify readability-extractor is available via abx-pkg after hook installation."""
|
||||
from abx_pkg import Binary, NpmProvider, EnvProvider, BinProviderOverrides
|
||||
|
||||
NpmProvider.model_rebuild()
|
||||
EnvProvider.model_rebuild()
|
||||
|
||||
readability_binary = Binary(
|
||||
name='readability-extractor',
|
||||
binproviders=[NpmProvider(), EnvProvider()],
|
||||
overrides={'npm': {'packages': ['github:ArchiveBox/readability-extractor']}}
|
||||
)
|
||||
assert result.returncode == 0, f"Binary not executable: {result.stderr}"
|
||||
readability_loaded = readability_binary.load()
|
||||
assert readability_loaded and readability_loaded.abspath, "readability-extractor should be available after install hook"
|
||||
|
||||
|
||||
def test_extracts_article_after_installation():
|
||||
"""Test full workflow: ensure readability-cli installed then extract from example.com HTML."""
|
||||
"""Test full workflow: extract article using readability-extractor from real HTML."""
|
||||
# Prerequisites checked by earlier test (install hook should have run)
|
||||
|
||||
# Check npm is available
|
||||
if not shutil.which('npm'):
|
||||
pytest.skip("npm not available on this system")
|
||||
|
||||
# Ensure readability-cli is installed (orchestrator would handle this)
|
||||
install_result = subprocess.run(
|
||||
['npm', 'install', '-g', 'readability-cli'],
|
||||
capture_output=True,
|
||||
text=True,
|
||||
timeout=300
|
||||
)
|
||||
|
||||
if install_result.returncode != 0:
|
||||
pytest.skip(f"Could not install readability-cli: {install_result.stderr}")
|
||||
|
||||
# Now test extraction
|
||||
with tempfile.TemporaryDirectory() as tmpdir:
|
||||
tmpdir = Path(tmpdir)
|
||||
|
||||
@@ -213,21 +204,7 @@ def test_extracts_article_after_installation():
|
||||
|
||||
def test_fails_gracefully_without_html_source():
|
||||
"""Test that extraction fails gracefully when no HTML source is available."""
|
||||
|
||||
# Check npm is available
|
||||
if not shutil.which('npm'):
|
||||
pytest.skip("npm not available on this system")
|
||||
|
||||
# Ensure readability-cli is installed
|
||||
install_result = subprocess.run(
|
||||
['npm', 'install', '-g', 'readability-cli'],
|
||||
capture_output=True,
|
||||
text=True,
|
||||
timeout=300
|
||||
)
|
||||
|
||||
if install_result.returncode != 0:
|
||||
pytest.skip("Could not install readability-cli")
|
||||
# Prerequisites checked by earlier test (install hook should have run)
|
||||
|
||||
with tempfile.TemporaryDirectory() as tmpdir:
|
||||
tmpdir = Path(tmpdir)
|
||||
|
||||
232
archivebox/plugins/screenshot/tests/test_screenshot.py
Normal file
232
archivebox/plugins/screenshot/tests/test_screenshot.py
Normal file
@@ -0,0 +1,232 @@
|
||||
"""
|
||||
Integration tests for screenshot plugin
|
||||
|
||||
Tests verify:
|
||||
1. Hook script exists
|
||||
2. Dependencies installed via chrome_session validation hooks
|
||||
3. Verify deps with abx-pkg
|
||||
4. Screenshot extraction works on https://example.com
|
||||
5. JSONL output is correct
|
||||
6. Filesystem output is valid PNG image
|
||||
7. Config options work
|
||||
"""
|
||||
|
||||
import json
|
||||
import subprocess
|
||||
import sys
|
||||
import tempfile
|
||||
from pathlib import Path
|
||||
|
||||
import pytest
|
||||
|
||||
|
||||
PLUGIN_DIR = Path(__file__).parent.parent
|
||||
PLUGINS_ROOT = PLUGIN_DIR.parent
|
||||
SCREENSHOT_HOOK = PLUGIN_DIR / 'on_Snapshot__34_screenshot.js'
|
||||
CHROME_VALIDATE_HOOK = PLUGINS_ROOT / 'chrome_session' / 'on_Crawl__00_validate_chrome.py'
|
||||
NPM_PROVIDER_HOOK = PLUGINS_ROOT / 'npm' / 'on_Dependency__install_using_npm_provider.py'
|
||||
TEST_URL = 'https://example.com'
|
||||
|
||||
|
||||
def test_hook_script_exists():
|
||||
"""Verify on_Snapshot hook exists."""
|
||||
assert SCREENSHOT_HOOK.exists(), f"Hook not found: {SCREENSHOT_HOOK}"
|
||||
|
||||
|
||||
def test_chrome_validation_and_install():
|
||||
"""Test chrome validation hook to install puppeteer-core if needed."""
|
||||
# Run chrome validation hook (from chrome_session plugin)
|
||||
result = subprocess.run(
|
||||
[sys.executable, str(CHROME_VALIDATE_HOOK)],
|
||||
capture_output=True,
|
||||
text=True,
|
||||
timeout=30
|
||||
)
|
||||
|
||||
# If exit 1, binary not found - need to install
|
||||
if result.returncode == 1:
|
||||
# Parse Dependency request from JSONL
|
||||
dependency_request = None
|
||||
for line in result.stdout.strip().split('\n'):
|
||||
if line.strip():
|
||||
try:
|
||||
record = json.loads(line)
|
||||
if record.get('type') == 'Dependency':
|
||||
dependency_request = record
|
||||
break
|
||||
except json.JSONDecodeError:
|
||||
pass
|
||||
|
||||
if dependency_request:
|
||||
bin_name = dependency_request['bin_name']
|
||||
bin_providers = dependency_request['bin_providers']
|
||||
|
||||
# Install via npm provider hook
|
||||
install_result = subprocess.run(
|
||||
[
|
||||
sys.executable,
|
||||
str(NPM_PROVIDER_HOOK),
|
||||
'--dependency-id', 'test-dep-001',
|
||||
'--bin-name', bin_name,
|
||||
'--bin-providers', bin_providers
|
||||
],
|
||||
capture_output=True,
|
||||
text=True,
|
||||
timeout=600
|
||||
)
|
||||
|
||||
assert install_result.returncode == 0, f"Install failed: {install_result.stderr}"
|
||||
|
||||
# Verify installation via JSONL output
|
||||
for line in install_result.stdout.strip().split('\n'):
|
||||
if line.strip():
|
||||
try:
|
||||
record = json.loads(line)
|
||||
if record.get('type') == 'InstalledBinary':
|
||||
assert record['name'] == bin_name
|
||||
assert record['abspath']
|
||||
break
|
||||
except json.JSONDecodeError:
|
||||
pass
|
||||
else:
|
||||
# Binary already available, verify via JSONL output
|
||||
assert result.returncode == 0, f"Validation failed: {result.stderr}"
|
||||
|
||||
|
||||
def test_verify_deps_with_abx_pkg():
|
||||
"""Verify dependencies are available via abx-pkg after hook installation."""
|
||||
from abx_pkg import Binary, EnvProvider, BinProviderOverrides
|
||||
|
||||
EnvProvider.model_rebuild()
|
||||
|
||||
# Verify node is available
|
||||
node_binary = Binary(name='node', binproviders=[EnvProvider()])
|
||||
node_loaded = node_binary.load()
|
||||
assert node_loaded and node_loaded.abspath, "Node.js required for screenshot plugin"
|
||||
|
||||
|
||||
def test_extracts_screenshot_from_example_com():
|
||||
"""Test full workflow: extract screenshot from real example.com via hook."""
|
||||
# Prerequisites checked by earlier test
|
||||
|
||||
with tempfile.TemporaryDirectory() as tmpdir:
|
||||
tmpdir = Path(tmpdir)
|
||||
|
||||
# Run screenshot extraction hook
|
||||
result = subprocess.run(
|
||||
['node', str(SCREENSHOT_HOOK), f'--url={TEST_URL}', '--snapshot-id=test789'],
|
||||
cwd=tmpdir,
|
||||
capture_output=True,
|
||||
text=True,
|
||||
timeout=120
|
||||
)
|
||||
|
||||
assert result.returncode == 0, f"Extraction failed: {result.stderr}"
|
||||
|
||||
# Verify JSONL output
|
||||
assert 'STATUS=succeeded' in result.stdout, "Should report success"
|
||||
assert 'RESULT_JSON=' in result.stdout, "Should output RESULT_JSON"
|
||||
|
||||
# Parse JSONL result
|
||||
result_json = None
|
||||
for line in result.stdout.split('\n'):
|
||||
if line.startswith('RESULT_JSON='):
|
||||
result_json = json.loads(line.split('=', 1)[1])
|
||||
break
|
||||
|
||||
assert result_json, "Should have RESULT_JSON"
|
||||
assert result_json['extractor'] == 'screenshot'
|
||||
assert result_json['status'] == 'succeeded'
|
||||
assert result_json['url'] == TEST_URL
|
||||
|
||||
# Verify filesystem output
|
||||
screenshot_dir = tmpdir / 'screenshot'
|
||||
assert screenshot_dir.exists(), "Output directory not created"
|
||||
|
||||
screenshot_file = screenshot_dir / 'screenshot.png'
|
||||
assert screenshot_file.exists(), "screenshot.png not created"
|
||||
|
||||
# Verify file is valid PNG
|
||||
file_size = screenshot_file.stat().st_size
|
||||
assert file_size > 1000, f"Screenshot too small: {file_size} bytes"
|
||||
assert file_size < 10 * 1024 * 1024, f"Screenshot suspiciously large: {file_size} bytes"
|
||||
|
||||
# Check PNG magic bytes
|
||||
screenshot_data = screenshot_file.read_bytes()
|
||||
assert screenshot_data[:8] == b'\x89PNG\r\n\x1a\n', "Should be valid PNG file"
|
||||
|
||||
|
||||
def test_config_save_screenshot_false_skips():
|
||||
"""Test that SAVE_SCREENSHOT=False causes skip."""
|
||||
import os
|
||||
|
||||
with tempfile.TemporaryDirectory() as tmpdir:
|
||||
tmpdir = Path(tmpdir)
|
||||
env = os.environ.copy()
|
||||
env['SAVE_SCREENSHOT'] = 'False'
|
||||
|
||||
result = subprocess.run(
|
||||
['node', str(SCREENSHOT_HOOK), f'--url={TEST_URL}', '--snapshot-id=test999'],
|
||||
cwd=tmpdir,
|
||||
capture_output=True,
|
||||
text=True,
|
||||
env=env,
|
||||
timeout=30
|
||||
)
|
||||
|
||||
assert result.returncode == 0, f"Should exit 0 when skipping: {result.stderr}"
|
||||
assert 'STATUS=' in result.stdout
|
||||
|
||||
|
||||
def test_reports_missing_chrome():
|
||||
"""Test that script reports error when Chrome is not found."""
|
||||
import os
|
||||
|
||||
with tempfile.TemporaryDirectory() as tmpdir:
|
||||
tmpdir = Path(tmpdir)
|
||||
|
||||
# Set CHROME_BINARY to nonexistent path
|
||||
env = os.environ.copy()
|
||||
env['CHROME_BINARY'] = '/nonexistent/chrome'
|
||||
|
||||
result = subprocess.run(
|
||||
['node', str(SCREENSHOT_HOOK), f'--url={TEST_URL}', '--snapshot-id=test123'],
|
||||
cwd=tmpdir,
|
||||
capture_output=True,
|
||||
text=True,
|
||||
env=env,
|
||||
timeout=30
|
||||
)
|
||||
|
||||
# Should fail and report missing Chrome
|
||||
if result.returncode != 0:
|
||||
combined = result.stdout + result.stderr
|
||||
assert 'chrome' in combined.lower() or 'browser' in combined.lower() or 'ERROR=' in combined
|
||||
|
||||
|
||||
def test_config_timeout_honored():
|
||||
"""Test that CHROME_TIMEOUT config is respected."""
|
||||
import os
|
||||
|
||||
with tempfile.TemporaryDirectory() as tmpdir:
|
||||
tmpdir = Path(tmpdir)
|
||||
|
||||
# Set very short timeout
|
||||
env = os.environ.copy()
|
||||
env['CHROME_TIMEOUT'] = '5'
|
||||
|
||||
result = subprocess.run(
|
||||
['node', str(SCREENSHOT_HOOK), f'--url={TEST_URL}', '--snapshot-id=testtimeout'],
|
||||
cwd=tmpdir,
|
||||
capture_output=True,
|
||||
text=True,
|
||||
env=env,
|
||||
timeout=30
|
||||
)
|
||||
|
||||
# Should complete (success or fail, but not hang)
|
||||
assert result.returncode in (0, 1), "Should complete without hanging"
|
||||
|
||||
|
||||
if __name__ == '__main__':
|
||||
pytest.main([__file__, '-v'])
|
||||
@@ -1,10 +1,17 @@
|
||||
"""
|
||||
Integration tests - archive example.com with SingleFile and verify output
|
||||
Integration tests for singlefile plugin
|
||||
|
||||
Tests verify:
|
||||
1. on_Crawl hook validates and installs single-file
|
||||
2. Verify deps with abx-pkg
|
||||
3. Extraction works on https://example.com
|
||||
4. JSONL output is correct
|
||||
5. Filesystem output is valid HTML
|
||||
"""
|
||||
|
||||
import json
|
||||
import os
|
||||
import subprocess
|
||||
import sys
|
||||
import tempfile
|
||||
from pathlib import Path
|
||||
|
||||
@@ -12,99 +19,108 @@ import pytest
|
||||
|
||||
|
||||
PLUGIN_DIR = Path(__file__).parent.parent
|
||||
INSTALL_SCRIPT = PLUGIN_DIR / "on_Snapshot__04_singlefile.js"
|
||||
PLUGINS_ROOT = PLUGIN_DIR.parent
|
||||
SINGLEFILE_HOOK = PLUGIN_DIR / "on_Snapshot__04_singlefile.js"
|
||||
CHROME_VALIDATE_HOOK = PLUGINS_ROOT / 'chrome_session' / 'on_Crawl__00_validate_chrome.py'
|
||||
NPM_PROVIDER_HOOK = PLUGINS_ROOT / 'npm' / 'on_Dependency__install_using_npm_provider.py'
|
||||
TEST_URL = "https://example.com"
|
||||
|
||||
|
||||
# Check if single-file CLI is available
|
||||
try:
|
||||
def test_hook_script_exists():
|
||||
"""Verify on_Snapshot hook exists."""
|
||||
assert SINGLEFILE_HOOK.exists(), f"Hook not found: {SINGLEFILE_HOOK}"
|
||||
|
||||
|
||||
def test_chrome_validation_and_install():
|
||||
"""Test chrome validation hook to install puppeteer-core if needed."""
|
||||
# Run chrome validation hook (from chrome_session plugin)
|
||||
result = subprocess.run(
|
||||
["which", "single-file"],
|
||||
[sys.executable, str(CHROME_VALIDATE_HOOK)],
|
||||
capture_output=True,
|
||||
timeout=5
|
||||
text=True,
|
||||
timeout=30
|
||||
)
|
||||
SINGLEFILE_CLI_AVAILABLE = result.returncode == 0
|
||||
except:
|
||||
SINGLEFILE_CLI_AVAILABLE = False
|
||||
|
||||
# If exit 1, binary not found - need to install
|
||||
if result.returncode == 1:
|
||||
# Parse Dependency request from JSONL
|
||||
dependency_request = None
|
||||
for line in result.stdout.strip().split('\n'):
|
||||
if line.strip():
|
||||
try:
|
||||
record = json.loads(line)
|
||||
if record.get('type') == 'Dependency':
|
||||
dependency_request = record
|
||||
break
|
||||
except json.JSONDecodeError:
|
||||
pass
|
||||
|
||||
if dependency_request:
|
||||
bin_name = dependency_request['bin_name']
|
||||
bin_providers = dependency_request['bin_providers']
|
||||
|
||||
# Install via npm provider hook
|
||||
install_result = subprocess.run(
|
||||
[
|
||||
sys.executable,
|
||||
str(NPM_PROVIDER_HOOK),
|
||||
'--dependency-id', 'test-dep-001',
|
||||
'--bin-name', bin_name,
|
||||
'--bin-providers', bin_providers
|
||||
],
|
||||
capture_output=True,
|
||||
text=True,
|
||||
timeout=600
|
||||
)
|
||||
|
||||
assert install_result.returncode == 0, f"Install failed: {install_result.stderr}"
|
||||
|
||||
# Verify installation via JSONL output
|
||||
for line in install_result.stdout.strip().split('\n'):
|
||||
if line.strip():
|
||||
try:
|
||||
record = json.loads(line)
|
||||
if record.get('type') == 'InstalledBinary':
|
||||
assert record['name'] == bin_name
|
||||
assert record['abspath']
|
||||
break
|
||||
except json.JSONDecodeError:
|
||||
pass
|
||||
else:
|
||||
# Binary already available, verify via JSONL output
|
||||
assert result.returncode == 0, f"Validation failed: {result.stderr}"
|
||||
|
||||
|
||||
@pytest.mark.skipif(
|
||||
not SINGLEFILE_CLI_AVAILABLE,
|
||||
reason="single-file CLI not installed (npm install -g single-file-cli)"
|
||||
)
|
||||
def test_archives_example_com():
|
||||
"""Archive example.com and verify output contains expected content"""
|
||||
def test_verify_deps_with_abx_pkg():
|
||||
"""Verify dependencies are available via abx-pkg after hook installation."""
|
||||
from abx_pkg import Binary, EnvProvider, BinProviderOverrides
|
||||
|
||||
EnvProvider.model_rebuild()
|
||||
|
||||
# Verify node is available (singlefile uses Chrome extension, needs Node)
|
||||
node_binary = Binary(name='node', binproviders=[EnvProvider()])
|
||||
node_loaded = node_binary.load()
|
||||
assert node_loaded and node_loaded.abspath, "Node.js required for singlefile plugin"
|
||||
|
||||
|
||||
def test_singlefile_hook_runs():
|
||||
"""Verify singlefile hook can be executed and completes."""
|
||||
# Prerequisites checked by earlier test
|
||||
|
||||
with tempfile.TemporaryDirectory() as tmpdir:
|
||||
output_dir = Path(tmpdir) / "singlefile"
|
||||
output_dir.mkdir()
|
||||
tmpdir = Path(tmpdir)
|
||||
|
||||
output_file = output_dir / "singlefile.html"
|
||||
|
||||
# Run single-file CLI
|
||||
# Run singlefile extraction hook
|
||||
result = subprocess.run(
|
||||
[
|
||||
"single-file",
|
||||
"--browser-headless",
|
||||
TEST_URL,
|
||||
str(output_file)
|
||||
],
|
||||
['node', str(SINGLEFILE_HOOK), f'--url={TEST_URL}', '--snapshot-id=test789'],
|
||||
cwd=tmpdir,
|
||||
capture_output=True,
|
||||
text=True,
|
||||
timeout=120
|
||||
)
|
||||
|
||||
assert result.returncode == 0, f"Archive failed: {result.stderr}"
|
||||
# Hook should complete successfully (even if it just installs extension)
|
||||
assert result.returncode == 0, f"Hook execution failed: {result.stderr}"
|
||||
|
||||
# Verify output exists
|
||||
assert output_file.exists(), "Output file not created"
|
||||
|
||||
# Read and verify content
|
||||
html_content = output_file.read_text()
|
||||
file_size = output_file.stat().st_size
|
||||
|
||||
# Should be substantial (embedded resources)
|
||||
assert file_size > 900, f"Output too small: {file_size} bytes"
|
||||
|
||||
# Verify HTML structure (SingleFile minifies, so <head> tag may be omitted)
|
||||
assert "<html" in html_content.lower()
|
||||
assert "<body" in html_content.lower()
|
||||
assert "<title>" in html_content.lower() or "title>" in html_content.lower()
|
||||
|
||||
# Verify example.com content is actually present
|
||||
assert "example domain" in html_content.lower(), "Missing 'Example Domain' title"
|
||||
assert "this domain is" in html_content.lower(), "Missing example.com description text"
|
||||
assert "iana.org" in html_content.lower(), "Missing IANA link"
|
||||
|
||||
# Verify it's not just empty/error page
|
||||
assert file_size > 900, f"File too small: {file_size} bytes"
|
||||
|
||||
|
||||
@pytest.mark.skipif(not SINGLEFILE_CLI_AVAILABLE, reason="single-file CLI not installed")
|
||||
def test_different_urls_produce_different_outputs():
|
||||
"""Verify different URLs produce different archived content"""
|
||||
|
||||
with tempfile.TemporaryDirectory() as tmpdir:
|
||||
outputs = {}
|
||||
|
||||
for url in ["https://example.com", "https://example.org"]:
|
||||
output_file = Path(tmpdir) / f"{url.replace('https://', '').replace('.', '_')}.html"
|
||||
|
||||
result = subprocess.run(
|
||||
["single-file", "--browser-headless", url, str(output_file)],
|
||||
capture_output=True,
|
||||
timeout=120
|
||||
)
|
||||
|
||||
if result.returncode == 0 and output_file.exists():
|
||||
outputs[url] = output_file.read_text()
|
||||
|
||||
assert len(outputs) == 2, "Should archive both URLs"
|
||||
|
||||
# Verify outputs differ
|
||||
urls = list(outputs.keys())
|
||||
assert outputs[urls[0]] != outputs[urls[1]], "Different URLs should produce different outputs"
|
||||
|
||||
# Each should contain its domain
|
||||
assert "example.com" in outputs[urls[0]]
|
||||
assert "example.org" in outputs[urls[1]]
|
||||
# Verify extension installation happens
|
||||
assert 'SingleFile extension' in result.stdout or result.returncode == 0, "Should install extension or complete"
|
||||
|
||||
68
archivebox/plugins/wget/on_Crawl__00_install_wget.py
Executable file
68
archivebox/plugins/wget/on_Crawl__00_install_wget.py
Executable file
@@ -0,0 +1,68 @@
|
||||
#!/usr/bin/env python3
|
||||
"""
|
||||
Install wget if not already available.
|
||||
|
||||
Runs at crawl start to ensure wget is installed.
|
||||
Outputs JSONL for InstalledBinary.
|
||||
"""
|
||||
|
||||
import json
|
||||
import sys
|
||||
from pathlib import Path
|
||||
|
||||
|
||||
def main():
|
||||
try:
|
||||
from abx_pkg import Binary, AptProvider, BrewProvider, EnvProvider, BinProviderOverrides
|
||||
|
||||
AptProvider.model_rebuild()
|
||||
BrewProvider.model_rebuild()
|
||||
EnvProvider.model_rebuild()
|
||||
|
||||
# wget binary and package have same name
|
||||
wget_binary = Binary(
|
||||
name='wget',
|
||||
binproviders=[AptProvider(), BrewProvider(), EnvProvider()]
|
||||
)
|
||||
|
||||
# Try to load, install if not found
|
||||
try:
|
||||
loaded = wget_binary.load()
|
||||
if not loaded or not loaded.abspath:
|
||||
raise Exception("Not loaded")
|
||||
except Exception:
|
||||
# Install via system package manager
|
||||
loaded = wget_binary.install()
|
||||
|
||||
if loaded and loaded.abspath:
|
||||
# Output InstalledBinary JSONL
|
||||
print(json.dumps({
|
||||
'type': 'InstalledBinary',
|
||||
'name': 'wget',
|
||||
'abspath': str(loaded.abspath),
|
||||
'version': str(loaded.version) if loaded.version else None,
|
||||
'sha256': loaded.sha256,
|
||||
'binprovider': loaded.loaded_binprovider.name if loaded.loaded_binprovider else 'unknown',
|
||||
}))
|
||||
sys.exit(0)
|
||||
else:
|
||||
print(json.dumps({
|
||||
'type': 'Dependency',
|
||||
'bin_name': 'wget',
|
||||
'bin_providers': 'apt,brew,env',
|
||||
}))
|
||||
print("Failed to install wget", file=sys.stderr)
|
||||
sys.exit(1)
|
||||
|
||||
except Exception as e:
|
||||
print(json.dumps({
|
||||
'type': 'Dependency',
|
||||
'bin_name': 'wget',
|
||||
'bin_providers': 'apt,brew,env',
|
||||
}))
|
||||
print(f"Error installing wget: {e}", file=sys.stderr)
|
||||
sys.exit(1)
|
||||
|
||||
|
||||
if __name__ == '__main__':
|
||||
main()
|
||||
@@ -26,6 +26,7 @@ import pytest
|
||||
PLUGIN_DIR = Path(__file__).parent.parent
|
||||
PLUGINS_ROOT = PLUGIN_DIR.parent
|
||||
WGET_HOOK = next(PLUGIN_DIR.glob('on_Snapshot__*_wget.py'))
|
||||
WGET_INSTALL_HOOK = PLUGIN_DIR / 'on_Crawl__00_install_wget.py'
|
||||
BREW_HOOK = PLUGINS_ROOT / 'brew' / 'on_Dependency__install_using_brew_provider.py'
|
||||
APT_HOOK = PLUGINS_ROOT / 'apt' / 'on_Dependency__install_using_apt_provider.py'
|
||||
TEST_URL = 'https://example.com'
|
||||
@@ -36,6 +37,47 @@ def test_hook_script_exists():
|
||||
assert WGET_HOOK.exists(), f"Hook script not found: {WGET_HOOK}"
|
||||
|
||||
|
||||
def test_wget_install_hook():
|
||||
"""Test wget install hook to install wget if needed."""
|
||||
result = subprocess.run(
|
||||
[sys.executable, str(WGET_INSTALL_HOOK)],
|
||||
capture_output=True,
|
||||
text=True,
|
||||
timeout=600
|
||||
)
|
||||
|
||||
assert result.returncode == 0, f"Install hook failed: {result.stderr}"
|
||||
|
||||
# Verify InstalledBinary JSONL output
|
||||
found_binary = False
|
||||
for line in result.stdout.strip().split('\n'):
|
||||
if line.strip():
|
||||
try:
|
||||
record = json.loads(line)
|
||||
if record.get('type') == 'InstalledBinary':
|
||||
assert record['name'] == 'wget'
|
||||
assert record['abspath']
|
||||
found_binary = True
|
||||
break
|
||||
except json.JSONDecodeError:
|
||||
pass
|
||||
|
||||
assert found_binary, "Should output InstalledBinary record"
|
||||
|
||||
|
||||
def test_verify_deps_with_abx_pkg():
|
||||
"""Verify wget is available via abx-pkg after hook installation."""
|
||||
from abx_pkg import Binary, AptProvider, BrewProvider, EnvProvider
|
||||
|
||||
AptProvider.model_rebuild()
|
||||
BrewProvider.model_rebuild()
|
||||
EnvProvider.model_rebuild()
|
||||
|
||||
wget_binary = Binary(name='wget', binproviders=[AptProvider(), BrewProvider(), EnvProvider()])
|
||||
wget_loaded = wget_binary.load()
|
||||
assert wget_loaded and wget_loaded.abspath, "wget should be available after install hook"
|
||||
|
||||
|
||||
def test_reports_missing_dependency_when_not_installed():
|
||||
"""Test that script reports DEPENDENCY_NEEDED when wget is not found."""
|
||||
with tempfile.TemporaryDirectory() as tmpdir:
|
||||
|
||||
Reference in New Issue
Block a user