mirror of
https://github.com/ArchiveBox/ArchiveBox.git
synced 2026-01-03 17:35:45 +10:00
add papersdl plugin
This commit is contained in:
129
archivebox/plugins/papersdl/on_Crawl__00_validate_papersdl.py
Executable file
129
archivebox/plugins/papersdl/on_Crawl__00_validate_papersdl.py
Executable file
@@ -0,0 +1,129 @@
|
||||
#!/usr/bin/env python3
|
||||
"""
|
||||
Validation hook for papers-dl.
|
||||
|
||||
Runs at crawl start to verify papers-dl binary is available.
|
||||
Outputs JSONL for InstalledBinary and Machine config updates.
|
||||
"""
|
||||
|
||||
import os
|
||||
import sys
|
||||
import json
|
||||
import shutil
|
||||
import hashlib
|
||||
import subprocess
|
||||
from pathlib import Path
|
||||
|
||||
|
||||
def get_binary_version(abspath: str, version_flag: str = '--version') -> str | None:
|
||||
"""Get version string from binary."""
|
||||
try:
|
||||
result = subprocess.run(
|
||||
[abspath, version_flag],
|
||||
capture_output=True,
|
||||
text=True,
|
||||
timeout=5,
|
||||
)
|
||||
if result.returncode == 0 and result.stdout:
|
||||
first_line = result.stdout.strip().split('\n')[0]
|
||||
return first_line[:64]
|
||||
except Exception:
|
||||
pass
|
||||
return None
|
||||
|
||||
|
||||
def get_binary_hash(abspath: str) -> str | None:
|
||||
"""Get SHA256 hash of binary."""
|
||||
try:
|
||||
with open(abspath, 'rb') as f:
|
||||
return hashlib.sha256(f.read()).hexdigest()
|
||||
except Exception:
|
||||
return None
|
||||
|
||||
|
||||
def find_papersdl() -> dict | None:
|
||||
"""Find papers-dl binary."""
|
||||
try:
|
||||
from abx_pkg import Binary, PipProvider, EnvProvider
|
||||
|
||||
class PapersdlBinary(Binary):
|
||||
name: str = 'papers-dl'
|
||||
binproviders_supported = [PipProvider(), EnvProvider()]
|
||||
|
||||
binary = PapersdlBinary()
|
||||
loaded = binary.load()
|
||||
if loaded and loaded.abspath:
|
||||
return {
|
||||
'name': 'papers-dl',
|
||||
'abspath': str(loaded.abspath),
|
||||
'version': str(loaded.version) if loaded.version else None,
|
||||
'sha256': loaded.sha256 if hasattr(loaded, 'sha256') else None,
|
||||
'binprovider': loaded.binprovider.name if loaded.binprovider else 'env',
|
||||
}
|
||||
except ImportError:
|
||||
pass
|
||||
except Exception:
|
||||
pass
|
||||
|
||||
# Fallback to shutil.which
|
||||
abspath = shutil.which('papers-dl') or os.environ.get('PAPERSDL_BINARY', '')
|
||||
if abspath and Path(abspath).is_file():
|
||||
return {
|
||||
'name': 'papers-dl',
|
||||
'abspath': abspath,
|
||||
'version': get_binary_version(abspath),
|
||||
'sha256': get_binary_hash(abspath),
|
||||
'binprovider': 'env',
|
||||
}
|
||||
|
||||
return None
|
||||
|
||||
|
||||
def main():
|
||||
# Check for papers-dl (required)
|
||||
papersdl_result = find_papersdl()
|
||||
|
||||
missing_deps = []
|
||||
|
||||
# Emit results for papers-dl
|
||||
if papersdl_result and papersdl_result.get('abspath'):
|
||||
print(json.dumps({
|
||||
'type': 'InstalledBinary',
|
||||
'name': papersdl_result['name'],
|
||||
'abspath': papersdl_result['abspath'],
|
||||
'version': papersdl_result['version'],
|
||||
'sha256': papersdl_result['sha256'],
|
||||
'binprovider': papersdl_result['binprovider'],
|
||||
}))
|
||||
|
||||
print(json.dumps({
|
||||
'type': 'Machine',
|
||||
'_method': 'update',
|
||||
'key': 'config/PAPERSDL_BINARY',
|
||||
'value': papersdl_result['abspath'],
|
||||
}))
|
||||
|
||||
if papersdl_result['version']:
|
||||
print(json.dumps({
|
||||
'type': 'Machine',
|
||||
'_method': 'update',
|
||||
'key': 'config/PAPERSDL_VERSION',
|
||||
'value': papersdl_result['version'],
|
||||
}))
|
||||
else:
|
||||
print(json.dumps({
|
||||
'type': 'Dependency',
|
||||
'bin_name': 'papers-dl',
|
||||
'bin_providers': 'pip,env',
|
||||
}))
|
||||
missing_deps.append('papers-dl')
|
||||
|
||||
if missing_deps:
|
||||
print(f"Missing dependencies: {', '.join(missing_deps)}", file=sys.stderr)
|
||||
sys.exit(1)
|
||||
else:
|
||||
sys.exit(0)
|
||||
|
||||
|
||||
if __name__ == '__main__':
|
||||
main()
|
||||
232
archivebox/plugins/papersdl/on_Snapshot__54_papersdl.py
Executable file
232
archivebox/plugins/papersdl/on_Snapshot__54_papersdl.py
Executable file
@@ -0,0 +1,232 @@
|
||||
#!/usr/bin/env python3
|
||||
"""
|
||||
Download scientific papers from a URL using papers-dl.
|
||||
|
||||
Usage: on_Snapshot__papersdl.py --url=<url> --snapshot-id=<uuid>
|
||||
Output: Downloads paper PDFs to $PWD/
|
||||
|
||||
Environment variables:
|
||||
PAPERSDL_BINARY: Path to papers-dl binary
|
||||
PAPERSDL_TIMEOUT: Timeout in seconds (default: 300 for paper downloads)
|
||||
PAPERSDL_EXTRA_ARGS: Extra arguments for papers-dl (space-separated)
|
||||
|
||||
# papers-dl feature toggles
|
||||
SAVE_PAPERSDL: Enable papers-dl paper extraction (default: True)
|
||||
|
||||
# Fallback to ARCHIVING_CONFIG values if PAPERSDL_* not set:
|
||||
TIMEOUT: Fallback timeout
|
||||
"""
|
||||
|
||||
import json
|
||||
import os
|
||||
import re
|
||||
import shutil
|
||||
import subprocess
|
||||
import sys
|
||||
from pathlib import Path
|
||||
|
||||
import rich_click as click
|
||||
|
||||
|
||||
# Extractor metadata
|
||||
EXTRACTOR_NAME = 'papersdl'
|
||||
BIN_NAME = 'papers-dl'
|
||||
BIN_PROVIDERS = 'pip,env'
|
||||
OUTPUT_DIR = '.'
|
||||
|
||||
|
||||
def get_env(name: str, default: str = '') -> str:
|
||||
return os.environ.get(name, default).strip()
|
||||
|
||||
|
||||
def get_env_bool(name: str, default: bool = False) -> bool:
|
||||
val = get_env(name, '').lower()
|
||||
if val in ('true', '1', 'yes', 'on'):
|
||||
return True
|
||||
if val in ('false', '0', 'no', 'off'):
|
||||
return False
|
||||
return default
|
||||
|
||||
|
||||
def get_env_int(name: str, default: int = 0) -> int:
|
||||
try:
|
||||
return int(get_env(name, str(default)))
|
||||
except ValueError:
|
||||
return default
|
||||
|
||||
|
||||
def find_papersdl() -> str | None:
|
||||
"""Find papers-dl binary."""
|
||||
papersdl = get_env('PAPERSDL_BINARY')
|
||||
if papersdl and os.path.isfile(papersdl):
|
||||
return papersdl
|
||||
|
||||
binary = shutil.which('papers-dl')
|
||||
if binary:
|
||||
return binary
|
||||
|
||||
return None
|
||||
|
||||
|
||||
def get_version(binary: str) -> str:
|
||||
"""Get papers-dl version."""
|
||||
try:
|
||||
result = subprocess.run([binary, '--version'], capture_output=True, text=True, timeout=10)
|
||||
return result.stdout.strip()[:64]
|
||||
except Exception:
|
||||
return ''
|
||||
|
||||
|
||||
def extract_doi_from_url(url: str) -> str | None:
|
||||
"""Extract DOI from common paper URLs."""
|
||||
# Match DOI pattern in URL
|
||||
doi_pattern = r'10\.\d{4,}/[^\s]+'
|
||||
match = re.search(doi_pattern, url)
|
||||
if match:
|
||||
return match.group(0)
|
||||
return None
|
||||
|
||||
|
||||
def save_paper(url: str, binary: str) -> tuple[bool, str | None, str]:
|
||||
"""
|
||||
Download paper using papers-dl.
|
||||
|
||||
Returns: (success, output_path, error_message)
|
||||
"""
|
||||
# Get config from env
|
||||
timeout = get_env_int('PAPERSDL_TIMEOUT') or get_env_int('TIMEOUT', 300)
|
||||
extra_args = get_env('PAPERSDL_EXTRA_ARGS', '')
|
||||
|
||||
# Output directory is current directory (hook already runs in output dir)
|
||||
output_dir = Path(OUTPUT_DIR)
|
||||
|
||||
# Try to extract DOI from URL
|
||||
doi = extract_doi_from_url(url)
|
||||
if not doi:
|
||||
# If no DOI found, papers-dl might handle the URL directly
|
||||
identifier = url
|
||||
else:
|
||||
identifier = doi
|
||||
|
||||
# Build command - papers-dl fetch <identifier> -o <output_dir>
|
||||
cmd = [binary, 'fetch', identifier, '-o', str(output_dir)]
|
||||
|
||||
if extra_args:
|
||||
cmd.extend(extra_args.split())
|
||||
|
||||
try:
|
||||
result = subprocess.run(cmd, capture_output=True, timeout=timeout, text=True)
|
||||
|
||||
# Check if any PDF files were downloaded
|
||||
pdf_files = list(output_dir.glob('*.pdf'))
|
||||
|
||||
if pdf_files:
|
||||
# Return first PDF file
|
||||
return True, str(pdf_files[0]), ''
|
||||
else:
|
||||
stderr = result.stderr
|
||||
stdout = result.stdout
|
||||
|
||||
# These are NOT errors - page simply has no downloadable paper
|
||||
stderr_lower = stderr.lower()
|
||||
stdout_lower = stdout.lower()
|
||||
if 'not found' in stderr_lower or 'not found' in stdout_lower:
|
||||
return True, None, '' # Paper not available - success, no output
|
||||
if 'no results' in stderr_lower or 'no results' in stdout_lower:
|
||||
return True, None, '' # No paper found - success, no output
|
||||
if result.returncode == 0:
|
||||
return True, None, '' # papers-dl exited cleanly, just no paper - success
|
||||
|
||||
# These ARE errors - something went wrong
|
||||
if '404' in stderr or '404' in stdout:
|
||||
return False, None, '404 Not Found'
|
||||
if '403' in stderr or '403' in stdout:
|
||||
return False, None, '403 Forbidden'
|
||||
|
||||
return False, None, f'papers-dl error: {stderr[:200] or stdout[:200]}'
|
||||
|
||||
except subprocess.TimeoutExpired:
|
||||
return False, None, f'Timed out after {timeout} seconds'
|
||||
except Exception as e:
|
||||
return False, None, f'{type(e).__name__}: {e}'
|
||||
|
||||
|
||||
@click.command()
|
||||
@click.option('--url', required=True, help='URL to download paper from')
|
||||
@click.option('--snapshot-id', required=True, help='Snapshot UUID')
|
||||
def main(url: str, snapshot_id: str):
|
||||
"""Download scientific paper from a URL using papers-dl."""
|
||||
|
||||
version = ''
|
||||
output = None
|
||||
status = 'failed'
|
||||
error = ''
|
||||
binary = None
|
||||
cmd_str = ''
|
||||
|
||||
try:
|
||||
# Check if papers-dl is enabled
|
||||
if not get_env_bool('SAVE_PAPERSDL', True):
|
||||
print('Skipping papers-dl (SAVE_PAPERSDL=False)')
|
||||
status = 'skipped'
|
||||
print(f'STATUS={status}')
|
||||
print(f'RESULT_JSON={json.dumps({"extractor": EXTRACTOR_NAME, "status": status, "url": url, "snapshot_id": snapshot_id})}')
|
||||
sys.exit(0)
|
||||
|
||||
# Find binary
|
||||
binary = find_papersdl()
|
||||
if not binary:
|
||||
print(f'ERROR: {BIN_NAME} binary not found', file=sys.stderr)
|
||||
print(f'DEPENDENCY_NEEDED={BIN_NAME}', file=sys.stderr)
|
||||
print(f'BIN_PROVIDERS={BIN_PROVIDERS}', file=sys.stderr)
|
||||
print(f'INSTALL_HINT=pip install papers-dl', file=sys.stderr)
|
||||
sys.exit(1)
|
||||
|
||||
version = get_version(binary)
|
||||
cmd_str = f'{binary} fetch {url}'
|
||||
|
||||
# Run extraction
|
||||
success, output, error = save_paper(url, binary)
|
||||
status = 'succeeded' if success else 'failed'
|
||||
|
||||
if success:
|
||||
if output:
|
||||
output_path = Path(output)
|
||||
file_size = output_path.stat().st_size
|
||||
print(f'papers-dl completed: {output_path.name} ({file_size} bytes)')
|
||||
else:
|
||||
print(f'papers-dl completed: no paper found for this URL (this is normal)')
|
||||
|
||||
except Exception as e:
|
||||
error = f'{type(e).__name__}: {e}'
|
||||
status = 'failed'
|
||||
|
||||
# Print results
|
||||
if cmd_str:
|
||||
print(f'CMD={cmd_str}')
|
||||
if version:
|
||||
print(f'VERSION={version}')
|
||||
if output:
|
||||
print(f'OUTPUT={output}')
|
||||
print(f'STATUS={status}')
|
||||
|
||||
if error:
|
||||
print(f'ERROR={error}', file=sys.stderr)
|
||||
|
||||
# Print JSON result
|
||||
result_json = {
|
||||
'extractor': EXTRACTOR_NAME,
|
||||
'url': url,
|
||||
'snapshot_id': snapshot_id,
|
||||
'status': status,
|
||||
'cmd_version': version,
|
||||
'output': output,
|
||||
'error': error or None,
|
||||
}
|
||||
print(f'RESULT_JSON={json.dumps(result_json)}')
|
||||
|
||||
sys.exit(0 if status == 'succeeded' else 1)
|
||||
|
||||
|
||||
if __name__ == '__main__':
|
||||
main()
|
||||
15
archivebox/plugins/papersdl/templates/embed.html
Normal file
15
archivebox/plugins/papersdl/templates/embed.html
Normal file
@@ -0,0 +1,15 @@
|
||||
<!-- Embedded paper view - shows PDF viewer -->
|
||||
<div class="extractor-embed papersdl-embed" style="width: 100%; max-width: 900px; margin: 0 auto; background: #1a1a1a; padding: 20px; border-radius: 8px;">
|
||||
<div style="text-align: center; padding: 15px 0; border-bottom: 1px solid #333; margin-bottom: 20px;">
|
||||
<span style="font-size: 32px;">📄</span>
|
||||
<h3 style="margin: 10px 0; color: #fff; font-size: 18px;">Scientific Paper</h3>
|
||||
</div>
|
||||
<div style="width: 100%; height: 500px; background: #2a2a2a; border-radius: 5px; overflow: hidden;">
|
||||
<embed src="{{ output_path }}" type="application/pdf" width="100%" height="100%" />
|
||||
</div>
|
||||
<div style="margin-top: 15px; text-align: center;">
|
||||
<a href="{{ output_path }}" download style="color: #4a9eff; text-decoration: none; padding: 10px 20px; background: #2a2a2a; border-radius: 5px; display: inline-block;">
|
||||
Download PDF
|
||||
</a>
|
||||
</div>
|
||||
</div>
|
||||
71
archivebox/plugins/papersdl/templates/fullscreen.html
Normal file
71
archivebox/plugins/papersdl/templates/fullscreen.html
Normal file
@@ -0,0 +1,71 @@
|
||||
<!-- Fullscreen paper view - shows PDF in full screen -->
|
||||
<!DOCTYPE html>
|
||||
<html>
|
||||
<head>
|
||||
<meta charset="UTF-8">
|
||||
<meta name="viewport" content="width=device-width, initial-scale=1.0">
|
||||
<title>Scientific Paper</title>
|
||||
<style>
|
||||
body {
|
||||
margin: 0;
|
||||
padding: 0;
|
||||
background: #1a1a1a;
|
||||
color: #ddd;
|
||||
font-family: -apple-system, BlinkMacSystemFont, 'Segoe UI', Helvetica, Arial, sans-serif;
|
||||
display: flex;
|
||||
flex-direction: column;
|
||||
height: 100vh;
|
||||
}
|
||||
.header {
|
||||
text-align: center;
|
||||
padding: 15px;
|
||||
background: #0d1117;
|
||||
border-bottom: 1px solid #30363d;
|
||||
}
|
||||
.icon {
|
||||
font-size: 32px;
|
||||
margin-bottom: 5px;
|
||||
}
|
||||
h1 {
|
||||
margin: 0;
|
||||
font-size: 20px;
|
||||
color: #f0f6fc;
|
||||
}
|
||||
.pdf-container {
|
||||
flex: 1;
|
||||
width: 100%;
|
||||
overflow: hidden;
|
||||
}
|
||||
embed {
|
||||
width: 100%;
|
||||
height: 100%;
|
||||
}
|
||||
.download-link {
|
||||
position: fixed;
|
||||
bottom: 20px;
|
||||
right: 20px;
|
||||
background: #58a6ff;
|
||||
color: #fff;
|
||||
padding: 12px 24px;
|
||||
border-radius: 6px;
|
||||
text-decoration: none;
|
||||
font-weight: 600;
|
||||
box-shadow: 0 4px 6px rgba(0,0,0,0.3);
|
||||
transition: background 0.2s;
|
||||
}
|
||||
.download-link:hover {
|
||||
background: #1f6feb;
|
||||
}
|
||||
</style>
|
||||
</head>
|
||||
<body>
|
||||
<div class="header">
|
||||
<div class="icon">📄</div>
|
||||
<h1>Scientific Paper</h1>
|
||||
</div>
|
||||
<div class="pdf-container">
|
||||
<embed src="{{ output_path }}" type="application/pdf" />
|
||||
</div>
|
||||
<a href="{{ output_path }}" download class="download-link">Download PDF</a>
|
||||
</body>
|
||||
</html>
|
||||
1
archivebox/plugins/papersdl/templates/icon.html
Normal file
1
archivebox/plugins/papersdl/templates/icon.html
Normal file
@@ -0,0 +1 @@
|
||||
📄
|
||||
7
archivebox/plugins/papersdl/templates/thumbnail.html
Normal file
7
archivebox/plugins/papersdl/templates/thumbnail.html
Normal file
@@ -0,0 +1,7 @@
|
||||
<!-- Paper thumbnail - shows PDF icon placeholder -->
|
||||
<div class="extractor-thumbnail papersdl-thumbnail" style="width: 100%; height: 100px; overflow: hidden; background: #1a1a1a; display: flex; align-items: center; justify-content: center;">
|
||||
<div style="display: flex; flex-direction: column; align-items: center; color: #888; font-size: 12px;">
|
||||
<span style="font-size: 32px;">📄</span>
|
||||
<span>Paper</span>
|
||||
</div>
|
||||
</div>
|
||||
157
archivebox/plugins/papersdl/tests/test_papersdl.py
Normal file
157
archivebox/plugins/papersdl/tests/test_papersdl.py
Normal file
@@ -0,0 +1,157 @@
|
||||
"""
|
||||
Integration tests for papersdl plugin
|
||||
|
||||
Tests verify:
|
||||
1. Hook script exists
|
||||
2. Dependencies installed via validation hooks
|
||||
3. Verify deps with abx-pkg
|
||||
4. Paper extraction works on paper URLs
|
||||
5. JSONL output is correct
|
||||
6. Config options work
|
||||
7. Handles non-paper URLs gracefully
|
||||
"""
|
||||
|
||||
import json
|
||||
import subprocess
|
||||
import sys
|
||||
import tempfile
|
||||
from pathlib import Path
|
||||
import pytest
|
||||
|
||||
PLUGIN_DIR = Path(__file__).parent.parent
|
||||
PLUGINS_ROOT = PLUGIN_DIR.parent
|
||||
PAPERSDL_HOOK = PLUGIN_DIR / 'on_Snapshot__54_papersdl.py'
|
||||
PAPERSDL_VALIDATE_HOOK = PLUGIN_DIR / 'on_Crawl__00_validate_papersdl.py'
|
||||
TEST_URL = 'https://example.com'
|
||||
|
||||
def test_hook_script_exists():
|
||||
"""Verify on_Snapshot hook exists."""
|
||||
assert PAPERSDL_HOOK.exists(), f"Hook not found: {PAPERSDL_HOOK}"
|
||||
|
||||
|
||||
def test_papersdl_validate_hook():
|
||||
"""Test papers-dl validate hook checks for papers-dl."""
|
||||
# Run papers-dl validate hook
|
||||
result = subprocess.run(
|
||||
[sys.executable, str(PAPERSDL_VALIDATE_HOOK)],
|
||||
capture_output=True,
|
||||
text=True,
|
||||
timeout=30
|
||||
)
|
||||
|
||||
# Hook exits 0 if all binaries found, 1 if any not found
|
||||
# Parse output for InstalledBinary and Dependency records
|
||||
found_binary = False
|
||||
found_dependency = False
|
||||
|
||||
for line in result.stdout.strip().split('\n'):
|
||||
if line.strip():
|
||||
try:
|
||||
record = json.loads(line)
|
||||
if record.get('type') == 'InstalledBinary':
|
||||
if record['name'] == 'papers-dl':
|
||||
assert record['abspath'], "papers-dl should have abspath"
|
||||
found_binary = True
|
||||
elif record.get('type') == 'Dependency':
|
||||
if record['bin_name'] == 'papers-dl':
|
||||
found_dependency = True
|
||||
except json.JSONDecodeError:
|
||||
pass
|
||||
|
||||
# papers-dl should either be found (InstalledBinary) or missing (Dependency)
|
||||
assert found_binary or found_dependency, \
|
||||
"papers-dl should have either InstalledBinary or Dependency record"
|
||||
|
||||
|
||||
def test_verify_deps_with_abx_pkg():
|
||||
"""Verify papers-dl is available via abx-pkg."""
|
||||
from abx_pkg import Binary, PipProvider, EnvProvider, BinProviderOverrides
|
||||
|
||||
missing_binaries = []
|
||||
|
||||
# Verify papers-dl is available
|
||||
papersdl_binary = Binary(name='papers-dl', binproviders=[PipProvider(), EnvProvider()])
|
||||
papersdl_loaded = papersdl_binary.load()
|
||||
if not (papersdl_loaded and papersdl_loaded.abspath):
|
||||
missing_binaries.append('papers-dl')
|
||||
|
||||
if missing_binaries:
|
||||
pytest.skip(f"Binaries not available: {', '.join(missing_binaries)} - Dependency records should have been emitted")
|
||||
|
||||
|
||||
def test_handles_non_paper_url():
|
||||
"""Test that papers-dl extractor handles non-paper URLs gracefully via hook."""
|
||||
# Prerequisites checked by earlier test
|
||||
|
||||
with tempfile.TemporaryDirectory() as tmpdir:
|
||||
tmpdir = Path(tmpdir)
|
||||
|
||||
# Run papers-dl extraction hook on non-paper URL
|
||||
result = subprocess.run(
|
||||
[sys.executable, str(PAPERSDL_HOOK), '--url', 'https://example.com', '--snapshot-id', 'test789'],
|
||||
cwd=tmpdir,
|
||||
capture_output=True,
|
||||
text=True,
|
||||
timeout=60
|
||||
)
|
||||
|
||||
# Should exit 0 even for non-paper URL
|
||||
assert result.returncode == 0, f"Should handle non-paper URL gracefully: {result.stderr}"
|
||||
|
||||
# Verify JSONL output
|
||||
assert 'STATUS=' in result.stdout, "Should report status"
|
||||
assert 'RESULT_JSON=' in result.stdout, "Should output RESULT_JSON"
|
||||
|
||||
# Parse JSONL result
|
||||
result_json = None
|
||||
for line in result.stdout.split('\n'):
|
||||
if line.startswith('RESULT_JSON='):
|
||||
result_json = json.loads(line.split('=', 1)[1])
|
||||
break
|
||||
|
||||
assert result_json, "Should have RESULT_JSON"
|
||||
assert result_json['extractor'] == 'papersdl'
|
||||
|
||||
|
||||
def test_config_save_papersdl_false_skips():
|
||||
"""Test that SAVE_PAPERSDL=False causes skip."""
|
||||
import os
|
||||
|
||||
with tempfile.TemporaryDirectory() as tmpdir:
|
||||
env = os.environ.copy()
|
||||
env['SAVE_PAPERSDL'] = 'False'
|
||||
|
||||
result = subprocess.run(
|
||||
[sys.executable, str(PAPERSDL_HOOK), '--url', TEST_URL, '--snapshot-id', 'test999'],
|
||||
cwd=tmpdir,
|
||||
capture_output=True,
|
||||
text=True,
|
||||
env=env,
|
||||
timeout=30
|
||||
)
|
||||
|
||||
assert result.returncode == 0, f"Should exit 0 when skipping: {result.stderr}"
|
||||
assert 'STATUS=' in result.stdout
|
||||
|
||||
|
||||
def test_config_timeout():
|
||||
"""Test that PAPERSDL_TIMEOUT config is respected."""
|
||||
import os
|
||||
|
||||
with tempfile.TemporaryDirectory() as tmpdir:
|
||||
env = os.environ.copy()
|
||||
env['PAPERSDL_TIMEOUT'] = '5'
|
||||
|
||||
result = subprocess.run(
|
||||
[sys.executable, str(PAPERSDL_HOOK), '--url', 'https://example.com', '--snapshot-id', 'testtimeout'],
|
||||
cwd=tmpdir,
|
||||
capture_output=True,
|
||||
text=True,
|
||||
env=env,
|
||||
timeout=30
|
||||
)
|
||||
|
||||
assert result.returncode == 0, "Should complete without hanging"
|
||||
|
||||
if __name__ == '__main__':
|
||||
pytest.main([__file__, '-v'])
|
||||
Reference in New Issue
Block a user