mirror of
https://github.com/ArchiveBox/ArchiveBox.git
synced 2026-04-06 07:47:53 +10:00
wip
This commit is contained in:
@@ -1,8 +1,10 @@
|
||||
"""archivebox/tests/conftest.py - Pytest fixtures for CLI tests."""
|
||||
|
||||
import os
|
||||
import shutil
|
||||
import sys
|
||||
import subprocess
|
||||
import textwrap
|
||||
from pathlib import Path
|
||||
from typing import List, Dict, Any, Optional, Tuple
|
||||
|
||||
@@ -104,6 +106,234 @@ def initialized_archive(isolated_data_dir):
|
||||
return isolated_data_dir
|
||||
|
||||
|
||||
# =============================================================================
|
||||
# CWD-based CLI Helpers (no DATA_DIR env)
|
||||
# =============================================================================
|
||||
|
||||
def run_archivebox_cmd_cwd(
|
||||
args: List[str],
|
||||
cwd: Path,
|
||||
stdin: Optional[str] = None,
|
||||
timeout: int = 60,
|
||||
env: Optional[Dict[str, str]] = None,
|
||||
) -> Tuple[str, str, int]:
|
||||
"""
|
||||
Run archivebox command via subprocess using cwd as DATA_DIR (no DATA_DIR env).
|
||||
Returns (stdout, stderr, returncode).
|
||||
"""
|
||||
cmd = [sys.executable, '-m', 'archivebox'] + args
|
||||
|
||||
base_env = os.environ.copy()
|
||||
base_env.pop('DATA_DIR', None)
|
||||
base_env['USE_COLOR'] = 'False'
|
||||
base_env['SHOW_PROGRESS'] = 'False'
|
||||
|
||||
if env:
|
||||
base_env.update(env)
|
||||
|
||||
result = subprocess.run(
|
||||
cmd,
|
||||
input=stdin,
|
||||
capture_output=True,
|
||||
text=True,
|
||||
cwd=cwd,
|
||||
env=base_env,
|
||||
timeout=timeout,
|
||||
)
|
||||
|
||||
return result.stdout, result.stderr, result.returncode
|
||||
|
||||
|
||||
def run_python_cwd(
|
||||
script: str,
|
||||
cwd: Path,
|
||||
timeout: int = 60,
|
||||
) -> Tuple[str, str, int]:
|
||||
base_env = os.environ.copy()
|
||||
base_env.pop('DATA_DIR', None)
|
||||
result = subprocess.run(
|
||||
[sys.executable, '-'],
|
||||
input=script,
|
||||
capture_output=True,
|
||||
text=True,
|
||||
cwd=cwd,
|
||||
env=base_env,
|
||||
timeout=timeout,
|
||||
)
|
||||
return result.stdout, result.stderr, result.returncode
|
||||
|
||||
def _get_machine_type() -> str:
|
||||
import platform
|
||||
|
||||
os_name = platform.system().lower()
|
||||
arch = platform.machine().lower()
|
||||
in_docker = os.environ.get('IN_DOCKER', '').lower() in ('1', 'true', 'yes')
|
||||
suffix = '-docker' if in_docker else ''
|
||||
return f'{arch}-{os_name}{suffix}'
|
||||
|
||||
def _find_cached_chromium(lib_dir: Path) -> Optional[Path]:
|
||||
candidates = [
|
||||
lib_dir / 'puppeteer',
|
||||
lib_dir / 'npm' / 'node_modules' / 'puppeteer' / '.local-chromium',
|
||||
]
|
||||
for base in candidates:
|
||||
if not base.exists():
|
||||
continue
|
||||
for path in base.rglob('Chromium.app/Contents/MacOS/Chromium'):
|
||||
return path
|
||||
for path in base.rglob('chrome-linux/chrome'):
|
||||
return path
|
||||
for path in base.rglob('chrome-linux64/chrome'):
|
||||
return path
|
||||
return None
|
||||
|
||||
def _find_system_browser() -> Optional[Path]:
|
||||
candidates = [
|
||||
Path('/Applications/Chromium.app/Contents/MacOS/Chromium'),
|
||||
Path('/usr/bin/chromium'),
|
||||
Path('/usr/bin/chromium-browser'),
|
||||
]
|
||||
for candidate in candidates:
|
||||
if candidate.exists():
|
||||
return candidate
|
||||
return None
|
||||
|
||||
def _ensure_puppeteer(shared_lib: Path) -> None:
|
||||
npm_prefix = shared_lib / 'npm'
|
||||
node_modules = npm_prefix / 'node_modules'
|
||||
puppeteer_dir = node_modules / 'puppeteer'
|
||||
if puppeteer_dir.exists():
|
||||
return
|
||||
npm_prefix.mkdir(parents=True, exist_ok=True)
|
||||
env = os.environ.copy()
|
||||
env['PUPPETEER_SKIP_DOWNLOAD'] = '1'
|
||||
subprocess.run(
|
||||
['npm', 'install', 'puppeteer'],
|
||||
cwd=str(npm_prefix),
|
||||
env=env,
|
||||
check=True,
|
||||
capture_output=True,
|
||||
text=True,
|
||||
timeout=600,
|
||||
)
|
||||
|
||||
|
||||
@pytest.fixture(scope="class")
|
||||
def real_archive_with_example(tmp_path_factory, request):
|
||||
"""
|
||||
Initialize archive and add https://example.com using chrome+responses only.
|
||||
Uses cwd for DATA_DIR and symlinks lib dir to a shared cache.
|
||||
"""
|
||||
tmp_path = tmp_path_factory.mktemp("archivebox_data")
|
||||
if getattr(request, "cls", None) is not None:
|
||||
request.cls.data_dir = tmp_path
|
||||
|
||||
stdout, stderr, returncode = run_archivebox_cmd_cwd(
|
||||
['init', '--quick'],
|
||||
cwd=tmp_path,
|
||||
timeout=120,
|
||||
)
|
||||
assert returncode == 0, f"archivebox init failed: {stderr}"
|
||||
|
||||
stdout, stderr, returncode = run_archivebox_cmd_cwd(
|
||||
[
|
||||
'config',
|
||||
'--set',
|
||||
'LISTEN_HOST=archivebox.localhost:8000',
|
||||
'PUBLIC_INDEX=True',
|
||||
'PUBLIC_SNAPSHOTS=True',
|
||||
'PUBLIC_ADD_VIEW=True',
|
||||
],
|
||||
cwd=tmp_path,
|
||||
)
|
||||
assert returncode == 0, f"archivebox config failed: {stderr}"
|
||||
|
||||
machine_type = _get_machine_type()
|
||||
shared_root = Path(__file__).resolve().parents[3] / 'tmp' / 'test_lib_cache'
|
||||
shared_lib = shared_root / machine_type
|
||||
shared_lib.mkdir(parents=True, exist_ok=True)
|
||||
|
||||
lib_target = tmp_path / 'lib' / machine_type
|
||||
if lib_target.exists() and not lib_target.is_symlink():
|
||||
shutil.rmtree(lib_target)
|
||||
if not lib_target.exists():
|
||||
lib_target.parent.mkdir(parents=True, exist_ok=True)
|
||||
lib_target.symlink_to(shared_lib, target_is_directory=True)
|
||||
|
||||
_ensure_puppeteer(shared_lib)
|
||||
cached_chromium = _find_cached_chromium(shared_lib)
|
||||
if cached_chromium:
|
||||
browser_binary = cached_chromium
|
||||
else:
|
||||
browser_binary = _find_system_browser()
|
||||
if browser_binary:
|
||||
chromium_link = shared_lib / 'chromium-bin'
|
||||
if not chromium_link.exists():
|
||||
chromium_link.symlink_to(browser_binary)
|
||||
browser_binary = chromium_link
|
||||
|
||||
if browser_binary:
|
||||
stdout, stderr, returncode = run_archivebox_cmd_cwd(
|
||||
[f'config', '--set', f'CHROME_BINARY={browser_binary}'],
|
||||
cwd=tmp_path,
|
||||
)
|
||||
assert returncode == 0, f"archivebox config CHROME_BINARY failed: {stderr}"
|
||||
script = textwrap.dedent(f"""\
|
||||
import os
|
||||
os.environ.setdefault('DJANGO_SETTINGS_MODULE', 'archivebox.core.settings')
|
||||
import django
|
||||
django.setup()
|
||||
from django.utils import timezone
|
||||
from archivebox.machine.models import Binary, Machine
|
||||
machine = Machine.current()
|
||||
Binary.objects.filter(machine=machine, name='chromium').update(
|
||||
status='installed',
|
||||
abspath='{browser_binary}',
|
||||
binprovider='env',
|
||||
retry_at=timezone.now(),
|
||||
)
|
||||
Binary.objects.update_or_create(
|
||||
machine=machine,
|
||||
name='chromium',
|
||||
defaults={{
|
||||
'status': 'installed',
|
||||
'abspath': '{browser_binary}',
|
||||
'binprovider': 'env',
|
||||
'retry_at': timezone.now(),
|
||||
}},
|
||||
)
|
||||
print('OK')
|
||||
"""
|
||||
)
|
||||
stdout, stderr, returncode = run_python_cwd(script, cwd=tmp_path, timeout=60)
|
||||
assert returncode == 0, f"Register chromium binary failed: {stderr}"
|
||||
|
||||
add_env = {
|
||||
'CHROME_ENABLED': 'True',
|
||||
'RESPONSES_ENABLED': 'True',
|
||||
'DOM_ENABLED': 'False',
|
||||
'SHOW_PROGRESS': 'False',
|
||||
'USE_COLOR': 'False',
|
||||
'CHROME_HEADLESS': 'True',
|
||||
'CHROME_PAGELOAD_TIMEOUT': '45',
|
||||
'CHROME_TIMEOUT': '60',
|
||||
'RESPONSES_TIMEOUT': '30',
|
||||
}
|
||||
if browser_binary:
|
||||
add_env['CHROME_BINARY'] = str(browser_binary)
|
||||
if cached_chromium:
|
||||
add_env['PUPPETEER_CACHE_DIR'] = str(shared_lib / 'puppeteer')
|
||||
stdout, stderr, returncode = run_archivebox_cmd_cwd(
|
||||
['add', '--depth=0', '--plugins=chrome,responses', 'https://example.com'],
|
||||
cwd=tmp_path,
|
||||
timeout=600,
|
||||
env=add_env,
|
||||
)
|
||||
assert returncode == 0, f"archivebox add failed: {stderr}"
|
||||
|
||||
return tmp_path
|
||||
|
||||
|
||||
# =============================================================================
|
||||
# Output Assertions
|
||||
# =============================================================================
|
||||
|
||||
Reference in New Issue
Block a user