mirror of
https://github.com/ArchiveBox/ArchiveBox.git
synced 2026-04-03 22:37:53 +10:00
make Claude.md stricter and improve migration tests
This commit is contained in:
@@ -12,6 +12,7 @@ Migration tests from 0.8.x to 0.9.x.
|
||||
|
||||
import shutil
|
||||
import sqlite3
|
||||
import subprocess
|
||||
import tempfile
|
||||
import unittest
|
||||
from pathlib import Path
|
||||
@@ -440,28 +441,34 @@ class TestFilesystemMigration08to09(unittest.TestCase):
|
||||
result = run_archivebox(self.work_dir, ['init'], timeout=45)
|
||||
self.assertEqual(result.returncode, 0, f"Init failed: {result.stderr}")
|
||||
|
||||
# Step 2: Archive example.com with some extractors enabled
|
||||
# Enable a subset of fast extractors for testing
|
||||
result = run_archivebox(
|
||||
self.work_dir,
|
||||
['add', '--depth=0', 'https://example.com'],
|
||||
timeout=120,
|
||||
env={
|
||||
'SAVE_TITLE': 'True',
|
||||
'SAVE_FAVICON': 'True',
|
||||
'SAVE_WGET': 'True',
|
||||
'SAVE_SCREENSHOT': 'False', # Disable slow extractors
|
||||
'SAVE_DOM': 'False',
|
||||
'SAVE_SINGLEFILE': 'False',
|
||||
'SAVE_READABILITY': 'False',
|
||||
'SAVE_MERCURY': 'False',
|
||||
'SAVE_PDF': 'False',
|
||||
'SAVE_MEDIA': 'False',
|
||||
'SAVE_ARCHIVE_DOT_ORG': 'False',
|
||||
}
|
||||
)
|
||||
# Note: Add may fail if network is down or extractors fail, but we still want to test
|
||||
# the filesystem migration logic even with partial failures
|
||||
# Step 2: Archive example.com with ALL extractors enabled
|
||||
# This ensures we test migration with all file types
|
||||
try:
|
||||
result = run_archivebox(
|
||||
self.work_dir,
|
||||
['add', '--depth=0', 'https://example.com'],
|
||||
timeout=300, # 5 minutes for all extractors
|
||||
env={
|
||||
'SAVE_TITLE': 'True',
|
||||
'SAVE_FAVICON': 'True',
|
||||
'SAVE_WGET': 'True',
|
||||
'SAVE_SCREENSHOT': 'True',
|
||||
'SAVE_DOM': 'True',
|
||||
'SAVE_SINGLEFILE': 'True',
|
||||
'SAVE_READABILITY': 'True',
|
||||
'SAVE_MERCURY': 'True',
|
||||
'SAVE_PDF': 'True',
|
||||
'SAVE_MEDIA': 'True',
|
||||
'SAVE_ARCHIVE_DOT_ORG': 'True',
|
||||
'SAVE_HEADERS': 'True',
|
||||
'SAVE_HTMLTOTEXT': 'True',
|
||||
'SAVE_GIT': 'True',
|
||||
}
|
||||
)
|
||||
except subprocess.TimeoutExpired as e:
|
||||
# If timeout, still continue - we want to test with whatever files were created
|
||||
print(f"\n[!] Add command timed out after {e.timeout}s, continuing with partial results...")
|
||||
# Note: Snapshot may still have been created even if command timed out
|
||||
|
||||
# Step 3: Get the snapshot and verify files were created
|
||||
conn = sqlite3.connect(str(self.db_path))
|
||||
|
||||
@@ -986,27 +986,31 @@ def seed_0_8_data(db_path: Path) -> Dict[str, List[Dict]]:
|
||||
# Helper Functions
|
||||
# =============================================================================
|
||||
|
||||
def run_archivebox(data_dir: Path, args: list, timeout: int = 60) -> subprocess.CompletedProcess:
|
||||
def run_archivebox(data_dir: Path, args: list, timeout: int = 60, env: dict = None) -> subprocess.CompletedProcess:
|
||||
"""Run archivebox command in subprocess with given data directory."""
|
||||
env = os.environ.copy()
|
||||
env['DATA_DIR'] = str(data_dir)
|
||||
env['USE_COLOR'] = 'False'
|
||||
env['SHOW_PROGRESS'] = 'False'
|
||||
# Disable ALL extractors for faster tests
|
||||
env['SAVE_ARCHIVE_DOT_ORG'] = 'False'
|
||||
env['SAVE_TITLE'] = 'False'
|
||||
env['SAVE_FAVICON'] = 'False'
|
||||
env['SAVE_WGET'] = 'False'
|
||||
env['SAVE_SINGLEFILE'] = 'False'
|
||||
env['SAVE_SCREENSHOT'] = 'False'
|
||||
env['SAVE_PDF'] = 'False'
|
||||
env['SAVE_DOM'] = 'False'
|
||||
env['SAVE_READABILITY'] = 'False'
|
||||
env['SAVE_MERCURY'] = 'False'
|
||||
env['SAVE_GIT'] = 'False'
|
||||
env['SAVE_MEDIA'] = 'False'
|
||||
env['SAVE_HEADERS'] = 'False'
|
||||
env['SAVE_HTMLTOTEXT'] = 'False'
|
||||
base_env = os.environ.copy()
|
||||
base_env['DATA_DIR'] = str(data_dir)
|
||||
base_env['USE_COLOR'] = 'False'
|
||||
base_env['SHOW_PROGRESS'] = 'False'
|
||||
# Disable ALL extractors for faster tests (can be overridden by env parameter)
|
||||
base_env['SAVE_ARCHIVE_DOT_ORG'] = 'False'
|
||||
base_env['SAVE_TITLE'] = 'False'
|
||||
base_env['SAVE_FAVICON'] = 'False'
|
||||
base_env['SAVE_WGET'] = 'False'
|
||||
base_env['SAVE_SINGLEFILE'] = 'False'
|
||||
base_env['SAVE_SCREENSHOT'] = 'False'
|
||||
base_env['SAVE_PDF'] = 'False'
|
||||
base_env['SAVE_DOM'] = 'False'
|
||||
base_env['SAVE_READABILITY'] = 'False'
|
||||
base_env['SAVE_MERCURY'] = 'False'
|
||||
base_env['SAVE_GIT'] = 'False'
|
||||
base_env['SAVE_MEDIA'] = 'False'
|
||||
base_env['SAVE_HEADERS'] = 'False'
|
||||
base_env['SAVE_HTMLTOTEXT'] = 'False'
|
||||
|
||||
# Override with any custom env vars
|
||||
if env:
|
||||
base_env.update(env)
|
||||
|
||||
cmd = [sys.executable, '-m', 'archivebox'] + args
|
||||
|
||||
@@ -1014,7 +1018,7 @@ def run_archivebox(data_dir: Path, args: list, timeout: int = 60) -> subprocess.
|
||||
cmd,
|
||||
capture_output=True,
|
||||
text=True,
|
||||
env=env,
|
||||
env=base_env,
|
||||
cwd=str(data_dir),
|
||||
timeout=timeout,
|
||||
)
|
||||
|
||||
Reference in New Issue
Block a user