mirror of
https://github.com/ArchiveBox/ArchiveBox.git
synced 2026-04-06 07:47:53 +10:00
type and test fixes
This commit is contained in:
@@ -400,13 +400,13 @@ def assert_record_has_fields(record: Dict[str, Any], required_fields: List[str])
|
||||
# Test Data Factories
|
||||
# =============================================================================
|
||||
|
||||
def create_test_url(domain: str = 'example.com', path: str = None) -> str:
|
||||
def create_test_url(domain: str = 'example.com', path: str | None = None) -> str:
|
||||
"""Generate unique test URL."""
|
||||
path = path or uuid7().hex[:8]
|
||||
return f'https://{domain}/{path}'
|
||||
|
||||
|
||||
def create_test_crawl_json(urls: List[str] = None, **kwargs) -> Dict[str, Any]:
|
||||
def create_test_crawl_json(urls: List[str] | None = None, **kwargs) -> Dict[str, Any]:
|
||||
"""Create Crawl JSONL record for testing."""
|
||||
urls = urls or [create_test_url()]
|
||||
return {
|
||||
@@ -419,7 +419,7 @@ def create_test_crawl_json(urls: List[str] = None, **kwargs) -> Dict[str, Any]:
|
||||
}
|
||||
|
||||
|
||||
def create_test_snapshot_json(url: str = None, **kwargs) -> Dict[str, Any]:
|
||||
def create_test_snapshot_json(url: str | None = None, **kwargs) -> Dict[str, Any]:
|
||||
"""Create Snapshot JSONL record for testing."""
|
||||
return {
|
||||
'type': 'Snapshot',
|
||||
|
||||
@@ -967,7 +967,7 @@ def seed_0_8_data(db_path: Path) -> Dict[str, List[Dict]]:
|
||||
# Helper Functions
|
||||
# =============================================================================
|
||||
|
||||
def run_archivebox(data_dir: Path, args: list, timeout: int = 60, env: dict = None) -> subprocess.CompletedProcess:
|
||||
def run_archivebox(data_dir: Path, args: list, timeout: int = 60, env: dict | None = None) -> subprocess.CompletedProcess:
|
||||
"""Run archivebox command in subprocess with given data directory."""
|
||||
base_env = os.environ.copy()
|
||||
base_env['DATA_DIR'] = str(data_dir)
|
||||
|
||||
@@ -1,166 +0,0 @@
|
||||
import os
|
||||
import sqlite3
|
||||
import subprocess
|
||||
|
||||
def test_depth_flag_is_accepted(process, disable_extractors_dict):
|
||||
arg_process = subprocess.run(["archivebox", "add", "--index-only", "https://example.com", "--depth=0"],
|
||||
capture_output=True, env=disable_extractors_dict)
|
||||
assert 'unrecognized arguments: --depth' not in arg_process.stderr.decode("utf-8")
|
||||
|
||||
|
||||
def test_depth_flag_fails_if_it_is_not_0_or_1(process, disable_extractors_dict):
|
||||
arg_process = subprocess.run(
|
||||
["archivebox", "add", "--index-only", "--depth=5", "https://example.com"],
|
||||
capture_output=True,
|
||||
env=disable_extractors_dict,
|
||||
)
|
||||
# Error message may say "invalid choice" or "is not one of"
|
||||
stderr = arg_process.stderr.decode("utf-8")
|
||||
assert 'invalid' in stderr.lower() or 'not one of' in stderr.lower()
|
||||
arg_process = subprocess.run(
|
||||
["archivebox", "add", "--index-only", "--depth=-1", "https://example.com"],
|
||||
capture_output=True,
|
||||
env=disable_extractors_dict,
|
||||
)
|
||||
stderr = arg_process.stderr.decode("utf-8")
|
||||
assert 'invalid' in stderr.lower() or 'not one of' in stderr.lower()
|
||||
|
||||
|
||||
def test_depth_flag_0_creates_source_file(tmp_path, process, disable_extractors_dict):
|
||||
os.chdir(tmp_path)
|
||||
subprocess.run(
|
||||
["archivebox", "add", "--index-only", "--depth=0", "https://example.com"],
|
||||
capture_output=True,
|
||||
env=disable_extractors_dict,
|
||||
)
|
||||
|
||||
# Check that source file was created with the URL
|
||||
sources_dir = tmp_path / "sources"
|
||||
assert sources_dir.exists()
|
||||
source_files = list(sources_dir.glob("*cli_add.txt"))
|
||||
assert len(source_files) >= 1
|
||||
source_content = source_files[0].read_text()
|
||||
assert "example.com" in source_content
|
||||
|
||||
|
||||
def test_overwrite_flag_is_accepted(process, disable_extractors_dict):
|
||||
subprocess.run(
|
||||
["archivebox", "add", "--index-only", "--depth=0", "https://example.com"],
|
||||
capture_output=True,
|
||||
env=disable_extractors_dict,
|
||||
)
|
||||
arg_process = subprocess.run(
|
||||
["archivebox", "add", "--index-only", "--overwrite", "https://example.com"],
|
||||
capture_output=True,
|
||||
env=disable_extractors_dict,
|
||||
)
|
||||
assert 'unrecognized arguments: --overwrite' not in arg_process.stderr.decode("utf-8")
|
||||
|
||||
def test_add_creates_crawl_in_database(tmp_path, process, disable_extractors_dict):
|
||||
os.chdir(tmp_path)
|
||||
subprocess.run(
|
||||
["archivebox", "add", "--index-only", "--depth=0", "https://example.com"],
|
||||
capture_output=True,
|
||||
env=disable_extractors_dict,
|
||||
)
|
||||
|
||||
# Check that a Crawl was created in database
|
||||
conn = sqlite3.connect("index.sqlite3")
|
||||
c = conn.cursor()
|
||||
count = c.execute("SELECT COUNT(*) FROM crawls_crawl").fetchone()[0]
|
||||
conn.close()
|
||||
|
||||
assert count >= 1
|
||||
|
||||
|
||||
def test_add_with_tags(tmp_path, process, disable_extractors_dict):
|
||||
"""Test adding URL with tags."""
|
||||
os.chdir(tmp_path)
|
||||
subprocess.run(
|
||||
["archivebox", "add", "--index-only", "--depth=0", "--tag=test,example", "https://example.com"],
|
||||
capture_output=True,
|
||||
env=disable_extractors_dict,
|
||||
)
|
||||
|
||||
# Check that tags were created in database
|
||||
conn = sqlite3.connect("index.sqlite3")
|
||||
c = conn.cursor()
|
||||
tags = c.execute("SELECT name FROM core_tag").fetchall()
|
||||
conn.close()
|
||||
|
||||
tag_names = [t[0] for t in tags]
|
||||
assert 'test' in tag_names or 'example' in tag_names
|
||||
|
||||
|
||||
def test_add_multiple_urls_single_call(tmp_path, process, disable_extractors_dict):
|
||||
"""Test adding multiple URLs in a single call creates multiple snapshots."""
|
||||
os.chdir(tmp_path)
|
||||
subprocess.run(
|
||||
["archivebox", "add", "--index-only", "--depth=0",
|
||||
"https://example.com", "https://example.org"],
|
||||
capture_output=True,
|
||||
env=disable_extractors_dict,
|
||||
)
|
||||
|
||||
# Check both URLs are in the source file
|
||||
sources_dir = tmp_path / "sources"
|
||||
source_files = list(sources_dir.glob("*cli_add.txt"))
|
||||
assert len(source_files) >= 1
|
||||
source_content = source_files[0].read_text()
|
||||
assert "example.com" in source_content
|
||||
assert "example.org" in source_content
|
||||
|
||||
|
||||
def test_add_from_file(tmp_path, process, disable_extractors_dict):
|
||||
"""Test adding URLs from a file."""
|
||||
os.chdir(tmp_path)
|
||||
|
||||
# Create a file with URLs
|
||||
urls_file = tmp_path / "urls.txt"
|
||||
urls_file.write_text("https://example.com\nhttps://example.org\n")
|
||||
|
||||
subprocess.run(
|
||||
["archivebox", "add", "--index-only", "--depth=0", str(urls_file)],
|
||||
capture_output=True,
|
||||
env=disable_extractors_dict,
|
||||
)
|
||||
|
||||
# Check that a Crawl was created
|
||||
conn = sqlite3.connect("index.sqlite3")
|
||||
c = conn.cursor()
|
||||
count = c.execute("SELECT COUNT(*) FROM crawls_crawl").fetchone()[0]
|
||||
conn.close()
|
||||
|
||||
assert count >= 1
|
||||
|
||||
|
||||
class TestAddCLI:
|
||||
"""Test the CLI interface for add command."""
|
||||
|
||||
def test_add_help(self, tmp_path, process):
|
||||
"""Test that --help works for add command."""
|
||||
os.chdir(tmp_path)
|
||||
|
||||
result = subprocess.run(
|
||||
["archivebox", "add", "--help"],
|
||||
capture_output=True,
|
||||
text=True,
|
||||
)
|
||||
|
||||
assert result.returncode == 0
|
||||
assert '--depth' in result.stdout or 'depth' in result.stdout
|
||||
assert '--tag' in result.stdout or 'tag' in result.stdout
|
||||
|
||||
def test_add_no_args_shows_help(self, tmp_path, process):
|
||||
"""Test that add with no args shows help or usage."""
|
||||
os.chdir(tmp_path)
|
||||
|
||||
result = subprocess.run(
|
||||
["archivebox", "add"],
|
||||
capture_output=True,
|
||||
text=True,
|
||||
)
|
||||
|
||||
# Should either show help or error about missing URL
|
||||
combined = result.stdout + result.stderr
|
||||
assert 'usage' in combined.lower() or 'url' in combined.lower() or 'add' in combined.lower()
|
||||
@@ -9,9 +9,11 @@ Tests cover:
|
||||
"""
|
||||
|
||||
import pytest
|
||||
from typing import cast
|
||||
from django.test import override_settings
|
||||
from django.urls import reverse
|
||||
from django.contrib.auth import get_user_model
|
||||
from django.contrib.auth.models import UserManager
|
||||
|
||||
pytestmark = pytest.mark.django_db
|
||||
|
||||
@@ -24,7 +26,7 @@ PUBLIC_HOST = 'public.archivebox.localhost:8000'
|
||||
@pytest.fixture
|
||||
def admin_user(db):
|
||||
"""Create admin user for tests."""
|
||||
return User.objects.create_superuser(
|
||||
return cast(UserManager, User.objects).create_superuser(
|
||||
username='testadmin',
|
||||
email='admin@test.com',
|
||||
password='testpassword'
|
||||
|
||||
@@ -7,6 +7,21 @@ Verify add creates snapshots in DB, crawls, source files, and archive directorie
|
||||
import os
|
||||
import sqlite3
|
||||
import subprocess
|
||||
from pathlib import Path
|
||||
|
||||
|
||||
def _find_snapshot_dir(data_dir: Path, snapshot_id: str) -> Path | None:
|
||||
candidates = {snapshot_id}
|
||||
if len(snapshot_id) == 32:
|
||||
candidates.add(f"{snapshot_id[:8]}-{snapshot_id[8:12]}-{snapshot_id[12:16]}-{snapshot_id[16:20]}-{snapshot_id[20:]}")
|
||||
elif len(snapshot_id) == 36 and '-' in snapshot_id:
|
||||
candidates.add(snapshot_id.replace('-', ''))
|
||||
|
||||
for needle in candidates:
|
||||
for path in data_dir.rglob(needle):
|
||||
if path.is_dir():
|
||||
return path
|
||||
return None
|
||||
|
||||
|
||||
def test_add_single_url_creates_snapshot_in_db(tmp_path, process, disable_extractors_dict):
|
||||
@@ -144,6 +159,21 @@ def test_add_with_depth_1_flag(tmp_path, process, disable_extractors_dict):
|
||||
assert 'unrecognized arguments: --depth' not in result.stderr.decode('utf-8')
|
||||
|
||||
|
||||
def test_add_rejects_invalid_depth_values(tmp_path, process, disable_extractors_dict):
|
||||
"""Test that add rejects depth values outside the supported range."""
|
||||
os.chdir(tmp_path)
|
||||
|
||||
for depth in ('5', '-1'):
|
||||
result = subprocess.run(
|
||||
['archivebox', 'add', '--index-only', f'--depth={depth}', 'https://example.com'],
|
||||
capture_output=True,
|
||||
env=disable_extractors_dict,
|
||||
)
|
||||
stderr = result.stderr.decode('utf-8').lower()
|
||||
assert result.returncode != 0
|
||||
assert 'invalid' in stderr or 'not one of' in stderr
|
||||
|
||||
|
||||
def test_add_with_tags(tmp_path, process, disable_extractors_dict):
|
||||
"""Test adding URL with tags stores tags_str in crawl.
|
||||
|
||||
@@ -245,11 +275,8 @@ def test_add_with_overwrite_flag(tmp_path, process, disable_extractors_dict):
|
||||
assert 'unrecognized arguments: --overwrite' not in result.stderr.decode('utf-8')
|
||||
|
||||
|
||||
def test_add_creates_archive_subdirectory(tmp_path, process, disable_extractors_dict):
|
||||
"""Test that add creates archive subdirectory for the snapshot.
|
||||
|
||||
Archive subdirectories are named by timestamp, not by snapshot ID.
|
||||
"""
|
||||
def test_add_creates_snapshot_output_directory(tmp_path, process, disable_extractors_dict):
|
||||
"""Test that add creates the current snapshot output directory on disk."""
|
||||
os.chdir(tmp_path)
|
||||
subprocess.run(
|
||||
['archivebox', 'add', '--index-only', '--depth=0', 'https://example.com'],
|
||||
@@ -257,16 +284,44 @@ def test_add_creates_archive_subdirectory(tmp_path, process, disable_extractors_
|
||||
env=disable_extractors_dict,
|
||||
)
|
||||
|
||||
# Get the snapshot timestamp from the database
|
||||
conn = sqlite3.connect("index.sqlite3")
|
||||
c = conn.cursor()
|
||||
timestamp = c.execute("SELECT timestamp FROM core_snapshot").fetchone()[0]
|
||||
snapshot_id = str(c.execute("SELECT id FROM core_snapshot").fetchone()[0])
|
||||
conn.close()
|
||||
|
||||
# Check that archive subdirectory was created using timestamp
|
||||
archive_dir = tmp_path / "archive" / str(timestamp)
|
||||
assert archive_dir.exists()
|
||||
assert archive_dir.is_dir()
|
||||
snapshot_dir = _find_snapshot_dir(tmp_path, snapshot_id)
|
||||
assert snapshot_dir is not None, f"Snapshot output directory not found for {snapshot_id}"
|
||||
assert snapshot_dir.is_dir()
|
||||
|
||||
|
||||
def test_add_help_shows_depth_and_tag_options(tmp_path, process):
|
||||
"""Test that add --help documents the main filter and crawl options."""
|
||||
os.chdir(tmp_path)
|
||||
|
||||
result = subprocess.run(
|
||||
['archivebox', 'add', '--help'],
|
||||
capture_output=True,
|
||||
text=True,
|
||||
)
|
||||
|
||||
assert result.returncode == 0
|
||||
assert '--depth' in result.stdout
|
||||
assert '--tag' in result.stdout
|
||||
|
||||
|
||||
def test_add_without_args_shows_usage(tmp_path, process):
|
||||
"""Test that add without URLs fails with a usage hint instead of crashing."""
|
||||
os.chdir(tmp_path)
|
||||
|
||||
result = subprocess.run(
|
||||
['archivebox', 'add'],
|
||||
capture_output=True,
|
||||
text=True,
|
||||
)
|
||||
|
||||
combined = result.stdout + result.stderr
|
||||
assert result.returncode != 0
|
||||
assert 'usage' in combined.lower() or 'url' in combined.lower()
|
||||
|
||||
|
||||
def test_add_index_only_skips_extraction(tmp_path, process, disable_extractors_dict):
|
||||
|
||||
@@ -241,3 +241,24 @@ def test_init_output_shows_collection_info(tmp_path):
|
||||
output = result.stdout
|
||||
# Should show some helpful info about the collection
|
||||
assert 'ArchiveBox' in output or 'collection' in output.lower() or 'Initializing' in output
|
||||
|
||||
|
||||
def test_init_ignores_unrecognized_archive_directories(tmp_path, process, disable_extractors_dict):
|
||||
"""Test that init upgrades existing dirs without choking on extra folders."""
|
||||
os.chdir(tmp_path)
|
||||
subprocess.run(
|
||||
['archivebox', 'add', '--index-only', '--depth=0', 'https://example.com'],
|
||||
capture_output=True,
|
||||
env=disable_extractors_dict,
|
||||
check=True,
|
||||
)
|
||||
(tmp_path / "archive" / "some_random_folder").mkdir(parents=True, exist_ok=True)
|
||||
|
||||
result = subprocess.run(
|
||||
['archivebox', 'init'],
|
||||
capture_output=True,
|
||||
text=True,
|
||||
env=disable_extractors_dict,
|
||||
)
|
||||
|
||||
assert result.returncode == 0, result.stdout + result.stderr
|
||||
|
||||
@@ -93,6 +93,59 @@ def test_install_shows_binary_status(tmp_path, process):
|
||||
assert len(output) > 50
|
||||
|
||||
|
||||
def test_install_dry_run_prints_dry_run_message(tmp_path, process):
|
||||
"""Test that install --dry-run clearly reports that no changes will be made."""
|
||||
os.chdir(tmp_path)
|
||||
result = subprocess.run(
|
||||
['archivebox', 'install', '--dry-run'],
|
||||
capture_output=True,
|
||||
text=True,
|
||||
timeout=60,
|
||||
)
|
||||
|
||||
assert result.returncode == 0
|
||||
assert 'dry run' in result.stdout.lower()
|
||||
|
||||
|
||||
def test_install_help_lists_dry_run_flag(tmp_path):
|
||||
"""Test that install --help documents the dry-run option."""
|
||||
os.chdir(tmp_path)
|
||||
result = subprocess.run(
|
||||
['archivebox', 'install', '--help'],
|
||||
capture_output=True,
|
||||
text=True,
|
||||
)
|
||||
|
||||
assert result.returncode == 0
|
||||
assert '--dry-run' in result.stdout or '-d' in result.stdout
|
||||
|
||||
|
||||
def test_install_invalid_option_fails(tmp_path):
|
||||
"""Test that invalid install options fail cleanly."""
|
||||
os.chdir(tmp_path)
|
||||
result = subprocess.run(
|
||||
['archivebox', 'install', '--invalid-option'],
|
||||
capture_output=True,
|
||||
text=True,
|
||||
)
|
||||
|
||||
assert result.returncode != 0
|
||||
|
||||
|
||||
def test_install_from_empty_dir_initializes_collection(tmp_path):
|
||||
"""Test that install bootstraps an empty dir before performing work."""
|
||||
os.chdir(tmp_path)
|
||||
result = subprocess.run(
|
||||
['archivebox', 'install', '--dry-run'],
|
||||
capture_output=True,
|
||||
text=True,
|
||||
)
|
||||
|
||||
output = result.stdout + result.stderr
|
||||
assert result.returncode == 0
|
||||
assert 'Initializing' in output or 'Dry run' in output or 'init' in output.lower()
|
||||
|
||||
|
||||
def test_install_updates_binary_table(tmp_path, process):
|
||||
"""Test that install completes and only mutates dependency state."""
|
||||
os.chdir(tmp_path)
|
||||
|
||||
146
archivebox/tests/test_cli_list.py
Normal file
146
archivebox/tests/test_cli_list.py
Normal file
@@ -0,0 +1,146 @@
|
||||
#!/usr/bin/env python3
|
||||
"""
|
||||
Tests for archivebox list command.
|
||||
Verify list emits snapshot JSONL and applies the documented filters.
|
||||
"""
|
||||
|
||||
import json
|
||||
import os
|
||||
import sqlite3
|
||||
import subprocess
|
||||
|
||||
|
||||
def _parse_jsonl(stdout: str) -> list[dict]:
|
||||
return [
|
||||
json.loads(line)
|
||||
for line in stdout.splitlines()
|
||||
if line.strip().startswith('{')
|
||||
]
|
||||
|
||||
|
||||
def test_list_outputs_existing_snapshots_as_jsonl(tmp_path, process, disable_extractors_dict):
|
||||
"""Test that list prints one JSON object per stored snapshot."""
|
||||
os.chdir(tmp_path)
|
||||
for url in ['https://example.com', 'https://iana.org']:
|
||||
subprocess.run(
|
||||
['archivebox', 'add', '--index-only', '--depth=0', url],
|
||||
capture_output=True,
|
||||
env=disable_extractors_dict,
|
||||
check=True,
|
||||
)
|
||||
|
||||
result = subprocess.run(
|
||||
['archivebox', 'list'],
|
||||
capture_output=True,
|
||||
text=True,
|
||||
timeout=30,
|
||||
)
|
||||
|
||||
rows = _parse_jsonl(result.stdout)
|
||||
urls = {row['url'] for row in rows}
|
||||
|
||||
assert result.returncode == 0, result.stderr
|
||||
assert 'https://example.com' in urls
|
||||
assert 'https://iana.org' in urls
|
||||
|
||||
|
||||
def test_list_filters_by_url_icontains(tmp_path, process, disable_extractors_dict):
|
||||
"""Test that list --url__icontains returns only matching snapshots."""
|
||||
os.chdir(tmp_path)
|
||||
for url in ['https://example.com', 'https://iana.org']:
|
||||
subprocess.run(
|
||||
['archivebox', 'add', '--index-only', '--depth=0', url],
|
||||
capture_output=True,
|
||||
env=disable_extractors_dict,
|
||||
check=True,
|
||||
)
|
||||
|
||||
result = subprocess.run(
|
||||
['archivebox', 'list', '--url__icontains', 'example.com'],
|
||||
capture_output=True,
|
||||
text=True,
|
||||
timeout=30,
|
||||
)
|
||||
|
||||
rows = _parse_jsonl(result.stdout)
|
||||
assert result.returncode == 0, result.stderr
|
||||
assert len(rows) == 1
|
||||
assert rows[0]['url'] == 'https://example.com'
|
||||
|
||||
|
||||
def test_list_filters_by_crawl_id_and_limit(tmp_path, process, disable_extractors_dict):
|
||||
"""Test that crawl-id and limit filters constrain the result set."""
|
||||
os.chdir(tmp_path)
|
||||
for url in ['https://example.com', 'https://iana.org']:
|
||||
subprocess.run(
|
||||
['archivebox', 'add', '--index-only', '--depth=0', url],
|
||||
capture_output=True,
|
||||
env=disable_extractors_dict,
|
||||
check=True,
|
||||
)
|
||||
|
||||
conn = sqlite3.connect("index.sqlite3")
|
||||
c = conn.cursor()
|
||||
crawl_id = str(c.execute(
|
||||
"SELECT crawl_id FROM core_snapshot WHERE url = ?",
|
||||
('https://example.com',),
|
||||
).fetchone()[0])
|
||||
conn.close()
|
||||
|
||||
result = subprocess.run(
|
||||
['archivebox', 'list', '--crawl-id', crawl_id, '--limit', '1'],
|
||||
capture_output=True,
|
||||
text=True,
|
||||
timeout=30,
|
||||
)
|
||||
|
||||
rows = _parse_jsonl(result.stdout)
|
||||
assert result.returncode == 0, result.stderr
|
||||
assert len(rows) == 1
|
||||
assert rows[0]['crawl_id'].replace('-', '') == crawl_id.replace('-', '')
|
||||
assert rows[0]['url'] == 'https://example.com'
|
||||
|
||||
|
||||
def test_list_filters_by_status(tmp_path, process, disable_extractors_dict):
|
||||
"""Test that list can filter using the current snapshot status."""
|
||||
os.chdir(tmp_path)
|
||||
subprocess.run(
|
||||
['archivebox', 'add', '--index-only', '--depth=0', 'https://example.com'],
|
||||
capture_output=True,
|
||||
env=disable_extractors_dict,
|
||||
check=True,
|
||||
)
|
||||
|
||||
conn = sqlite3.connect("index.sqlite3")
|
||||
c = conn.cursor()
|
||||
status = c.execute("SELECT status FROM core_snapshot LIMIT 1").fetchone()[0]
|
||||
conn.close()
|
||||
|
||||
result = subprocess.run(
|
||||
['archivebox', 'list', '--status', status],
|
||||
capture_output=True,
|
||||
text=True,
|
||||
timeout=30,
|
||||
)
|
||||
|
||||
rows = _parse_jsonl(result.stdout)
|
||||
assert result.returncode == 0, result.stderr
|
||||
assert len(rows) == 1
|
||||
assert rows[0]['status'] == status
|
||||
|
||||
|
||||
def test_list_help_lists_filter_options(tmp_path, process):
|
||||
"""Test that list --help documents the supported filter flags."""
|
||||
os.chdir(tmp_path)
|
||||
|
||||
result = subprocess.run(
|
||||
['archivebox', 'list', '--help'],
|
||||
capture_output=True,
|
||||
text=True,
|
||||
timeout=30,
|
||||
)
|
||||
|
||||
assert result.returncode == 0
|
||||
assert '--url__icontains' in result.stdout
|
||||
assert '--crawl-id' in result.stdout
|
||||
assert '--limit' in result.stdout
|
||||
@@ -7,6 +7,21 @@ Verify remove deletes snapshots from DB and filesystem.
|
||||
import os
|
||||
import sqlite3
|
||||
import subprocess
|
||||
from pathlib import Path
|
||||
|
||||
|
||||
def _find_snapshot_dir(data_dir: Path, snapshot_id: str) -> Path | None:
|
||||
candidates = {snapshot_id}
|
||||
if len(snapshot_id) == 32:
|
||||
candidates.add(f"{snapshot_id[:8]}-{snapshot_id[8:12]}-{snapshot_id[12:16]}-{snapshot_id[16:20]}-{snapshot_id[20:]}")
|
||||
elif len(snapshot_id) == 36 and '-' in snapshot_id:
|
||||
candidates.add(snapshot_id.replace('-', ''))
|
||||
|
||||
for needle in candidates:
|
||||
for path in data_dir.rglob(needle):
|
||||
if path.is_dir():
|
||||
return path
|
||||
return None
|
||||
|
||||
|
||||
def test_remove_deletes_snapshot_from_db(tmp_path, process, disable_extractors_dict):
|
||||
@@ -44,10 +59,7 @@ def test_remove_deletes_snapshot_from_db(tmp_path, process, disable_extractors_d
|
||||
|
||||
|
||||
def test_remove_deletes_archive_directory(tmp_path, process, disable_extractors_dict):
|
||||
"""Test that remove deletes the archive directory when using --delete flag.
|
||||
|
||||
Archive directories are named by timestamp, not by snapshot ID.
|
||||
"""
|
||||
"""Test that remove --delete removes the current snapshot output directory."""
|
||||
os.chdir(tmp_path)
|
||||
|
||||
# Add a snapshot
|
||||
@@ -57,24 +69,21 @@ def test_remove_deletes_archive_directory(tmp_path, process, disable_extractors_
|
||||
env=disable_extractors_dict,
|
||||
)
|
||||
|
||||
# Get snapshot timestamp
|
||||
conn = sqlite3.connect("index.sqlite3")
|
||||
c = conn.cursor()
|
||||
timestamp = c.execute("SELECT timestamp FROM core_snapshot").fetchone()[0]
|
||||
snapshot_id = str(c.execute("SELECT id FROM core_snapshot").fetchone()[0])
|
||||
conn.close()
|
||||
|
||||
archive_dir = tmp_path / "archive" / str(timestamp)
|
||||
assert archive_dir.exists()
|
||||
snapshot_dir = _find_snapshot_dir(tmp_path, snapshot_id)
|
||||
assert snapshot_dir is not None, f"Snapshot output directory not found for {snapshot_id}"
|
||||
|
||||
# Remove snapshot with --delete to remove both DB record and directory
|
||||
subprocess.run(
|
||||
['archivebox', 'remove', 'https://example.com', '--yes', '--delete'],
|
||||
capture_output=True,
|
||||
env=disable_extractors_dict,
|
||||
)
|
||||
|
||||
# Archive directory should be deleted
|
||||
assert not archive_dir.exists()
|
||||
assert not snapshot_dir.exists()
|
||||
|
||||
|
||||
def test_remove_yes_flag_skips_confirmation(tmp_path, process, disable_extractors_dict):
|
||||
@@ -158,6 +167,35 @@ def test_remove_with_filter(tmp_path, process, disable_extractors_dict):
|
||||
assert result.returncode in [0, 1, 2]
|
||||
|
||||
|
||||
def test_remove_with_regex_filter_deletes_all_matches(tmp_path, process, disable_extractors_dict):
|
||||
"""Test regex filters remove every matching snapshot."""
|
||||
os.chdir(tmp_path)
|
||||
|
||||
for url in ['https://example.com', 'https://iana.org']:
|
||||
subprocess.run(
|
||||
['archivebox', 'add', '--index-only', '--depth=0', url],
|
||||
capture_output=True,
|
||||
env=disable_extractors_dict,
|
||||
check=True,
|
||||
)
|
||||
|
||||
result = subprocess.run(
|
||||
['archivebox', 'remove', '--filter-type=regex', '.*', '--yes'],
|
||||
capture_output=True,
|
||||
env=disable_extractors_dict,
|
||||
check=True,
|
||||
)
|
||||
|
||||
conn = sqlite3.connect("index.sqlite3")
|
||||
c = conn.cursor()
|
||||
count_after = c.execute("SELECT COUNT(*) FROM core_snapshot").fetchone()[0]
|
||||
conn.close()
|
||||
|
||||
output = result.stdout.decode("utf-8") + result.stderr.decode("utf-8")
|
||||
assert count_after == 0
|
||||
assert 'Removed' in output or 'Found' in output
|
||||
|
||||
|
||||
def test_remove_nonexistent_url_fails_gracefully(tmp_path, process, disable_extractors_dict):
|
||||
"""Test that removing non-existent URL fails gracefully."""
|
||||
os.chdir(tmp_path)
|
||||
@@ -169,7 +207,8 @@ def test_remove_nonexistent_url_fails_gracefully(tmp_path, process, disable_extr
|
||||
)
|
||||
|
||||
# Should fail or show error
|
||||
assert result.returncode != 0 or 'not found' in result.stdout.lower() or 'no matches' in result.stdout.lower()
|
||||
stdout_text = result.stdout.decode('utf-8', errors='replace').lower()
|
||||
assert result.returncode != 0 or 'not found' in stdout_text or 'no matches' in stdout_text
|
||||
|
||||
|
||||
def test_remove_reports_remaining_link_count_correctly(tmp_path, process, disable_extractors_dict):
|
||||
|
||||
@@ -4,6 +4,7 @@ Tests for archivebox search command.
|
||||
Verify search queries snapshots from DB.
|
||||
"""
|
||||
|
||||
import json
|
||||
import os
|
||||
import subprocess
|
||||
|
||||
@@ -65,3 +66,145 @@ def test_search_on_empty_archive(tmp_path, process):
|
||||
|
||||
# Should complete without error
|
||||
assert result.returncode in [0, 1]
|
||||
|
||||
|
||||
def test_search_json_outputs_matching_snapshots(tmp_path, process, disable_extractors_dict):
|
||||
"""Test that search --json returns parseable matching snapshot rows."""
|
||||
os.chdir(tmp_path)
|
||||
subprocess.run(
|
||||
['archivebox', 'add', '--index-only', '--depth=0', 'https://example.com'],
|
||||
capture_output=True,
|
||||
env=disable_extractors_dict,
|
||||
check=True,
|
||||
)
|
||||
|
||||
result = subprocess.run(
|
||||
['archivebox', 'search', '--json'],
|
||||
capture_output=True,
|
||||
text=True,
|
||||
timeout=30,
|
||||
)
|
||||
|
||||
assert result.returncode == 0, result.stderr
|
||||
payload = json.loads(result.stdout)
|
||||
assert any('example.com' in row.get('url', '') for row in payload)
|
||||
|
||||
|
||||
def test_search_json_with_headers_wraps_links_payload(tmp_path, process, disable_extractors_dict):
|
||||
"""Test that search --json --with-headers returns a headers envelope."""
|
||||
os.chdir(tmp_path)
|
||||
subprocess.run(
|
||||
['archivebox', 'add', '--index-only', '--depth=0', 'https://example.com'],
|
||||
capture_output=True,
|
||||
env=disable_extractors_dict,
|
||||
check=True,
|
||||
)
|
||||
|
||||
result = subprocess.run(
|
||||
['archivebox', 'search', '--json', '--with-headers'],
|
||||
capture_output=True,
|
||||
text=True,
|
||||
timeout=30,
|
||||
)
|
||||
|
||||
assert result.returncode == 0, result.stderr
|
||||
payload = json.loads(result.stdout)
|
||||
links = payload.get('links', payload)
|
||||
assert any('example.com' in row.get('url', '') for row in links)
|
||||
|
||||
|
||||
def test_search_html_outputs_markup(tmp_path, process, disable_extractors_dict):
|
||||
"""Test that search --html renders an HTML response."""
|
||||
os.chdir(tmp_path)
|
||||
subprocess.run(
|
||||
['archivebox', 'add', '--index-only', '--depth=0', 'https://example.com'],
|
||||
capture_output=True,
|
||||
env=disable_extractors_dict,
|
||||
check=True,
|
||||
)
|
||||
|
||||
result = subprocess.run(
|
||||
['archivebox', 'search', '--html'],
|
||||
capture_output=True,
|
||||
text=True,
|
||||
timeout=30,
|
||||
)
|
||||
|
||||
assert result.returncode == 0, result.stderr
|
||||
assert '<' in result.stdout
|
||||
|
||||
|
||||
def test_search_csv_outputs_requested_column(tmp_path, process, disable_extractors_dict):
|
||||
"""Test that search --csv emits the requested fields."""
|
||||
os.chdir(tmp_path)
|
||||
subprocess.run(
|
||||
['archivebox', 'add', '--index-only', '--depth=0', 'https://example.com'],
|
||||
capture_output=True,
|
||||
env=disable_extractors_dict,
|
||||
check=True,
|
||||
)
|
||||
|
||||
result = subprocess.run(
|
||||
['archivebox', 'search', '--csv', 'url', '--with-headers'],
|
||||
capture_output=True,
|
||||
text=True,
|
||||
timeout=30,
|
||||
)
|
||||
|
||||
assert result.returncode == 0, result.stderr
|
||||
assert 'url' in result.stdout
|
||||
assert 'example.com' in result.stdout
|
||||
|
||||
|
||||
def test_search_with_headers_requires_structured_output_format(tmp_path, process):
|
||||
"""Test that --with-headers is rejected without --json, --html, or --csv."""
|
||||
os.chdir(tmp_path)
|
||||
|
||||
result = subprocess.run(
|
||||
['archivebox', 'search', '--with-headers'],
|
||||
capture_output=True,
|
||||
text=True,
|
||||
timeout=30,
|
||||
)
|
||||
|
||||
assert result.returncode != 0
|
||||
assert 'requires' in result.stderr.lower() or 'json' in result.stderr.lower()
|
||||
|
||||
|
||||
def test_search_sort_option_runs_successfully(tmp_path, process, disable_extractors_dict):
|
||||
"""Test that search --sort accepts sortable fields."""
|
||||
os.chdir(tmp_path)
|
||||
for url in ['https://iana.org', 'https://example.com']:
|
||||
subprocess.run(
|
||||
['archivebox', 'add', '--index-only', '--depth=0', url],
|
||||
capture_output=True,
|
||||
env=disable_extractors_dict,
|
||||
check=True,
|
||||
)
|
||||
|
||||
result = subprocess.run(
|
||||
['archivebox', 'search', '--csv', 'url', '--sort=url'],
|
||||
capture_output=True,
|
||||
text=True,
|
||||
timeout=30,
|
||||
)
|
||||
|
||||
assert result.returncode == 0, result.stderr
|
||||
assert 'example.com' in result.stdout or 'iana.org' in result.stdout
|
||||
|
||||
|
||||
def test_search_help_lists_supported_filters(tmp_path, process):
|
||||
"""Test that search --help documents the available filters and output modes."""
|
||||
os.chdir(tmp_path)
|
||||
|
||||
result = subprocess.run(
|
||||
['archivebox', 'search', '--help'],
|
||||
capture_output=True,
|
||||
text=True,
|
||||
timeout=30,
|
||||
)
|
||||
|
||||
assert result.returncode == 0
|
||||
assert '--filter-type' in result.stdout or '-f' in result.stdout
|
||||
assert '--status' in result.stdout
|
||||
assert '--sort' in result.stdout
|
||||
|
||||
@@ -202,3 +202,24 @@ def test_status_shows_index_file_info(tmp_path, process):
|
||||
|
||||
# Should mention index
|
||||
assert 'index' in result.stdout.lower() or 'Index' in result.stdout
|
||||
|
||||
|
||||
def test_status_help_lists_available_options(tmp_path, process):
|
||||
"""Test that status --help works and documents the command."""
|
||||
os.chdir(tmp_path)
|
||||
result = subprocess.run(
|
||||
['archivebox', 'status', '--help'],
|
||||
capture_output=True,
|
||||
text=True,
|
||||
)
|
||||
|
||||
assert result.returncode == 0
|
||||
assert 'status' in result.stdout.lower() or 'statistic' in result.stdout.lower()
|
||||
|
||||
|
||||
def test_status_shows_data_directory_path(tmp_path, process):
|
||||
"""Test that status reports which collection directory it is inspecting."""
|
||||
os.chdir(tmp_path)
|
||||
result = subprocess.run(['archivebox', 'status'], capture_output=True, text=True)
|
||||
|
||||
assert 'archive' in result.stdout.lower() or str(tmp_path) in result.stdout
|
||||
|
||||
@@ -77,6 +77,17 @@ def test_version_quiet_outputs_version_number(tmp_path):
|
||||
assert len(parts) >= 2
|
||||
|
||||
|
||||
def test_version_flag_outputs_version_number(tmp_path):
|
||||
"""Test that top-level --version reports the package version."""
|
||||
os.chdir(tmp_path)
|
||||
result = subprocess.run(['archivebox', '--version'], capture_output=True, text=True)
|
||||
|
||||
assert result.returncode == 0
|
||||
version = result.stdout.strip()
|
||||
assert version
|
||||
assert len(version.split('.')) >= 2
|
||||
|
||||
|
||||
def test_version_shows_system_info_in_initialized_dir(tmp_path, process):
|
||||
"""Test that version shows system metadata in initialized directory."""
|
||||
os.chdir(tmp_path)
|
||||
@@ -148,3 +159,20 @@ def test_version_auto_selects_short_tmp_dir_for_deep_collection_path(tmp_path):
|
||||
assert reported_tmp_dir.exists()
|
||||
assert not reported_tmp_dir.is_relative_to(default_tmp_dir)
|
||||
assert len(f"file://{reported_tmp_dir / 'supervisord.sock'}") <= 96
|
||||
|
||||
|
||||
def test_version_help_lists_quiet_flag(tmp_path):
|
||||
"""Test that version --help documents the quiet output mode."""
|
||||
os.chdir(tmp_path)
|
||||
result = subprocess.run(['archivebox', 'version', '--help'], capture_output=True, text=True)
|
||||
|
||||
assert result.returncode == 0
|
||||
assert '--quiet' in result.stdout or '-q' in result.stdout
|
||||
|
||||
|
||||
def test_version_invalid_option_fails(tmp_path):
|
||||
"""Test that invalid version options fail cleanly."""
|
||||
os.chdir(tmp_path)
|
||||
result = subprocess.run(['archivebox', 'version', '--invalid-option'], capture_output=True, text=True)
|
||||
|
||||
assert result.returncode != 0
|
||||
|
||||
@@ -1,94 +0,0 @@
|
||||
# archivebox init
|
||||
# archivebox add
|
||||
|
||||
import os
|
||||
import subprocess
|
||||
import sqlite3
|
||||
|
||||
from archivebox.config.common import STORAGE_CONFIG
|
||||
|
||||
from .fixtures import disable_extractors_dict, process
|
||||
|
||||
FIXTURES = (disable_extractors_dict, process)
|
||||
|
||||
DIR_PERMISSIONS = STORAGE_CONFIG.OUTPUT_PERMISSIONS.replace('6', '7').replace('4', '5')
|
||||
|
||||
def test_init(tmp_path, process):
|
||||
assert "Initializing a new ArchiveBox" in process.stdout.decode("utf-8")
|
||||
|
||||
def test_update(tmp_path, process):
|
||||
os.chdir(tmp_path)
|
||||
update_process = subprocess.run(['archivebox', 'init'], capture_output=True)
|
||||
assert "updating existing ArchiveBox" in update_process.stdout.decode("utf-8")
|
||||
|
||||
def test_add_link(tmp_path, process, disable_extractors_dict):
|
||||
os.chdir(tmp_path)
|
||||
add_process = subprocess.run(['archivebox', 'add', '--index-only', 'https://example.com'],
|
||||
capture_output=True, env=disable_extractors_dict)
|
||||
assert add_process.returncode == 0, add_process.stderr.decode("utf-8")
|
||||
|
||||
# In the new architecture, URLs are saved to source files
|
||||
# Check that a source file was created with the URL
|
||||
sources_dir = tmp_path / "sources"
|
||||
assert sources_dir.exists(), "Sources directory should be created"
|
||||
source_files = list(sources_dir.glob("*cli_add.txt"))
|
||||
assert len(source_files) >= 1, "Source file should be created"
|
||||
source_content = source_files[0].read_text()
|
||||
assert "https://example.com" in source_content
|
||||
|
||||
|
||||
def test_add_multiple_urls(tmp_path, process, disable_extractors_dict):
|
||||
"""Test adding multiple URLs via command line arguments"""
|
||||
os.chdir(tmp_path)
|
||||
add_process = subprocess.run(['archivebox', 'add', '--index-only', 'https://example.com', 'https://iana.org'],
|
||||
capture_output=True, env=disable_extractors_dict)
|
||||
assert add_process.returncode == 0, add_process.stderr.decode("utf-8")
|
||||
|
||||
# Check that a source file was created with both URLs
|
||||
sources_dir = tmp_path / "sources"
|
||||
assert sources_dir.exists(), "Sources directory should be created"
|
||||
source_files = list(sources_dir.glob("*cli_add.txt"))
|
||||
assert len(source_files) >= 1, "Source file should be created"
|
||||
source_content = source_files[-1].read_text()
|
||||
assert "https://example.com" in source_content
|
||||
assert "https://iana.org" in source_content
|
||||
|
||||
def test_correct_permissions_output_folder(tmp_path, process):
|
||||
index_files = ['index.sqlite3', 'archive']
|
||||
for file in index_files:
|
||||
file_path = tmp_path / file
|
||||
assert oct(file_path.stat().st_mode)[-3:] in (STORAGE_CONFIG.OUTPUT_PERMISSIONS, DIR_PERMISSIONS)
|
||||
|
||||
def test_correct_permissions_add_command_results(tmp_path, process, disable_extractors_dict):
|
||||
os.chdir(tmp_path)
|
||||
add_process = subprocess.run(['archivebox', 'add', '--index-only', 'https://example.com'], capture_output=True,
|
||||
env=disable_extractors_dict)
|
||||
assert add_process.returncode == 0, add_process.stderr.decode("utf-8")
|
||||
|
||||
# Check database permissions
|
||||
assert oct((tmp_path / "index.sqlite3").stat().st_mode)[-3:] in (STORAGE_CONFIG.OUTPUT_PERMISSIONS, DIR_PERMISSIONS)
|
||||
|
||||
def test_collision_urls_different_timestamps(tmp_path, process, disable_extractors_dict):
|
||||
os.chdir(tmp_path)
|
||||
subprocess.run(['archivebox', 'add', '--index-only', 'https://example.com'], capture_output=True,
|
||||
env=disable_extractors_dict)
|
||||
subprocess.run(['archivebox', 'add', '--index-only', 'https://iana.org'], capture_output=True,
|
||||
env=disable_extractors_dict)
|
||||
|
||||
# Check both URLs are in database
|
||||
conn = sqlite3.connect("index.sqlite3")
|
||||
c = conn.cursor()
|
||||
count = c.execute("SELECT COUNT(*) FROM core_snapshot").fetchone()[0]
|
||||
conn.close()
|
||||
|
||||
assert count == 2
|
||||
|
||||
def test_unrecognized_folders(tmp_path, process, disable_extractors_dict):
|
||||
os.chdir(tmp_path)
|
||||
subprocess.run(['archivebox', 'add', '--index-only', 'https://example.com'], capture_output=True,
|
||||
env=disable_extractors_dict)
|
||||
(tmp_path / "archive" / "some_random_folder").mkdir(parents=True, exist_ok=True)
|
||||
|
||||
init_process = subprocess.run(['archivebox', 'init'], capture_output=True, env=disable_extractors_dict)
|
||||
# Just check that init completes successfully
|
||||
assert init_process.returncode == 0
|
||||
@@ -1,128 +0,0 @@
|
||||
#!/usr/bin/env python3
|
||||
"""Integration tests for archivebox install command."""
|
||||
|
||||
import os
|
||||
import subprocess
|
||||
import sqlite3
|
||||
|
||||
import pytest
|
||||
|
||||
|
||||
|
||||
class TestInstallDryRun:
|
||||
"""Test the dry-run mode of install command."""
|
||||
|
||||
def test_dry_run_prints_message(self, tmp_path, process):
|
||||
"""Test that dry-run mode prints appropriate message."""
|
||||
os.chdir(tmp_path)
|
||||
|
||||
result = subprocess.run(
|
||||
['archivebox', 'install', '--dry-run'],
|
||||
capture_output=True,
|
||||
text=True,
|
||||
)
|
||||
|
||||
assert result.returncode == 0
|
||||
assert 'Dry run' in result.stdout
|
||||
|
||||
def test_dry_run_does_not_create_crawl(self, tmp_path, process):
|
||||
"""Test that dry-run mode doesn't create a crawl."""
|
||||
os.chdir(tmp_path)
|
||||
|
||||
# Get initial crawl count
|
||||
conn = sqlite3.connect('index.sqlite3')
|
||||
c = conn.cursor()
|
||||
c.execute("SELECT COUNT(*) FROM crawls_crawl")
|
||||
initial_count = c.fetchone()[0]
|
||||
conn.close()
|
||||
|
||||
# Run install with dry-run
|
||||
result = subprocess.run(
|
||||
['archivebox', 'install', '--dry-run'],
|
||||
capture_output=True,
|
||||
text=True,
|
||||
)
|
||||
|
||||
assert result.returncode == 0
|
||||
|
||||
# Check crawl count unchanged
|
||||
conn = sqlite3.connect('index.sqlite3')
|
||||
c = conn.cursor()
|
||||
c.execute("SELECT COUNT(*) FROM crawls_crawl")
|
||||
final_count = c.fetchone()[0]
|
||||
conn.close()
|
||||
|
||||
assert final_count == initial_count
|
||||
|
||||
|
||||
class TestInstallOutput:
|
||||
"""Test the output/messages from install command."""
|
||||
|
||||
def test_install_prints_detecting_message(self, tmp_path, process, disable_extractors_dict):
|
||||
"""Test that install prints detecting dependencies message."""
|
||||
os.chdir(tmp_path)
|
||||
|
||||
result = subprocess.run(
|
||||
['archivebox', 'install', '--dry-run'],
|
||||
capture_output=True,
|
||||
text=True,
|
||||
env=disable_extractors_dict,
|
||||
)
|
||||
|
||||
assert result.returncode == 0
|
||||
# Should mention detecting or dependencies
|
||||
output = result.stdout.lower()
|
||||
assert 'detect' in output or 'dependenc' in output or 'dry run' in output
|
||||
|
||||
|
||||
class TestInstallCLI:
|
||||
"""Test the CLI interface for install command."""
|
||||
|
||||
def test_cli_help(self, tmp_path):
|
||||
"""Test that --help works for install command."""
|
||||
os.chdir(tmp_path)
|
||||
|
||||
result = subprocess.run(
|
||||
['archivebox', 'install', '--help'],
|
||||
capture_output=True,
|
||||
text=True,
|
||||
)
|
||||
|
||||
assert result.returncode == 0
|
||||
assert '--dry-run' in result.stdout or '-d' in result.stdout
|
||||
|
||||
def test_cli_invalid_option(self, tmp_path):
|
||||
"""Test that invalid options are handled."""
|
||||
os.chdir(tmp_path)
|
||||
|
||||
result = subprocess.run(
|
||||
['archivebox', 'install', '--invalid-option'],
|
||||
capture_output=True,
|
||||
text=True,
|
||||
)
|
||||
|
||||
# Should fail with non-zero exit code
|
||||
assert result.returncode != 0
|
||||
|
||||
|
||||
class TestInstallInitialization:
|
||||
"""Test that install initializes the data directory if needed."""
|
||||
|
||||
def test_install_from_empty_dir(self, tmp_path):
|
||||
"""Test that install from empty dir initializes first."""
|
||||
os.chdir(tmp_path)
|
||||
|
||||
# Don't use process fixture - start from empty dir
|
||||
result = subprocess.run(
|
||||
['archivebox', 'install', '--dry-run'],
|
||||
capture_output=True,
|
||||
text=True,
|
||||
)
|
||||
|
||||
# Should either initialize or show dry run message
|
||||
output = result.stdout
|
||||
assert 'Initializing' in output or 'Dry run' in output or 'init' in output.lower()
|
||||
|
||||
|
||||
if __name__ == '__main__':
|
||||
pytest.main([__file__, '-v'])
|
||||
@@ -1,98 +0,0 @@
|
||||
import json
|
||||
import subprocess
|
||||
|
||||
from .fixtures import disable_extractors_dict, process
|
||||
|
||||
FIXTURES = (disable_extractors_dict, process)
|
||||
|
||||
def test_search_json(process, disable_extractors_dict):
|
||||
subprocess.run(["archivebox", "add", "--index-only", "https://example.com", "--depth=0"],
|
||||
capture_output=True, env=disable_extractors_dict)
|
||||
search_process = subprocess.run(["archivebox", "search", "--json"], capture_output=True)
|
||||
output_str = search_process.stdout.decode("utf-8").strip()
|
||||
# Handle potential control characters in output
|
||||
try:
|
||||
output_json = json.loads(output_str)
|
||||
except json.JSONDecodeError:
|
||||
# Try with strict=False if there are control characters
|
||||
import re
|
||||
# Remove ANSI escape sequences and control characters
|
||||
clean_str = re.sub(r'\x1b\[[0-9;]*m', '', output_str)
|
||||
clean_str = re.sub(r'[\x00-\x1f\x7f]', lambda m: ' ' if m.group(0) in '\t\n\r' else '', clean_str)
|
||||
output_json = json.loads(clean_str)
|
||||
# Verify we get at least one snapshot back
|
||||
assert len(output_json) >= 1
|
||||
# Should include the requested URL
|
||||
assert any("example.com" in entry.get("url", "") for entry in output_json)
|
||||
|
||||
|
||||
def test_search_json_headers(process, disable_extractors_dict):
|
||||
subprocess.run(["archivebox", "add", "--index-only", "https://example.com", "--depth=0"],
|
||||
capture_output=True, env=disable_extractors_dict)
|
||||
search_process = subprocess.run(["archivebox", "search", "--json", "--with-headers"], capture_output=True)
|
||||
output_str = search_process.stdout.decode("utf-8").strip()
|
||||
# Handle potential control characters in output
|
||||
try:
|
||||
output_json = json.loads(output_str)
|
||||
except json.JSONDecodeError:
|
||||
# Try with strict=False if there are control characters
|
||||
import re
|
||||
# Remove ANSI escape sequences and control characters
|
||||
clean_str = re.sub(r'\x1b\[[0-9;]*m', '', output_str)
|
||||
clean_str = re.sub(r'[\x00-\x1f\x7f]', lambda m: ' ' if m.group(0) in '\t\n\r' else '', clean_str)
|
||||
output_json = json.loads(clean_str)
|
||||
# The response should have a links key with headers mode
|
||||
links = output_json.get("links", output_json)
|
||||
assert len(links) >= 1
|
||||
|
||||
def test_search_html(process, disable_extractors_dict):
|
||||
subprocess.run(["archivebox", "add", "--index-only", "https://example.com", "--depth=0"],
|
||||
capture_output=True, env=disable_extractors_dict)
|
||||
search_process = subprocess.run(["archivebox", "search", "--html"], capture_output=True)
|
||||
output_html = search_process.stdout.decode("utf-8")
|
||||
# Should contain some HTML and reference to the source file
|
||||
assert "sources" in output_html or "cli_add" in output_html or "<" in output_html
|
||||
|
||||
def test_search_html_headers(process, disable_extractors_dict):
|
||||
subprocess.run(["archivebox", "add", "--index-only", "https://example.com", "--depth=0"],
|
||||
capture_output=True, env=disable_extractors_dict)
|
||||
search_process = subprocess.run(["archivebox", "search", "--html", "--with-headers"], capture_output=True)
|
||||
output_html = search_process.stdout.decode("utf-8")
|
||||
# Should contain HTML
|
||||
assert "<" in output_html
|
||||
|
||||
def test_search_csv(process, disable_extractors_dict):
|
||||
subprocess.run(["archivebox", "add", "--index-only", "https://example.com", "--depth=0"],
|
||||
capture_output=True, env=disable_extractors_dict)
|
||||
search_process = subprocess.run(["archivebox", "search", "--csv", "url"], capture_output=True)
|
||||
output_csv = search_process.stdout.decode("utf-8")
|
||||
# Should contain the requested URL
|
||||
assert "example.com" in output_csv
|
||||
|
||||
def test_search_csv_headers(process, disable_extractors_dict):
|
||||
subprocess.run(["archivebox", "add", "--index-only", "https://example.com", "--depth=0"],
|
||||
capture_output=True, env=disable_extractors_dict)
|
||||
search_process = subprocess.run(["archivebox", "search", "--csv", "url", "--with-headers"], capture_output=True)
|
||||
output_csv = search_process.stdout.decode("utf-8")
|
||||
# Should have url header and requested URL
|
||||
assert "url" in output_csv
|
||||
assert "example.com" in output_csv
|
||||
|
||||
def test_search_with_headers_requires_format(process):
|
||||
search_process = subprocess.run(["archivebox", "search", "--with-headers"], capture_output=True)
|
||||
stderr = search_process.stderr.decode("utf-8")
|
||||
assert "--with-headers" in stderr and ("requires" in stderr or "can only be used" in stderr)
|
||||
|
||||
def test_sort_by_url(process, disable_extractors_dict):
|
||||
# Add two URLs - they will create separate source files
|
||||
subprocess.run(["archivebox", "add", "--index-only", "https://iana.org", "--depth=0"],
|
||||
capture_output=True, env=disable_extractors_dict)
|
||||
subprocess.run(["archivebox", "add", "--index-only", "https://example.com", "--depth=0"],
|
||||
capture_output=True, env=disable_extractors_dict)
|
||||
|
||||
# Search with sort should return results (even if they're file:// URLs)
|
||||
search_process = subprocess.run(["archivebox", "search", "--csv", "url", "--sort=url"], capture_output=True)
|
||||
output = search_process.stdout.decode("utf-8")
|
||||
lines = [line for line in output.strip().split("\n") if line]
|
||||
# Should have at least 2 snapshots (the source file snapshots)
|
||||
assert len(lines) >= 2
|
||||
@@ -12,6 +12,7 @@ import sqlite3
|
||||
import tempfile
|
||||
import unittest
|
||||
from pathlib import Path
|
||||
from typing import cast
|
||||
|
||||
from .migrations_helpers import (
|
||||
SCHEMA_0_4,
|
||||
@@ -74,7 +75,7 @@ class TestMigrationFrom04x(unittest.TestCase):
|
||||
|
||||
# Collect unique tags from original data
|
||||
original_tags = set()
|
||||
for tags_str in self.original_data['tags_str']:
|
||||
for tags_str in cast(list[str], self.original_data['tags_str']):
|
||||
if tags_str:
|
||||
for tag in tags_str.split(','):
|
||||
original_tags.add(tag.strip())
|
||||
|
||||
@@ -1,89 +0,0 @@
|
||||
import os
|
||||
import sqlite3
|
||||
import subprocess
|
||||
|
||||
from .fixtures import disable_extractors_dict, process
|
||||
|
||||
FIXTURES = (disable_extractors_dict, process)
|
||||
|
||||
def test_remove_single_snapshot(tmp_path, process, disable_extractors_dict):
|
||||
"""Test removing a snapshot by URL pattern"""
|
||||
os.chdir(tmp_path)
|
||||
# Add a URL - creates source file snapshot
|
||||
subprocess.run(['archivebox', 'add', '--index-only', 'https://example.com'], capture_output=True, env=disable_extractors_dict)
|
||||
|
||||
# Verify snapshot exists
|
||||
conn = sqlite3.connect("index.sqlite3")
|
||||
c = conn.cursor()
|
||||
count_before = c.execute("SELECT COUNT() FROM core_snapshot").fetchone()[0]
|
||||
conn.close()
|
||||
assert count_before >= 1
|
||||
|
||||
# Remove all snapshots (including source file snapshots)
|
||||
remove_process = subprocess.run(['archivebox', 'remove', '--filter-type=regex', '.*', '--yes'], capture_output=True)
|
||||
# Check that it ran successfully (either output indicates success or return code 0)
|
||||
output = remove_process.stdout.decode("utf-8") + remove_process.stderr.decode("utf-8")
|
||||
assert remove_process.returncode == 0 or "removed" in output.lower() or "Found" in output
|
||||
|
||||
conn = sqlite3.connect("index.sqlite3")
|
||||
c = conn.cursor()
|
||||
count = c.execute("SELECT COUNT() FROM core_snapshot").fetchone()[0]
|
||||
conn.close()
|
||||
|
||||
assert count == 0
|
||||
|
||||
|
||||
def test_remove_with_delete_flag(tmp_path, process, disable_extractors_dict):
|
||||
"""Test removing snapshot with --delete also removes archive folder"""
|
||||
os.chdir(tmp_path)
|
||||
subprocess.run(['archivebox', 'add', '--index-only', 'https://example.com'], capture_output=True, env=disable_extractors_dict)
|
||||
|
||||
# Get archives before delete
|
||||
archive_dir = tmp_path / "archive"
|
||||
archives_before = list(archive_dir.iterdir()) if archive_dir.exists() else []
|
||||
|
||||
# Only run the rest of the test if archives were created
|
||||
if archives_before:
|
||||
subprocess.run(['archivebox', 'remove', '--filter-type=regex', '.*', '--yes', '--delete'], capture_output=True)
|
||||
archives_after = list(archive_dir.iterdir()) if archive_dir.exists() else []
|
||||
assert len(archives_after) < len(archives_before)
|
||||
else:
|
||||
# With --index-only, archive folders may not be created immediately
|
||||
# Just verify that remove command doesn't error
|
||||
remove_result = subprocess.run(['archivebox', 'remove', '--filter-type=regex', '.*', '--yes', '--delete'], capture_output=True)
|
||||
assert remove_result.returncode in (0, 1) # 0 = success, 1 = no matches
|
||||
|
||||
|
||||
def test_remove_regex(tmp_path, process, disable_extractors_dict):
|
||||
"""Test removing snapshots by regex pattern"""
|
||||
os.chdir(tmp_path)
|
||||
subprocess.run(['archivebox', 'add', '--index-only', 'https://example.com'], capture_output=True, env=disable_extractors_dict)
|
||||
subprocess.run(['archivebox', 'add', '--index-only', 'https://iana.org'], capture_output=True, env=disable_extractors_dict)
|
||||
|
||||
conn = sqlite3.connect("index.sqlite3")
|
||||
c = conn.cursor()
|
||||
count_before = c.execute("SELECT COUNT() FROM core_snapshot").fetchone()[0]
|
||||
conn.close()
|
||||
assert count_before >= 2
|
||||
|
||||
subprocess.run(['archivebox', 'remove', '--filter-type=regex', '.*', '--yes', '--delete'], capture_output=True)
|
||||
|
||||
conn = sqlite3.connect("index.sqlite3")
|
||||
c = conn.cursor()
|
||||
count_after = c.execute("SELECT COUNT() FROM core_snapshot").fetchone()[0]
|
||||
conn.close()
|
||||
assert count_after == 0
|
||||
|
||||
|
||||
def test_add_creates_crawls(tmp_path, process, disable_extractors_dict):
|
||||
"""Test that adding URLs creates crawls in database"""
|
||||
os.chdir(tmp_path)
|
||||
subprocess.run(['archivebox', 'add', '--index-only', 'https://example.com'], capture_output=True, env=disable_extractors_dict)
|
||||
subprocess.run(['archivebox', 'add', '--index-only', 'https://iana.org'], capture_output=True, env=disable_extractors_dict)
|
||||
|
||||
conn = sqlite3.connect("index.sqlite3")
|
||||
c = conn.cursor()
|
||||
crawl_count = c.execute("SELECT COUNT() FROM crawls_crawl").fetchone()[0]
|
||||
conn.close()
|
||||
|
||||
assert crawl_count == 2
|
||||
@@ -1,142 +0,0 @@
|
||||
#!/usr/bin/env python3
|
||||
"""Integration tests for archivebox search command."""
|
||||
|
||||
import os
|
||||
import subprocess
|
||||
|
||||
import pytest
|
||||
|
||||
|
||||
|
||||
def test_search_returns_snapshots(tmp_path, process, disable_extractors_dict):
|
||||
"""Test that search returns snapshots."""
|
||||
os.chdir(tmp_path)
|
||||
|
||||
# Add some snapshots
|
||||
subprocess.run(
|
||||
['archivebox', 'add', '--index-only', 'https://example.com'],
|
||||
capture_output=True,
|
||||
env=disable_extractors_dict,
|
||||
)
|
||||
|
||||
result = subprocess.run(
|
||||
['archivebox', 'search'],
|
||||
capture_output=True,
|
||||
text=True,
|
||||
)
|
||||
|
||||
# Should return some output (path or URL info)
|
||||
assert result.stdout.strip() != '' or result.returncode == 0
|
||||
|
||||
|
||||
def test_search_filter_by_substring(tmp_path, process, disable_extractors_dict):
|
||||
"""Test that substring filter works."""
|
||||
os.chdir(tmp_path)
|
||||
|
||||
subprocess.run(
|
||||
['archivebox', 'add', '--index-only', 'https://example.com'],
|
||||
capture_output=True,
|
||||
env=disable_extractors_dict,
|
||||
)
|
||||
|
||||
# Search with filter - may not find if URL isn't stored as expected
|
||||
result = subprocess.run(
|
||||
['archivebox', 'search', '--filter-type=substring', 'example'],
|
||||
capture_output=True,
|
||||
text=True,
|
||||
)
|
||||
|
||||
# Should run without error
|
||||
assert result.returncode == 0 or 'No Snapshots' in result.stderr
|
||||
|
||||
|
||||
def test_search_sort_option(tmp_path, process, disable_extractors_dict):
|
||||
"""Test that --sort option works."""
|
||||
os.chdir(tmp_path)
|
||||
|
||||
subprocess.run(
|
||||
['archivebox', 'add', '--index-only', 'https://example.com'],
|
||||
capture_output=True,
|
||||
env=disable_extractors_dict,
|
||||
)
|
||||
|
||||
result = subprocess.run(
|
||||
['archivebox', 'search', '--sort=url'],
|
||||
capture_output=True,
|
||||
text=True,
|
||||
)
|
||||
|
||||
# Should run without error
|
||||
assert result.returncode == 0
|
||||
|
||||
|
||||
def test_search_with_headers_requires_format(tmp_path, process):
|
||||
"""Test that --with-headers requires --json, --html, or --csv."""
|
||||
os.chdir(tmp_path)
|
||||
|
||||
result = subprocess.run(
|
||||
['archivebox', 'search', '--with-headers'],
|
||||
capture_output=True,
|
||||
text=True,
|
||||
)
|
||||
|
||||
# Should fail with error message
|
||||
assert result.returncode != 0
|
||||
assert 'requires' in result.stderr.lower() or 'json' in result.stderr.lower()
|
||||
|
||||
|
||||
def test_search_status_option(tmp_path, process, disable_extractors_dict):
|
||||
"""Test that --status option filters by status."""
|
||||
os.chdir(tmp_path)
|
||||
|
||||
subprocess.run(
|
||||
['archivebox', 'add', '--index-only', 'https://example.com'],
|
||||
capture_output=True,
|
||||
env=disable_extractors_dict,
|
||||
)
|
||||
|
||||
result = subprocess.run(
|
||||
['archivebox', 'search', '--status=indexed'],
|
||||
capture_output=True,
|
||||
text=True,
|
||||
)
|
||||
|
||||
# Should run without error
|
||||
assert result.returncode == 0
|
||||
|
||||
|
||||
def test_search_no_snapshots_message(tmp_path, process):
|
||||
"""Test that searching empty archive shows appropriate output."""
|
||||
os.chdir(tmp_path)
|
||||
|
||||
result = subprocess.run(
|
||||
['archivebox', 'search'],
|
||||
capture_output=True,
|
||||
text=True,
|
||||
)
|
||||
|
||||
# Should complete (empty results are OK)
|
||||
assert result.returncode == 0
|
||||
|
||||
|
||||
class TestSearchCLI:
|
||||
"""Test the CLI interface for search command."""
|
||||
|
||||
def test_cli_help(self, tmp_path, process):
|
||||
"""Test that --help works for search command."""
|
||||
os.chdir(tmp_path)
|
||||
|
||||
result = subprocess.run(
|
||||
['archivebox', 'search', '--help'],
|
||||
capture_output=True,
|
||||
text=True,
|
||||
)
|
||||
|
||||
assert result.returncode == 0
|
||||
assert '--filter-type' in result.stdout or '-f' in result.stdout
|
||||
assert '--status' in result.stdout
|
||||
assert '--sort' in result.stdout
|
||||
|
||||
|
||||
if __name__ == '__main__':
|
||||
pytest.main([__file__, '-v'])
|
||||
@@ -1,195 +0,0 @@
|
||||
#!/usr/bin/env python3
|
||||
"""Integration tests for archivebox status command."""
|
||||
|
||||
import os
|
||||
import subprocess
|
||||
|
||||
import pytest
|
||||
|
||||
|
||||
|
||||
def test_status_shows_index_info(tmp_path, process):
|
||||
"""Test that status shows index information."""
|
||||
os.chdir(tmp_path)
|
||||
|
||||
result = subprocess.run(
|
||||
['archivebox', 'status'],
|
||||
capture_output=True,
|
||||
text=True,
|
||||
)
|
||||
|
||||
# Should show index scanning info
|
||||
assert 'index' in result.stdout.lower() or 'Index' in result.stdout
|
||||
|
||||
|
||||
def test_status_shows_snapshot_count(tmp_path, process, disable_extractors_dict):
|
||||
"""Test that status shows snapshot count."""
|
||||
os.chdir(tmp_path)
|
||||
|
||||
# Add some snapshots
|
||||
subprocess.run(
|
||||
['archivebox', 'add', '--index-only', 'https://example.com'],
|
||||
capture_output=True,
|
||||
env=disable_extractors_dict,
|
||||
)
|
||||
subprocess.run(
|
||||
['archivebox', 'add', '--index-only', 'https://iana.org'],
|
||||
capture_output=True,
|
||||
env=disable_extractors_dict,
|
||||
)
|
||||
|
||||
result = subprocess.run(
|
||||
['archivebox', 'status'],
|
||||
capture_output=True,
|
||||
text=True,
|
||||
)
|
||||
|
||||
# Should show link/snapshot count
|
||||
assert '2' in result.stdout or 'links' in result.stdout.lower()
|
||||
|
||||
|
||||
def test_status_shows_archive_size(tmp_path, process, disable_extractors_dict):
|
||||
"""Test that status shows archive size information."""
|
||||
os.chdir(tmp_path)
|
||||
|
||||
subprocess.run(
|
||||
['archivebox', 'add', '--index-only', 'https://example.com'],
|
||||
capture_output=True,
|
||||
env=disable_extractors_dict,
|
||||
)
|
||||
|
||||
result = subprocess.run(
|
||||
['archivebox', 'status'],
|
||||
capture_output=True,
|
||||
text=True,
|
||||
)
|
||||
|
||||
# Should show size info (bytes, KB, MB, etc)
|
||||
assert 'Size' in result.stdout or 'size' in result.stdout or 'B' in result.stdout
|
||||
|
||||
|
||||
def test_status_shows_indexed_count(tmp_path, process, disable_extractors_dict):
|
||||
"""Test that status shows indexed folder count."""
|
||||
os.chdir(tmp_path)
|
||||
|
||||
subprocess.run(
|
||||
['archivebox', 'add', '--index-only', 'https://example.com'],
|
||||
capture_output=True,
|
||||
env=disable_extractors_dict,
|
||||
)
|
||||
|
||||
result = subprocess.run(
|
||||
['archivebox', 'status'],
|
||||
capture_output=True,
|
||||
text=True,
|
||||
)
|
||||
|
||||
# Should show indexed count
|
||||
assert 'indexed' in result.stdout.lower()
|
||||
|
||||
|
||||
def test_status_shows_archived_vs_unarchived(tmp_path, process, disable_extractors_dict):
|
||||
"""Test that status shows archived vs unarchived counts."""
|
||||
os.chdir(tmp_path)
|
||||
|
||||
# Add index-only snapshot (unarchived)
|
||||
subprocess.run(
|
||||
['archivebox', 'add', '--index-only', 'https://example.com'],
|
||||
capture_output=True,
|
||||
env=disable_extractors_dict,
|
||||
)
|
||||
|
||||
result = subprocess.run(
|
||||
['archivebox', 'status'],
|
||||
capture_output=True,
|
||||
text=True,
|
||||
)
|
||||
|
||||
# Should show archived/unarchived categories
|
||||
assert 'archived' in result.stdout.lower() or 'unarchived' in result.stdout.lower()
|
||||
|
||||
|
||||
def test_status_shows_data_directory_info(tmp_path, process):
|
||||
"""Test that status shows data directory path."""
|
||||
os.chdir(tmp_path)
|
||||
|
||||
result = subprocess.run(
|
||||
['archivebox', 'status'],
|
||||
capture_output=True,
|
||||
text=True,
|
||||
)
|
||||
|
||||
# Should show data directory or archive path
|
||||
assert 'archive' in result.stdout.lower() or str(tmp_path) in result.stdout
|
||||
|
||||
|
||||
def test_status_shows_user_info(tmp_path, process):
|
||||
"""Test that status shows user information."""
|
||||
os.chdir(tmp_path)
|
||||
|
||||
result = subprocess.run(
|
||||
['archivebox', 'status'],
|
||||
capture_output=True,
|
||||
text=True,
|
||||
)
|
||||
|
||||
# Should show user info section
|
||||
assert 'user' in result.stdout.lower() or 'login' in result.stdout.lower()
|
||||
|
||||
|
||||
def test_status_empty_archive(tmp_path, process):
|
||||
"""Test status on empty archive shows zero counts."""
|
||||
os.chdir(tmp_path)
|
||||
|
||||
result = subprocess.run(
|
||||
['archivebox', 'status'],
|
||||
capture_output=True,
|
||||
text=True,
|
||||
)
|
||||
|
||||
# Should still run successfully
|
||||
assert result.returncode == 0 or 'index' in result.stdout.lower()
|
||||
# Should show 0 links
|
||||
assert '0' in result.stdout or 'links' in result.stdout.lower()
|
||||
|
||||
|
||||
def test_status_shows_valid_vs_invalid(tmp_path, process, disable_extractors_dict):
|
||||
"""Test that status shows valid vs invalid folder counts."""
|
||||
os.chdir(tmp_path)
|
||||
|
||||
subprocess.run(
|
||||
['archivebox', 'add', '--index-only', 'https://example.com'],
|
||||
capture_output=True,
|
||||
env=disable_extractors_dict,
|
||||
)
|
||||
|
||||
result = subprocess.run(
|
||||
['archivebox', 'status'],
|
||||
capture_output=True,
|
||||
text=True,
|
||||
)
|
||||
|
||||
# Should show valid/invalid categories
|
||||
assert 'valid' in result.stdout.lower() or 'present' in result.stdout.lower()
|
||||
|
||||
|
||||
class TestStatusCLI:
|
||||
"""Test the CLI interface for status command."""
|
||||
|
||||
def test_cli_help(self, tmp_path, process):
|
||||
"""Test that --help works for status command."""
|
||||
os.chdir(tmp_path)
|
||||
|
||||
result = subprocess.run(
|
||||
['archivebox', 'status', '--help'],
|
||||
capture_output=True,
|
||||
text=True,
|
||||
)
|
||||
|
||||
assert result.returncode == 0
|
||||
# Help should show some info about the command
|
||||
assert 'status' in result.stdout.lower() or 'statistic' in result.stdout.lower()
|
||||
|
||||
|
||||
if __name__ == '__main__':
|
||||
pytest.main([__file__, '-v'])
|
||||
@@ -1,158 +0,0 @@
|
||||
#!/usr/bin/env python3
|
||||
"""Integration tests for archivebox version command."""
|
||||
|
||||
import os
|
||||
import subprocess
|
||||
|
||||
import pytest
|
||||
|
||||
|
||||
|
||||
class TestVersionQuiet:
|
||||
"""Test the quiet/minimal version output."""
|
||||
|
||||
def test_version_prints_version_number(self, tmp_path):
|
||||
"""Test that version prints the version number."""
|
||||
os.chdir(tmp_path)
|
||||
|
||||
result = subprocess.run(
|
||||
['archivebox', 'version', '--quiet'],
|
||||
capture_output=True,
|
||||
text=True,
|
||||
)
|
||||
|
||||
assert result.returncode == 0
|
||||
# Should contain a version string like "0.8.0" or similar
|
||||
version = result.stdout.strip()
|
||||
assert version
|
||||
# Version should be a valid semver-ish format
|
||||
parts = version.split('.')
|
||||
assert len(parts) >= 2 # At least major.minor
|
||||
|
||||
def test_version_flag_prints_version_number(self, tmp_path):
|
||||
"""Test that --version flag prints the version number."""
|
||||
os.chdir(tmp_path)
|
||||
|
||||
result = subprocess.run(
|
||||
['archivebox', '--version'],
|
||||
capture_output=True,
|
||||
text=True,
|
||||
)
|
||||
|
||||
assert result.returncode == 0
|
||||
version = result.stdout.strip()
|
||||
assert version
|
||||
parts = version.split('.')
|
||||
assert len(parts) >= 2
|
||||
|
||||
|
||||
class TestVersionFull:
|
||||
"""Test the full version output."""
|
||||
|
||||
def test_version_shows_system_info(self, tmp_path, process):
|
||||
"""Test that version shows system information."""
|
||||
os.chdir(tmp_path)
|
||||
|
||||
result = subprocess.run(
|
||||
['archivebox', 'version'],
|
||||
capture_output=True,
|
||||
text=True,
|
||||
)
|
||||
|
||||
output = result.stdout
|
||||
|
||||
# Should show basic system info (exit code may be 1 if binaries missing)
|
||||
assert 'ArchiveBox' in output
|
||||
|
||||
def test_version_shows_binary_section(self, tmp_path, process):
|
||||
"""Test that version shows binary dependencies section."""
|
||||
os.chdir(tmp_path)
|
||||
|
||||
result = subprocess.run(
|
||||
['archivebox', 'version'],
|
||||
capture_output=True,
|
||||
text=True,
|
||||
)
|
||||
|
||||
output = result.stdout
|
||||
|
||||
# Should show binary dependencies section
|
||||
assert 'Binary' in output or 'Dependenc' in output
|
||||
|
||||
def test_version_shows_data_locations(self, tmp_path, process):
|
||||
"""Test that version shows data locations."""
|
||||
os.chdir(tmp_path)
|
||||
|
||||
result = subprocess.run(
|
||||
['archivebox', 'version'],
|
||||
capture_output=True,
|
||||
text=True,
|
||||
)
|
||||
|
||||
output = result.stdout
|
||||
|
||||
# Should show data/code locations
|
||||
assert 'Data' in output or 'location' in output.lower() or 'DIR' in output or 'Code' in output
|
||||
|
||||
|
||||
class TestVersionWithBinaries:
|
||||
"""Test version output after running install."""
|
||||
|
||||
def test_version_shows_binary_status(self, tmp_path, process, disable_extractors_dict):
|
||||
"""Test that version shows binary status (installed or not)."""
|
||||
os.chdir(tmp_path)
|
||||
|
||||
# First run install (with dry-run to speed up)
|
||||
subprocess.run(
|
||||
['archivebox', 'install', '--dry-run'],
|
||||
capture_output=True,
|
||||
text=True,
|
||||
env=disable_extractors_dict,
|
||||
)
|
||||
|
||||
# Now check version
|
||||
result = subprocess.run(
|
||||
['archivebox', 'version'],
|
||||
capture_output=True,
|
||||
text=True,
|
||||
env=disable_extractors_dict,
|
||||
)
|
||||
|
||||
output = result.stdout
|
||||
|
||||
# Should show binary status (either installed or not installed)
|
||||
assert 'installed' in output.lower() or 'Binary' in output
|
||||
|
||||
|
||||
class TestVersionCLI:
|
||||
"""Test the CLI interface for version command."""
|
||||
|
||||
def test_cli_help(self, tmp_path):
|
||||
"""Test that --help works for version command."""
|
||||
os.chdir(tmp_path)
|
||||
|
||||
result = subprocess.run(
|
||||
['archivebox', 'version', '--help'],
|
||||
capture_output=True,
|
||||
text=True,
|
||||
)
|
||||
|
||||
assert result.returncode == 0
|
||||
assert '--quiet' in result.stdout or '-q' in result.stdout
|
||||
|
||||
def test_cli_invalid_option(self, tmp_path):
|
||||
"""Test that invalid options are handled."""
|
||||
os.chdir(tmp_path)
|
||||
|
||||
result = subprocess.run(
|
||||
['archivebox', 'version', '--invalid-option'],
|
||||
capture_output=True,
|
||||
text=True,
|
||||
)
|
||||
|
||||
# Should fail with non-zero exit code
|
||||
assert result.returncode != 0
|
||||
|
||||
|
||||
if __name__ == '__main__':
|
||||
pytest.main([__file__, '-v'])
|
||||
Reference in New Issue
Block a user