much better tests and add page ui

This commit is contained in:
Nick Sweeting
2025-12-29 04:02:11 -08:00
parent 9487f8a0de
commit 30c60eef76
93 changed files with 2998 additions and 2712 deletions

203
tests/test_cli_config.py Normal file
View File

@@ -0,0 +1,203 @@
#!/usr/bin/env python3
"""
Comprehensive tests for archivebox config command.
Verify config reads/writes ArchiveBox.conf file correctly.
"""
import os
import subprocess
from pathlib import Path
from .fixtures import *
def test_config_displays_all_config(tmp_path, process):
"""Test that config without args displays all configuration."""
os.chdir(tmp_path)
result = subprocess.run(['archivebox', 'config'], capture_output=True, text=True)
assert result.returncode == 0
output = result.stdout
# Should show config sections
assert len(output) > 100
# Should show at least some standard config keys
assert 'TIMEOUT' in output or 'OUTPUT_PERMISSIONS' in output
def test_config_get_specific_key(tmp_path, process):
"""Test that config --get KEY retrieves specific value."""
os.chdir(tmp_path)
result = subprocess.run(
['archivebox', 'config', '--get', 'TIMEOUT'],
capture_output=True,
text=True,
)
assert result.returncode == 0
assert 'TIMEOUT' in result.stdout
def test_config_set_writes_to_file(tmp_path, process):
"""Test that config --set KEY=VALUE writes to ArchiveBox.conf."""
os.chdir(tmp_path)
result = subprocess.run(
['archivebox', 'config', '--set', 'TIMEOUT=120'],
capture_output=True,
text=True,
)
assert result.returncode == 0
# Verify config file was updated
config_file = tmp_path / 'ArchiveBox.conf'
assert config_file.exists()
content = config_file.read_text()
assert 'TIMEOUT' in content or '120' in content
def test_config_set_and_get_roundtrip(tmp_path, process):
"""Test that set value can be retrieved with get."""
os.chdir(tmp_path)
# Set a unique value
subprocess.run(
['archivebox', 'config', '--set', 'TIMEOUT=987'],
capture_output=True,
text=True,
)
# Get the value back
result = subprocess.run(
['archivebox', 'config', '--get', 'TIMEOUT'],
capture_output=True,
text=True,
)
assert '987' in result.stdout
def test_config_set_multiple_values(tmp_path, process):
"""Test setting multiple config values at once."""
os.chdir(tmp_path)
result = subprocess.run(
['archivebox', 'config', '--set', 'TIMEOUT=111', 'MEDIA_TIMEOUT=222'],
capture_output=True,
text=True,
)
assert result.returncode == 0
# Verify both were written
config_file = tmp_path / 'ArchiveBox.conf'
content = config_file.read_text()
assert '111' in content
assert '222' in content
def test_config_set_invalid_key_fails(tmp_path, process):
"""Test that setting invalid config key fails."""
os.chdir(tmp_path)
result = subprocess.run(
['archivebox', 'config', '--set', 'TOTALLY_INVALID_KEY_XYZ=value'],
capture_output=True,
text=True,
)
assert result.returncode != 0
def test_config_set_requires_equals_sign(tmp_path, process):
"""Test that set requires KEY=VALUE format."""
os.chdir(tmp_path)
result = subprocess.run(
['archivebox', 'config', '--set', 'TIMEOUT'],
capture_output=True,
text=True,
)
assert result.returncode != 0
def test_config_search_finds_keys(tmp_path, process):
"""Test that config --search finds matching keys."""
os.chdir(tmp_path)
result = subprocess.run(
['archivebox', 'config', '--search', 'TIMEOUT'],
capture_output=True,
text=True,
)
# Should find timeout-related config
assert 'TIMEOUT' in result.stdout
def test_config_preserves_existing_values(tmp_path, process):
"""Test that setting new values preserves existing ones."""
os.chdir(tmp_path)
# Set first value
subprocess.run(
['archivebox', 'config', '--set', 'TIMEOUT=100'],
capture_output=True,
)
# Set second value
subprocess.run(
['archivebox', 'config', '--set', 'MEDIA_TIMEOUT=200'],
capture_output=True,
)
# Verify both are in config file
config_file = tmp_path / 'ArchiveBox.conf'
content = config_file.read_text()
assert 'TIMEOUT' in content
assert 'MEDIA_TIMEOUT' in content
def test_config_file_is_valid_toml(tmp_path, process):
"""Test that config file remains valid TOML after set."""
os.chdir(tmp_path)
subprocess.run(
['archivebox', 'config', '--set', 'TIMEOUT=150'],
capture_output=True,
)
config_file = tmp_path / 'ArchiveBox.conf'
content = config_file.read_text()
# Basic TOML validation - should have sections and key=value pairs
assert '[' in content or '=' in content
def test_config_updates_existing_value(tmp_path, process):
"""Test that setting same key twice updates the value."""
os.chdir(tmp_path)
# Set initial value
subprocess.run(
['archivebox', 'config', '--set', 'TIMEOUT=100'],
capture_output=True,
)
# Update to new value
subprocess.run(
['archivebox', 'config', '--set', 'TIMEOUT=200'],
capture_output=True,
)
# Get current value
result = subprocess.run(
['archivebox', 'config', '--get', 'TIMEOUT'],
capture_output=True,
text=True,
)
# Should show updated value
assert '200' in result.stdout

72
tests/test_cli_crawl.py Normal file
View File

@@ -0,0 +1,72 @@
#!/usr/bin/env python3
"""
Tests for archivebox crawl command.
Verify crawl creates snapshots with depth.
"""
import os
import subprocess
import sqlite3
from .fixtures import *
def test_crawl_creates_snapshots(tmp_path, process, disable_extractors_dict):
"""Test that crawl command creates snapshots."""
os.chdir(tmp_path)
result = subprocess.run(
['archivebox', 'crawl', '--index-only', '--depth=0', 'https://example.com'],
capture_output=True,
env=disable_extractors_dict,
timeout=30,
)
assert result.returncode == 0
# Check snapshot was created
conn = sqlite3.connect("index.sqlite3")
c = conn.cursor()
count = c.execute("SELECT COUNT(*) FROM core_snapshot").fetchone()[0]
conn.close()
assert count == 1
def test_crawl_with_depth_0(tmp_path, process, disable_extractors_dict):
"""Test crawl with depth=0 creates single snapshot."""
os.chdir(tmp_path)
subprocess.run(
['archivebox', 'crawl', '--index-only', '--depth=0', 'https://example.com'],
capture_output=True,
env=disable_extractors_dict,
timeout=30,
)
conn = sqlite3.connect("index.sqlite3")
c = conn.cursor()
count = c.execute("SELECT COUNT(*) FROM core_snapshot").fetchone()[0]
conn.close()
# Depth 0 should create at least 1 snapshot
assert count >= 1
def test_crawl_creates_crawl_record(tmp_path, process, disable_extractors_dict):
"""Test that crawl creates a Crawl record."""
os.chdir(tmp_path)
subprocess.run(
['archivebox', 'crawl', '--index-only', '--depth=0', 'https://example.com'],
capture_output=True,
env=disable_extractors_dict,
timeout=30,
)
conn = sqlite3.connect("index.sqlite3")
c = conn.cursor()
crawl_count = c.execute("SELECT COUNT(*) FROM crawls_crawl").fetchone()[0]
conn.close()
assert crawl_count >= 1

66
tests/test_cli_extract.py Normal file
View File

@@ -0,0 +1,66 @@
#!/usr/bin/env python3
"""
Tests for archivebox extract command.
Verify extract re-runs extractors on existing snapshots.
"""
import os
import subprocess
import sqlite3
from .fixtures import *
def test_extract_runs_on_existing_snapshots(tmp_path, process, disable_extractors_dict):
"""Test that extract command runs on existing snapshots."""
os.chdir(tmp_path)
# Add a snapshot first
subprocess.run(
['archivebox', 'add', '--index-only', '--depth=0', 'https://example.com'],
capture_output=True,
env=disable_extractors_dict,
)
# Run extract
result = subprocess.run(
['archivebox', 'extract', '--overwrite'],
capture_output=True,
env=disable_extractors_dict,
timeout=30,
)
# Should complete
assert result.returncode in [0, 1]
def test_extract_preserves_snapshot_count(tmp_path, process, disable_extractors_dict):
"""Test that extract doesn't change snapshot count."""
os.chdir(tmp_path)
# Add snapshot
subprocess.run(
['archivebox', 'add', '--index-only', '--depth=0', 'https://example.com'],
capture_output=True,
env=disable_extractors_dict,
)
conn = sqlite3.connect("index.sqlite3")
c = conn.cursor()
count_before = c.execute("SELECT COUNT(*) FROM core_snapshot").fetchone()[0]
conn.close()
# Run extract
subprocess.run(
['archivebox', 'extract', '--overwrite'],
capture_output=True,
env=disable_extractors_dict,
timeout=30,
)
conn = sqlite3.connect("index.sqlite3")
c = conn.cursor()
count_after = c.execute("SELECT COUNT(*) FROM core_snapshot").fetchone()[0]
conn.close()
assert count_after == count_before

115
tests/test_cli_install.py Normal file
View File

@@ -0,0 +1,115 @@
#!/usr/bin/env python3
"""
Comprehensive tests for archivebox install command.
Verify install detects and records binary dependencies in DB.
"""
import os
import subprocess
import sqlite3
from .fixtures import *
def test_install_runs_successfully(tmp_path, process):
"""Test that install command runs without error."""
os.chdir(tmp_path)
result = subprocess.run(
['archivebox', 'install', '--dry-run'],
capture_output=True,
text=True,
timeout=60,
)
# Dry run should complete quickly
assert result.returncode in [0, 1] # May return 1 if binaries missing
def test_install_creates_binary_records_in_db(tmp_path, process):
"""Test that install creates Binary records in database."""
os.chdir(tmp_path)
subprocess.run(
['archivebox', 'install', '--dry-run'],
capture_output=True,
timeout=60,
)
# Check that binary records were created
conn = sqlite3.connect("index.sqlite3")
c = conn.cursor()
# Check machine_binary table exists
tables = c.execute(
"SELECT name FROM sqlite_master WHERE type='table' AND name='machine_binary'"
).fetchall()
conn.close()
assert len(tables) == 1
def test_install_dry_run_does_not_install(tmp_path, process):
"""Test that --dry-run doesn't actually install anything."""
os.chdir(tmp_path)
result = subprocess.run(
['archivebox', 'install', '--dry-run'],
capture_output=True,
text=True,
timeout=60,
)
# Should complete without actually installing
assert 'dry' in result.stdout.lower() or result.returncode in [0, 1]
def test_install_detects_system_binaries(tmp_path, process):
"""Test that install detects existing system binaries."""
os.chdir(tmp_path)
result = subprocess.run(
['archivebox', 'install', '--dry-run'],
capture_output=True,
text=True,
timeout=60,
)
# Should detect at least some common binaries (python, curl, etc)
assert result.returncode in [0, 1]
def test_install_shows_binary_status(tmp_path, process):
"""Test that install shows status of binaries."""
os.chdir(tmp_path)
result = subprocess.run(
['archivebox', 'install', '--dry-run'],
capture_output=True,
text=True,
timeout=60,
)
output = result.stdout + result.stderr
# Should show some binary information
assert len(output) > 50
def test_install_updates_binary_table(tmp_path, process):
"""Test that install updates the machine_binary table."""
os.chdir(tmp_path)
# Run install
subprocess.run(
['archivebox', 'install', '--dry-run'],
capture_output=True,
timeout=60,
)
# Check binary table has entries
conn = sqlite3.connect("index.sqlite3")
c = conn.cursor()
binary_count = c.execute("SELECT COUNT(*) FROM machine_binary").fetchone()[0]
conn.close()
# Should have detected some binaries
assert binary_count > 0

73
tests/test_cli_manage.py Normal file
View File

@@ -0,0 +1,73 @@
#!/usr/bin/env python3
"""
Tests for archivebox manage command.
Verify manage command runs Django management commands.
"""
import os
import subprocess
import sqlite3
from .fixtures import *
def test_manage_help_works(tmp_path, process):
"""Test that manage help command works."""
os.chdir(tmp_path)
result = subprocess.run(
['archivebox', 'manage', 'help'],
capture_output=True,
text=True,
timeout=30,
)
assert result.returncode == 0
assert len(result.stdout) > 100
def test_manage_showmigrations_works(tmp_path, process):
"""Test that manage showmigrations works."""
os.chdir(tmp_path)
result = subprocess.run(
['archivebox', 'manage', 'showmigrations'],
capture_output=True,
text=True,
timeout=30,
)
assert result.returncode == 0
# Should show migration status
assert 'core' in result.stdout or '[' in result.stdout
def test_manage_dbshell_command_exists(tmp_path, process):
"""Test that manage dbshell command is recognized."""
os.chdir(tmp_path)
result = subprocess.run(
['archivebox', 'manage', 'help', 'dbshell'],
capture_output=True,
text=True,
timeout=30,
)
# Should show help for dbshell
assert result.returncode == 0
assert 'dbshell' in result.stdout or 'database' in result.stdout.lower()
def test_manage_check_works(tmp_path, process):
"""Test that manage check works."""
os.chdir(tmp_path)
result = subprocess.run(
['archivebox', 'manage', 'check'],
capture_output=True,
text=True,
timeout=30,
)
# Check should complete
assert result.returncode in [0, 1]

62
tests/test_cli_oneshot.py Normal file
View File

@@ -0,0 +1,62 @@
#!/usr/bin/env python3
"""
Tests for archivebox oneshot command.
Verify oneshot archives URL and exits.
"""
import os
import subprocess
import sqlite3
from pathlib import Path
from .fixtures import *
def test_oneshot_creates_temporary_collection(tmp_path, disable_extractors_dict):
"""Test that oneshot creates temporary collection."""
os.chdir(tmp_path)
result = subprocess.run(
['archivebox', 'oneshot', '--index-only', '--depth=0', 'https://example.com'],
capture_output=True,
env=disable_extractors_dict,
timeout=60,
)
# Should complete
assert result.returncode in [0, 1]
def test_oneshot_without_existing_collection(tmp_path, disable_extractors_dict):
"""Test oneshot works without pre-existing collection."""
empty_dir = tmp_path / "oneshot_test"
empty_dir.mkdir()
os.chdir(empty_dir)
result = subprocess.run(
['archivebox', 'oneshot', '--index-only', '--depth=0', 'https://example.com'],
capture_output=True,
env=disable_extractors_dict,
timeout=60,
)
# Should work even without init
assert result.returncode in [0, 1]
def test_oneshot_creates_archive_output(tmp_path, disable_extractors_dict):
"""Test that oneshot creates archive output."""
empty_dir = tmp_path / "oneshot_test2"
empty_dir.mkdir()
os.chdir(empty_dir)
result = subprocess.run(
['archivebox', 'oneshot', '--index-only', '--depth=0', 'https://example.com'],
capture_output=True,
env=disable_extractors_dict,
timeout=60,
)
# Oneshot may create archive directory
# Check if any output was created
assert result.returncode in [0, 1] or len(list(empty_dir.iterdir())) > 0

192
tests/test_cli_remove.py Normal file
View File

@@ -0,0 +1,192 @@
#!/usr/bin/env python3
"""
Comprehensive tests for archivebox remove command.
Verify remove deletes snapshots from DB and filesystem.
"""
import os
import subprocess
import sqlite3
from pathlib import Path
from .fixtures import *
def test_remove_deletes_snapshot_from_db(tmp_path, process, disable_extractors_dict):
"""Test that remove command deletes snapshot from database."""
os.chdir(tmp_path)
# Add a snapshot
subprocess.run(
['archivebox', 'add', '--index-only', '--depth=0', 'https://example.com'],
capture_output=True,
env=disable_extractors_dict,
)
# Verify it exists
conn = sqlite3.connect("index.sqlite3")
c = conn.cursor()
count_before = c.execute("SELECT COUNT(*) FROM core_snapshot").fetchone()[0]
conn.close()
assert count_before == 1
# Remove it
subprocess.run(
['archivebox', 'remove', 'https://example.com', '--yes'],
capture_output=True,
env=disable_extractors_dict,
)
# Verify it's gone
conn = sqlite3.connect("index.sqlite3")
c = conn.cursor()
count_after = c.execute("SELECT COUNT(*) FROM core_snapshot").fetchone()[0]
conn.close()
assert count_after == 0
def test_remove_deletes_archive_directory(tmp_path, process, disable_extractors_dict):
"""Test that remove deletes the archive directory."""
os.chdir(tmp_path)
# Add a snapshot
subprocess.run(
['archivebox', 'add', '--index-only', '--depth=0', 'https://example.com'],
capture_output=True,
env=disable_extractors_dict,
)
# Get snapshot ID
conn = sqlite3.connect("index.sqlite3")
c = conn.cursor()
snapshot_id = c.execute("SELECT id FROM core_snapshot").fetchone()[0]
conn.close()
archive_dir = tmp_path / "archive" / snapshot_id
assert archive_dir.exists()
# Remove snapshot
subprocess.run(
['archivebox', 'remove', 'https://example.com', '--yes'],
capture_output=True,
env=disable_extractors_dict,
)
# Archive directory should be deleted
assert not archive_dir.exists()
def test_remove_yes_flag_skips_confirmation(tmp_path, process, disable_extractors_dict):
"""Test that --yes flag skips confirmation prompt."""
os.chdir(tmp_path)
subprocess.run(
['archivebox', 'add', '--index-only', '--depth=0', 'https://example.com'],
capture_output=True,
env=disable_extractors_dict,
)
# Remove with --yes should complete without interaction
result = subprocess.run(
['archivebox', 'remove', 'https://example.com', '--yes'],
capture_output=True,
env=disable_extractors_dict,
timeout=30,
)
assert result.returncode == 0
def test_remove_multiple_snapshots(tmp_path, process, disable_extractors_dict):
"""Test removing multiple snapshots at once."""
os.chdir(tmp_path)
# Add multiple snapshots
for url in ['https://example.com', 'https://example.org']:
subprocess.run(
['archivebox', 'add', '--index-only', '--depth=0', url],
capture_output=True,
env=disable_extractors_dict,
)
# Verify both exist
conn = sqlite3.connect("index.sqlite3")
c = conn.cursor()
count_before = c.execute("SELECT COUNT(*) FROM core_snapshot").fetchone()[0]
conn.close()
assert count_before == 2
# Remove both
subprocess.run(
['archivebox', 'remove', 'https://example.com', 'https://example.org', '--yes'],
capture_output=True,
env=disable_extractors_dict,
)
# Verify both are gone
conn = sqlite3.connect("index.sqlite3")
c = conn.cursor()
count_after = c.execute("SELECT COUNT(*) FROM core_snapshot").fetchone()[0]
conn.close()
assert count_after == 0
def test_remove_with_filter(tmp_path, process, disable_extractors_dict):
"""Test removing snapshots using filter."""
os.chdir(tmp_path)
# Add snapshots
subprocess.run(
['archivebox', 'add', '--index-only', '--depth=0', 'https://example.com'],
capture_output=True,
env=disable_extractors_dict,
)
# Remove using filter
result = subprocess.run(
['archivebox', 'remove', '--filter-type=search', '--filter=example.com', '--yes'],
capture_output=True,
env=disable_extractors_dict,
timeout=30,
)
# Should complete (exit code depends on implementation)
assert result.returncode in [0, 1, 2]
def test_remove_nonexistent_url_fails_gracefully(tmp_path, process, disable_extractors_dict):
"""Test that removing non-existent URL fails gracefully."""
os.chdir(tmp_path)
result = subprocess.run(
['archivebox', 'remove', 'https://nonexistent-url-12345.com', '--yes'],
capture_output=True,
env=disable_extractors_dict,
)
# Should fail or show error
assert result.returncode != 0 or 'not found' in result.stdout.lower() or 'no matches' in result.stdout.lower()
def test_remove_after_flag(tmp_path, process, disable_extractors_dict):
"""Test remove --after flag removes snapshots after date."""
os.chdir(tmp_path)
subprocess.run(
['archivebox', 'add', '--index-only', '--depth=0', 'https://example.com'],
capture_output=True,
env=disable_extractors_dict,
)
# Try remove with --after flag (should work or show usage)
result = subprocess.run(
['archivebox', 'remove', '--after=2020-01-01', '--yes'],
capture_output=True,
env=disable_extractors_dict,
timeout=30,
)
# Should complete
assert result.returncode in [0, 1, 2]

View File

@@ -0,0 +1,56 @@
#!/usr/bin/env python3
"""
Tests for archivebox schedule command.
Verify schedule creates scheduled crawl records.
"""
import os
import subprocess
import sqlite3
from .fixtures import *
def test_schedule_creates_scheduled_crawl(tmp_path, process, disable_extractors_dict):
"""Test that schedule command creates a scheduled crawl."""
os.chdir(tmp_path)
result = subprocess.run(
['archivebox', 'schedule', '--every=day', '--depth=0', 'https://example.com'],
capture_output=True,
env=disable_extractors_dict,
timeout=30,
)
# Should complete (creating schedule or showing usage)
assert result.returncode in [0, 1, 2]
def test_schedule_with_every_flag(tmp_path, process, disable_extractors_dict):
"""Test schedule with --every flag."""
os.chdir(tmp_path)
result = subprocess.run(
['archivebox', 'schedule', '--every=week', '--depth=0', 'https://example.com'],
capture_output=True,
env=disable_extractors_dict,
timeout=30,
)
assert result.returncode in [0, 1, 2]
def test_schedule_list_shows_schedules(tmp_path, process):
"""Test that schedule can list existing schedules."""
os.chdir(tmp_path)
# Try to list schedules
result = subprocess.run(
['archivebox', 'schedule', '--list'],
capture_output=True,
text=True,
timeout=30,
)
# Should show schedules or empty list
assert result.returncode in [0, 1, 2]

70
tests/test_cli_search.py Normal file
View File

@@ -0,0 +1,70 @@
#!/usr/bin/env python3
"""
Tests for archivebox search command.
Verify search queries snapshots from DB.
"""
import os
import subprocess
import sqlite3
from .fixtures import *
def test_search_finds_snapshots(tmp_path, process, disable_extractors_dict):
"""Test that search command finds matching snapshots."""
os.chdir(tmp_path)
# Add snapshots
subprocess.run(
['archivebox', 'add', '--index-only', '--depth=0', 'https://example.com'],
capture_output=True,
env=disable_extractors_dict,
)
# Search for it
result = subprocess.run(
['archivebox', 'search', 'example'],
capture_output=True,
text=True,
timeout=30,
)
assert result.returncode == 0
assert 'example' in result.stdout
def test_search_returns_no_results_for_missing_term(tmp_path, process, disable_extractors_dict):
"""Test search returns empty for non-existent term."""
os.chdir(tmp_path)
subprocess.run(
['archivebox', 'add', '--index-only', '--depth=0', 'https://example.com'],
capture_output=True,
env=disable_extractors_dict,
)
result = subprocess.run(
['archivebox', 'search', 'nonexistentterm12345'],
capture_output=True,
text=True,
timeout=30,
)
# Should complete with no results
assert result.returncode in [0, 1]
def test_search_on_empty_archive(tmp_path, process):
"""Test search works on empty archive."""
os.chdir(tmp_path)
result = subprocess.run(
['archivebox', 'search', 'anything'],
capture_output=True,
text=True,
timeout=30,
)
# Should complete without error
assert result.returncode in [0, 1]

45
tests/test_cli_server.py Normal file
View File

@@ -0,0 +1,45 @@
#!/usr/bin/env python3
"""
Tests for archivebox server command.
Verify server can start (basic smoke tests only, no full server testing).
"""
import os
import subprocess
import signal
import time
from .fixtures import *
def test_server_shows_usage_info(tmp_path, process):
"""Test that server command shows usage or starts."""
os.chdir(tmp_path)
# Just check that the command is recognized
# We won't actually start a full server in tests
result = subprocess.run(
['archivebox', 'server', '--help'],
capture_output=True,
text=True,
timeout=10,
)
assert result.returncode == 0
assert 'server' in result.stdout.lower() or 'http' in result.stdout.lower()
def test_server_init_flag(tmp_path, process):
"""Test that --init flag runs init before starting server."""
os.chdir(tmp_path)
# Check init flag is recognized
result = subprocess.run(
['archivebox', 'server', '--help'],
capture_output=True,
text=True,
timeout=10,
)
assert result.returncode == 0
assert '--init' in result.stdout or 'init' in result.stdout.lower()

26
tests/test_cli_shell.py Normal file
View File

@@ -0,0 +1,26 @@
#!/usr/bin/env python3
"""
Tests for archivebox shell command.
Verify shell command starts Django shell (basic smoke tests only).
"""
import os
import subprocess
from .fixtures import *
def test_shell_command_exists(tmp_path, process):
"""Test that shell command is recognized."""
os.chdir(tmp_path)
# Test that the command exists (will fail without input but should recognize command)
result = subprocess.run(
['archivebox', 'shell', '--help'],
capture_output=True,
text=True,
timeout=10,
)
# Should show shell help or recognize command
assert result.returncode in [0, 1, 2]

View File

@@ -0,0 +1,63 @@
#!/usr/bin/env python3
"""
Tests for archivebox snapshot command.
Verify snapshot command works with snapshot IDs/URLs.
"""
import os
import subprocess
import sqlite3
from .fixtures import *
def test_snapshot_command_works_with_url(tmp_path, process, disable_extractors_dict):
"""Test that snapshot command works with URL."""
os.chdir(tmp_path)
# Add a snapshot first
subprocess.run(
['archivebox', 'add', '--index-only', '--depth=0', 'https://example.com'],
capture_output=True,
env=disable_extractors_dict,
)
# Try to view/interact with snapshot
result = subprocess.run(
['archivebox', 'snapshot', 'https://example.com'],
capture_output=True,
text=True,
env=disable_extractors_dict,
timeout=30,
)
# Should complete (exit code depends on implementation)
assert result.returncode in [0, 1, 2]
def test_snapshot_command_with_timestamp(tmp_path, process, disable_extractors_dict):
"""Test snapshot command with timestamp ID."""
os.chdir(tmp_path)
# Add snapshot
subprocess.run(
['archivebox', 'add', '--index-only', '--depth=0', 'https://example.com'],
capture_output=True,
env=disable_extractors_dict,
)
# Get snapshot timestamp
conn = sqlite3.connect("index.sqlite3")
c = conn.cursor()
timestamp = c.execute("SELECT timestamp FROM core_snapshot").fetchone()[0]
conn.close()
# Try snapshot command with timestamp
result = subprocess.run(
['archivebox', 'snapshot', str(timestamp)],
capture_output=True,
env=disable_extractors_dict,
timeout=30,
)
assert result.returncode in [0, 1, 2]

160
tests/test_cli_status.py Normal file
View File

@@ -0,0 +1,160 @@
#!/usr/bin/env python3
"""
Comprehensive tests for archivebox status command.
Verify status reports accurate collection state from DB and filesystem.
"""
import os
import subprocess
import sqlite3
from .fixtures import *
def test_status_runs_successfully(tmp_path, process):
"""Test that status command runs without error."""
os.chdir(tmp_path)
result = subprocess.run(['archivebox', 'status'], capture_output=True, text=True)
assert result.returncode == 0
assert len(result.stdout) > 100
def test_status_shows_zero_snapshots_in_empty_archive(tmp_path, process):
"""Test status shows 0 snapshots in empty archive."""
os.chdir(tmp_path)
result = subprocess.run(['archivebox', 'status'], capture_output=True, text=True)
output = result.stdout
# Should indicate empty/zero state
assert '0' in output
def test_status_shows_correct_snapshot_count(tmp_path, process, disable_extractors_dict):
"""Test that status shows accurate snapshot count from DB."""
os.chdir(tmp_path)
# Add 3 snapshots
for url in ['https://example.com', 'https://example.org', 'https://example.net']:
subprocess.run(
['archivebox', 'add', '--index-only', '--depth=0', url],
capture_output=True,
env=disable_extractors_dict,
)
result = subprocess.run(['archivebox', 'status'], capture_output=True, text=True)
# Verify DB has 3 snapshots
conn = sqlite3.connect("index.sqlite3")
c = conn.cursor()
db_count = c.execute("SELECT COUNT(*) FROM core_snapshot").fetchone()[0]
conn.close()
assert db_count == 3
# Status output should show 3
assert '3' in result.stdout
def test_status_shows_archived_count(tmp_path, process, disable_extractors_dict):
"""Test status distinguishes archived vs unarchived snapshots."""
os.chdir(tmp_path)
subprocess.run(
['archivebox', 'add', '--index-only', '--depth=0', 'https://example.com'],
capture_output=True,
env=disable_extractors_dict,
)
result = subprocess.run(['archivebox', 'status'], capture_output=True, text=True)
# Should show archived/unarchived categories
assert 'archived' in result.stdout.lower() or 'queued' in result.stdout.lower()
def test_status_shows_archive_directory_size(tmp_path, process):
"""Test status reports archive directory size."""
os.chdir(tmp_path)
result = subprocess.run(['archivebox', 'status'], capture_output=True, text=True)
output = result.stdout
# Should show size info
assert 'Size' in output or 'size' in output
def test_status_counts_archive_directories(tmp_path, process, disable_extractors_dict):
"""Test status counts directories in archive/ folder."""
os.chdir(tmp_path)
subprocess.run(
['archivebox', 'add', '--index-only', '--depth=0', 'https://example.com'],
capture_output=True,
env=disable_extractors_dict,
)
result = subprocess.run(['archivebox', 'status'], capture_output=True, text=True)
# Should show directory count
assert 'present' in result.stdout.lower() or 'directories' in result.stdout
def test_status_detects_orphaned_directories(tmp_path, process, disable_extractors_dict):
"""Test status detects directories not in DB (orphaned)."""
os.chdir(tmp_path)
# Add a snapshot
subprocess.run(
['archivebox', 'add', '--index-only', '--depth=0', 'https://example.com'],
capture_output=True,
env=disable_extractors_dict,
)
# Create an orphaned directory
(tmp_path / "archive" / "fake_orphaned_dir").mkdir(parents=True, exist_ok=True)
result = subprocess.run(['archivebox', 'status'], capture_output=True, text=True)
# Should mention orphaned dirs
assert 'orphan' in result.stdout.lower() or '1' in result.stdout
def test_status_shows_user_info(tmp_path, process):
"""Test status shows user/login information."""
os.chdir(tmp_path)
result = subprocess.run(['archivebox', 'status'], capture_output=True, text=True)
output = result.stdout
# Should show user section
assert 'user' in output.lower() or 'login' in output.lower()
def test_status_reads_from_db_not_filesystem(tmp_path, process, disable_extractors_dict):
"""Test that status uses DB as source of truth, not filesystem."""
os.chdir(tmp_path)
# Add snapshot to DB
subprocess.run(
['archivebox', 'add', '--index-only', '--depth=0', 'https://example.com'],
capture_output=True,
env=disable_extractors_dict,
)
# Verify DB has snapshot
conn = sqlite3.connect("index.sqlite3")
c = conn.cursor()
db_count = c.execute("SELECT COUNT(*) FROM core_snapshot").fetchone()[0]
conn.close()
assert db_count == 1
# Status should reflect DB count
result = subprocess.run(['archivebox', 'status'], capture_output=True, text=True)
assert '1' in result.stdout
def test_status_shows_index_file_info(tmp_path, process):
"""Test status shows index file information."""
os.chdir(tmp_path)
result = subprocess.run(['archivebox', 'status'], capture_output=True, text=True)
# Should mention index
assert 'index' in result.stdout.lower() or 'Index' in result.stdout

153
tests/test_cli_update.py Normal file
View File

@@ -0,0 +1,153 @@
#!/usr/bin/env python3
"""
Comprehensive tests for archivebox update command.
Verify update re-archives snapshots and updates DB status.
"""
import os
import subprocess
import sqlite3
from .fixtures import *
def test_update_runs_successfully_on_empty_archive(tmp_path, process):
"""Test that update runs without error on empty archive."""
os.chdir(tmp_path)
result = subprocess.run(
['archivebox', 'update', '--index-only'],
capture_output=True,
text=True,
timeout=30,
)
# Should complete successfully even with no snapshots
assert result.returncode == 0
def test_update_re_archives_existing_snapshots(tmp_path, process, disable_extractors_dict):
"""Test that update command re-archives existing snapshots."""
os.chdir(tmp_path)
# Add a snapshot
subprocess.run(
['archivebox', 'add', '--index-only', '--depth=0', 'https://example.com'],
capture_output=True,
env=disable_extractors_dict,
)
# Run update
result = subprocess.run(
['archivebox', 'update', '--index-only'],
capture_output=True,
env=disable_extractors_dict,
timeout=30,
)
assert result.returncode == 0
def test_update_index_only_flag(tmp_path, process, disable_extractors_dict):
"""Test that --index-only flag skips extraction."""
os.chdir(tmp_path)
subprocess.run(
['archivebox', 'add', '--index-only', '--depth=0', 'https://example.com'],
capture_output=True,
env=disable_extractors_dict,
)
# Update with index-only should be fast
result = subprocess.run(
['archivebox', 'update', '--index-only'],
capture_output=True,
env=disable_extractors_dict,
timeout=30,
)
assert result.returncode == 0
def test_update_specific_snapshot_by_filter(tmp_path, process, disable_extractors_dict):
"""Test updating specific snapshot using filter."""
os.chdir(tmp_path)
# Add multiple snapshots
subprocess.run(
['archivebox', 'add', '--index-only', '--depth=0', 'https://example.com'],
capture_output=True,
env=disable_extractors_dict,
)
subprocess.run(
['archivebox', 'add', '--index-only', '--depth=0', 'https://example.org'],
capture_output=True,
env=disable_extractors_dict,
)
# Update with filter
result = subprocess.run(
['archivebox', 'update', '--index-only', '--filter-type=search', '--filter=example.com'],
capture_output=True,
env=disable_extractors_dict,
timeout=30,
)
# Should complete (may succeed or show usage)
assert result.returncode in [0, 1, 2]
def test_update_preserves_snapshot_count(tmp_path, process, disable_extractors_dict):
"""Test that update doesn't change snapshot count."""
os.chdir(tmp_path)
# Add snapshots
subprocess.run(
['archivebox', 'add', '--index-only', '--depth=0', 'https://example.com'],
capture_output=True,
env=disable_extractors_dict,
)
# Count before update
conn = sqlite3.connect("index.sqlite3")
c = conn.cursor()
count_before = c.execute("SELECT COUNT(*) FROM core_snapshot").fetchone()[0]
conn.close()
assert count_before == 1
# Run update
subprocess.run(
['archivebox', 'update', '--index-only'],
capture_output=True,
env=disable_extractors_dict,
timeout=30,
)
# Count after update
conn = sqlite3.connect("index.sqlite3")
c = conn.cursor()
count_after = c.execute("SELECT COUNT(*) FROM core_snapshot").fetchone()[0]
conn.close()
# Snapshot count should remain the same
assert count_after == count_before
def test_update_with_overwrite_flag(tmp_path, process, disable_extractors_dict):
"""Test update with --overwrite flag forces re-archiving."""
os.chdir(tmp_path)
subprocess.run(
['archivebox', 'add', '--index-only', '--depth=0', 'https://example.com'],
capture_output=True,
env=disable_extractors_dict,
)
result = subprocess.run(
['archivebox', 'update', '--index-only', '--overwrite'],
capture_output=True,
env=disable_extractors_dict,
timeout=30,
)
assert result.returncode == 0

View File

@@ -1,42 +0,0 @@
from pathlib import Path
from .fixtures import *
def test_oneshot_command_exists(tmp_path, disable_extractors_dict):
os.chdir(tmp_path)
process = subprocess.run(['archivebox', 'oneshot'], capture_output=True, env=disable_extractors_dict)
assert not "invalid choice: 'oneshot'" in process.stderr.decode("utf-8")
def test_oneshot_command_saves_page_in_right_folder(tmp_path, disable_extractors_dict):
disable_extractors_dict.update({"SAVE_DOM": "true"})
process = subprocess.run(
[
"archivebox",
"oneshot",
f"--out-dir={tmp_path}",
"--extract=title,favicon,dom",
"https://example.com",
],
capture_output=True,
env=disable_extractors_dict,
)
items = ' '.join([str(x) for x in tmp_path.iterdir()])
current_path = ' '.join([str(x) for x in Path.cwd().iterdir()])
assert "index.json" in items
assert not "index.sqlite3" in current_path
def test_oneshot_command_succeeds(tmp_path, disable_extractors_dict):
disable_extractors_dict.update({"SAVE_DOM": "true"})
process = subprocess.run(
[
"archivebox",
"oneshot",
f"--out-dir={tmp_path}",
"--extract=title,favicon,dom",
"https://example.com",
],
capture_output=True,
env=disable_extractors_dict,
)
assert process.returncode == 0