Files
ArchiveBox/archivebox/tests/test_cli_init.py
Nick Sweeting 934e02695b fix lint
2026-03-15 18:45:29 -07:00

244 lines
7.5 KiB
Python

#!/usr/bin/env python3
"""
Comprehensive tests for archivebox init command.
Verify init creates correct database schema, filesystem structure, and config.
"""
import os
import sqlite3
import subprocess
from archivebox.config.common import STORAGE_CONFIG
DIR_PERMISSIONS = STORAGE_CONFIG.OUTPUT_PERMISSIONS.replace('6', '7').replace('4', '5')
def test_init_creates_database_file(tmp_path):
"""Test that init creates index.sqlite3 database file."""
os.chdir(tmp_path)
result = subprocess.run(['archivebox', 'init'], capture_output=True)
assert result.returncode == 0
db_path = tmp_path / "index.sqlite3"
assert db_path.exists()
assert db_path.is_file()
def test_init_creates_archive_directory(tmp_path):
"""Test that init creates archive directory."""
os.chdir(tmp_path)
subprocess.run(['archivebox', 'init'], capture_output=True)
archive_dir = tmp_path / "archive"
assert archive_dir.exists()
assert archive_dir.is_dir()
def test_init_creates_sources_directory(tmp_path):
"""Test that init creates sources directory."""
os.chdir(tmp_path)
subprocess.run(['archivebox', 'init'], capture_output=True)
sources_dir = tmp_path / "sources"
assert sources_dir.exists()
assert sources_dir.is_dir()
def test_init_creates_logs_directory(tmp_path):
"""Test that init creates logs directory."""
os.chdir(tmp_path)
subprocess.run(['archivebox', 'init'], capture_output=True)
logs_dir = tmp_path / "logs"
assert logs_dir.exists()
assert logs_dir.is_dir()
def test_init_creates_config_file(tmp_path):
"""Test that init creates ArchiveBox.conf config file."""
os.chdir(tmp_path)
subprocess.run(['archivebox', 'init'], capture_output=True)
config_file = tmp_path / "ArchiveBox.conf"
assert config_file.exists()
assert config_file.is_file()
def test_init_runs_migrations(tmp_path):
"""Test that init runs Django migrations and creates core tables."""
os.chdir(tmp_path)
subprocess.run(['archivebox', 'init'], capture_output=True)
# Check that migrations were applied
conn = sqlite3.connect("index.sqlite3")
c = conn.cursor()
# Check django_migrations table exists
migrations = c.execute(
"SELECT name FROM sqlite_master WHERE type='table' AND name='django_migrations'"
).fetchall()
assert len(migrations) == 1
# Check that some migrations were applied
migration_count = c.execute("SELECT COUNT(*) FROM django_migrations").fetchone()[0]
assert migration_count > 0
conn.close()
def test_init_creates_core_snapshot_table(tmp_path):
"""Test that init creates core_snapshot table."""
os.chdir(tmp_path)
subprocess.run(['archivebox', 'init'], capture_output=True)
conn = sqlite3.connect("index.sqlite3")
c = conn.cursor()
# Check core_snapshot table exists
tables = c.execute(
"SELECT name FROM sqlite_master WHERE type='table' AND name='core_snapshot'"
).fetchall()
assert len(tables) == 1
conn.close()
def test_init_creates_crawls_crawl_table(tmp_path):
"""Test that init creates crawls_crawl table."""
os.chdir(tmp_path)
subprocess.run(['archivebox', 'init'], capture_output=True)
conn = sqlite3.connect("index.sqlite3")
c = conn.cursor()
# Check crawls_crawl table exists
tables = c.execute(
"SELECT name FROM sqlite_master WHERE type='table' AND name='crawls_crawl'"
).fetchall()
assert len(tables) == 1
conn.close()
def test_init_creates_core_archiveresult_table(tmp_path):
"""Test that init creates core_archiveresult table."""
os.chdir(tmp_path)
subprocess.run(['archivebox', 'init'], capture_output=True)
conn = sqlite3.connect("index.sqlite3")
c = conn.cursor()
# Check core_archiveresult table exists
tables = c.execute(
"SELECT name FROM sqlite_master WHERE type='table' AND name='core_archiveresult'"
).fetchall()
assert len(tables) == 1
conn.close()
def test_init_sets_correct_file_permissions(tmp_path):
"""Test that init sets correct permissions on created files."""
os.chdir(tmp_path)
subprocess.run(['archivebox', 'init'], capture_output=True)
# Check database permissions
db_path = tmp_path / "index.sqlite3"
assert oct(db_path.stat().st_mode)[-3:] in (STORAGE_CONFIG.OUTPUT_PERMISSIONS, DIR_PERMISSIONS)
# Check directory permissions
archive_dir = tmp_path / "archive"
assert oct(archive_dir.stat().st_mode)[-3:] in (STORAGE_CONFIG.OUTPUT_PERMISSIONS, DIR_PERMISSIONS)
def test_init_is_idempotent(tmp_path):
"""Test that running init multiple times is safe (idempotent)."""
os.chdir(tmp_path)
# First init
result1 = subprocess.run(['archivebox', 'init'], capture_output=True, text=True)
assert result1.returncode == 0
assert "Initializing a new ArchiveBox" in result1.stdout
# Second init should update, not fail
result2 = subprocess.run(['archivebox', 'init'], capture_output=True, text=True)
assert result2.returncode == 0
assert "updating existing ArchiveBox" in result2.stdout or "up-to-date" in result2.stdout.lower()
# Database should still be valid
conn = sqlite3.connect("index.sqlite3")
c = conn.cursor()
count = c.execute("SELECT COUNT(*) FROM django_migrations").fetchone()[0]
assert count > 0
conn.close()
def test_init_with_existing_data_preserves_snapshots(tmp_path, process, disable_extractors_dict):
"""Test that re-running init preserves existing snapshot data."""
os.chdir(tmp_path)
# Add a snapshot
subprocess.run(
['archivebox', 'add', '--index-only', '--depth=0', 'https://example.com'],
capture_output=True,
env=disable_extractors_dict,
)
# Check snapshot was created
conn = sqlite3.connect("index.sqlite3")
c = conn.cursor()
count_before = c.execute("SELECT COUNT(*) FROM core_snapshot").fetchone()[0]
assert count_before == 1
conn.close()
# Run init again
result = subprocess.run(['archivebox', 'init'], capture_output=True)
assert result.returncode == 0
# Snapshot should still exist
conn = sqlite3.connect("index.sqlite3")
c = conn.cursor()
count_after = c.execute("SELECT COUNT(*) FROM core_snapshot").fetchone()[0]
assert count_after == count_before
conn.close()
def test_init_quick_flag_skips_checks(tmp_path):
"""Test that init --quick runs faster by skipping some checks."""
os.chdir(tmp_path)
result = subprocess.run(['archivebox', 'init', '--quick'], capture_output=True, text=True)
assert result.returncode == 0
# Database should still be created
db_path = tmp_path / "index.sqlite3"
assert db_path.exists()
def test_init_creates_machine_table(tmp_path):
"""Test that init creates the machine_machine table."""
os.chdir(tmp_path)
subprocess.run(['archivebox', 'init'], capture_output=True)
conn = sqlite3.connect("index.sqlite3")
c = conn.cursor()
# Check machine_machine table exists
tables = c.execute(
"SELECT name FROM sqlite_master WHERE type='table' AND name='machine_machine'"
).fetchall()
conn.close()
assert len(tables) == 1
def test_init_output_shows_collection_info(tmp_path):
"""Test that init output shows helpful collection information."""
os.chdir(tmp_path)
result = subprocess.run(['archivebox', 'init'], capture_output=True, text=True)
output = result.stdout
# Should show some helpful info about the collection
assert 'ArchiveBox' in output or 'collection' in output.lower() or 'Initializing' in output