#!/usr/bin/env python3 """ Comprehensive tests for archivebox init command. Verify init creates correct database schema, filesystem structure, and config. """ import os import sqlite3 import subprocess from archivebox.config.common import STORAGE_CONFIG DIR_PERMISSIONS = STORAGE_CONFIG.OUTPUT_PERMISSIONS.replace("6", "7").replace("4", "5") def test_init_creates_database_file(tmp_path): """Test that init creates index.sqlite3 database file.""" os.chdir(tmp_path) result = subprocess.run(["archivebox", "init"], capture_output=True) assert result.returncode == 0 db_path = tmp_path / "index.sqlite3" assert db_path.exists() assert db_path.is_file() def test_init_creates_archive_directory(tmp_path): """Test that init creates archive directory.""" os.chdir(tmp_path) subprocess.run(["archivebox", "init"], capture_output=True) archive_dir = tmp_path / "archive" assert archive_dir.exists() assert archive_dir.is_dir() def test_init_creates_sources_directory(tmp_path): """Test that init creates sources directory.""" os.chdir(tmp_path) subprocess.run(["archivebox", "init"], capture_output=True) sources_dir = tmp_path / "sources" assert sources_dir.exists() assert sources_dir.is_dir() def test_init_creates_logs_directory(tmp_path): """Test that init creates logs directory.""" os.chdir(tmp_path) subprocess.run(["archivebox", "init"], capture_output=True) logs_dir = tmp_path / "logs" assert logs_dir.exists() assert logs_dir.is_dir() def test_init_creates_config_file(tmp_path): """Test that init creates ArchiveBox.conf config file.""" os.chdir(tmp_path) subprocess.run(["archivebox", "init"], capture_output=True) config_file = tmp_path / "ArchiveBox.conf" assert config_file.exists() assert config_file.is_file() def test_init_runs_migrations(tmp_path): """Test that init runs Django migrations and creates core tables.""" os.chdir(tmp_path) subprocess.run(["archivebox", "init"], capture_output=True) # Check that migrations were applied conn = sqlite3.connect("index.sqlite3") c = conn.cursor() # Check django_migrations table exists migrations = c.execute( "SELECT name FROM sqlite_master WHERE type='table' AND name='django_migrations'", ).fetchall() assert len(migrations) == 1 # Check that some migrations were applied migration_count = c.execute("SELECT COUNT(*) FROM django_migrations").fetchone()[0] assert migration_count > 0 conn.close() def test_init_creates_core_snapshot_table(tmp_path): """Test that init creates core_snapshot table.""" os.chdir(tmp_path) subprocess.run(["archivebox", "init"], capture_output=True) conn = sqlite3.connect("index.sqlite3") c = conn.cursor() # Check core_snapshot table exists tables = c.execute( "SELECT name FROM sqlite_master WHERE type='table' AND name='core_snapshot'", ).fetchall() assert len(tables) == 1 conn.close() def test_init_creates_crawls_crawl_table(tmp_path): """Test that init creates crawls_crawl table.""" os.chdir(tmp_path) subprocess.run(["archivebox", "init"], capture_output=True) conn = sqlite3.connect("index.sqlite3") c = conn.cursor() # Check crawls_crawl table exists tables = c.execute( "SELECT name FROM sqlite_master WHERE type='table' AND name='crawls_crawl'", ).fetchall() assert len(tables) == 1 conn.close() def test_init_creates_core_archiveresult_table(tmp_path): """Test that init creates core_archiveresult table.""" os.chdir(tmp_path) subprocess.run(["archivebox", "init"], capture_output=True) conn = sqlite3.connect("index.sqlite3") c = conn.cursor() # Check core_archiveresult table exists tables = c.execute( "SELECT name FROM sqlite_master WHERE type='table' AND name='core_archiveresult'", ).fetchall() assert len(tables) == 1 conn.close() def test_init_sets_correct_file_permissions(tmp_path): """Test that init sets correct permissions on created files.""" os.chdir(tmp_path) subprocess.run(["archivebox", "init"], capture_output=True) # Check database permissions db_path = tmp_path / "index.sqlite3" assert oct(db_path.stat().st_mode)[-3:] in (STORAGE_CONFIG.OUTPUT_PERMISSIONS, DIR_PERMISSIONS) # Check directory permissions archive_dir = tmp_path / "archive" assert oct(archive_dir.stat().st_mode)[-3:] in (STORAGE_CONFIG.OUTPUT_PERMISSIONS, DIR_PERMISSIONS) def test_init_is_idempotent(tmp_path): """Test that running init multiple times is safe (idempotent).""" os.chdir(tmp_path) # First init result1 = subprocess.run(["archivebox", "init"], capture_output=True, text=True) assert result1.returncode == 0 assert "Initializing a new ArchiveBox" in result1.stdout # Second init should update, not fail result2 = subprocess.run(["archivebox", "init"], capture_output=True, text=True) assert result2.returncode == 0 assert "updating existing ArchiveBox" in result2.stdout or "up-to-date" in result2.stdout.lower() # Database should still be valid conn = sqlite3.connect("index.sqlite3") c = conn.cursor() count = c.execute("SELECT COUNT(*) FROM django_migrations").fetchone()[0] assert count > 0 conn.close() def test_init_with_existing_data_preserves_snapshots(tmp_path, process, disable_extractors_dict): """Test that re-running init preserves existing snapshot data.""" os.chdir(tmp_path) # Add a snapshot subprocess.run( ["archivebox", "add", "--index-only", "--depth=0", "https://example.com"], capture_output=True, env=disable_extractors_dict, ) # Check snapshot was created conn = sqlite3.connect("index.sqlite3") c = conn.cursor() count_before = c.execute("SELECT COUNT(*) FROM core_snapshot").fetchone()[0] assert count_before == 1 conn.close() # Run init again result = subprocess.run(["archivebox", "init"], capture_output=True) assert result.returncode == 0 # Snapshot should still exist conn = sqlite3.connect("index.sqlite3") c = conn.cursor() count_after = c.execute("SELECT COUNT(*) FROM core_snapshot").fetchone()[0] assert count_after == count_before conn.close() def test_init_quick_flag_skips_checks(tmp_path): """Test that init --quick runs faster by skipping some checks.""" os.chdir(tmp_path) result = subprocess.run(["archivebox", "init", "--quick"], capture_output=True, text=True) assert result.returncode == 0 # Database should still be created db_path = tmp_path / "index.sqlite3" assert db_path.exists() def test_init_creates_machine_table(tmp_path): """Test that init creates the machine_machine table.""" os.chdir(tmp_path) subprocess.run(["archivebox", "init"], capture_output=True) conn = sqlite3.connect("index.sqlite3") c = conn.cursor() # Check machine_machine table exists tables = c.execute( "SELECT name FROM sqlite_master WHERE type='table' AND name='machine_machine'", ).fetchall() conn.close() assert len(tables) == 1 def test_init_output_shows_collection_info(tmp_path): """Test that init output shows helpful collection information.""" os.chdir(tmp_path) result = subprocess.run(["archivebox", "init"], capture_output=True, text=True) output = result.stdout # Should show some helpful info about the collection assert "ArchiveBox" in output or "collection" in output.lower() or "Initializing" in output def test_init_ignores_unrecognized_archive_directories(tmp_path, process, disable_extractors_dict): """Test that init upgrades existing dirs without choking on extra folders.""" os.chdir(tmp_path) subprocess.run( ["archivebox", "add", "--index-only", "--depth=0", "https://example.com"], capture_output=True, env=disable_extractors_dict, check=True, ) (tmp_path / "archive" / "some_random_folder").mkdir(parents=True, exist_ok=True) result = subprocess.run( ["archivebox", "init"], capture_output=True, text=True, env=disable_extractors_dict, ) assert result.returncode == 0, result.stdout + result.stderr