This commit is contained in:
Nick Sweeting
2026-03-23 03:58:32 -07:00
parent 268856bcfb
commit b749b26c5d
286 changed files with 21704 additions and 13480 deletions

View File

@@ -11,13 +11,13 @@ import subprocess
from archivebox.config.common import STORAGE_CONFIG
DIR_PERMISSIONS = STORAGE_CONFIG.OUTPUT_PERMISSIONS.replace('6', '7').replace('4', '5')
DIR_PERMISSIONS = STORAGE_CONFIG.OUTPUT_PERMISSIONS.replace("6", "7").replace("4", "5")
def test_init_creates_database_file(tmp_path):
"""Test that init creates index.sqlite3 database file."""
os.chdir(tmp_path)
result = subprocess.run(['archivebox', 'init'], capture_output=True)
result = subprocess.run(["archivebox", "init"], capture_output=True)
assert result.returncode == 0
db_path = tmp_path / "index.sqlite3"
@@ -28,7 +28,7 @@ def test_init_creates_database_file(tmp_path):
def test_init_creates_archive_directory(tmp_path):
"""Test that init creates archive directory."""
os.chdir(tmp_path)
subprocess.run(['archivebox', 'init'], capture_output=True)
subprocess.run(["archivebox", "init"], capture_output=True)
archive_dir = tmp_path / "archive"
assert archive_dir.exists()
@@ -38,7 +38,7 @@ def test_init_creates_archive_directory(tmp_path):
def test_init_creates_sources_directory(tmp_path):
"""Test that init creates sources directory."""
os.chdir(tmp_path)
subprocess.run(['archivebox', 'init'], capture_output=True)
subprocess.run(["archivebox", "init"], capture_output=True)
sources_dir = tmp_path / "sources"
assert sources_dir.exists()
@@ -48,7 +48,7 @@ def test_init_creates_sources_directory(tmp_path):
def test_init_creates_logs_directory(tmp_path):
"""Test that init creates logs directory."""
os.chdir(tmp_path)
subprocess.run(['archivebox', 'init'], capture_output=True)
subprocess.run(["archivebox", "init"], capture_output=True)
logs_dir = tmp_path / "logs"
assert logs_dir.exists()
@@ -58,7 +58,7 @@ def test_init_creates_logs_directory(tmp_path):
def test_init_creates_config_file(tmp_path):
"""Test that init creates ArchiveBox.conf config file."""
os.chdir(tmp_path)
subprocess.run(['archivebox', 'init'], capture_output=True)
subprocess.run(["archivebox", "init"], capture_output=True)
config_file = tmp_path / "ArchiveBox.conf"
assert config_file.exists()
@@ -68,7 +68,7 @@ def test_init_creates_config_file(tmp_path):
def test_init_runs_migrations(tmp_path):
"""Test that init runs Django migrations and creates core tables."""
os.chdir(tmp_path)
subprocess.run(['archivebox', 'init'], capture_output=True)
subprocess.run(["archivebox", "init"], capture_output=True)
# Check that migrations were applied
conn = sqlite3.connect("index.sqlite3")
@@ -76,7 +76,7 @@ def test_init_runs_migrations(tmp_path):
# Check django_migrations table exists
migrations = c.execute(
"SELECT name FROM sqlite_master WHERE type='table' AND name='django_migrations'"
"SELECT name FROM sqlite_master WHERE type='table' AND name='django_migrations'",
).fetchall()
assert len(migrations) == 1
@@ -90,14 +90,14 @@ def test_init_runs_migrations(tmp_path):
def test_init_creates_core_snapshot_table(tmp_path):
"""Test that init creates core_snapshot table."""
os.chdir(tmp_path)
subprocess.run(['archivebox', 'init'], capture_output=True)
subprocess.run(["archivebox", "init"], capture_output=True)
conn = sqlite3.connect("index.sqlite3")
c = conn.cursor()
# Check core_snapshot table exists
tables = c.execute(
"SELECT name FROM sqlite_master WHERE type='table' AND name='core_snapshot'"
"SELECT name FROM sqlite_master WHERE type='table' AND name='core_snapshot'",
).fetchall()
assert len(tables) == 1
@@ -107,14 +107,14 @@ def test_init_creates_core_snapshot_table(tmp_path):
def test_init_creates_crawls_crawl_table(tmp_path):
"""Test that init creates crawls_crawl table."""
os.chdir(tmp_path)
subprocess.run(['archivebox', 'init'], capture_output=True)
subprocess.run(["archivebox", "init"], capture_output=True)
conn = sqlite3.connect("index.sqlite3")
c = conn.cursor()
# Check crawls_crawl table exists
tables = c.execute(
"SELECT name FROM sqlite_master WHERE type='table' AND name='crawls_crawl'"
"SELECT name FROM sqlite_master WHERE type='table' AND name='crawls_crawl'",
).fetchall()
assert len(tables) == 1
@@ -124,14 +124,14 @@ def test_init_creates_crawls_crawl_table(tmp_path):
def test_init_creates_core_archiveresult_table(tmp_path):
"""Test that init creates core_archiveresult table."""
os.chdir(tmp_path)
subprocess.run(['archivebox', 'init'], capture_output=True)
subprocess.run(["archivebox", "init"], capture_output=True)
conn = sqlite3.connect("index.sqlite3")
c = conn.cursor()
# Check core_archiveresult table exists
tables = c.execute(
"SELECT name FROM sqlite_master WHERE type='table' AND name='core_archiveresult'"
"SELECT name FROM sqlite_master WHERE type='table' AND name='core_archiveresult'",
).fetchall()
assert len(tables) == 1
@@ -141,7 +141,7 @@ def test_init_creates_core_archiveresult_table(tmp_path):
def test_init_sets_correct_file_permissions(tmp_path):
"""Test that init sets correct permissions on created files."""
os.chdir(tmp_path)
subprocess.run(['archivebox', 'init'], capture_output=True)
subprocess.run(["archivebox", "init"], capture_output=True)
# Check database permissions
db_path = tmp_path / "index.sqlite3"
@@ -157,12 +157,12 @@ def test_init_is_idempotent(tmp_path):
os.chdir(tmp_path)
# First init
result1 = subprocess.run(['archivebox', 'init'], capture_output=True, text=True)
result1 = subprocess.run(["archivebox", "init"], capture_output=True, text=True)
assert result1.returncode == 0
assert "Initializing a new ArchiveBox" in result1.stdout
# Second init should update, not fail
result2 = subprocess.run(['archivebox', 'init'], capture_output=True, text=True)
result2 = subprocess.run(["archivebox", "init"], capture_output=True, text=True)
assert result2.returncode == 0
assert "updating existing ArchiveBox" in result2.stdout or "up-to-date" in result2.stdout.lower()
@@ -180,7 +180,7 @@ def test_init_with_existing_data_preserves_snapshots(tmp_path, process, disable_
# Add a snapshot
subprocess.run(
['archivebox', 'add', '--index-only', '--depth=0', 'https://example.com'],
["archivebox", "add", "--index-only", "--depth=0", "https://example.com"],
capture_output=True,
env=disable_extractors_dict,
)
@@ -193,7 +193,7 @@ def test_init_with_existing_data_preserves_snapshots(tmp_path, process, disable_
conn.close()
# Run init again
result = subprocess.run(['archivebox', 'init'], capture_output=True)
result = subprocess.run(["archivebox", "init"], capture_output=True)
assert result.returncode == 0
# Snapshot should still exist
@@ -208,7 +208,7 @@ def test_init_quick_flag_skips_checks(tmp_path):
"""Test that init --quick runs faster by skipping some checks."""
os.chdir(tmp_path)
result = subprocess.run(['archivebox', 'init', '--quick'], capture_output=True, text=True)
result = subprocess.run(["archivebox", "init", "--quick"], capture_output=True, text=True)
assert result.returncode == 0
# Database should still be created
@@ -219,14 +219,14 @@ def test_init_quick_flag_skips_checks(tmp_path):
def test_init_creates_machine_table(tmp_path):
"""Test that init creates the machine_machine table."""
os.chdir(tmp_path)
subprocess.run(['archivebox', 'init'], capture_output=True)
subprocess.run(["archivebox", "init"], capture_output=True)
conn = sqlite3.connect("index.sqlite3")
c = conn.cursor()
# Check machine_machine table exists
tables = c.execute(
"SELECT name FROM sqlite_master WHERE type='table' AND name='machine_machine'"
"SELECT name FROM sqlite_master WHERE type='table' AND name='machine_machine'",
).fetchall()
conn.close()
@@ -236,18 +236,18 @@ def test_init_creates_machine_table(tmp_path):
def test_init_output_shows_collection_info(tmp_path):
"""Test that init output shows helpful collection information."""
os.chdir(tmp_path)
result = subprocess.run(['archivebox', 'init'], capture_output=True, text=True)
result = subprocess.run(["archivebox", "init"], capture_output=True, text=True)
output = result.stdout
# Should show some helpful info about the collection
assert 'ArchiveBox' in output or 'collection' in output.lower() or 'Initializing' in output
assert "ArchiveBox" in output or "collection" in output.lower() or "Initializing" in output
def test_init_ignores_unrecognized_archive_directories(tmp_path, process, disable_extractors_dict):
"""Test that init upgrades existing dirs without choking on extra folders."""
os.chdir(tmp_path)
subprocess.run(
['archivebox', 'add', '--index-only', '--depth=0', 'https://example.com'],
["archivebox", "add", "--index-only", "--depth=0", "https://example.com"],
capture_output=True,
env=disable_extractors_dict,
check=True,
@@ -255,7 +255,7 @@ def test_init_ignores_unrecognized_archive_directories(tmp_path, process, disabl
(tmp_path / "archive" / "some_random_folder").mkdir(parents=True, exist_ok=True)
result = subprocess.run(
['archivebox', 'init'],
["archivebox", "init"],
capture_output=True,
text=True,
env=disable_extractors_dict,