Files
ArchiveBox/archivebox/tests/test_cli_remove.py
Nick Sweeting b749b26c5d wip
2026-03-23 03:58:32 -07:00

258 lines
8.0 KiB
Python

#!/usr/bin/env python3
"""
Comprehensive tests for archivebox remove command.
Verify remove deletes snapshots from DB and filesystem.
"""
import os
import sqlite3
import subprocess
from pathlib import Path
def _find_snapshot_dir(data_dir: Path, snapshot_id: str) -> Path | None:
candidates = {snapshot_id}
if len(snapshot_id) == 32:
candidates.add(f"{snapshot_id[:8]}-{snapshot_id[8:12]}-{snapshot_id[12:16]}-{snapshot_id[16:20]}-{snapshot_id[20:]}")
elif len(snapshot_id) == 36 and "-" in snapshot_id:
candidates.add(snapshot_id.replace("-", ""))
for needle in candidates:
for path in data_dir.rglob(needle):
if path.is_dir():
return path
return None
def test_remove_deletes_snapshot_from_db(tmp_path, process, disable_extractors_dict):
"""Test that remove command deletes snapshot from database."""
os.chdir(tmp_path)
# Add a snapshot
subprocess.run(
["archivebox", "add", "--index-only", "--depth=0", "https://example.com"],
capture_output=True,
env=disable_extractors_dict,
)
# Verify it exists
conn = sqlite3.connect("index.sqlite3")
c = conn.cursor()
count_before = c.execute("SELECT COUNT(*) FROM core_snapshot").fetchone()[0]
conn.close()
assert count_before == 1
# Remove it
subprocess.run(
["archivebox", "remove", "https://example.com", "--yes"],
capture_output=True,
env=disable_extractors_dict,
)
# Verify it's gone
conn = sqlite3.connect("index.sqlite3")
c = conn.cursor()
count_after = c.execute("SELECT COUNT(*) FROM core_snapshot").fetchone()[0]
conn.close()
assert count_after == 0
def test_remove_deletes_archive_directory(tmp_path, process, disable_extractors_dict):
"""Test that remove --delete removes the current snapshot output directory."""
os.chdir(tmp_path)
# Add a snapshot
subprocess.run(
["archivebox", "add", "--index-only", "--depth=0", "https://example.com"],
capture_output=True,
env=disable_extractors_dict,
)
conn = sqlite3.connect("index.sqlite3")
c = conn.cursor()
snapshot_id = str(c.execute("SELECT id FROM core_snapshot").fetchone()[0])
conn.close()
snapshot_dir = _find_snapshot_dir(tmp_path, snapshot_id)
assert snapshot_dir is not None, f"Snapshot output directory not found for {snapshot_id}"
subprocess.run(
["archivebox", "remove", "https://example.com", "--yes", "--delete"],
capture_output=True,
env=disable_extractors_dict,
)
assert not snapshot_dir.exists()
def test_remove_yes_flag_skips_confirmation(tmp_path, process, disable_extractors_dict):
"""Test that --yes flag skips confirmation prompt."""
os.chdir(tmp_path)
subprocess.run(
["archivebox", "add", "--index-only", "--depth=0", "https://example.com"],
capture_output=True,
env=disable_extractors_dict,
)
# Remove with --yes should complete without interaction
result = subprocess.run(
["archivebox", "remove", "https://example.com", "--yes"],
capture_output=True,
env=disable_extractors_dict,
timeout=30,
)
assert result.returncode == 0
output = result.stdout.decode("utf-8") + result.stderr.decode("utf-8")
assert "Index now contains 0 links." in output
def test_remove_multiple_snapshots(tmp_path, process, disable_extractors_dict):
"""Test removing multiple snapshots at once."""
os.chdir(tmp_path)
# Add multiple snapshots
for url in ["https://example.com", "https://example.org"]:
subprocess.run(
["archivebox", "add", "--index-only", "--depth=0", url],
capture_output=True,
env=disable_extractors_dict,
)
# Verify both exist
conn = sqlite3.connect("index.sqlite3")
c = conn.cursor()
count_before = c.execute("SELECT COUNT(*) FROM core_snapshot").fetchone()[0]
conn.close()
assert count_before == 2
# Remove both
subprocess.run(
["archivebox", "remove", "https://example.com", "https://example.org", "--yes"],
capture_output=True,
env=disable_extractors_dict,
)
# Verify both are gone
conn = sqlite3.connect("index.sqlite3")
c = conn.cursor()
count_after = c.execute("SELECT COUNT(*) FROM core_snapshot").fetchone()[0]
conn.close()
assert count_after == 0
def test_remove_with_filter(tmp_path, process, disable_extractors_dict):
"""Test removing snapshots using filter."""
os.chdir(tmp_path)
# Add snapshots
subprocess.run(
["archivebox", "add", "--index-only", "--depth=0", "https://example.com"],
capture_output=True,
env=disable_extractors_dict,
)
# Remove using filter
result = subprocess.run(
["archivebox", "remove", "--filter-type=search", "--filter=example.com", "--yes"],
capture_output=True,
env=disable_extractors_dict,
timeout=30,
)
# Should complete (exit code depends on implementation)
assert result.returncode in [0, 1, 2]
def test_remove_with_regex_filter_deletes_all_matches(tmp_path, process, disable_extractors_dict):
"""Test regex filters remove every matching snapshot."""
os.chdir(tmp_path)
for url in ["https://example.com", "https://iana.org"]:
subprocess.run(
["archivebox", "add", "--index-only", "--depth=0", url],
capture_output=True,
env=disable_extractors_dict,
check=True,
)
result = subprocess.run(
["archivebox", "remove", "--filter-type=regex", ".*", "--yes"],
capture_output=True,
env=disable_extractors_dict,
check=True,
)
conn = sqlite3.connect("index.sqlite3")
c = conn.cursor()
count_after = c.execute("SELECT COUNT(*) FROM core_snapshot").fetchone()[0]
conn.close()
output = result.stdout.decode("utf-8") + result.stderr.decode("utf-8")
assert count_after == 0
assert "Removed" in output or "Found" in output
def test_remove_nonexistent_url_fails_gracefully(tmp_path, process, disable_extractors_dict):
"""Test that removing non-existent URL fails gracefully."""
os.chdir(tmp_path)
result = subprocess.run(
["archivebox", "remove", "https://nonexistent-url-12345.com", "--yes"],
capture_output=True,
env=disable_extractors_dict,
)
# Should fail or show error
stdout_text = result.stdout.decode("utf-8", errors="replace").lower()
assert result.returncode != 0 or "not found" in stdout_text or "no matches" in stdout_text
def test_remove_reports_remaining_link_count_correctly(tmp_path, process, disable_extractors_dict):
"""Test remove reports the remaining snapshot count after deletion."""
os.chdir(tmp_path)
for url in ["https://example.com", "https://example.org"]:
subprocess.run(
["archivebox", "add", "--index-only", "--depth=0", url],
capture_output=True,
env=disable_extractors_dict,
check=True,
)
result = subprocess.run(
["archivebox", "remove", "https://example.org", "--yes"],
capture_output=True,
env=disable_extractors_dict,
check=True,
)
output = result.stdout.decode("utf-8") + result.stderr.decode("utf-8")
assert "Removed 1 out of 2 links" in output
assert "Index now contains 1 links." in output
def test_remove_after_flag(tmp_path, process, disable_extractors_dict):
"""Test remove --after flag removes snapshots after date."""
os.chdir(tmp_path)
subprocess.run(
["archivebox", "add", "--index-only", "--depth=0", "https://example.com"],
capture_output=True,
env=disable_extractors_dict,
)
# Try remove with --after flag (should work or show usage)
result = subprocess.run(
["archivebox", "remove", "--after=2020-01-01", "--yes"],
capture_output=True,
env=disable_extractors_dict,
timeout=30,
)
# Should complete
assert result.returncode in [0, 1, 2]