This commit is contained in:
Nick Sweeting
2026-03-23 03:58:32 -07:00
parent 268856bcfb
commit b749b26c5d
286 changed files with 21704 additions and 13480 deletions

View File

@@ -14,8 +14,8 @@ def _find_snapshot_dir(data_dir: Path, snapshot_id: str) -> Path | None:
candidates = {snapshot_id}
if len(snapshot_id) == 32:
candidates.add(f"{snapshot_id[:8]}-{snapshot_id[8:12]}-{snapshot_id[12:16]}-{snapshot_id[16:20]}-{snapshot_id[20:]}")
elif len(snapshot_id) == 36 and '-' in snapshot_id:
candidates.add(snapshot_id.replace('-', ''))
elif len(snapshot_id) == 36 and "-" in snapshot_id:
candidates.add(snapshot_id.replace("-", ""))
for needle in candidates:
for path in data_dir.rglob(needle):
@@ -30,7 +30,7 @@ def test_remove_deletes_snapshot_from_db(tmp_path, process, disable_extractors_d
# Add a snapshot
subprocess.run(
['archivebox', 'add', '--index-only', '--depth=0', 'https://example.com'],
["archivebox", "add", "--index-only", "--depth=0", "https://example.com"],
capture_output=True,
env=disable_extractors_dict,
)
@@ -44,7 +44,7 @@ def test_remove_deletes_snapshot_from_db(tmp_path, process, disable_extractors_d
# Remove it
subprocess.run(
['archivebox', 'remove', 'https://example.com', '--yes'],
["archivebox", "remove", "https://example.com", "--yes"],
capture_output=True,
env=disable_extractors_dict,
)
@@ -64,7 +64,7 @@ def test_remove_deletes_archive_directory(tmp_path, process, disable_extractors_
# Add a snapshot
subprocess.run(
['archivebox', 'add', '--index-only', '--depth=0', 'https://example.com'],
["archivebox", "add", "--index-only", "--depth=0", "https://example.com"],
capture_output=True,
env=disable_extractors_dict,
)
@@ -78,7 +78,7 @@ def test_remove_deletes_archive_directory(tmp_path, process, disable_extractors_
assert snapshot_dir is not None, f"Snapshot output directory not found for {snapshot_id}"
subprocess.run(
['archivebox', 'remove', 'https://example.com', '--yes', '--delete'],
["archivebox", "remove", "https://example.com", "--yes", "--delete"],
capture_output=True,
env=disable_extractors_dict,
)
@@ -91,14 +91,14 @@ def test_remove_yes_flag_skips_confirmation(tmp_path, process, disable_extractor
os.chdir(tmp_path)
subprocess.run(
['archivebox', 'add', '--index-only', '--depth=0', 'https://example.com'],
["archivebox", "add", "--index-only", "--depth=0", "https://example.com"],
capture_output=True,
env=disable_extractors_dict,
)
# Remove with --yes should complete without interaction
result = subprocess.run(
['archivebox', 'remove', 'https://example.com', '--yes'],
["archivebox", "remove", "https://example.com", "--yes"],
capture_output=True,
env=disable_extractors_dict,
timeout=30,
@@ -114,9 +114,9 @@ def test_remove_multiple_snapshots(tmp_path, process, disable_extractors_dict):
os.chdir(tmp_path)
# Add multiple snapshots
for url in ['https://example.com', 'https://example.org']:
for url in ["https://example.com", "https://example.org"]:
subprocess.run(
['archivebox', 'add', '--index-only', '--depth=0', url],
["archivebox", "add", "--index-only", "--depth=0", url],
capture_output=True,
env=disable_extractors_dict,
)
@@ -130,7 +130,7 @@ def test_remove_multiple_snapshots(tmp_path, process, disable_extractors_dict):
# Remove both
subprocess.run(
['archivebox', 'remove', 'https://example.com', 'https://example.org', '--yes'],
["archivebox", "remove", "https://example.com", "https://example.org", "--yes"],
capture_output=True,
env=disable_extractors_dict,
)
@@ -150,14 +150,14 @@ def test_remove_with_filter(tmp_path, process, disable_extractors_dict):
# Add snapshots
subprocess.run(
['archivebox', 'add', '--index-only', '--depth=0', 'https://example.com'],
["archivebox", "add", "--index-only", "--depth=0", "https://example.com"],
capture_output=True,
env=disable_extractors_dict,
)
# Remove using filter
result = subprocess.run(
['archivebox', 'remove', '--filter-type=search', '--filter=example.com', '--yes'],
["archivebox", "remove", "--filter-type=search", "--filter=example.com", "--yes"],
capture_output=True,
env=disable_extractors_dict,
timeout=30,
@@ -171,16 +171,16 @@ def test_remove_with_regex_filter_deletes_all_matches(tmp_path, process, disable
"""Test regex filters remove every matching snapshot."""
os.chdir(tmp_path)
for url in ['https://example.com', 'https://iana.org']:
for url in ["https://example.com", "https://iana.org"]:
subprocess.run(
['archivebox', 'add', '--index-only', '--depth=0', url],
["archivebox", "add", "--index-only", "--depth=0", url],
capture_output=True,
env=disable_extractors_dict,
check=True,
)
result = subprocess.run(
['archivebox', 'remove', '--filter-type=regex', '.*', '--yes'],
["archivebox", "remove", "--filter-type=regex", ".*", "--yes"],
capture_output=True,
env=disable_extractors_dict,
check=True,
@@ -193,7 +193,7 @@ def test_remove_with_regex_filter_deletes_all_matches(tmp_path, process, disable
output = result.stdout.decode("utf-8") + result.stderr.decode("utf-8")
assert count_after == 0
assert 'Removed' in output or 'Found' in output
assert "Removed" in output or "Found" in output
def test_remove_nonexistent_url_fails_gracefully(tmp_path, process, disable_extractors_dict):
@@ -201,30 +201,30 @@ def test_remove_nonexistent_url_fails_gracefully(tmp_path, process, disable_extr
os.chdir(tmp_path)
result = subprocess.run(
['archivebox', 'remove', 'https://nonexistent-url-12345.com', '--yes'],
["archivebox", "remove", "https://nonexistent-url-12345.com", "--yes"],
capture_output=True,
env=disable_extractors_dict,
)
# Should fail or show error
stdout_text = result.stdout.decode('utf-8', errors='replace').lower()
assert result.returncode != 0 or 'not found' in stdout_text or 'no matches' in stdout_text
stdout_text = result.stdout.decode("utf-8", errors="replace").lower()
assert result.returncode != 0 or "not found" in stdout_text or "no matches" in stdout_text
def test_remove_reports_remaining_link_count_correctly(tmp_path, process, disable_extractors_dict):
"""Test remove reports the remaining snapshot count after deletion."""
os.chdir(tmp_path)
for url in ['https://example.com', 'https://example.org']:
for url in ["https://example.com", "https://example.org"]:
subprocess.run(
['archivebox', 'add', '--index-only', '--depth=0', url],
["archivebox", "add", "--index-only", "--depth=0", url],
capture_output=True,
env=disable_extractors_dict,
check=True,
)
result = subprocess.run(
['archivebox', 'remove', 'https://example.org', '--yes'],
["archivebox", "remove", "https://example.org", "--yes"],
capture_output=True,
env=disable_extractors_dict,
check=True,
@@ -240,14 +240,14 @@ def test_remove_after_flag(tmp_path, process, disable_extractors_dict):
os.chdir(tmp_path)
subprocess.run(
['archivebox', 'add', '--index-only', '--depth=0', 'https://example.com'],
["archivebox", "add", "--index-only", "--depth=0", "https://example.com"],
capture_output=True,
env=disable_extractors_dict,
)
# Try remove with --after flag (should work or show usage)
result = subprocess.run(
['archivebox', 'remove', '--after=2020-01-01', '--yes'],
["archivebox", "remove", "--after=2020-01-01", "--yes"],
capture_output=True,
env=disable_extractors_dict,
timeout=30,