mirror of
https://github.com/ArchiveBox/ArchiveBox.git
synced 2026-04-06 07:47:53 +10:00
302 lines
10 KiB
Python
302 lines
10 KiB
Python
"""
|
|
Tests for archivebox snapshot CLI command.
|
|
|
|
Tests cover:
|
|
- snapshot create (from URLs, from Crawl JSONL, pass-through)
|
|
- snapshot list (with filters)
|
|
- snapshot update
|
|
- snapshot delete
|
|
"""
|
|
|
|
import json
|
|
|
|
from archivebox.tests.conftest import (
|
|
run_archivebox_cmd,
|
|
parse_jsonl_output,
|
|
create_test_url,
|
|
)
|
|
|
|
|
|
class TestSnapshotCreate:
|
|
"""Tests for `archivebox snapshot create`."""
|
|
|
|
def test_create_from_url_args(self, initialized_archive):
|
|
"""Create snapshot from URL arguments."""
|
|
url = create_test_url()
|
|
|
|
stdout, stderr, code = run_archivebox_cmd(
|
|
["snapshot", "create", url],
|
|
data_dir=initialized_archive,
|
|
)
|
|
|
|
assert code == 0, f"Command failed: {stderr}"
|
|
assert "Created" in stderr
|
|
|
|
records = parse_jsonl_output(stdout)
|
|
assert len(records) == 1
|
|
assert records[0]["type"] == "Snapshot"
|
|
assert records[0]["url"] == url
|
|
|
|
def test_create_from_crawl_jsonl(self, initialized_archive):
|
|
"""Create snapshots from Crawl JSONL input."""
|
|
url = create_test_url()
|
|
|
|
# First create a crawl
|
|
stdout1, _, _ = run_archivebox_cmd(["crawl", "create", url], data_dir=initialized_archive)
|
|
crawl = parse_jsonl_output(stdout1)[0]
|
|
|
|
# Pipe crawl to snapshot create
|
|
stdout2, stderr, code = run_archivebox_cmd(
|
|
["snapshot", "create"],
|
|
stdin=json.dumps(crawl),
|
|
data_dir=initialized_archive,
|
|
)
|
|
|
|
assert code == 0, f"Command failed: {stderr}"
|
|
|
|
records = parse_jsonl_output(stdout2)
|
|
# Should have the Crawl passed through and the Snapshot created
|
|
types = [r.get("type") for r in records]
|
|
assert "Crawl" in types
|
|
assert "Snapshot" in types
|
|
|
|
snapshot = next(r for r in records if r["type"] == "Snapshot")
|
|
assert snapshot["url"] == url
|
|
|
|
def test_create_with_tag(self, initialized_archive):
|
|
"""Create snapshot with --tag flag."""
|
|
url = create_test_url()
|
|
|
|
stdout, stderr, code = run_archivebox_cmd(
|
|
["snapshot", "create", "--tag=test-tag", url],
|
|
data_dir=initialized_archive,
|
|
)
|
|
|
|
assert code == 0
|
|
records = parse_jsonl_output(stdout)
|
|
assert "test-tag" in records[0].get("tags", "")
|
|
|
|
def test_create_pass_through_other_types(self, initialized_archive):
|
|
"""Pass-through records of other types unchanged."""
|
|
tag_record = {"type": "Tag", "id": "fake-tag-id", "name": "test"}
|
|
url = create_test_url()
|
|
stdin = json.dumps(tag_record) + "\n" + json.dumps({"url": url})
|
|
|
|
stdout, stderr, code = run_archivebox_cmd(
|
|
["snapshot", "create"],
|
|
stdin=stdin,
|
|
data_dir=initialized_archive,
|
|
)
|
|
|
|
assert code == 0
|
|
records = parse_jsonl_output(stdout)
|
|
|
|
types = [r.get("type") for r in records]
|
|
assert "Tag" in types
|
|
assert "Snapshot" in types
|
|
|
|
def test_create_multiple_urls(self, initialized_archive):
|
|
"""Create snapshots from multiple URLs."""
|
|
urls = [create_test_url() for _ in range(3)]
|
|
|
|
stdout, stderr, code = run_archivebox_cmd(
|
|
["snapshot", "create"] + urls,
|
|
data_dir=initialized_archive,
|
|
)
|
|
|
|
assert code == 0
|
|
records = parse_jsonl_output(stdout)
|
|
assert len(records) == 3
|
|
|
|
created_urls = {r["url"] for r in records}
|
|
for url in urls:
|
|
assert url in created_urls
|
|
|
|
|
|
class TestSnapshotList:
|
|
"""Tests for `archivebox snapshot list`."""
|
|
|
|
def test_list_empty(self, initialized_archive):
|
|
"""List with no snapshots returns empty."""
|
|
stdout, stderr, code = run_archivebox_cmd(
|
|
["snapshot", "list"],
|
|
data_dir=initialized_archive,
|
|
)
|
|
|
|
assert code == 0
|
|
assert "Listed 0 snapshots" in stderr
|
|
|
|
def test_list_returns_created(self, initialized_archive):
|
|
"""List returns previously created snapshots."""
|
|
url = create_test_url()
|
|
run_archivebox_cmd(["snapshot", "create", url], data_dir=initialized_archive)
|
|
|
|
stdout, stderr, code = run_archivebox_cmd(
|
|
["snapshot", "list"],
|
|
data_dir=initialized_archive,
|
|
)
|
|
|
|
assert code == 0
|
|
records = parse_jsonl_output(stdout)
|
|
assert len(records) >= 1
|
|
assert any(r.get("url") == url for r in records)
|
|
|
|
def test_list_filter_by_status(self, initialized_archive):
|
|
"""Filter snapshots by status."""
|
|
url = create_test_url()
|
|
run_archivebox_cmd(["snapshot", "create", url], data_dir=initialized_archive)
|
|
|
|
stdout, stderr, code = run_archivebox_cmd(
|
|
["snapshot", "list", "--status=queued"],
|
|
data_dir=initialized_archive,
|
|
)
|
|
|
|
assert code == 0
|
|
records = parse_jsonl_output(stdout)
|
|
for r in records:
|
|
assert r["status"] == "queued"
|
|
|
|
def test_list_filter_by_url_contains(self, initialized_archive):
|
|
"""Filter snapshots by URL contains."""
|
|
url = create_test_url(domain="unique-domain-12345.com")
|
|
run_archivebox_cmd(["snapshot", "create", url], data_dir=initialized_archive)
|
|
|
|
stdout, stderr, code = run_archivebox_cmd(
|
|
["snapshot", "list", "--url__icontains=unique-domain-12345"],
|
|
data_dir=initialized_archive,
|
|
)
|
|
|
|
assert code == 0
|
|
records = parse_jsonl_output(stdout)
|
|
assert len(records) == 1
|
|
assert "unique-domain-12345" in records[0]["url"]
|
|
|
|
def test_list_with_limit(self, initialized_archive):
|
|
"""Limit number of results."""
|
|
for _ in range(3):
|
|
run_archivebox_cmd(["snapshot", "create", create_test_url()], data_dir=initialized_archive)
|
|
|
|
stdout, stderr, code = run_archivebox_cmd(
|
|
["snapshot", "list", "--limit=2"],
|
|
data_dir=initialized_archive,
|
|
)
|
|
|
|
assert code == 0
|
|
records = parse_jsonl_output(stdout)
|
|
assert len(records) == 2
|
|
|
|
def test_list_with_sort_and_limit(self, initialized_archive):
|
|
"""Sorting should be applied before limiting."""
|
|
for _ in range(3):
|
|
run_archivebox_cmd(["snapshot", "create", create_test_url()], data_dir=initialized_archive)
|
|
|
|
stdout, stderr, code = run_archivebox_cmd(
|
|
["snapshot", "list", "--limit=2", "--sort=-created_at"],
|
|
data_dir=initialized_archive,
|
|
)
|
|
|
|
assert code == 0, f"Command failed: {stderr}"
|
|
records = parse_jsonl_output(stdout)
|
|
assert len(records) == 2
|
|
|
|
def test_list_search_meta(self, initialized_archive):
|
|
"""snapshot list should support metadata search mode."""
|
|
url = create_test_url(domain="meta-search-example.com")
|
|
run_archivebox_cmd(["snapshot", "create", url], data_dir=initialized_archive)
|
|
|
|
stdout, stderr, code = run_archivebox_cmd(
|
|
["snapshot", "list", "--search=meta", "meta-search-example.com"],
|
|
data_dir=initialized_archive,
|
|
)
|
|
|
|
assert code == 0, f"Command failed: {stderr}"
|
|
records = parse_jsonl_output(stdout)
|
|
assert len(records) == 1
|
|
assert "meta-search-example.com" in records[0]["url"]
|
|
|
|
|
|
class TestSnapshotUpdate:
|
|
"""Tests for `archivebox snapshot update`."""
|
|
|
|
def test_update_status(self, initialized_archive):
|
|
"""Update snapshot status."""
|
|
url = create_test_url()
|
|
stdout1, _, _ = run_archivebox_cmd(["snapshot", "create", url], data_dir=initialized_archive)
|
|
snapshot = parse_jsonl_output(stdout1)[0]
|
|
|
|
stdout2, stderr, code = run_archivebox_cmd(
|
|
["snapshot", "update", "--status=started"],
|
|
stdin=json.dumps(snapshot),
|
|
data_dir=initialized_archive,
|
|
)
|
|
|
|
assert code == 0
|
|
assert "Updated 1 snapshots" in stderr
|
|
|
|
records = parse_jsonl_output(stdout2)
|
|
assert records[0]["status"] == "started"
|
|
|
|
def test_update_add_tag(self, initialized_archive):
|
|
"""Update snapshot by adding tag."""
|
|
url = create_test_url()
|
|
stdout1, _, _ = run_archivebox_cmd(["snapshot", "create", url], data_dir=initialized_archive)
|
|
snapshot = parse_jsonl_output(stdout1)[0]
|
|
|
|
stdout2, stderr, code = run_archivebox_cmd(
|
|
["snapshot", "update", "--tag=new-tag"],
|
|
stdin=json.dumps(snapshot),
|
|
data_dir=initialized_archive,
|
|
)
|
|
|
|
assert code == 0
|
|
assert "Updated 1 snapshots" in stderr
|
|
|
|
|
|
class TestSnapshotDelete:
|
|
"""Tests for `archivebox snapshot delete`."""
|
|
|
|
def test_delete_requires_yes(self, initialized_archive):
|
|
"""Delete requires --yes flag."""
|
|
url = create_test_url()
|
|
stdout1, _, _ = run_archivebox_cmd(["snapshot", "create", url], data_dir=initialized_archive)
|
|
snapshot = parse_jsonl_output(stdout1)[0]
|
|
|
|
stdout, stderr, code = run_archivebox_cmd(
|
|
["snapshot", "delete"],
|
|
stdin=json.dumps(snapshot),
|
|
data_dir=initialized_archive,
|
|
)
|
|
|
|
assert code == 1
|
|
assert "--yes" in stderr
|
|
|
|
def test_delete_with_yes(self, initialized_archive):
|
|
"""Delete with --yes flag works."""
|
|
url = create_test_url()
|
|
stdout1, _, _ = run_archivebox_cmd(["snapshot", "create", url], data_dir=initialized_archive)
|
|
snapshot = parse_jsonl_output(stdout1)[0]
|
|
|
|
stdout, stderr, code = run_archivebox_cmd(
|
|
["snapshot", "delete", "--yes"],
|
|
stdin=json.dumps(snapshot),
|
|
data_dir=initialized_archive,
|
|
)
|
|
|
|
assert code == 0
|
|
assert "Deleted 1 snapshots" in stderr
|
|
|
|
def test_delete_dry_run(self, initialized_archive):
|
|
"""Dry run shows what would be deleted."""
|
|
url = create_test_url()
|
|
stdout1, _, _ = run_archivebox_cmd(["snapshot", "create", url], data_dir=initialized_archive)
|
|
snapshot = parse_jsonl_output(stdout1)[0]
|
|
|
|
stdout, stderr, code = run_archivebox_cmd(
|
|
["snapshot", "delete", "--dry-run"],
|
|
stdin=json.dumps(snapshot),
|
|
data_dir=initialized_archive,
|
|
)
|
|
|
|
assert code == 0
|
|
assert "Would delete" in stderr
|