""" Tests for archivebox archiveresult CLI command. Tests cover: - archiveresult create (from Snapshot JSONL, with --plugin, pass-through) - archiveresult list (with filters) - archiveresult update - archiveresult delete """ import json from archivebox.tests.conftest import ( run_archivebox_cmd, parse_jsonl_output, create_test_url, ) PROJECTOR_TEST_ENV = { "PLUGINS": "favicon", "SAVE_FAVICON": "True", "USE_COLOR": "False", "SHOW_PROGRESS": "False", } class TestArchiveResultCreate: """Tests for `archivebox archiveresult create`.""" def test_create_from_snapshot_jsonl(self, initialized_archive): """Create archive results from Snapshot JSONL input.""" url = create_test_url() # Create a snapshot first stdout1, _, _ = run_archivebox_cmd(["snapshot", "create", url], data_dir=initialized_archive) snapshot = parse_jsonl_output(stdout1)[0] # Pipe snapshot to archiveresult create stdout2, stderr, code = run_archivebox_cmd( ["archiveresult", "create", "--plugin=title"], stdin=json.dumps(snapshot), data_dir=initialized_archive, ) assert code == 0, f"Command failed: {stderr}" records = parse_jsonl_output(stdout2) # Should have the Snapshot passed through and an ArchiveResult request emitted types = [r.get("type") for r in records] assert "Snapshot" in types assert "ArchiveResult" in types ar = next(r for r in records if r["type"] == "ArchiveResult") assert ar["plugin"] == "title" assert "id" not in ar def test_create_with_specific_plugin(self, initialized_archive): """Create archive result for specific plugin.""" url = create_test_url() stdout1, _, _ = run_archivebox_cmd(["snapshot", "create", url], data_dir=initialized_archive) snapshot = parse_jsonl_output(stdout1)[0] stdout2, stderr, code = run_archivebox_cmd( ["archiveresult", "create", "--plugin=screenshot"], stdin=json.dumps(snapshot), data_dir=initialized_archive, ) assert code == 0 records = parse_jsonl_output(stdout2) ar_records = [r for r in records if r.get("type") == "ArchiveResult"] assert len(ar_records) >= 1 assert ar_records[0]["plugin"] == "screenshot" def test_create_pass_through_crawl(self, initialized_archive): """Pass-through Crawl records unchanged.""" url = create_test_url() # Create crawl and snapshot stdout1, _, _ = run_archivebox_cmd(["crawl", "create", url], data_dir=initialized_archive) crawl = parse_jsonl_output(stdout1)[0] stdout2, _, _ = run_archivebox_cmd( ["snapshot", "create"], stdin=json.dumps(crawl), data_dir=initialized_archive, ) # Now pipe all to archiveresult create stdout3, stderr, code = run_archivebox_cmd( ["archiveresult", "create", "--plugin=title"], stdin=stdout2, data_dir=initialized_archive, ) assert code == 0 records = parse_jsonl_output(stdout3) types = [r.get("type") for r in records] assert "Crawl" in types assert "Snapshot" in types assert "ArchiveResult" in types def test_create_pass_through_only_when_no_snapshots(self, initialized_archive): """Only pass-through records but no new snapshots returns success.""" crawl_record = {"type": "Crawl", "id": "fake-id", "urls": "https://example.com"} stdout, stderr, code = run_archivebox_cmd( ["archiveresult", "create"], stdin=json.dumps(crawl_record), data_dir=initialized_archive, ) assert code == 0 assert "Passed through" in stderr class TestArchiveResultList: """Tests for `archivebox archiveresult list`.""" def test_list_empty(self, initialized_archive): """List with no archive results returns empty.""" stdout, stderr, code = run_archivebox_cmd( ["archiveresult", "list"], data_dir=initialized_archive, ) assert code == 0 assert "Listed 0 archive results" in stderr def test_list_filter_by_status(self, initialized_archive): """Filter archive results by status.""" # Create snapshot and materialize an archive result via the runner url = create_test_url() stdout1, _, _ = run_archivebox_cmd(["snapshot", "create", url], data_dir=initialized_archive) snapshot = parse_jsonl_output(stdout1)[0] stdout2, _, _ = run_archivebox_cmd( ["archiveresult", "create", "--plugin=favicon"], stdin=json.dumps(snapshot), data_dir=initialized_archive, ) run_archivebox_cmd( ["run"], stdin=stdout2, data_dir=initialized_archive, timeout=120, env=PROJECTOR_TEST_ENV, ) created = parse_jsonl_output( run_archivebox_cmd( ["archiveresult", "list", "--plugin=favicon"], data_dir=initialized_archive, )[0], )[0] run_archivebox_cmd( ["archiveresult", "update", "--status=queued"], stdin=json.dumps(created), data_dir=initialized_archive, ) stdout, stderr, code = run_archivebox_cmd( ["archiveresult", "list", "--status=queued"], data_dir=initialized_archive, ) assert code == 0 records = parse_jsonl_output(stdout) for r in records: assert r["status"] == "queued" def test_list_filter_by_plugin(self, initialized_archive): """Filter archive results by plugin.""" url = create_test_url() stdout1, _, _ = run_archivebox_cmd(["snapshot", "create", url], data_dir=initialized_archive) snapshot = parse_jsonl_output(stdout1)[0] stdout2, _, _ = run_archivebox_cmd( ["archiveresult", "create", "--plugin=favicon"], stdin=json.dumps(snapshot), data_dir=initialized_archive, ) run_archivebox_cmd( ["run"], stdin=stdout2, data_dir=initialized_archive, timeout=120, env=PROJECTOR_TEST_ENV, ) stdout, stderr, code = run_archivebox_cmd( ["archiveresult", "list", "--plugin=favicon"], data_dir=initialized_archive, ) assert code == 0 records = parse_jsonl_output(stdout) for r in records: assert r["plugin"] == "favicon" def test_list_with_limit(self, initialized_archive): """Limit number of results.""" # Create multiple archive results for _ in range(3): url = create_test_url() stdout1, _, _ = run_archivebox_cmd(["snapshot", "create", url], data_dir=initialized_archive) snapshot = parse_jsonl_output(stdout1)[0] stdout2, _, _ = run_archivebox_cmd( ["archiveresult", "create", "--plugin=favicon"], stdin=json.dumps(snapshot), data_dir=initialized_archive, ) run_archivebox_cmd( ["run"], stdin=stdout2, data_dir=initialized_archive, timeout=120, env=PROJECTOR_TEST_ENV, ) stdout, stderr, code = run_archivebox_cmd( ["archiveresult", "list", "--limit=2"], data_dir=initialized_archive, ) assert code == 0 records = parse_jsonl_output(stdout) assert len(records) == 2 class TestArchiveResultUpdate: """Tests for `archivebox archiveresult update`.""" def test_update_status(self, initialized_archive): """Update archive result status.""" url = create_test_url() stdout1, _, _ = run_archivebox_cmd(["snapshot", "create", url], data_dir=initialized_archive) snapshot = parse_jsonl_output(stdout1)[0] stdout2, _, _ = run_archivebox_cmd( ["archiveresult", "create", "--plugin=favicon"], stdin=json.dumps(snapshot), data_dir=initialized_archive, ) stdout_run, _, _ = run_archivebox_cmd( ["run"], stdin=stdout2, data_dir=initialized_archive, timeout=120, env=PROJECTOR_TEST_ENV, ) stdout_list, _, _ = run_archivebox_cmd( ["archiveresult", "list", "--plugin=favicon"], data_dir=initialized_archive, ) ar = parse_jsonl_output(stdout_list)[0] stdout3, stderr, code = run_archivebox_cmd( ["archiveresult", "update", "--status=failed"], stdin=json.dumps(ar), data_dir=initialized_archive, ) assert code == 0 assert "Updated 1 archive results" in stderr records = parse_jsonl_output(stdout3) assert records[0]["status"] == "failed" class TestArchiveResultDelete: """Tests for `archivebox archiveresult delete`.""" def test_delete_requires_yes(self, initialized_archive): """Delete requires --yes flag.""" url = create_test_url() stdout1, _, _ = run_archivebox_cmd(["snapshot", "create", url], data_dir=initialized_archive) snapshot = parse_jsonl_output(stdout1)[0] stdout2, _, _ = run_archivebox_cmd( ["archiveresult", "create", "--plugin=favicon"], stdin=json.dumps(snapshot), data_dir=initialized_archive, ) stdout_run, _, _ = run_archivebox_cmd( ["run"], stdin=stdout2, data_dir=initialized_archive, timeout=120, env=PROJECTOR_TEST_ENV, ) stdout_list, _, _ = run_archivebox_cmd( ["archiveresult", "list", "--plugin=favicon"], data_dir=initialized_archive, ) ar = parse_jsonl_output(stdout_list)[0] stdout, stderr, code = run_archivebox_cmd( ["archiveresult", "delete"], stdin=json.dumps(ar), data_dir=initialized_archive, ) assert code == 1 assert "--yes" in stderr def test_delete_with_yes(self, initialized_archive): """Delete with --yes flag works.""" url = create_test_url() stdout1, _, _ = run_archivebox_cmd(["snapshot", "create", url], data_dir=initialized_archive) snapshot = parse_jsonl_output(stdout1)[0] stdout2, _, _ = run_archivebox_cmd( ["archiveresult", "create", "--plugin=favicon"], stdin=json.dumps(snapshot), data_dir=initialized_archive, ) stdout_run, _, _ = run_archivebox_cmd( ["run"], stdin=stdout2, data_dir=initialized_archive, timeout=120, env=PROJECTOR_TEST_ENV, ) stdout_list, _, _ = run_archivebox_cmd( ["archiveresult", "list", "--plugin=favicon"], data_dir=initialized_archive, ) ar = parse_jsonl_output(stdout_list)[0] stdout, stderr, code = run_archivebox_cmd( ["archiveresult", "delete", "--yes"], stdin=json.dumps(ar), data_dir=initialized_archive, ) assert code == 0 assert "Deleted 1 archive results" in stderr