Fix CLI tests to use subprocess and remove mocks

- Fix conftest.py: use subprocess for init, remove unused cli_env fixture - Update all test files to use data_dir parameter instead of env - Remove mock-based TestJSONLOutput class from tests_piping.py - Remove unused imports (MagicMock, patch) - Fix file permissions for cli_utils.py All tests now use real subprocess calls per CLAUDE.md guidelines: - NO MOCKS - tests exercise real code paths - NO SKIPS - every test runs
2026-04-06 07:47:53 +10:00 · 2025-12-31 10:53:45 +00:00
parent bb52b5902a
commit b87bbbbecb
6 changed files with 218 additions and 279 deletions
--- a/archivebox/tests/test_cli_run.py
+++ b/archivebox/tests/test_cli_run.py
@@ -22,14 +22,14 @@ from archivebox.tests.conftest import (
 class TestRunWithCrawl:
    """Tests for `archivebox run` with Crawl input."""

-    def test_run_with_new_crawl(self, cli_env, initialized_archive):
+    def test_run_with_new_crawl(self, initialized_archive):
        """Run creates and processes a new Crawl (no id)."""
        crawl_record = create_test_crawl_json()

        stdout, stderr, code = run_archivebox_cmd(
            ['run'],
            stdin=json.dumps(crawl_record),
-            env=cli_env,
+            data_dir=initialized_archive,
            timeout=120,
        )

@@ -41,19 +41,19 @@ class TestRunWithCrawl:
        assert len(crawl_records) >= 1
        assert crawl_records[0].get('id')  # Should have an id now

-    def test_run_with_existing_crawl(self, cli_env, initialized_archive):
+    def test_run_with_existing_crawl(self, initialized_archive):
        """Run re-queues an existing Crawl (with id)."""
        url = create_test_url()

        # First create a crawl
-        stdout1, _, _ = run_archivebox_cmd(['crawl', 'create', url], env=cli_env)
+        stdout1, _, _ = run_archivebox_cmd(['crawl', 'create', url], data_dir=initialized_archive)
        crawl = parse_jsonl_output(stdout1)[0]

        # Run with the existing crawl
        stdout2, stderr, code = run_archivebox_cmd(
            ['run'],
            stdin=json.dumps(crawl),
-            env=cli_env,
+            data_dir=initialized_archive,
            timeout=120,
        )

@@ -65,14 +65,14 @@ class TestRunWithCrawl:
 class TestRunWithSnapshot:
    """Tests for `archivebox run` with Snapshot input."""

-    def test_run_with_new_snapshot(self, cli_env, initialized_archive):
+    def test_run_with_new_snapshot(self, initialized_archive):
        """Run creates and processes a new Snapshot (no id, just url)."""
        snapshot_record = create_test_snapshot_json()

        stdout, stderr, code = run_archivebox_cmd(
            ['run'],
            stdin=json.dumps(snapshot_record),
-            env=cli_env,
+            data_dir=initialized_archive,
            timeout=120,
        )

@@ -83,19 +83,19 @@ class TestRunWithSnapshot:
        assert len(snapshot_records) >= 1
        assert snapshot_records[0].get('id')

-    def test_run_with_existing_snapshot(self, cli_env, initialized_archive):
+    def test_run_with_existing_snapshot(self, initialized_archive):
        """Run re-queues an existing Snapshot (with id)."""
        url = create_test_url()

        # First create a snapshot
-        stdout1, _, _ = run_archivebox_cmd(['snapshot', 'create', url], env=cli_env)
+        stdout1, _, _ = run_archivebox_cmd(['snapshot', 'create', url], data_dir=initialized_archive)
        snapshot = parse_jsonl_output(stdout1)[0]

        # Run with the existing snapshot
        stdout2, stderr, code = run_archivebox_cmd(
            ['run'],
            stdin=json.dumps(snapshot),
-            env=cli_env,
+            data_dir=initialized_archive,
            timeout=120,
        )

@@ -103,7 +103,7 @@ class TestRunWithSnapshot:
        records = parse_jsonl_output(stdout2)
        assert len(records) >= 1

-    def test_run_with_plain_url(self, cli_env, initialized_archive):
+    def test_run_with_plain_url(self, initialized_archive):
        """Run accepts plain URL records (no type field)."""
        url = create_test_url()
        url_record = {'url': url}
@@ -111,7 +111,7 @@ class TestRunWithSnapshot:
        stdout, stderr, code = run_archivebox_cmd(
            ['run'],
            stdin=json.dumps(url_record),
-            env=cli_env,
+            data_dir=initialized_archive,
            timeout=120,
        )

@@ -123,18 +123,18 @@ class TestRunWithSnapshot:
 class TestRunWithArchiveResult:
    """Tests for `archivebox run` with ArchiveResult input."""

-    def test_run_requeues_failed_archiveresult(self, cli_env, initialized_archive):
+    def test_run_requeues_failed_archiveresult(self, initialized_archive):
        """Run re-queues a failed ArchiveResult."""
        url = create_test_url()

        # Create snapshot and archive result
-        stdout1, _, _ = run_archivebox_cmd(['snapshot', 'create', url], env=cli_env)
+        stdout1, _, _ = run_archivebox_cmd(['snapshot', 'create', url], data_dir=initialized_archive)
        snapshot = parse_jsonl_output(stdout1)[0]

        stdout2, _, _ = run_archivebox_cmd(
            ['archiveresult', 'create', '--plugin=title'],
            stdin=json.dumps(snapshot),
-            env=cli_env,
+            data_dir=initialized_archive,
        )
        ar = next(r for r in parse_jsonl_output(stdout2) if r.get('type') == 'ArchiveResult')

@@ -143,14 +143,14 @@ class TestRunWithArchiveResult:
        run_archivebox_cmd(
            ['archiveresult', 'update', '--status=failed'],
            stdin=json.dumps(ar),
-            env=cli_env,
+            data_dir=initialized_archive,
        )

        # Now run should re-queue it
        stdout3, stderr, code = run_archivebox_cmd(
            ['run'],
            stdin=json.dumps(ar),
-            env=cli_env,
+            data_dir=initialized_archive,
            timeout=120,
        )

@@ -163,14 +163,14 @@ class TestRunWithArchiveResult:
 class TestRunPassThrough:
    """Tests for pass-through behavior in `archivebox run`."""

-    def test_run_passes_through_unknown_types(self, cli_env, initialized_archive):
+    def test_run_passes_through_unknown_types(self, initialized_archive):
        """Run passes through records with unknown types."""
        unknown_record = {'type': 'Unknown', 'id': 'fake-id', 'data': 'test'}

        stdout, stderr, code = run_archivebox_cmd(
            ['run'],
            stdin=json.dumps(unknown_record),
-            env=cli_env,
+            data_dir=initialized_archive,
        )

        assert code == 0
@@ -179,7 +179,7 @@ class TestRunPassThrough:
        assert len(unknown_records) == 1
        assert unknown_records[0]['data'] == 'test'

-    def test_run_outputs_all_processed_records(self, cli_env, initialized_archive):
+    def test_run_outputs_all_processed_records(self, initialized_archive):
        """Run outputs all processed records for chaining."""
        url = create_test_url()
        crawl_record = create_test_crawl_json(urls=[url])
@@ -187,7 +187,7 @@ class TestRunPassThrough:
        stdout, stderr, code = run_archivebox_cmd(
            ['run'],
            stdin=json.dumps(crawl_record),
-            env=cli_env,
+            data_dir=initialized_archive,
            timeout=120,
        )

@@ -200,7 +200,7 @@ class TestRunPassThrough:
 class TestRunMixedInput:
    """Tests for `archivebox run` with mixed record types."""

-    def test_run_handles_mixed_types(self, cli_env, initialized_archive):
+    def test_run_handles_mixed_types(self, initialized_archive):
        """Run handles mixed Crawl/Snapshot/ArchiveResult input."""
        crawl = create_test_crawl_json()
        snapshot = create_test_snapshot_json()
@@ -215,7 +215,7 @@ class TestRunMixedInput:
        stdout, stderr, code = run_archivebox_cmd(
            ['run'],
            stdin=stdin,
-            env=cli_env,
+            data_dir=initialized_archive,
            timeout=120,
        )

@@ -230,24 +230,24 @@ class TestRunMixedInput:
 class TestRunEmpty:
    """Tests for `archivebox run` edge cases."""

-    def test_run_empty_stdin(self, cli_env, initialized_archive):
+    def test_run_empty_stdin(self, initialized_archive):
        """Run with empty stdin returns success."""
        stdout, stderr, code = run_archivebox_cmd(
            ['run'],
            stdin='',
-            env=cli_env,
+            data_dir=initialized_archive,
        )

        assert code == 0

-    def test_run_no_records_to_process(self, cli_env, initialized_archive):
+    def test_run_no_records_to_process(self, initialized_archive):
        """Run with only pass-through records shows message."""
        unknown = {'type': 'Unknown', 'id': 'fake'}

        stdout, stderr, code = run_archivebox_cmd(
            ['run'],
            stdin=json.dumps(unknown),
-            env=cli_env,
+            data_dir=initialized_archive,
        )

        assert code == 0