mirror of
https://github.com/ArchiveBox/ArchiveBox.git
synced 2026-04-06 07:47:53 +10:00
Fix CLI tests to use subprocess and remove mocks
- Fix conftest.py: use subprocess for init, remove unused cli_env fixture - Update all test files to use data_dir parameter instead of env - Remove mock-based TestJSONLOutput class from tests_piping.py - Remove unused imports (MagicMock, patch) - Fix file permissions for cli_utils.py All tests now use real subprocess calls per CLAUDE.md guidelines: - NO MOCKS - tests exercise real code paths - NO SKIPS - every test runs
This commit is contained in:
@@ -23,13 +23,13 @@ from archivebox.tests.conftest import (
|
||||
class TestCrawlCreate:
|
||||
"""Tests for `archivebox crawl create`."""
|
||||
|
||||
def test_create_from_url_args(self, cli_env, initialized_archive):
|
||||
def test_create_from_url_args(self, initialized_archive):
|
||||
"""Create crawl from URL arguments."""
|
||||
url = create_test_url()
|
||||
|
||||
stdout, stderr, code = run_archivebox_cmd(
|
||||
['crawl', 'create', url],
|
||||
env=cli_env,
|
||||
data_dir=initialized_archive,
|
||||
)
|
||||
|
||||
assert code == 0, f"Command failed: {stderr}"
|
||||
@@ -41,7 +41,7 @@ class TestCrawlCreate:
|
||||
assert records[0]['type'] == 'Crawl'
|
||||
assert url in records[0]['urls']
|
||||
|
||||
def test_create_from_stdin_urls(self, cli_env, initialized_archive):
|
||||
def test_create_from_stdin_urls(self, initialized_archive):
|
||||
"""Create crawl from stdin URLs (one per line)."""
|
||||
urls = [create_test_url() for _ in range(3)]
|
||||
stdin = '\n'.join(urls)
|
||||
@@ -49,7 +49,7 @@ class TestCrawlCreate:
|
||||
stdout, stderr, code = run_archivebox_cmd(
|
||||
['crawl', 'create'],
|
||||
stdin=stdin,
|
||||
env=cli_env,
|
||||
data_dir=initialized_archive,
|
||||
)
|
||||
|
||||
assert code == 0, f"Command failed: {stderr}"
|
||||
@@ -62,33 +62,33 @@ class TestCrawlCreate:
|
||||
for url in urls:
|
||||
assert url in crawl['urls']
|
||||
|
||||
def test_create_with_depth(self, cli_env, initialized_archive):
|
||||
def test_create_with_depth(self, initialized_archive):
|
||||
"""Create crawl with --depth flag."""
|
||||
url = create_test_url()
|
||||
|
||||
stdout, stderr, code = run_archivebox_cmd(
|
||||
['crawl', 'create', '--depth=2', url],
|
||||
env=cli_env,
|
||||
data_dir=initialized_archive,
|
||||
)
|
||||
|
||||
assert code == 0
|
||||
records = parse_jsonl_output(stdout)
|
||||
assert records[0]['max_depth'] == 2
|
||||
|
||||
def test_create_with_tag(self, cli_env, initialized_archive):
|
||||
def test_create_with_tag(self, initialized_archive):
|
||||
"""Create crawl with --tag flag."""
|
||||
url = create_test_url()
|
||||
|
||||
stdout, stderr, code = run_archivebox_cmd(
|
||||
['crawl', 'create', '--tag=test-tag', url],
|
||||
env=cli_env,
|
||||
data_dir=initialized_archive,
|
||||
)
|
||||
|
||||
assert code == 0
|
||||
records = parse_jsonl_output(stdout)
|
||||
assert 'test-tag' in records[0].get('tags_str', '')
|
||||
|
||||
def test_create_pass_through_other_types(self, cli_env, initialized_archive):
|
||||
def test_create_pass_through_other_types(self, initialized_archive):
|
||||
"""Pass-through records of other types unchanged."""
|
||||
tag_record = {'type': 'Tag', 'id': 'fake-tag-id', 'name': 'test'}
|
||||
url = create_test_url()
|
||||
@@ -97,7 +97,7 @@ class TestCrawlCreate:
|
||||
stdout, stderr, code = run_archivebox_cmd(
|
||||
['crawl', 'create'],
|
||||
stdin=stdin,
|
||||
env=cli_env,
|
||||
data_dir=initialized_archive,
|
||||
)
|
||||
|
||||
assert code == 0
|
||||
@@ -108,18 +108,18 @@ class TestCrawlCreate:
|
||||
assert 'Tag' in types
|
||||
assert 'Crawl' in types
|
||||
|
||||
def test_create_pass_through_existing_crawl(self, cli_env, initialized_archive):
|
||||
def test_create_pass_through_existing_crawl(self, initialized_archive):
|
||||
"""Existing Crawl records (with id) are passed through."""
|
||||
# First create a crawl
|
||||
url = create_test_url()
|
||||
stdout1, _, _ = run_archivebox_cmd(['crawl', 'create', url], env=cli_env)
|
||||
stdout1, _, _ = run_archivebox_cmd(['crawl', 'create', url], data_dir=initialized_archive)
|
||||
crawl = parse_jsonl_output(stdout1)[0]
|
||||
|
||||
# Now pipe it back - should pass through
|
||||
stdout2, stderr, code = run_archivebox_cmd(
|
||||
['crawl', 'create'],
|
||||
stdin=json.dumps(crawl),
|
||||
env=cli_env,
|
||||
data_dir=initialized_archive,
|
||||
)
|
||||
|
||||
assert code == 0
|
||||
@@ -131,24 +131,24 @@ class TestCrawlCreate:
|
||||
class TestCrawlList:
|
||||
"""Tests for `archivebox crawl list`."""
|
||||
|
||||
def test_list_empty(self, cli_env, initialized_archive):
|
||||
def test_list_empty(self, initialized_archive):
|
||||
"""List with no crawls returns empty."""
|
||||
stdout, stderr, code = run_archivebox_cmd(
|
||||
['crawl', 'list'],
|
||||
env=cli_env,
|
||||
data_dir=initialized_archive,
|
||||
)
|
||||
|
||||
assert code == 0
|
||||
assert 'Listed 0 crawls' in stderr
|
||||
|
||||
def test_list_returns_created(self, cli_env, initialized_archive):
|
||||
def test_list_returns_created(self, initialized_archive):
|
||||
"""List returns previously created crawls."""
|
||||
url = create_test_url()
|
||||
run_archivebox_cmd(['crawl', 'create', url], env=cli_env)
|
||||
run_archivebox_cmd(['crawl', 'create', url], data_dir=initialized_archive)
|
||||
|
||||
stdout, stderr, code = run_archivebox_cmd(
|
||||
['crawl', 'list'],
|
||||
env=cli_env,
|
||||
data_dir=initialized_archive,
|
||||
)
|
||||
|
||||
assert code == 0
|
||||
@@ -156,14 +156,14 @@ class TestCrawlList:
|
||||
assert len(records) >= 1
|
||||
assert any(url in r.get('urls', '') for r in records)
|
||||
|
||||
def test_list_filter_by_status(self, cli_env, initialized_archive):
|
||||
def test_list_filter_by_status(self, initialized_archive):
|
||||
"""Filter crawls by status."""
|
||||
url = create_test_url()
|
||||
run_archivebox_cmd(['crawl', 'create', url], env=cli_env)
|
||||
run_archivebox_cmd(['crawl', 'create', url], data_dir=initialized_archive)
|
||||
|
||||
stdout, stderr, code = run_archivebox_cmd(
|
||||
['crawl', 'list', '--status=queued'],
|
||||
env=cli_env,
|
||||
data_dir=initialized_archive,
|
||||
)
|
||||
|
||||
assert code == 0
|
||||
@@ -171,15 +171,15 @@ class TestCrawlList:
|
||||
for r in records:
|
||||
assert r['status'] == 'queued'
|
||||
|
||||
def test_list_with_limit(self, cli_env, initialized_archive):
|
||||
def test_list_with_limit(self, initialized_archive):
|
||||
"""Limit number of results."""
|
||||
# Create multiple crawls
|
||||
for _ in range(3):
|
||||
run_archivebox_cmd(['crawl', 'create', create_test_url()], env=cli_env)
|
||||
run_archivebox_cmd(['crawl', 'create', create_test_url()], data_dir=initialized_archive)
|
||||
|
||||
stdout, stderr, code = run_archivebox_cmd(
|
||||
['crawl', 'list', '--limit=2'],
|
||||
env=cli_env,
|
||||
data_dir=initialized_archive,
|
||||
)
|
||||
|
||||
assert code == 0
|
||||
@@ -190,18 +190,18 @@ class TestCrawlList:
|
||||
class TestCrawlUpdate:
|
||||
"""Tests for `archivebox crawl update`."""
|
||||
|
||||
def test_update_status(self, cli_env, initialized_archive):
|
||||
def test_update_status(self, initialized_archive):
|
||||
"""Update crawl status."""
|
||||
# Create a crawl
|
||||
url = create_test_url()
|
||||
stdout1, _, _ = run_archivebox_cmd(['crawl', 'create', url], env=cli_env)
|
||||
stdout1, _, _ = run_archivebox_cmd(['crawl', 'create', url], data_dir=initialized_archive)
|
||||
crawl = parse_jsonl_output(stdout1)[0]
|
||||
|
||||
# Update it
|
||||
stdout2, stderr, code = run_archivebox_cmd(
|
||||
['crawl', 'update', '--status=started'],
|
||||
stdin=json.dumps(crawl),
|
||||
env=cli_env,
|
||||
data_dir=initialized_archive,
|
||||
)
|
||||
|
||||
assert code == 0
|
||||
@@ -214,46 +214,46 @@ class TestCrawlUpdate:
|
||||
class TestCrawlDelete:
|
||||
"""Tests for `archivebox crawl delete`."""
|
||||
|
||||
def test_delete_requires_yes(self, cli_env, initialized_archive):
|
||||
def test_delete_requires_yes(self, initialized_archive):
|
||||
"""Delete requires --yes flag."""
|
||||
url = create_test_url()
|
||||
stdout1, _, _ = run_archivebox_cmd(['crawl', 'create', url], env=cli_env)
|
||||
stdout1, _, _ = run_archivebox_cmd(['crawl', 'create', url], data_dir=initialized_archive)
|
||||
crawl = parse_jsonl_output(stdout1)[0]
|
||||
|
||||
stdout, stderr, code = run_archivebox_cmd(
|
||||
['crawl', 'delete'],
|
||||
stdin=json.dumps(crawl),
|
||||
env=cli_env,
|
||||
data_dir=initialized_archive,
|
||||
)
|
||||
|
||||
assert code == 1
|
||||
assert '--yes' in stderr
|
||||
|
||||
def test_delete_with_yes(self, cli_env, initialized_archive):
|
||||
def test_delete_with_yes(self, initialized_archive):
|
||||
"""Delete with --yes flag works."""
|
||||
url = create_test_url()
|
||||
stdout1, _, _ = run_archivebox_cmd(['crawl', 'create', url], env=cli_env)
|
||||
stdout1, _, _ = run_archivebox_cmd(['crawl', 'create', url], data_dir=initialized_archive)
|
||||
crawl = parse_jsonl_output(stdout1)[0]
|
||||
|
||||
stdout, stderr, code = run_archivebox_cmd(
|
||||
['crawl', 'delete', '--yes'],
|
||||
stdin=json.dumps(crawl),
|
||||
env=cli_env,
|
||||
data_dir=initialized_archive,
|
||||
)
|
||||
|
||||
assert code == 0
|
||||
assert 'Deleted 1 crawls' in stderr
|
||||
|
||||
def test_delete_dry_run(self, cli_env, initialized_archive):
|
||||
def test_delete_dry_run(self, initialized_archive):
|
||||
"""Dry run shows what would be deleted."""
|
||||
url = create_test_url()
|
||||
stdout1, _, _ = run_archivebox_cmd(['crawl', 'create', url], env=cli_env)
|
||||
stdout1, _, _ = run_archivebox_cmd(['crawl', 'create', url], data_dir=initialized_archive)
|
||||
crawl = parse_jsonl_output(stdout1)[0]
|
||||
|
||||
stdout, stderr, code = run_archivebox_cmd(
|
||||
['crawl', 'delete', '--dry-run'],
|
||||
stdin=json.dumps(crawl),
|
||||
env=cli_env,
|
||||
data_dir=initialized_archive,
|
||||
)
|
||||
|
||||
assert code == 0
|
||||
|
||||
Reference in New Issue
Block a user