mirror of
https://github.com/ArchiveBox/ArchiveBox.git
synced 2026-04-06 07:47:53 +10:00
Fix CLI tests to use subprocess and remove mocks
- Fix conftest.py: use subprocess for init, remove unused cli_env fixture - Update all test files to use data_dir parameter instead of env - Remove mock-based TestJSONLOutput class from tests_piping.py - Remove unused imports (MagicMock, patch) - Fix file permissions for cli_utils.py All tests now use real subprocess calls per CLAUDE.md guidelines: - NO MOCKS - tests exercise real code paths - NO SKIPS - every test runs
This commit is contained in:
@@ -30,7 +30,6 @@ import tempfile
|
|||||||
import unittest
|
import unittest
|
||||||
from io import StringIO
|
from io import StringIO
|
||||||
from pathlib import Path
|
from pathlib import Path
|
||||||
from unittest.mock import patch, MagicMock
|
|
||||||
|
|
||||||
# Test configuration - disable slow extractors
|
# Test configuration - disable slow extractors
|
||||||
TEST_CONFIG = {
|
TEST_CONFIG = {
|
||||||
@@ -152,35 +151,8 @@ class TestJSONLParsing(unittest.TestCase):
|
|||||||
self.assertEqual(result['url'], 'file:///path/to/file.txt')
|
self.assertEqual(result['url'], 'file:///path/to/file.txt')
|
||||||
|
|
||||||
|
|
||||||
class TestJSONLOutput(unittest.TestCase):
|
# Note: JSONL output serialization is tested in TestPipingWorkflowIntegration
|
||||||
"""Test JSONL output formatting."""
|
# using real model instances, not mocks.
|
||||||
|
|
||||||
def test_crawl_to_json(self):
|
|
||||||
"""Crawl model should serialize to JSON correctly."""
|
|
||||||
from archivebox.misc.jsonl import TYPE_CRAWL
|
|
||||||
|
|
||||||
# Create a mock crawl with to_json method configured
|
|
||||||
mock_crawl = MagicMock()
|
|
||||||
mock_crawl.to_json.return_value = {
|
|
||||||
'type': TYPE_CRAWL,
|
|
||||||
'schema_version': '0.9.0',
|
|
||||||
'id': 'test-crawl-uuid',
|
|
||||||
'urls': 'https://example.com',
|
|
||||||
'status': 'queued',
|
|
||||||
'max_depth': 0,
|
|
||||||
'tags_str': 'tag1,tag2',
|
|
||||||
'label': '',
|
|
||||||
'created_at': None,
|
|
||||||
}
|
|
||||||
|
|
||||||
result = mock_crawl.to_json()
|
|
||||||
self.assertEqual(result['type'], TYPE_CRAWL)
|
|
||||||
self.assertEqual(result['id'], 'test-crawl-uuid')
|
|
||||||
self.assertEqual(result['urls'], 'https://example.com')
|
|
||||||
self.assertEqual(result['status'], 'queued')
|
|
||||||
|
|
||||||
# Note: Snapshot and ArchiveResult serialization is tested in integration tests
|
|
||||||
# (TestPipingWorkflowIntegration) using real model instances, not mocks.
|
|
||||||
|
|
||||||
|
|
||||||
class TestReadArgsOrStdin(unittest.TestCase):
|
class TestReadArgsOrStdin(unittest.TestCase):
|
||||||
|
|||||||
@@ -10,29 +10,83 @@ from typing import List, Dict, Any, Optional, Tuple
|
|||||||
import pytest
|
import pytest
|
||||||
|
|
||||||
|
|
||||||
|
# =============================================================================
|
||||||
|
# CLI Helpers (defined before fixtures that use them)
|
||||||
|
# =============================================================================
|
||||||
|
|
||||||
|
def run_archivebox_cmd(
|
||||||
|
args: List[str],
|
||||||
|
data_dir: Path,
|
||||||
|
stdin: Optional[str] = None,
|
||||||
|
timeout: int = 60,
|
||||||
|
env: Optional[Dict[str, str]] = None,
|
||||||
|
) -> Tuple[str, str, int]:
|
||||||
|
"""
|
||||||
|
Run archivebox command via subprocess, return (stdout, stderr, returncode).
|
||||||
|
|
||||||
|
Args:
|
||||||
|
args: Command arguments (e.g., ['crawl', 'create', 'https://example.com'])
|
||||||
|
data_dir: The DATA_DIR to use
|
||||||
|
stdin: Optional string to pipe to stdin
|
||||||
|
timeout: Command timeout in seconds
|
||||||
|
env: Additional environment variables
|
||||||
|
|
||||||
|
Returns:
|
||||||
|
Tuple of (stdout, stderr, returncode)
|
||||||
|
"""
|
||||||
|
cmd = [sys.executable, '-m', 'archivebox'] + args
|
||||||
|
|
||||||
|
base_env = os.environ.copy()
|
||||||
|
base_env['DATA_DIR'] = str(data_dir)
|
||||||
|
base_env['USE_COLOR'] = 'False'
|
||||||
|
base_env['SHOW_PROGRESS'] = 'False'
|
||||||
|
# Disable slow extractors for faster tests
|
||||||
|
base_env['SAVE_ARCHIVEDOTORG'] = 'False'
|
||||||
|
base_env['SAVE_TITLE'] = 'False'
|
||||||
|
base_env['SAVE_FAVICON'] = 'False'
|
||||||
|
base_env['SAVE_WGET'] = 'False'
|
||||||
|
base_env['SAVE_WARC'] = 'False'
|
||||||
|
base_env['SAVE_PDF'] = 'False'
|
||||||
|
base_env['SAVE_SCREENSHOT'] = 'False'
|
||||||
|
base_env['SAVE_DOM'] = 'False'
|
||||||
|
base_env['SAVE_SINGLEFILE'] = 'False'
|
||||||
|
base_env['SAVE_READABILITY'] = 'False'
|
||||||
|
base_env['SAVE_MERCURY'] = 'False'
|
||||||
|
base_env['SAVE_GIT'] = 'False'
|
||||||
|
base_env['SAVE_YTDLP'] = 'False'
|
||||||
|
base_env['SAVE_HEADERS'] = 'False'
|
||||||
|
base_env['SAVE_HTMLTOTEXT'] = 'False'
|
||||||
|
|
||||||
|
if env:
|
||||||
|
base_env.update(env)
|
||||||
|
|
||||||
|
result = subprocess.run(
|
||||||
|
cmd,
|
||||||
|
input=stdin,
|
||||||
|
capture_output=True,
|
||||||
|
text=True,
|
||||||
|
cwd=data_dir,
|
||||||
|
env=base_env,
|
||||||
|
timeout=timeout,
|
||||||
|
)
|
||||||
|
|
||||||
|
return result.stdout, result.stderr, result.returncode
|
||||||
|
|
||||||
|
|
||||||
# =============================================================================
|
# =============================================================================
|
||||||
# Fixtures
|
# Fixtures
|
||||||
# =============================================================================
|
# =============================================================================
|
||||||
|
|
||||||
@pytest.fixture
|
@pytest.fixture
|
||||||
def isolated_data_dir(tmp_path, settings):
|
def isolated_data_dir(tmp_path):
|
||||||
"""
|
"""
|
||||||
Create isolated DATA_DIR for each test.
|
Create isolated DATA_DIR for each test.
|
||||||
|
|
||||||
Uses tmp_path for isolation, configures Django settings.
|
Uses tmp_path for complete isolation.
|
||||||
"""
|
"""
|
||||||
data_dir = tmp_path / 'archivebox_data'
|
data_dir = tmp_path / 'archivebox_data'
|
||||||
data_dir.mkdir()
|
data_dir.mkdir()
|
||||||
|
return data_dir
|
||||||
# Set environment for subprocess calls
|
|
||||||
os.environ['DATA_DIR'] = str(data_dir)
|
|
||||||
|
|
||||||
# Update Django settings
|
|
||||||
settings.DATA_DIR = data_dir
|
|
||||||
|
|
||||||
yield data_dir
|
|
||||||
|
|
||||||
# Cleanup handled by tmp_path fixture
|
|
||||||
|
|
||||||
|
|
||||||
@pytest.fixture
|
@pytest.fixture
|
||||||
@@ -40,81 +94,15 @@ def initialized_archive(isolated_data_dir):
|
|||||||
"""
|
"""
|
||||||
Initialize ArchiveBox archive in isolated directory.
|
Initialize ArchiveBox archive in isolated directory.
|
||||||
|
|
||||||
Runs `archivebox init` to set up database and directories.
|
Runs `archivebox init` via subprocess to set up database and directories.
|
||||||
"""
|
"""
|
||||||
from archivebox.cli.archivebox_init import init
|
stdout, stderr, returncode = run_archivebox_cmd(
|
||||||
init(setup=True, quick=True)
|
['init', '--quick'],
|
||||||
return isolated_data_dir
|
data_dir=isolated_data_dir,
|
||||||
|
timeout=60,
|
||||||
|
|
||||||
@pytest.fixture
|
|
||||||
def cli_env(initialized_archive):
|
|
||||||
"""
|
|
||||||
Environment dict for CLI subprocess calls.
|
|
||||||
|
|
||||||
Includes DATA_DIR and disables slow extractors.
|
|
||||||
"""
|
|
||||||
return {
|
|
||||||
**os.environ,
|
|
||||||
'DATA_DIR': str(initialized_archive),
|
|
||||||
'USE_COLOR': 'False',
|
|
||||||
'SHOW_PROGRESS': 'False',
|
|
||||||
'SAVE_TITLE': 'True',
|
|
||||||
'SAVE_FAVICON': 'False',
|
|
||||||
'SAVE_WGET': 'False',
|
|
||||||
'SAVE_WARC': 'False',
|
|
||||||
'SAVE_PDF': 'False',
|
|
||||||
'SAVE_SCREENSHOT': 'False',
|
|
||||||
'SAVE_DOM': 'False',
|
|
||||||
'SAVE_SINGLEFILE': 'False',
|
|
||||||
'SAVE_READABILITY': 'False',
|
|
||||||
'SAVE_MERCURY': 'False',
|
|
||||||
'SAVE_GIT': 'False',
|
|
||||||
'SAVE_YTDLP': 'False',
|
|
||||||
'SAVE_HEADERS': 'False',
|
|
||||||
}
|
|
||||||
|
|
||||||
|
|
||||||
# =============================================================================
|
|
||||||
# CLI Helpers
|
|
||||||
# =============================================================================
|
|
||||||
|
|
||||||
def run_archivebox_cmd(
|
|
||||||
args: List[str],
|
|
||||||
stdin: Optional[str] = None,
|
|
||||||
cwd: Optional[Path] = None,
|
|
||||||
env: Optional[Dict[str, str]] = None,
|
|
||||||
timeout: int = 60,
|
|
||||||
) -> Tuple[str, str, int]:
|
|
||||||
"""
|
|
||||||
Run archivebox command, return (stdout, stderr, returncode).
|
|
||||||
|
|
||||||
Args:
|
|
||||||
args: Command arguments (e.g., ['crawl', 'create', 'https://example.com'])
|
|
||||||
stdin: Optional string to pipe to stdin
|
|
||||||
cwd: Working directory (defaults to DATA_DIR from env)
|
|
||||||
env: Environment variables (defaults to os.environ with DATA_DIR)
|
|
||||||
timeout: Command timeout in seconds
|
|
||||||
|
|
||||||
Returns:
|
|
||||||
Tuple of (stdout, stderr, returncode)
|
|
||||||
"""
|
|
||||||
cmd = [sys.executable, '-m', 'archivebox'] + args
|
|
||||||
|
|
||||||
env = env or {**os.environ}
|
|
||||||
cwd = cwd or Path(env.get('DATA_DIR', '.'))
|
|
||||||
|
|
||||||
result = subprocess.run(
|
|
||||||
cmd,
|
|
||||||
input=stdin,
|
|
||||||
capture_output=True,
|
|
||||||
text=True,
|
|
||||||
cwd=cwd,
|
|
||||||
env=env,
|
|
||||||
timeout=timeout,
|
|
||||||
)
|
)
|
||||||
|
assert returncode == 0, f"archivebox init failed: {stderr}"
|
||||||
return result.stdout, result.stderr, result.returncode
|
return isolated_data_dir
|
||||||
|
|
||||||
|
|
||||||
# =============================================================================
|
# =============================================================================
|
||||||
@@ -162,23 +150,6 @@ def assert_record_has_fields(record: Dict[str, Any], required_fields: List[str])
|
|||||||
assert record[field] is not None, f"Record field is None: {field}"
|
assert record[field] is not None, f"Record field is None: {field}"
|
||||||
|
|
||||||
|
|
||||||
# =============================================================================
|
|
||||||
# Database Assertions
|
|
||||||
# =============================================================================
|
|
||||||
|
|
||||||
def assert_db_count(model_class, filters: Dict[str, Any], expected: int):
|
|
||||||
"""Assert database count matches expected."""
|
|
||||||
actual = model_class.objects.filter(**filters).count()
|
|
||||||
assert actual == expected, \
|
|
||||||
f"Expected {expected} {model_class.__name__}, got {actual}"
|
|
||||||
|
|
||||||
|
|
||||||
def assert_db_exists(model_class, **filters):
|
|
||||||
"""Assert at least one record exists matching filters."""
|
|
||||||
assert model_class.objects.filter(**filters).exists(), \
|
|
||||||
f"No {model_class.__name__} found matching {filters}"
|
|
||||||
|
|
||||||
|
|
||||||
# =============================================================================
|
# =============================================================================
|
||||||
# Test Data Factories
|
# Test Data Factories
|
||||||
# =============================================================================
|
# =============================================================================
|
||||||
@@ -192,11 +163,9 @@ def create_test_url(domain: str = 'example.com', path: str = None) -> str:
|
|||||||
|
|
||||||
def create_test_crawl_json(urls: List[str] = None, **kwargs) -> Dict[str, Any]:
|
def create_test_crawl_json(urls: List[str] = None, **kwargs) -> Dict[str, Any]:
|
||||||
"""Create Crawl JSONL record for testing."""
|
"""Create Crawl JSONL record for testing."""
|
||||||
from archivebox.misc.jsonl import TYPE_CRAWL
|
|
||||||
|
|
||||||
urls = urls or [create_test_url()]
|
urls = urls or [create_test_url()]
|
||||||
return {
|
return {
|
||||||
'type': TYPE_CRAWL,
|
'type': 'Crawl',
|
||||||
'urls': '\n'.join(urls),
|
'urls': '\n'.join(urls),
|
||||||
'max_depth': kwargs.get('max_depth', 0),
|
'max_depth': kwargs.get('max_depth', 0),
|
||||||
'tags_str': kwargs.get('tags_str', ''),
|
'tags_str': kwargs.get('tags_str', ''),
|
||||||
@@ -207,10 +176,8 @@ def create_test_crawl_json(urls: List[str] = None, **kwargs) -> Dict[str, Any]:
|
|||||||
|
|
||||||
def create_test_snapshot_json(url: str = None, **kwargs) -> Dict[str, Any]:
|
def create_test_snapshot_json(url: str = None, **kwargs) -> Dict[str, Any]:
|
||||||
"""Create Snapshot JSONL record for testing."""
|
"""Create Snapshot JSONL record for testing."""
|
||||||
from archivebox.misc.jsonl import TYPE_SNAPSHOT
|
|
||||||
|
|
||||||
return {
|
return {
|
||||||
'type': TYPE_SNAPSHOT,
|
'type': 'Snapshot',
|
||||||
'url': url or create_test_url(),
|
'url': url or create_test_url(),
|
||||||
'tags_str': kwargs.get('tags_str', ''),
|
'tags_str': kwargs.get('tags_str', ''),
|
||||||
'status': kwargs.get('status', 'queued'),
|
'status': kwargs.get('status', 'queued'),
|
||||||
|
|||||||
@@ -21,19 +21,19 @@ from archivebox.tests.conftest import (
|
|||||||
class TestArchiveResultCreate:
|
class TestArchiveResultCreate:
|
||||||
"""Tests for `archivebox archiveresult create`."""
|
"""Tests for `archivebox archiveresult create`."""
|
||||||
|
|
||||||
def test_create_from_snapshot_jsonl(self, cli_env, initialized_archive):
|
def test_create_from_snapshot_jsonl(self, initialized_archive):
|
||||||
"""Create archive results from Snapshot JSONL input."""
|
"""Create archive results from Snapshot JSONL input."""
|
||||||
url = create_test_url()
|
url = create_test_url()
|
||||||
|
|
||||||
# Create a snapshot first
|
# Create a snapshot first
|
||||||
stdout1, _, _ = run_archivebox_cmd(['snapshot', 'create', url], env=cli_env)
|
stdout1, _, _ = run_archivebox_cmd(['snapshot', 'create', url], data_dir=initialized_archive)
|
||||||
snapshot = parse_jsonl_output(stdout1)[0]
|
snapshot = parse_jsonl_output(stdout1)[0]
|
||||||
|
|
||||||
# Pipe snapshot to archiveresult create
|
# Pipe snapshot to archiveresult create
|
||||||
stdout2, stderr, code = run_archivebox_cmd(
|
stdout2, stderr, code = run_archivebox_cmd(
|
||||||
['archiveresult', 'create', '--plugin=title'],
|
['archiveresult', 'create', '--plugin=title'],
|
||||||
stdin=json.dumps(snapshot),
|
stdin=json.dumps(snapshot),
|
||||||
env=cli_env,
|
data_dir=initialized_archive,
|
||||||
)
|
)
|
||||||
|
|
||||||
assert code == 0, f"Command failed: {stderr}"
|
assert code == 0, f"Command failed: {stderr}"
|
||||||
@@ -47,16 +47,16 @@ class TestArchiveResultCreate:
|
|||||||
ar = next(r for r in records if r['type'] == 'ArchiveResult')
|
ar = next(r for r in records if r['type'] == 'ArchiveResult')
|
||||||
assert ar['plugin'] == 'title'
|
assert ar['plugin'] == 'title'
|
||||||
|
|
||||||
def test_create_with_specific_plugin(self, cli_env, initialized_archive):
|
def test_create_with_specific_plugin(self, initialized_archive):
|
||||||
"""Create archive result for specific plugin."""
|
"""Create archive result for specific plugin."""
|
||||||
url = create_test_url()
|
url = create_test_url()
|
||||||
stdout1, _, _ = run_archivebox_cmd(['snapshot', 'create', url], env=cli_env)
|
stdout1, _, _ = run_archivebox_cmd(['snapshot', 'create', url], data_dir=initialized_archive)
|
||||||
snapshot = parse_jsonl_output(stdout1)[0]
|
snapshot = parse_jsonl_output(stdout1)[0]
|
||||||
|
|
||||||
stdout2, stderr, code = run_archivebox_cmd(
|
stdout2, stderr, code = run_archivebox_cmd(
|
||||||
['archiveresult', 'create', '--plugin=screenshot'],
|
['archiveresult', 'create', '--plugin=screenshot'],
|
||||||
stdin=json.dumps(snapshot),
|
stdin=json.dumps(snapshot),
|
||||||
env=cli_env,
|
data_dir=initialized_archive,
|
||||||
)
|
)
|
||||||
|
|
||||||
assert code == 0
|
assert code == 0
|
||||||
@@ -65,25 +65,25 @@ class TestArchiveResultCreate:
|
|||||||
assert len(ar_records) >= 1
|
assert len(ar_records) >= 1
|
||||||
assert ar_records[0]['plugin'] == 'screenshot'
|
assert ar_records[0]['plugin'] == 'screenshot'
|
||||||
|
|
||||||
def test_create_pass_through_crawl(self, cli_env, initialized_archive):
|
def test_create_pass_through_crawl(self, initialized_archive):
|
||||||
"""Pass-through Crawl records unchanged."""
|
"""Pass-through Crawl records unchanged."""
|
||||||
url = create_test_url()
|
url = create_test_url()
|
||||||
|
|
||||||
# Create crawl and snapshot
|
# Create crawl and snapshot
|
||||||
stdout1, _, _ = run_archivebox_cmd(['crawl', 'create', url], env=cli_env)
|
stdout1, _, _ = run_archivebox_cmd(['crawl', 'create', url], data_dir=initialized_archive)
|
||||||
crawl = parse_jsonl_output(stdout1)[0]
|
crawl = parse_jsonl_output(stdout1)[0]
|
||||||
|
|
||||||
stdout2, _, _ = run_archivebox_cmd(
|
stdout2, _, _ = run_archivebox_cmd(
|
||||||
['snapshot', 'create'],
|
['snapshot', 'create'],
|
||||||
stdin=json.dumps(crawl),
|
stdin=json.dumps(crawl),
|
||||||
env=cli_env,
|
data_dir=initialized_archive,
|
||||||
)
|
)
|
||||||
|
|
||||||
# Now pipe all to archiveresult create
|
# Now pipe all to archiveresult create
|
||||||
stdout3, stderr, code = run_archivebox_cmd(
|
stdout3, stderr, code = run_archivebox_cmd(
|
||||||
['archiveresult', 'create', '--plugin=title'],
|
['archiveresult', 'create', '--plugin=title'],
|
||||||
stdin=stdout2,
|
stdin=stdout2,
|
||||||
env=cli_env,
|
data_dir=initialized_archive,
|
||||||
)
|
)
|
||||||
|
|
||||||
assert code == 0
|
assert code == 0
|
||||||
@@ -94,14 +94,14 @@ class TestArchiveResultCreate:
|
|||||||
assert 'Snapshot' in types
|
assert 'Snapshot' in types
|
||||||
assert 'ArchiveResult' in types
|
assert 'ArchiveResult' in types
|
||||||
|
|
||||||
def test_create_pass_through_only_when_no_snapshots(self, cli_env, initialized_archive):
|
def test_create_pass_through_only_when_no_snapshots(self, initialized_archive):
|
||||||
"""Only pass-through records but no new snapshots returns success."""
|
"""Only pass-through records but no new snapshots returns success."""
|
||||||
crawl_record = {'type': 'Crawl', 'id': 'fake-id', 'urls': 'https://example.com'}
|
crawl_record = {'type': 'Crawl', 'id': 'fake-id', 'urls': 'https://example.com'}
|
||||||
|
|
||||||
stdout, stderr, code = run_archivebox_cmd(
|
stdout, stderr, code = run_archivebox_cmd(
|
||||||
['archiveresult', 'create'],
|
['archiveresult', 'create'],
|
||||||
stdin=json.dumps(crawl_record),
|
stdin=json.dumps(crawl_record),
|
||||||
env=cli_env,
|
data_dir=initialized_archive,
|
||||||
)
|
)
|
||||||
|
|
||||||
assert code == 0
|
assert code == 0
|
||||||
@@ -111,31 +111,31 @@ class TestArchiveResultCreate:
|
|||||||
class TestArchiveResultList:
|
class TestArchiveResultList:
|
||||||
"""Tests for `archivebox archiveresult list`."""
|
"""Tests for `archivebox archiveresult list`."""
|
||||||
|
|
||||||
def test_list_empty(self, cli_env, initialized_archive):
|
def test_list_empty(self, initialized_archive):
|
||||||
"""List with no archive results returns empty."""
|
"""List with no archive results returns empty."""
|
||||||
stdout, stderr, code = run_archivebox_cmd(
|
stdout, stderr, code = run_archivebox_cmd(
|
||||||
['archiveresult', 'list'],
|
['archiveresult', 'list'],
|
||||||
env=cli_env,
|
data_dir=initialized_archive,
|
||||||
)
|
)
|
||||||
|
|
||||||
assert code == 0
|
assert code == 0
|
||||||
assert 'Listed 0 archive results' in stderr
|
assert 'Listed 0 archive results' in stderr
|
||||||
|
|
||||||
def test_list_filter_by_status(self, cli_env, initialized_archive):
|
def test_list_filter_by_status(self, initialized_archive):
|
||||||
"""Filter archive results by status."""
|
"""Filter archive results by status."""
|
||||||
# Create snapshot and archive result
|
# Create snapshot and archive result
|
||||||
url = create_test_url()
|
url = create_test_url()
|
||||||
stdout1, _, _ = run_archivebox_cmd(['snapshot', 'create', url], env=cli_env)
|
stdout1, _, _ = run_archivebox_cmd(['snapshot', 'create', url], data_dir=initialized_archive)
|
||||||
snapshot = parse_jsonl_output(stdout1)[0]
|
snapshot = parse_jsonl_output(stdout1)[0]
|
||||||
run_archivebox_cmd(
|
run_archivebox_cmd(
|
||||||
['archiveresult', 'create', '--plugin=title'],
|
['archiveresult', 'create', '--plugin=title'],
|
||||||
stdin=json.dumps(snapshot),
|
stdin=json.dumps(snapshot),
|
||||||
env=cli_env,
|
data_dir=initialized_archive,
|
||||||
)
|
)
|
||||||
|
|
||||||
stdout, stderr, code = run_archivebox_cmd(
|
stdout, stderr, code = run_archivebox_cmd(
|
||||||
['archiveresult', 'list', '--status=queued'],
|
['archiveresult', 'list', '--status=queued'],
|
||||||
env=cli_env,
|
data_dir=initialized_archive,
|
||||||
)
|
)
|
||||||
|
|
||||||
assert code == 0
|
assert code == 0
|
||||||
@@ -143,20 +143,20 @@ class TestArchiveResultList:
|
|||||||
for r in records:
|
for r in records:
|
||||||
assert r['status'] == 'queued'
|
assert r['status'] == 'queued'
|
||||||
|
|
||||||
def test_list_filter_by_plugin(self, cli_env, initialized_archive):
|
def test_list_filter_by_plugin(self, initialized_archive):
|
||||||
"""Filter archive results by plugin."""
|
"""Filter archive results by plugin."""
|
||||||
url = create_test_url()
|
url = create_test_url()
|
||||||
stdout1, _, _ = run_archivebox_cmd(['snapshot', 'create', url], env=cli_env)
|
stdout1, _, _ = run_archivebox_cmd(['snapshot', 'create', url], data_dir=initialized_archive)
|
||||||
snapshot = parse_jsonl_output(stdout1)[0]
|
snapshot = parse_jsonl_output(stdout1)[0]
|
||||||
run_archivebox_cmd(
|
run_archivebox_cmd(
|
||||||
['archiveresult', 'create', '--plugin=title'],
|
['archiveresult', 'create', '--plugin=title'],
|
||||||
stdin=json.dumps(snapshot),
|
stdin=json.dumps(snapshot),
|
||||||
env=cli_env,
|
data_dir=initialized_archive,
|
||||||
)
|
)
|
||||||
|
|
||||||
stdout, stderr, code = run_archivebox_cmd(
|
stdout, stderr, code = run_archivebox_cmd(
|
||||||
['archiveresult', 'list', '--plugin=title'],
|
['archiveresult', 'list', '--plugin=title'],
|
||||||
env=cli_env,
|
data_dir=initialized_archive,
|
||||||
)
|
)
|
||||||
|
|
||||||
assert code == 0
|
assert code == 0
|
||||||
@@ -164,22 +164,22 @@ class TestArchiveResultList:
|
|||||||
for r in records:
|
for r in records:
|
||||||
assert r['plugin'] == 'title'
|
assert r['plugin'] == 'title'
|
||||||
|
|
||||||
def test_list_with_limit(self, cli_env, initialized_archive):
|
def test_list_with_limit(self, initialized_archive):
|
||||||
"""Limit number of results."""
|
"""Limit number of results."""
|
||||||
# Create multiple archive results
|
# Create multiple archive results
|
||||||
for _ in range(3):
|
for _ in range(3):
|
||||||
url = create_test_url()
|
url = create_test_url()
|
||||||
stdout1, _, _ = run_archivebox_cmd(['snapshot', 'create', url], env=cli_env)
|
stdout1, _, _ = run_archivebox_cmd(['snapshot', 'create', url], data_dir=initialized_archive)
|
||||||
snapshot = parse_jsonl_output(stdout1)[0]
|
snapshot = parse_jsonl_output(stdout1)[0]
|
||||||
run_archivebox_cmd(
|
run_archivebox_cmd(
|
||||||
['archiveresult', 'create', '--plugin=title'],
|
['archiveresult', 'create', '--plugin=title'],
|
||||||
stdin=json.dumps(snapshot),
|
stdin=json.dumps(snapshot),
|
||||||
env=cli_env,
|
data_dir=initialized_archive,
|
||||||
)
|
)
|
||||||
|
|
||||||
stdout, stderr, code = run_archivebox_cmd(
|
stdout, stderr, code = run_archivebox_cmd(
|
||||||
['archiveresult', 'list', '--limit=2'],
|
['archiveresult', 'list', '--limit=2'],
|
||||||
env=cli_env,
|
data_dir=initialized_archive,
|
||||||
)
|
)
|
||||||
|
|
||||||
assert code == 0
|
assert code == 0
|
||||||
@@ -190,23 +190,23 @@ class TestArchiveResultList:
|
|||||||
class TestArchiveResultUpdate:
|
class TestArchiveResultUpdate:
|
||||||
"""Tests for `archivebox archiveresult update`."""
|
"""Tests for `archivebox archiveresult update`."""
|
||||||
|
|
||||||
def test_update_status(self, cli_env, initialized_archive):
|
def test_update_status(self, initialized_archive):
|
||||||
"""Update archive result status."""
|
"""Update archive result status."""
|
||||||
url = create_test_url()
|
url = create_test_url()
|
||||||
stdout1, _, _ = run_archivebox_cmd(['snapshot', 'create', url], env=cli_env)
|
stdout1, _, _ = run_archivebox_cmd(['snapshot', 'create', url], data_dir=initialized_archive)
|
||||||
snapshot = parse_jsonl_output(stdout1)[0]
|
snapshot = parse_jsonl_output(stdout1)[0]
|
||||||
|
|
||||||
stdout2, _, _ = run_archivebox_cmd(
|
stdout2, _, _ = run_archivebox_cmd(
|
||||||
['archiveresult', 'create', '--plugin=title'],
|
['archiveresult', 'create', '--plugin=title'],
|
||||||
stdin=json.dumps(snapshot),
|
stdin=json.dumps(snapshot),
|
||||||
env=cli_env,
|
data_dir=initialized_archive,
|
||||||
)
|
)
|
||||||
ar = next(r for r in parse_jsonl_output(stdout2) if r.get('type') == 'ArchiveResult')
|
ar = next(r for r in parse_jsonl_output(stdout2) if r.get('type') == 'ArchiveResult')
|
||||||
|
|
||||||
stdout3, stderr, code = run_archivebox_cmd(
|
stdout3, stderr, code = run_archivebox_cmd(
|
||||||
['archiveresult', 'update', '--status=failed'],
|
['archiveresult', 'update', '--status=failed'],
|
||||||
stdin=json.dumps(ar),
|
stdin=json.dumps(ar),
|
||||||
env=cli_env,
|
data_dir=initialized_archive,
|
||||||
)
|
)
|
||||||
|
|
||||||
assert code == 0
|
assert code == 0
|
||||||
@@ -219,45 +219,45 @@ class TestArchiveResultUpdate:
|
|||||||
class TestArchiveResultDelete:
|
class TestArchiveResultDelete:
|
||||||
"""Tests for `archivebox archiveresult delete`."""
|
"""Tests for `archivebox archiveresult delete`."""
|
||||||
|
|
||||||
def test_delete_requires_yes(self, cli_env, initialized_archive):
|
def test_delete_requires_yes(self, initialized_archive):
|
||||||
"""Delete requires --yes flag."""
|
"""Delete requires --yes flag."""
|
||||||
url = create_test_url()
|
url = create_test_url()
|
||||||
stdout1, _, _ = run_archivebox_cmd(['snapshot', 'create', url], env=cli_env)
|
stdout1, _, _ = run_archivebox_cmd(['snapshot', 'create', url], data_dir=initialized_archive)
|
||||||
snapshot = parse_jsonl_output(stdout1)[0]
|
snapshot = parse_jsonl_output(stdout1)[0]
|
||||||
|
|
||||||
stdout2, _, _ = run_archivebox_cmd(
|
stdout2, _, _ = run_archivebox_cmd(
|
||||||
['archiveresult', 'create', '--plugin=title'],
|
['archiveresult', 'create', '--plugin=title'],
|
||||||
stdin=json.dumps(snapshot),
|
stdin=json.dumps(snapshot),
|
||||||
env=cli_env,
|
data_dir=initialized_archive,
|
||||||
)
|
)
|
||||||
ar = next(r for r in parse_jsonl_output(stdout2) if r.get('type') == 'ArchiveResult')
|
ar = next(r for r in parse_jsonl_output(stdout2) if r.get('type') == 'ArchiveResult')
|
||||||
|
|
||||||
stdout, stderr, code = run_archivebox_cmd(
|
stdout, stderr, code = run_archivebox_cmd(
|
||||||
['archiveresult', 'delete'],
|
['archiveresult', 'delete'],
|
||||||
stdin=json.dumps(ar),
|
stdin=json.dumps(ar),
|
||||||
env=cli_env,
|
data_dir=initialized_archive,
|
||||||
)
|
)
|
||||||
|
|
||||||
assert code == 1
|
assert code == 1
|
||||||
assert '--yes' in stderr
|
assert '--yes' in stderr
|
||||||
|
|
||||||
def test_delete_with_yes(self, cli_env, initialized_archive):
|
def test_delete_with_yes(self, initialized_archive):
|
||||||
"""Delete with --yes flag works."""
|
"""Delete with --yes flag works."""
|
||||||
url = create_test_url()
|
url = create_test_url()
|
||||||
stdout1, _, _ = run_archivebox_cmd(['snapshot', 'create', url], env=cli_env)
|
stdout1, _, _ = run_archivebox_cmd(['snapshot', 'create', url], data_dir=initialized_archive)
|
||||||
snapshot = parse_jsonl_output(stdout1)[0]
|
snapshot = parse_jsonl_output(stdout1)[0]
|
||||||
|
|
||||||
stdout2, _, _ = run_archivebox_cmd(
|
stdout2, _, _ = run_archivebox_cmd(
|
||||||
['archiveresult', 'create', '--plugin=title'],
|
['archiveresult', 'create', '--plugin=title'],
|
||||||
stdin=json.dumps(snapshot),
|
stdin=json.dumps(snapshot),
|
||||||
env=cli_env,
|
data_dir=initialized_archive,
|
||||||
)
|
)
|
||||||
ar = next(r for r in parse_jsonl_output(stdout2) if r.get('type') == 'ArchiveResult')
|
ar = next(r for r in parse_jsonl_output(stdout2) if r.get('type') == 'ArchiveResult')
|
||||||
|
|
||||||
stdout, stderr, code = run_archivebox_cmd(
|
stdout, stderr, code = run_archivebox_cmd(
|
||||||
['archiveresult', 'delete', '--yes'],
|
['archiveresult', 'delete', '--yes'],
|
||||||
stdin=json.dumps(ar),
|
stdin=json.dumps(ar),
|
||||||
env=cli_env,
|
data_dir=initialized_archive,
|
||||||
)
|
)
|
||||||
|
|
||||||
assert code == 0
|
assert code == 0
|
||||||
|
|||||||
@@ -23,13 +23,13 @@ from archivebox.tests.conftest import (
|
|||||||
class TestCrawlCreate:
|
class TestCrawlCreate:
|
||||||
"""Tests for `archivebox crawl create`."""
|
"""Tests for `archivebox crawl create`."""
|
||||||
|
|
||||||
def test_create_from_url_args(self, cli_env, initialized_archive):
|
def test_create_from_url_args(self, initialized_archive):
|
||||||
"""Create crawl from URL arguments."""
|
"""Create crawl from URL arguments."""
|
||||||
url = create_test_url()
|
url = create_test_url()
|
||||||
|
|
||||||
stdout, stderr, code = run_archivebox_cmd(
|
stdout, stderr, code = run_archivebox_cmd(
|
||||||
['crawl', 'create', url],
|
['crawl', 'create', url],
|
||||||
env=cli_env,
|
data_dir=initialized_archive,
|
||||||
)
|
)
|
||||||
|
|
||||||
assert code == 0, f"Command failed: {stderr}"
|
assert code == 0, f"Command failed: {stderr}"
|
||||||
@@ -41,7 +41,7 @@ class TestCrawlCreate:
|
|||||||
assert records[0]['type'] == 'Crawl'
|
assert records[0]['type'] == 'Crawl'
|
||||||
assert url in records[0]['urls']
|
assert url in records[0]['urls']
|
||||||
|
|
||||||
def test_create_from_stdin_urls(self, cli_env, initialized_archive):
|
def test_create_from_stdin_urls(self, initialized_archive):
|
||||||
"""Create crawl from stdin URLs (one per line)."""
|
"""Create crawl from stdin URLs (one per line)."""
|
||||||
urls = [create_test_url() for _ in range(3)]
|
urls = [create_test_url() for _ in range(3)]
|
||||||
stdin = '\n'.join(urls)
|
stdin = '\n'.join(urls)
|
||||||
@@ -49,7 +49,7 @@ class TestCrawlCreate:
|
|||||||
stdout, stderr, code = run_archivebox_cmd(
|
stdout, stderr, code = run_archivebox_cmd(
|
||||||
['crawl', 'create'],
|
['crawl', 'create'],
|
||||||
stdin=stdin,
|
stdin=stdin,
|
||||||
env=cli_env,
|
data_dir=initialized_archive,
|
||||||
)
|
)
|
||||||
|
|
||||||
assert code == 0, f"Command failed: {stderr}"
|
assert code == 0, f"Command failed: {stderr}"
|
||||||
@@ -62,33 +62,33 @@ class TestCrawlCreate:
|
|||||||
for url in urls:
|
for url in urls:
|
||||||
assert url in crawl['urls']
|
assert url in crawl['urls']
|
||||||
|
|
||||||
def test_create_with_depth(self, cli_env, initialized_archive):
|
def test_create_with_depth(self, initialized_archive):
|
||||||
"""Create crawl with --depth flag."""
|
"""Create crawl with --depth flag."""
|
||||||
url = create_test_url()
|
url = create_test_url()
|
||||||
|
|
||||||
stdout, stderr, code = run_archivebox_cmd(
|
stdout, stderr, code = run_archivebox_cmd(
|
||||||
['crawl', 'create', '--depth=2', url],
|
['crawl', 'create', '--depth=2', url],
|
||||||
env=cli_env,
|
data_dir=initialized_archive,
|
||||||
)
|
)
|
||||||
|
|
||||||
assert code == 0
|
assert code == 0
|
||||||
records = parse_jsonl_output(stdout)
|
records = parse_jsonl_output(stdout)
|
||||||
assert records[0]['max_depth'] == 2
|
assert records[0]['max_depth'] == 2
|
||||||
|
|
||||||
def test_create_with_tag(self, cli_env, initialized_archive):
|
def test_create_with_tag(self, initialized_archive):
|
||||||
"""Create crawl with --tag flag."""
|
"""Create crawl with --tag flag."""
|
||||||
url = create_test_url()
|
url = create_test_url()
|
||||||
|
|
||||||
stdout, stderr, code = run_archivebox_cmd(
|
stdout, stderr, code = run_archivebox_cmd(
|
||||||
['crawl', 'create', '--tag=test-tag', url],
|
['crawl', 'create', '--tag=test-tag', url],
|
||||||
env=cli_env,
|
data_dir=initialized_archive,
|
||||||
)
|
)
|
||||||
|
|
||||||
assert code == 0
|
assert code == 0
|
||||||
records = parse_jsonl_output(stdout)
|
records = parse_jsonl_output(stdout)
|
||||||
assert 'test-tag' in records[0].get('tags_str', '')
|
assert 'test-tag' in records[0].get('tags_str', '')
|
||||||
|
|
||||||
def test_create_pass_through_other_types(self, cli_env, initialized_archive):
|
def test_create_pass_through_other_types(self, initialized_archive):
|
||||||
"""Pass-through records of other types unchanged."""
|
"""Pass-through records of other types unchanged."""
|
||||||
tag_record = {'type': 'Tag', 'id': 'fake-tag-id', 'name': 'test'}
|
tag_record = {'type': 'Tag', 'id': 'fake-tag-id', 'name': 'test'}
|
||||||
url = create_test_url()
|
url = create_test_url()
|
||||||
@@ -97,7 +97,7 @@ class TestCrawlCreate:
|
|||||||
stdout, stderr, code = run_archivebox_cmd(
|
stdout, stderr, code = run_archivebox_cmd(
|
||||||
['crawl', 'create'],
|
['crawl', 'create'],
|
||||||
stdin=stdin,
|
stdin=stdin,
|
||||||
env=cli_env,
|
data_dir=initialized_archive,
|
||||||
)
|
)
|
||||||
|
|
||||||
assert code == 0
|
assert code == 0
|
||||||
@@ -108,18 +108,18 @@ class TestCrawlCreate:
|
|||||||
assert 'Tag' in types
|
assert 'Tag' in types
|
||||||
assert 'Crawl' in types
|
assert 'Crawl' in types
|
||||||
|
|
||||||
def test_create_pass_through_existing_crawl(self, cli_env, initialized_archive):
|
def test_create_pass_through_existing_crawl(self, initialized_archive):
|
||||||
"""Existing Crawl records (with id) are passed through."""
|
"""Existing Crawl records (with id) are passed through."""
|
||||||
# First create a crawl
|
# First create a crawl
|
||||||
url = create_test_url()
|
url = create_test_url()
|
||||||
stdout1, _, _ = run_archivebox_cmd(['crawl', 'create', url], env=cli_env)
|
stdout1, _, _ = run_archivebox_cmd(['crawl', 'create', url], data_dir=initialized_archive)
|
||||||
crawl = parse_jsonl_output(stdout1)[0]
|
crawl = parse_jsonl_output(stdout1)[0]
|
||||||
|
|
||||||
# Now pipe it back - should pass through
|
# Now pipe it back - should pass through
|
||||||
stdout2, stderr, code = run_archivebox_cmd(
|
stdout2, stderr, code = run_archivebox_cmd(
|
||||||
['crawl', 'create'],
|
['crawl', 'create'],
|
||||||
stdin=json.dumps(crawl),
|
stdin=json.dumps(crawl),
|
||||||
env=cli_env,
|
data_dir=initialized_archive,
|
||||||
)
|
)
|
||||||
|
|
||||||
assert code == 0
|
assert code == 0
|
||||||
@@ -131,24 +131,24 @@ class TestCrawlCreate:
|
|||||||
class TestCrawlList:
|
class TestCrawlList:
|
||||||
"""Tests for `archivebox crawl list`."""
|
"""Tests for `archivebox crawl list`."""
|
||||||
|
|
||||||
def test_list_empty(self, cli_env, initialized_archive):
|
def test_list_empty(self, initialized_archive):
|
||||||
"""List with no crawls returns empty."""
|
"""List with no crawls returns empty."""
|
||||||
stdout, stderr, code = run_archivebox_cmd(
|
stdout, stderr, code = run_archivebox_cmd(
|
||||||
['crawl', 'list'],
|
['crawl', 'list'],
|
||||||
env=cli_env,
|
data_dir=initialized_archive,
|
||||||
)
|
)
|
||||||
|
|
||||||
assert code == 0
|
assert code == 0
|
||||||
assert 'Listed 0 crawls' in stderr
|
assert 'Listed 0 crawls' in stderr
|
||||||
|
|
||||||
def test_list_returns_created(self, cli_env, initialized_archive):
|
def test_list_returns_created(self, initialized_archive):
|
||||||
"""List returns previously created crawls."""
|
"""List returns previously created crawls."""
|
||||||
url = create_test_url()
|
url = create_test_url()
|
||||||
run_archivebox_cmd(['crawl', 'create', url], env=cli_env)
|
run_archivebox_cmd(['crawl', 'create', url], data_dir=initialized_archive)
|
||||||
|
|
||||||
stdout, stderr, code = run_archivebox_cmd(
|
stdout, stderr, code = run_archivebox_cmd(
|
||||||
['crawl', 'list'],
|
['crawl', 'list'],
|
||||||
env=cli_env,
|
data_dir=initialized_archive,
|
||||||
)
|
)
|
||||||
|
|
||||||
assert code == 0
|
assert code == 0
|
||||||
@@ -156,14 +156,14 @@ class TestCrawlList:
|
|||||||
assert len(records) >= 1
|
assert len(records) >= 1
|
||||||
assert any(url in r.get('urls', '') for r in records)
|
assert any(url in r.get('urls', '') for r in records)
|
||||||
|
|
||||||
def test_list_filter_by_status(self, cli_env, initialized_archive):
|
def test_list_filter_by_status(self, initialized_archive):
|
||||||
"""Filter crawls by status."""
|
"""Filter crawls by status."""
|
||||||
url = create_test_url()
|
url = create_test_url()
|
||||||
run_archivebox_cmd(['crawl', 'create', url], env=cli_env)
|
run_archivebox_cmd(['crawl', 'create', url], data_dir=initialized_archive)
|
||||||
|
|
||||||
stdout, stderr, code = run_archivebox_cmd(
|
stdout, stderr, code = run_archivebox_cmd(
|
||||||
['crawl', 'list', '--status=queued'],
|
['crawl', 'list', '--status=queued'],
|
||||||
env=cli_env,
|
data_dir=initialized_archive,
|
||||||
)
|
)
|
||||||
|
|
||||||
assert code == 0
|
assert code == 0
|
||||||
@@ -171,15 +171,15 @@ class TestCrawlList:
|
|||||||
for r in records:
|
for r in records:
|
||||||
assert r['status'] == 'queued'
|
assert r['status'] == 'queued'
|
||||||
|
|
||||||
def test_list_with_limit(self, cli_env, initialized_archive):
|
def test_list_with_limit(self, initialized_archive):
|
||||||
"""Limit number of results."""
|
"""Limit number of results."""
|
||||||
# Create multiple crawls
|
# Create multiple crawls
|
||||||
for _ in range(3):
|
for _ in range(3):
|
||||||
run_archivebox_cmd(['crawl', 'create', create_test_url()], env=cli_env)
|
run_archivebox_cmd(['crawl', 'create', create_test_url()], data_dir=initialized_archive)
|
||||||
|
|
||||||
stdout, stderr, code = run_archivebox_cmd(
|
stdout, stderr, code = run_archivebox_cmd(
|
||||||
['crawl', 'list', '--limit=2'],
|
['crawl', 'list', '--limit=2'],
|
||||||
env=cli_env,
|
data_dir=initialized_archive,
|
||||||
)
|
)
|
||||||
|
|
||||||
assert code == 0
|
assert code == 0
|
||||||
@@ -190,18 +190,18 @@ class TestCrawlList:
|
|||||||
class TestCrawlUpdate:
|
class TestCrawlUpdate:
|
||||||
"""Tests for `archivebox crawl update`."""
|
"""Tests for `archivebox crawl update`."""
|
||||||
|
|
||||||
def test_update_status(self, cli_env, initialized_archive):
|
def test_update_status(self, initialized_archive):
|
||||||
"""Update crawl status."""
|
"""Update crawl status."""
|
||||||
# Create a crawl
|
# Create a crawl
|
||||||
url = create_test_url()
|
url = create_test_url()
|
||||||
stdout1, _, _ = run_archivebox_cmd(['crawl', 'create', url], env=cli_env)
|
stdout1, _, _ = run_archivebox_cmd(['crawl', 'create', url], data_dir=initialized_archive)
|
||||||
crawl = parse_jsonl_output(stdout1)[0]
|
crawl = parse_jsonl_output(stdout1)[0]
|
||||||
|
|
||||||
# Update it
|
# Update it
|
||||||
stdout2, stderr, code = run_archivebox_cmd(
|
stdout2, stderr, code = run_archivebox_cmd(
|
||||||
['crawl', 'update', '--status=started'],
|
['crawl', 'update', '--status=started'],
|
||||||
stdin=json.dumps(crawl),
|
stdin=json.dumps(crawl),
|
||||||
env=cli_env,
|
data_dir=initialized_archive,
|
||||||
)
|
)
|
||||||
|
|
||||||
assert code == 0
|
assert code == 0
|
||||||
@@ -214,46 +214,46 @@ class TestCrawlUpdate:
|
|||||||
class TestCrawlDelete:
|
class TestCrawlDelete:
|
||||||
"""Tests for `archivebox crawl delete`."""
|
"""Tests for `archivebox crawl delete`."""
|
||||||
|
|
||||||
def test_delete_requires_yes(self, cli_env, initialized_archive):
|
def test_delete_requires_yes(self, initialized_archive):
|
||||||
"""Delete requires --yes flag."""
|
"""Delete requires --yes flag."""
|
||||||
url = create_test_url()
|
url = create_test_url()
|
||||||
stdout1, _, _ = run_archivebox_cmd(['crawl', 'create', url], env=cli_env)
|
stdout1, _, _ = run_archivebox_cmd(['crawl', 'create', url], data_dir=initialized_archive)
|
||||||
crawl = parse_jsonl_output(stdout1)[0]
|
crawl = parse_jsonl_output(stdout1)[0]
|
||||||
|
|
||||||
stdout, stderr, code = run_archivebox_cmd(
|
stdout, stderr, code = run_archivebox_cmd(
|
||||||
['crawl', 'delete'],
|
['crawl', 'delete'],
|
||||||
stdin=json.dumps(crawl),
|
stdin=json.dumps(crawl),
|
||||||
env=cli_env,
|
data_dir=initialized_archive,
|
||||||
)
|
)
|
||||||
|
|
||||||
assert code == 1
|
assert code == 1
|
||||||
assert '--yes' in stderr
|
assert '--yes' in stderr
|
||||||
|
|
||||||
def test_delete_with_yes(self, cli_env, initialized_archive):
|
def test_delete_with_yes(self, initialized_archive):
|
||||||
"""Delete with --yes flag works."""
|
"""Delete with --yes flag works."""
|
||||||
url = create_test_url()
|
url = create_test_url()
|
||||||
stdout1, _, _ = run_archivebox_cmd(['crawl', 'create', url], env=cli_env)
|
stdout1, _, _ = run_archivebox_cmd(['crawl', 'create', url], data_dir=initialized_archive)
|
||||||
crawl = parse_jsonl_output(stdout1)[0]
|
crawl = parse_jsonl_output(stdout1)[0]
|
||||||
|
|
||||||
stdout, stderr, code = run_archivebox_cmd(
|
stdout, stderr, code = run_archivebox_cmd(
|
||||||
['crawl', 'delete', '--yes'],
|
['crawl', 'delete', '--yes'],
|
||||||
stdin=json.dumps(crawl),
|
stdin=json.dumps(crawl),
|
||||||
env=cli_env,
|
data_dir=initialized_archive,
|
||||||
)
|
)
|
||||||
|
|
||||||
assert code == 0
|
assert code == 0
|
||||||
assert 'Deleted 1 crawls' in stderr
|
assert 'Deleted 1 crawls' in stderr
|
||||||
|
|
||||||
def test_delete_dry_run(self, cli_env, initialized_archive):
|
def test_delete_dry_run(self, initialized_archive):
|
||||||
"""Dry run shows what would be deleted."""
|
"""Dry run shows what would be deleted."""
|
||||||
url = create_test_url()
|
url = create_test_url()
|
||||||
stdout1, _, _ = run_archivebox_cmd(['crawl', 'create', url], env=cli_env)
|
stdout1, _, _ = run_archivebox_cmd(['crawl', 'create', url], data_dir=initialized_archive)
|
||||||
crawl = parse_jsonl_output(stdout1)[0]
|
crawl = parse_jsonl_output(stdout1)[0]
|
||||||
|
|
||||||
stdout, stderr, code = run_archivebox_cmd(
|
stdout, stderr, code = run_archivebox_cmd(
|
||||||
['crawl', 'delete', '--dry-run'],
|
['crawl', 'delete', '--dry-run'],
|
||||||
stdin=json.dumps(crawl),
|
stdin=json.dumps(crawl),
|
||||||
env=cli_env,
|
data_dir=initialized_archive,
|
||||||
)
|
)
|
||||||
|
|
||||||
assert code == 0
|
assert code == 0
|
||||||
|
|||||||
@@ -22,14 +22,14 @@ from archivebox.tests.conftest import (
|
|||||||
class TestRunWithCrawl:
|
class TestRunWithCrawl:
|
||||||
"""Tests for `archivebox run` with Crawl input."""
|
"""Tests for `archivebox run` with Crawl input."""
|
||||||
|
|
||||||
def test_run_with_new_crawl(self, cli_env, initialized_archive):
|
def test_run_with_new_crawl(self, initialized_archive):
|
||||||
"""Run creates and processes a new Crawl (no id)."""
|
"""Run creates and processes a new Crawl (no id)."""
|
||||||
crawl_record = create_test_crawl_json()
|
crawl_record = create_test_crawl_json()
|
||||||
|
|
||||||
stdout, stderr, code = run_archivebox_cmd(
|
stdout, stderr, code = run_archivebox_cmd(
|
||||||
['run'],
|
['run'],
|
||||||
stdin=json.dumps(crawl_record),
|
stdin=json.dumps(crawl_record),
|
||||||
env=cli_env,
|
data_dir=initialized_archive,
|
||||||
timeout=120,
|
timeout=120,
|
||||||
)
|
)
|
||||||
|
|
||||||
@@ -41,19 +41,19 @@ class TestRunWithCrawl:
|
|||||||
assert len(crawl_records) >= 1
|
assert len(crawl_records) >= 1
|
||||||
assert crawl_records[0].get('id') # Should have an id now
|
assert crawl_records[0].get('id') # Should have an id now
|
||||||
|
|
||||||
def test_run_with_existing_crawl(self, cli_env, initialized_archive):
|
def test_run_with_existing_crawl(self, initialized_archive):
|
||||||
"""Run re-queues an existing Crawl (with id)."""
|
"""Run re-queues an existing Crawl (with id)."""
|
||||||
url = create_test_url()
|
url = create_test_url()
|
||||||
|
|
||||||
# First create a crawl
|
# First create a crawl
|
||||||
stdout1, _, _ = run_archivebox_cmd(['crawl', 'create', url], env=cli_env)
|
stdout1, _, _ = run_archivebox_cmd(['crawl', 'create', url], data_dir=initialized_archive)
|
||||||
crawl = parse_jsonl_output(stdout1)[0]
|
crawl = parse_jsonl_output(stdout1)[0]
|
||||||
|
|
||||||
# Run with the existing crawl
|
# Run with the existing crawl
|
||||||
stdout2, stderr, code = run_archivebox_cmd(
|
stdout2, stderr, code = run_archivebox_cmd(
|
||||||
['run'],
|
['run'],
|
||||||
stdin=json.dumps(crawl),
|
stdin=json.dumps(crawl),
|
||||||
env=cli_env,
|
data_dir=initialized_archive,
|
||||||
timeout=120,
|
timeout=120,
|
||||||
)
|
)
|
||||||
|
|
||||||
@@ -65,14 +65,14 @@ class TestRunWithCrawl:
|
|||||||
class TestRunWithSnapshot:
|
class TestRunWithSnapshot:
|
||||||
"""Tests for `archivebox run` with Snapshot input."""
|
"""Tests for `archivebox run` with Snapshot input."""
|
||||||
|
|
||||||
def test_run_with_new_snapshot(self, cli_env, initialized_archive):
|
def test_run_with_new_snapshot(self, initialized_archive):
|
||||||
"""Run creates and processes a new Snapshot (no id, just url)."""
|
"""Run creates and processes a new Snapshot (no id, just url)."""
|
||||||
snapshot_record = create_test_snapshot_json()
|
snapshot_record = create_test_snapshot_json()
|
||||||
|
|
||||||
stdout, stderr, code = run_archivebox_cmd(
|
stdout, stderr, code = run_archivebox_cmd(
|
||||||
['run'],
|
['run'],
|
||||||
stdin=json.dumps(snapshot_record),
|
stdin=json.dumps(snapshot_record),
|
||||||
env=cli_env,
|
data_dir=initialized_archive,
|
||||||
timeout=120,
|
timeout=120,
|
||||||
)
|
)
|
||||||
|
|
||||||
@@ -83,19 +83,19 @@ class TestRunWithSnapshot:
|
|||||||
assert len(snapshot_records) >= 1
|
assert len(snapshot_records) >= 1
|
||||||
assert snapshot_records[0].get('id')
|
assert snapshot_records[0].get('id')
|
||||||
|
|
||||||
def test_run_with_existing_snapshot(self, cli_env, initialized_archive):
|
def test_run_with_existing_snapshot(self, initialized_archive):
|
||||||
"""Run re-queues an existing Snapshot (with id)."""
|
"""Run re-queues an existing Snapshot (with id)."""
|
||||||
url = create_test_url()
|
url = create_test_url()
|
||||||
|
|
||||||
# First create a snapshot
|
# First create a snapshot
|
||||||
stdout1, _, _ = run_archivebox_cmd(['snapshot', 'create', url], env=cli_env)
|
stdout1, _, _ = run_archivebox_cmd(['snapshot', 'create', url], data_dir=initialized_archive)
|
||||||
snapshot = parse_jsonl_output(stdout1)[0]
|
snapshot = parse_jsonl_output(stdout1)[0]
|
||||||
|
|
||||||
# Run with the existing snapshot
|
# Run with the existing snapshot
|
||||||
stdout2, stderr, code = run_archivebox_cmd(
|
stdout2, stderr, code = run_archivebox_cmd(
|
||||||
['run'],
|
['run'],
|
||||||
stdin=json.dumps(snapshot),
|
stdin=json.dumps(snapshot),
|
||||||
env=cli_env,
|
data_dir=initialized_archive,
|
||||||
timeout=120,
|
timeout=120,
|
||||||
)
|
)
|
||||||
|
|
||||||
@@ -103,7 +103,7 @@ class TestRunWithSnapshot:
|
|||||||
records = parse_jsonl_output(stdout2)
|
records = parse_jsonl_output(stdout2)
|
||||||
assert len(records) >= 1
|
assert len(records) >= 1
|
||||||
|
|
||||||
def test_run_with_plain_url(self, cli_env, initialized_archive):
|
def test_run_with_plain_url(self, initialized_archive):
|
||||||
"""Run accepts plain URL records (no type field)."""
|
"""Run accepts plain URL records (no type field)."""
|
||||||
url = create_test_url()
|
url = create_test_url()
|
||||||
url_record = {'url': url}
|
url_record = {'url': url}
|
||||||
@@ -111,7 +111,7 @@ class TestRunWithSnapshot:
|
|||||||
stdout, stderr, code = run_archivebox_cmd(
|
stdout, stderr, code = run_archivebox_cmd(
|
||||||
['run'],
|
['run'],
|
||||||
stdin=json.dumps(url_record),
|
stdin=json.dumps(url_record),
|
||||||
env=cli_env,
|
data_dir=initialized_archive,
|
||||||
timeout=120,
|
timeout=120,
|
||||||
)
|
)
|
||||||
|
|
||||||
@@ -123,18 +123,18 @@ class TestRunWithSnapshot:
|
|||||||
class TestRunWithArchiveResult:
|
class TestRunWithArchiveResult:
|
||||||
"""Tests for `archivebox run` with ArchiveResult input."""
|
"""Tests for `archivebox run` with ArchiveResult input."""
|
||||||
|
|
||||||
def test_run_requeues_failed_archiveresult(self, cli_env, initialized_archive):
|
def test_run_requeues_failed_archiveresult(self, initialized_archive):
|
||||||
"""Run re-queues a failed ArchiveResult."""
|
"""Run re-queues a failed ArchiveResult."""
|
||||||
url = create_test_url()
|
url = create_test_url()
|
||||||
|
|
||||||
# Create snapshot and archive result
|
# Create snapshot and archive result
|
||||||
stdout1, _, _ = run_archivebox_cmd(['snapshot', 'create', url], env=cli_env)
|
stdout1, _, _ = run_archivebox_cmd(['snapshot', 'create', url], data_dir=initialized_archive)
|
||||||
snapshot = parse_jsonl_output(stdout1)[0]
|
snapshot = parse_jsonl_output(stdout1)[0]
|
||||||
|
|
||||||
stdout2, _, _ = run_archivebox_cmd(
|
stdout2, _, _ = run_archivebox_cmd(
|
||||||
['archiveresult', 'create', '--plugin=title'],
|
['archiveresult', 'create', '--plugin=title'],
|
||||||
stdin=json.dumps(snapshot),
|
stdin=json.dumps(snapshot),
|
||||||
env=cli_env,
|
data_dir=initialized_archive,
|
||||||
)
|
)
|
||||||
ar = next(r for r in parse_jsonl_output(stdout2) if r.get('type') == 'ArchiveResult')
|
ar = next(r for r in parse_jsonl_output(stdout2) if r.get('type') == 'ArchiveResult')
|
||||||
|
|
||||||
@@ -143,14 +143,14 @@ class TestRunWithArchiveResult:
|
|||||||
run_archivebox_cmd(
|
run_archivebox_cmd(
|
||||||
['archiveresult', 'update', '--status=failed'],
|
['archiveresult', 'update', '--status=failed'],
|
||||||
stdin=json.dumps(ar),
|
stdin=json.dumps(ar),
|
||||||
env=cli_env,
|
data_dir=initialized_archive,
|
||||||
)
|
)
|
||||||
|
|
||||||
# Now run should re-queue it
|
# Now run should re-queue it
|
||||||
stdout3, stderr, code = run_archivebox_cmd(
|
stdout3, stderr, code = run_archivebox_cmd(
|
||||||
['run'],
|
['run'],
|
||||||
stdin=json.dumps(ar),
|
stdin=json.dumps(ar),
|
||||||
env=cli_env,
|
data_dir=initialized_archive,
|
||||||
timeout=120,
|
timeout=120,
|
||||||
)
|
)
|
||||||
|
|
||||||
@@ -163,14 +163,14 @@ class TestRunWithArchiveResult:
|
|||||||
class TestRunPassThrough:
|
class TestRunPassThrough:
|
||||||
"""Tests for pass-through behavior in `archivebox run`."""
|
"""Tests for pass-through behavior in `archivebox run`."""
|
||||||
|
|
||||||
def test_run_passes_through_unknown_types(self, cli_env, initialized_archive):
|
def test_run_passes_through_unknown_types(self, initialized_archive):
|
||||||
"""Run passes through records with unknown types."""
|
"""Run passes through records with unknown types."""
|
||||||
unknown_record = {'type': 'Unknown', 'id': 'fake-id', 'data': 'test'}
|
unknown_record = {'type': 'Unknown', 'id': 'fake-id', 'data': 'test'}
|
||||||
|
|
||||||
stdout, stderr, code = run_archivebox_cmd(
|
stdout, stderr, code = run_archivebox_cmd(
|
||||||
['run'],
|
['run'],
|
||||||
stdin=json.dumps(unknown_record),
|
stdin=json.dumps(unknown_record),
|
||||||
env=cli_env,
|
data_dir=initialized_archive,
|
||||||
)
|
)
|
||||||
|
|
||||||
assert code == 0
|
assert code == 0
|
||||||
@@ -179,7 +179,7 @@ class TestRunPassThrough:
|
|||||||
assert len(unknown_records) == 1
|
assert len(unknown_records) == 1
|
||||||
assert unknown_records[0]['data'] == 'test'
|
assert unknown_records[0]['data'] == 'test'
|
||||||
|
|
||||||
def test_run_outputs_all_processed_records(self, cli_env, initialized_archive):
|
def test_run_outputs_all_processed_records(self, initialized_archive):
|
||||||
"""Run outputs all processed records for chaining."""
|
"""Run outputs all processed records for chaining."""
|
||||||
url = create_test_url()
|
url = create_test_url()
|
||||||
crawl_record = create_test_crawl_json(urls=[url])
|
crawl_record = create_test_crawl_json(urls=[url])
|
||||||
@@ -187,7 +187,7 @@ class TestRunPassThrough:
|
|||||||
stdout, stderr, code = run_archivebox_cmd(
|
stdout, stderr, code = run_archivebox_cmd(
|
||||||
['run'],
|
['run'],
|
||||||
stdin=json.dumps(crawl_record),
|
stdin=json.dumps(crawl_record),
|
||||||
env=cli_env,
|
data_dir=initialized_archive,
|
||||||
timeout=120,
|
timeout=120,
|
||||||
)
|
)
|
||||||
|
|
||||||
@@ -200,7 +200,7 @@ class TestRunPassThrough:
|
|||||||
class TestRunMixedInput:
|
class TestRunMixedInput:
|
||||||
"""Tests for `archivebox run` with mixed record types."""
|
"""Tests for `archivebox run` with mixed record types."""
|
||||||
|
|
||||||
def test_run_handles_mixed_types(self, cli_env, initialized_archive):
|
def test_run_handles_mixed_types(self, initialized_archive):
|
||||||
"""Run handles mixed Crawl/Snapshot/ArchiveResult input."""
|
"""Run handles mixed Crawl/Snapshot/ArchiveResult input."""
|
||||||
crawl = create_test_crawl_json()
|
crawl = create_test_crawl_json()
|
||||||
snapshot = create_test_snapshot_json()
|
snapshot = create_test_snapshot_json()
|
||||||
@@ -215,7 +215,7 @@ class TestRunMixedInput:
|
|||||||
stdout, stderr, code = run_archivebox_cmd(
|
stdout, stderr, code = run_archivebox_cmd(
|
||||||
['run'],
|
['run'],
|
||||||
stdin=stdin,
|
stdin=stdin,
|
||||||
env=cli_env,
|
data_dir=initialized_archive,
|
||||||
timeout=120,
|
timeout=120,
|
||||||
)
|
)
|
||||||
|
|
||||||
@@ -230,24 +230,24 @@ class TestRunMixedInput:
|
|||||||
class TestRunEmpty:
|
class TestRunEmpty:
|
||||||
"""Tests for `archivebox run` edge cases."""
|
"""Tests for `archivebox run` edge cases."""
|
||||||
|
|
||||||
def test_run_empty_stdin(self, cli_env, initialized_archive):
|
def test_run_empty_stdin(self, initialized_archive):
|
||||||
"""Run with empty stdin returns success."""
|
"""Run with empty stdin returns success."""
|
||||||
stdout, stderr, code = run_archivebox_cmd(
|
stdout, stderr, code = run_archivebox_cmd(
|
||||||
['run'],
|
['run'],
|
||||||
stdin='',
|
stdin='',
|
||||||
env=cli_env,
|
data_dir=initialized_archive,
|
||||||
)
|
)
|
||||||
|
|
||||||
assert code == 0
|
assert code == 0
|
||||||
|
|
||||||
def test_run_no_records_to_process(self, cli_env, initialized_archive):
|
def test_run_no_records_to_process(self, initialized_archive):
|
||||||
"""Run with only pass-through records shows message."""
|
"""Run with only pass-through records shows message."""
|
||||||
unknown = {'type': 'Unknown', 'id': 'fake'}
|
unknown = {'type': 'Unknown', 'id': 'fake'}
|
||||||
|
|
||||||
stdout, stderr, code = run_archivebox_cmd(
|
stdout, stderr, code = run_archivebox_cmd(
|
||||||
['run'],
|
['run'],
|
||||||
stdin=json.dumps(unknown),
|
stdin=json.dumps(unknown),
|
||||||
env=cli_env,
|
data_dir=initialized_archive,
|
||||||
)
|
)
|
||||||
|
|
||||||
assert code == 0
|
assert code == 0
|
||||||
|
|||||||
@@ -22,13 +22,13 @@ from archivebox.tests.conftest import (
|
|||||||
class TestSnapshotCreate:
|
class TestSnapshotCreate:
|
||||||
"""Tests for `archivebox snapshot create`."""
|
"""Tests for `archivebox snapshot create`."""
|
||||||
|
|
||||||
def test_create_from_url_args(self, cli_env, initialized_archive):
|
def test_create_from_url_args(self, initialized_archive):
|
||||||
"""Create snapshot from URL arguments."""
|
"""Create snapshot from URL arguments."""
|
||||||
url = create_test_url()
|
url = create_test_url()
|
||||||
|
|
||||||
stdout, stderr, code = run_archivebox_cmd(
|
stdout, stderr, code = run_archivebox_cmd(
|
||||||
['snapshot', 'create', url],
|
['snapshot', 'create', url],
|
||||||
env=cli_env,
|
data_dir=initialized_archive,
|
||||||
)
|
)
|
||||||
|
|
||||||
assert code == 0, f"Command failed: {stderr}"
|
assert code == 0, f"Command failed: {stderr}"
|
||||||
@@ -39,19 +39,19 @@ class TestSnapshotCreate:
|
|||||||
assert records[0]['type'] == 'Snapshot'
|
assert records[0]['type'] == 'Snapshot'
|
||||||
assert records[0]['url'] == url
|
assert records[0]['url'] == url
|
||||||
|
|
||||||
def test_create_from_crawl_jsonl(self, cli_env, initialized_archive):
|
def test_create_from_crawl_jsonl(self, initialized_archive):
|
||||||
"""Create snapshots from Crawl JSONL input."""
|
"""Create snapshots from Crawl JSONL input."""
|
||||||
url = create_test_url()
|
url = create_test_url()
|
||||||
|
|
||||||
# First create a crawl
|
# First create a crawl
|
||||||
stdout1, _, _ = run_archivebox_cmd(['crawl', 'create', url], env=cli_env)
|
stdout1, _, _ = run_archivebox_cmd(['crawl', 'create', url], data_dir=initialized_archive)
|
||||||
crawl = parse_jsonl_output(stdout1)[0]
|
crawl = parse_jsonl_output(stdout1)[0]
|
||||||
|
|
||||||
# Pipe crawl to snapshot create
|
# Pipe crawl to snapshot create
|
||||||
stdout2, stderr, code = run_archivebox_cmd(
|
stdout2, stderr, code = run_archivebox_cmd(
|
||||||
['snapshot', 'create'],
|
['snapshot', 'create'],
|
||||||
stdin=json.dumps(crawl),
|
stdin=json.dumps(crawl),
|
||||||
env=cli_env,
|
data_dir=initialized_archive,
|
||||||
)
|
)
|
||||||
|
|
||||||
assert code == 0, f"Command failed: {stderr}"
|
assert code == 0, f"Command failed: {stderr}"
|
||||||
@@ -65,20 +65,20 @@ class TestSnapshotCreate:
|
|||||||
snapshot = next(r for r in records if r['type'] == 'Snapshot')
|
snapshot = next(r for r in records if r['type'] == 'Snapshot')
|
||||||
assert snapshot['url'] == url
|
assert snapshot['url'] == url
|
||||||
|
|
||||||
def test_create_with_tag(self, cli_env, initialized_archive):
|
def test_create_with_tag(self, initialized_archive):
|
||||||
"""Create snapshot with --tag flag."""
|
"""Create snapshot with --tag flag."""
|
||||||
url = create_test_url()
|
url = create_test_url()
|
||||||
|
|
||||||
stdout, stderr, code = run_archivebox_cmd(
|
stdout, stderr, code = run_archivebox_cmd(
|
||||||
['snapshot', 'create', '--tag=test-tag', url],
|
['snapshot', 'create', '--tag=test-tag', url],
|
||||||
env=cli_env,
|
data_dir=initialized_archive,
|
||||||
)
|
)
|
||||||
|
|
||||||
assert code == 0
|
assert code == 0
|
||||||
records = parse_jsonl_output(stdout)
|
records = parse_jsonl_output(stdout)
|
||||||
assert 'test-tag' in records[0].get('tags_str', '')
|
assert 'test-tag' in records[0].get('tags_str', '')
|
||||||
|
|
||||||
def test_create_pass_through_other_types(self, cli_env, initialized_archive):
|
def test_create_pass_through_other_types(self, initialized_archive):
|
||||||
"""Pass-through records of other types unchanged."""
|
"""Pass-through records of other types unchanged."""
|
||||||
tag_record = {'type': 'Tag', 'id': 'fake-tag-id', 'name': 'test'}
|
tag_record = {'type': 'Tag', 'id': 'fake-tag-id', 'name': 'test'}
|
||||||
url = create_test_url()
|
url = create_test_url()
|
||||||
@@ -87,7 +87,7 @@ class TestSnapshotCreate:
|
|||||||
stdout, stderr, code = run_archivebox_cmd(
|
stdout, stderr, code = run_archivebox_cmd(
|
||||||
['snapshot', 'create'],
|
['snapshot', 'create'],
|
||||||
stdin=stdin,
|
stdin=stdin,
|
||||||
env=cli_env,
|
data_dir=initialized_archive,
|
||||||
)
|
)
|
||||||
|
|
||||||
assert code == 0
|
assert code == 0
|
||||||
@@ -97,13 +97,13 @@ class TestSnapshotCreate:
|
|||||||
assert 'Tag' in types
|
assert 'Tag' in types
|
||||||
assert 'Snapshot' in types
|
assert 'Snapshot' in types
|
||||||
|
|
||||||
def test_create_multiple_urls(self, cli_env, initialized_archive):
|
def test_create_multiple_urls(self, initialized_archive):
|
||||||
"""Create snapshots from multiple URLs."""
|
"""Create snapshots from multiple URLs."""
|
||||||
urls = [create_test_url() for _ in range(3)]
|
urls = [create_test_url() for _ in range(3)]
|
||||||
|
|
||||||
stdout, stderr, code = run_archivebox_cmd(
|
stdout, stderr, code = run_archivebox_cmd(
|
||||||
['snapshot', 'create'] + urls,
|
['snapshot', 'create'] + urls,
|
||||||
env=cli_env,
|
data_dir=initialized_archive,
|
||||||
)
|
)
|
||||||
|
|
||||||
assert code == 0
|
assert code == 0
|
||||||
@@ -118,24 +118,24 @@ class TestSnapshotCreate:
|
|||||||
class TestSnapshotList:
|
class TestSnapshotList:
|
||||||
"""Tests for `archivebox snapshot list`."""
|
"""Tests for `archivebox snapshot list`."""
|
||||||
|
|
||||||
def test_list_empty(self, cli_env, initialized_archive):
|
def test_list_empty(self, initialized_archive):
|
||||||
"""List with no snapshots returns empty."""
|
"""List with no snapshots returns empty."""
|
||||||
stdout, stderr, code = run_archivebox_cmd(
|
stdout, stderr, code = run_archivebox_cmd(
|
||||||
['snapshot', 'list'],
|
['snapshot', 'list'],
|
||||||
env=cli_env,
|
data_dir=initialized_archive,
|
||||||
)
|
)
|
||||||
|
|
||||||
assert code == 0
|
assert code == 0
|
||||||
assert 'Listed 0 snapshots' in stderr
|
assert 'Listed 0 snapshots' in stderr
|
||||||
|
|
||||||
def test_list_returns_created(self, cli_env, initialized_archive):
|
def test_list_returns_created(self, initialized_archive):
|
||||||
"""List returns previously created snapshots."""
|
"""List returns previously created snapshots."""
|
||||||
url = create_test_url()
|
url = create_test_url()
|
||||||
run_archivebox_cmd(['snapshot', 'create', url], env=cli_env)
|
run_archivebox_cmd(['snapshot', 'create', url], data_dir=initialized_archive)
|
||||||
|
|
||||||
stdout, stderr, code = run_archivebox_cmd(
|
stdout, stderr, code = run_archivebox_cmd(
|
||||||
['snapshot', 'list'],
|
['snapshot', 'list'],
|
||||||
env=cli_env,
|
data_dir=initialized_archive,
|
||||||
)
|
)
|
||||||
|
|
||||||
assert code == 0
|
assert code == 0
|
||||||
@@ -143,14 +143,14 @@ class TestSnapshotList:
|
|||||||
assert len(records) >= 1
|
assert len(records) >= 1
|
||||||
assert any(r.get('url') == url for r in records)
|
assert any(r.get('url') == url for r in records)
|
||||||
|
|
||||||
def test_list_filter_by_status(self, cli_env, initialized_archive):
|
def test_list_filter_by_status(self, initialized_archive):
|
||||||
"""Filter snapshots by status."""
|
"""Filter snapshots by status."""
|
||||||
url = create_test_url()
|
url = create_test_url()
|
||||||
run_archivebox_cmd(['snapshot', 'create', url], env=cli_env)
|
run_archivebox_cmd(['snapshot', 'create', url], data_dir=initialized_archive)
|
||||||
|
|
||||||
stdout, stderr, code = run_archivebox_cmd(
|
stdout, stderr, code = run_archivebox_cmd(
|
||||||
['snapshot', 'list', '--status=queued'],
|
['snapshot', 'list', '--status=queued'],
|
||||||
env=cli_env,
|
data_dir=initialized_archive,
|
||||||
)
|
)
|
||||||
|
|
||||||
assert code == 0
|
assert code == 0
|
||||||
@@ -158,14 +158,14 @@ class TestSnapshotList:
|
|||||||
for r in records:
|
for r in records:
|
||||||
assert r['status'] == 'queued'
|
assert r['status'] == 'queued'
|
||||||
|
|
||||||
def test_list_filter_by_url_contains(self, cli_env, initialized_archive):
|
def test_list_filter_by_url_contains(self, initialized_archive):
|
||||||
"""Filter snapshots by URL contains."""
|
"""Filter snapshots by URL contains."""
|
||||||
url = create_test_url(domain='unique-domain-12345.com')
|
url = create_test_url(domain='unique-domain-12345.com')
|
||||||
run_archivebox_cmd(['snapshot', 'create', url], env=cli_env)
|
run_archivebox_cmd(['snapshot', 'create', url], data_dir=initialized_archive)
|
||||||
|
|
||||||
stdout, stderr, code = run_archivebox_cmd(
|
stdout, stderr, code = run_archivebox_cmd(
|
||||||
['snapshot', 'list', '--url__icontains=unique-domain-12345'],
|
['snapshot', 'list', '--url__icontains=unique-domain-12345'],
|
||||||
env=cli_env,
|
data_dir=initialized_archive,
|
||||||
)
|
)
|
||||||
|
|
||||||
assert code == 0
|
assert code == 0
|
||||||
@@ -173,14 +173,14 @@ class TestSnapshotList:
|
|||||||
assert len(records) == 1
|
assert len(records) == 1
|
||||||
assert 'unique-domain-12345' in records[0]['url']
|
assert 'unique-domain-12345' in records[0]['url']
|
||||||
|
|
||||||
def test_list_with_limit(self, cli_env, initialized_archive):
|
def test_list_with_limit(self, initialized_archive):
|
||||||
"""Limit number of results."""
|
"""Limit number of results."""
|
||||||
for _ in range(3):
|
for _ in range(3):
|
||||||
run_archivebox_cmd(['snapshot', 'create', create_test_url()], env=cli_env)
|
run_archivebox_cmd(['snapshot', 'create', create_test_url()], data_dir=initialized_archive)
|
||||||
|
|
||||||
stdout, stderr, code = run_archivebox_cmd(
|
stdout, stderr, code = run_archivebox_cmd(
|
||||||
['snapshot', 'list', '--limit=2'],
|
['snapshot', 'list', '--limit=2'],
|
||||||
env=cli_env,
|
data_dir=initialized_archive,
|
||||||
)
|
)
|
||||||
|
|
||||||
assert code == 0
|
assert code == 0
|
||||||
@@ -191,16 +191,16 @@ class TestSnapshotList:
|
|||||||
class TestSnapshotUpdate:
|
class TestSnapshotUpdate:
|
||||||
"""Tests for `archivebox snapshot update`."""
|
"""Tests for `archivebox snapshot update`."""
|
||||||
|
|
||||||
def test_update_status(self, cli_env, initialized_archive):
|
def test_update_status(self, initialized_archive):
|
||||||
"""Update snapshot status."""
|
"""Update snapshot status."""
|
||||||
url = create_test_url()
|
url = create_test_url()
|
||||||
stdout1, _, _ = run_archivebox_cmd(['snapshot', 'create', url], env=cli_env)
|
stdout1, _, _ = run_archivebox_cmd(['snapshot', 'create', url], data_dir=initialized_archive)
|
||||||
snapshot = parse_jsonl_output(stdout1)[0]
|
snapshot = parse_jsonl_output(stdout1)[0]
|
||||||
|
|
||||||
stdout2, stderr, code = run_archivebox_cmd(
|
stdout2, stderr, code = run_archivebox_cmd(
|
||||||
['snapshot', 'update', '--status=started'],
|
['snapshot', 'update', '--status=started'],
|
||||||
stdin=json.dumps(snapshot),
|
stdin=json.dumps(snapshot),
|
||||||
env=cli_env,
|
data_dir=initialized_archive,
|
||||||
)
|
)
|
||||||
|
|
||||||
assert code == 0
|
assert code == 0
|
||||||
@@ -209,16 +209,16 @@ class TestSnapshotUpdate:
|
|||||||
records = parse_jsonl_output(stdout2)
|
records = parse_jsonl_output(stdout2)
|
||||||
assert records[0]['status'] == 'started'
|
assert records[0]['status'] == 'started'
|
||||||
|
|
||||||
def test_update_add_tag(self, cli_env, initialized_archive):
|
def test_update_add_tag(self, initialized_archive):
|
||||||
"""Update snapshot by adding tag."""
|
"""Update snapshot by adding tag."""
|
||||||
url = create_test_url()
|
url = create_test_url()
|
||||||
stdout1, _, _ = run_archivebox_cmd(['snapshot', 'create', url], env=cli_env)
|
stdout1, _, _ = run_archivebox_cmd(['snapshot', 'create', url], data_dir=initialized_archive)
|
||||||
snapshot = parse_jsonl_output(stdout1)[0]
|
snapshot = parse_jsonl_output(stdout1)[0]
|
||||||
|
|
||||||
stdout2, stderr, code = run_archivebox_cmd(
|
stdout2, stderr, code = run_archivebox_cmd(
|
||||||
['snapshot', 'update', '--tag=new-tag'],
|
['snapshot', 'update', '--tag=new-tag'],
|
||||||
stdin=json.dumps(snapshot),
|
stdin=json.dumps(snapshot),
|
||||||
env=cli_env,
|
data_dir=initialized_archive,
|
||||||
)
|
)
|
||||||
|
|
||||||
assert code == 0
|
assert code == 0
|
||||||
@@ -228,46 +228,46 @@ class TestSnapshotUpdate:
|
|||||||
class TestSnapshotDelete:
|
class TestSnapshotDelete:
|
||||||
"""Tests for `archivebox snapshot delete`."""
|
"""Tests for `archivebox snapshot delete`."""
|
||||||
|
|
||||||
def test_delete_requires_yes(self, cli_env, initialized_archive):
|
def test_delete_requires_yes(self, initialized_archive):
|
||||||
"""Delete requires --yes flag."""
|
"""Delete requires --yes flag."""
|
||||||
url = create_test_url()
|
url = create_test_url()
|
||||||
stdout1, _, _ = run_archivebox_cmd(['snapshot', 'create', url], env=cli_env)
|
stdout1, _, _ = run_archivebox_cmd(['snapshot', 'create', url], data_dir=initialized_archive)
|
||||||
snapshot = parse_jsonl_output(stdout1)[0]
|
snapshot = parse_jsonl_output(stdout1)[0]
|
||||||
|
|
||||||
stdout, stderr, code = run_archivebox_cmd(
|
stdout, stderr, code = run_archivebox_cmd(
|
||||||
['snapshot', 'delete'],
|
['snapshot', 'delete'],
|
||||||
stdin=json.dumps(snapshot),
|
stdin=json.dumps(snapshot),
|
||||||
env=cli_env,
|
data_dir=initialized_archive,
|
||||||
)
|
)
|
||||||
|
|
||||||
assert code == 1
|
assert code == 1
|
||||||
assert '--yes' in stderr
|
assert '--yes' in stderr
|
||||||
|
|
||||||
def test_delete_with_yes(self, cli_env, initialized_archive):
|
def test_delete_with_yes(self, initialized_archive):
|
||||||
"""Delete with --yes flag works."""
|
"""Delete with --yes flag works."""
|
||||||
url = create_test_url()
|
url = create_test_url()
|
||||||
stdout1, _, _ = run_archivebox_cmd(['snapshot', 'create', url], env=cli_env)
|
stdout1, _, _ = run_archivebox_cmd(['snapshot', 'create', url], data_dir=initialized_archive)
|
||||||
snapshot = parse_jsonl_output(stdout1)[0]
|
snapshot = parse_jsonl_output(stdout1)[0]
|
||||||
|
|
||||||
stdout, stderr, code = run_archivebox_cmd(
|
stdout, stderr, code = run_archivebox_cmd(
|
||||||
['snapshot', 'delete', '--yes'],
|
['snapshot', 'delete', '--yes'],
|
||||||
stdin=json.dumps(snapshot),
|
stdin=json.dumps(snapshot),
|
||||||
env=cli_env,
|
data_dir=initialized_archive,
|
||||||
)
|
)
|
||||||
|
|
||||||
assert code == 0
|
assert code == 0
|
||||||
assert 'Deleted 1 snapshots' in stderr
|
assert 'Deleted 1 snapshots' in stderr
|
||||||
|
|
||||||
def test_delete_dry_run(self, cli_env, initialized_archive):
|
def test_delete_dry_run(self, initialized_archive):
|
||||||
"""Dry run shows what would be deleted."""
|
"""Dry run shows what would be deleted."""
|
||||||
url = create_test_url()
|
url = create_test_url()
|
||||||
stdout1, _, _ = run_archivebox_cmd(['snapshot', 'create', url], env=cli_env)
|
stdout1, _, _ = run_archivebox_cmd(['snapshot', 'create', url], data_dir=initialized_archive)
|
||||||
snapshot = parse_jsonl_output(stdout1)[0]
|
snapshot = parse_jsonl_output(stdout1)[0]
|
||||||
|
|
||||||
stdout, stderr, code = run_archivebox_cmd(
|
stdout, stderr, code = run_archivebox_cmd(
|
||||||
['snapshot', 'delete', '--dry-run'],
|
['snapshot', 'delete', '--dry-run'],
|
||||||
stdin=json.dumps(snapshot),
|
stdin=json.dumps(snapshot),
|
||||||
env=cli_env,
|
data_dir=initialized_archive,
|
||||||
)
|
)
|
||||||
|
|
||||||
assert code == 0
|
assert code == 0
|
||||||
|
|||||||
Reference in New Issue
Block a user