mirror of
https://github.com/ArchiveBox/ArchiveBox.git
synced 2026-01-03 09:25:42 +10:00
wip major changes
This commit is contained in:
143
tests/test_config.py
Normal file
143
tests/test_config.py
Normal file
@@ -0,0 +1,143 @@
|
||||
#!/usr/bin/env python3
|
||||
"""Integration tests for archivebox config command."""
|
||||
|
||||
import os
|
||||
import subprocess
|
||||
|
||||
import pytest
|
||||
|
||||
from .fixtures import process, disable_extractors_dict
|
||||
|
||||
|
||||
def test_config_shows_all_config_values(tmp_path, process):
|
||||
"""Test that config without args shows all config values."""
|
||||
os.chdir(tmp_path)
|
||||
|
||||
result = subprocess.run(
|
||||
['archivebox', 'config'],
|
||||
capture_output=True,
|
||||
text=True,
|
||||
)
|
||||
|
||||
# Should show various config sections
|
||||
assert 'TIMEOUT' in result.stdout or 'timeout' in result.stdout.lower()
|
||||
# Config should show some output
|
||||
assert len(result.stdout) > 100
|
||||
|
||||
|
||||
def test_config_get_specific_key(tmp_path, process):
|
||||
"""Test that --get retrieves a specific config value."""
|
||||
os.chdir(tmp_path)
|
||||
|
||||
result = subprocess.run(
|
||||
['archivebox', 'config', '--get', 'TIMEOUT'],
|
||||
capture_output=True,
|
||||
text=True,
|
||||
)
|
||||
|
||||
# Should show the TIMEOUT value
|
||||
assert 'TIMEOUT' in result.stdout or result.returncode == 0
|
||||
|
||||
|
||||
def test_config_set_value_writes_to_config_file(tmp_path, process):
|
||||
"""Test that --set writes config value to ArchiveBox.conf file."""
|
||||
os.chdir(tmp_path)
|
||||
|
||||
# Set a config value
|
||||
result = subprocess.run(
|
||||
['archivebox', 'config', '--set', 'TIMEOUT=120'],
|
||||
capture_output=True,
|
||||
text=True,
|
||||
)
|
||||
|
||||
# Read the config file directly to verify it was written
|
||||
config_file = tmp_path / 'ArchiveBox.conf'
|
||||
if config_file.exists():
|
||||
config_content = config_file.read_text()
|
||||
# Config should contain the set value
|
||||
assert 'TIMEOUT' in config_content or 'timeout' in config_content.lower()
|
||||
|
||||
|
||||
def test_config_set_and_get_roundtrip(tmp_path, process):
|
||||
"""Test that a value set with --set can be retrieved with --get."""
|
||||
os.chdir(tmp_path)
|
||||
|
||||
# Set a value
|
||||
set_result = subprocess.run(
|
||||
['archivebox', 'config', '--set', 'TIMEOUT=999'],
|
||||
capture_output=True,
|
||||
text=True,
|
||||
)
|
||||
|
||||
# Verify set was successful
|
||||
assert set_result.returncode == 0 or '999' in set_result.stdout
|
||||
|
||||
# Read the config file directly to verify
|
||||
config_file = tmp_path / 'ArchiveBox.conf'
|
||||
if config_file.exists():
|
||||
config_content = config_file.read_text()
|
||||
assert '999' in config_content or 'TIMEOUT' in config_content
|
||||
|
||||
|
||||
def test_config_search_finds_matching_keys(tmp_path, process):
|
||||
"""Test that --search finds config keys matching a pattern."""
|
||||
os.chdir(tmp_path)
|
||||
|
||||
result = subprocess.run(
|
||||
['archivebox', 'config', '--search', 'TIMEOUT'],
|
||||
capture_output=True,
|
||||
text=True,
|
||||
)
|
||||
|
||||
# Should find TIMEOUT-related config
|
||||
assert 'TIMEOUT' in result.stdout or result.returncode == 0
|
||||
|
||||
|
||||
def test_config_invalid_key_fails(tmp_path, process):
|
||||
"""Test that setting an invalid config key fails."""
|
||||
os.chdir(tmp_path)
|
||||
|
||||
result = subprocess.run(
|
||||
['archivebox', 'config', '--set', 'INVALID_KEY_THAT_DOES_NOT_EXIST=value'],
|
||||
capture_output=True,
|
||||
text=True,
|
||||
)
|
||||
|
||||
# Should fail
|
||||
assert result.returncode != 0 or 'failed' in result.stdout.lower()
|
||||
|
||||
|
||||
def test_config_set_requires_equals_sign(tmp_path, process):
|
||||
"""Test that --set requires KEY=VALUE format."""
|
||||
os.chdir(tmp_path)
|
||||
|
||||
result = subprocess.run(
|
||||
['archivebox', 'config', '--set', 'TIMEOUT'],
|
||||
capture_output=True,
|
||||
text=True,
|
||||
)
|
||||
|
||||
# Should fail because there's no = sign
|
||||
assert result.returncode != 0
|
||||
|
||||
|
||||
class TestConfigCLI:
|
||||
"""Test the CLI interface for config command."""
|
||||
|
||||
def test_cli_help(self, tmp_path, process):
|
||||
"""Test that --help works for config command."""
|
||||
os.chdir(tmp_path)
|
||||
|
||||
result = subprocess.run(
|
||||
['archivebox', 'config', '--help'],
|
||||
capture_output=True,
|
||||
text=True,
|
||||
)
|
||||
|
||||
assert result.returncode == 0
|
||||
assert '--get' in result.stdout
|
||||
assert '--set' in result.stdout
|
||||
|
||||
|
||||
if __name__ == '__main__':
|
||||
pytest.main([__file__, '-v'])
|
||||
185
tests/test_crawl.py
Normal file
185
tests/test_crawl.py
Normal file
@@ -0,0 +1,185 @@
|
||||
#!/usr/bin/env python3
|
||||
"""Integration tests for archivebox crawl command."""
|
||||
|
||||
import os
|
||||
import subprocess
|
||||
import sqlite3
|
||||
import json
|
||||
|
||||
import pytest
|
||||
|
||||
from .fixtures import process, disable_extractors_dict
|
||||
|
||||
|
||||
def test_crawl_creates_crawl_object(tmp_path, process, disable_extractors_dict):
|
||||
"""Test that crawl command creates a Crawl object."""
|
||||
os.chdir(tmp_path)
|
||||
|
||||
subprocess.run(
|
||||
['archivebox', 'crawl', '--no-wait', 'https://example.com'],
|
||||
capture_output=True,
|
||||
text=True,
|
||||
env=disable_extractors_dict,
|
||||
)
|
||||
|
||||
conn = sqlite3.connect('index.sqlite3')
|
||||
c = conn.cursor()
|
||||
crawl = c.execute("SELECT id, max_depth FROM crawls_crawl ORDER BY created_at DESC LIMIT 1").fetchone()
|
||||
conn.close()
|
||||
|
||||
assert crawl is not None, "Crawl object should be created"
|
||||
|
||||
|
||||
def test_crawl_depth_sets_max_depth_in_crawl(tmp_path, process, disable_extractors_dict):
|
||||
"""Test that --depth option sets max_depth in the Crawl object."""
|
||||
os.chdir(tmp_path)
|
||||
|
||||
subprocess.run(
|
||||
['archivebox', 'crawl', '--depth=2', '--no-wait', 'https://example.com'],
|
||||
capture_output=True,
|
||||
text=True,
|
||||
env=disable_extractors_dict,
|
||||
)
|
||||
|
||||
conn = sqlite3.connect('index.sqlite3')
|
||||
c = conn.cursor()
|
||||
crawl = c.execute("SELECT max_depth FROM crawls_crawl ORDER BY created_at DESC LIMIT 1").fetchone()
|
||||
conn.close()
|
||||
|
||||
assert crawl is not None
|
||||
assert crawl[0] == 2, "Crawl max_depth should match --depth=2"
|
||||
|
||||
|
||||
def test_crawl_creates_snapshot_for_url(tmp_path, process, disable_extractors_dict):
|
||||
"""Test that crawl creates a Snapshot for the input URL."""
|
||||
os.chdir(tmp_path)
|
||||
|
||||
subprocess.run(
|
||||
['archivebox', 'crawl', '--no-wait', 'https://example.com'],
|
||||
capture_output=True,
|
||||
text=True,
|
||||
env=disable_extractors_dict,
|
||||
)
|
||||
|
||||
conn = sqlite3.connect('index.sqlite3')
|
||||
c = conn.cursor()
|
||||
snapshot = c.execute("SELECT url FROM core_snapshot WHERE url = ?",
|
||||
('https://example.com',)).fetchone()
|
||||
conn.close()
|
||||
|
||||
assert snapshot is not None, "Snapshot should be created for input URL"
|
||||
|
||||
|
||||
def test_crawl_links_snapshot_to_crawl(tmp_path, process, disable_extractors_dict):
|
||||
"""Test that Snapshot is linked to Crawl via crawl_id."""
|
||||
os.chdir(tmp_path)
|
||||
|
||||
subprocess.run(
|
||||
['archivebox', 'crawl', '--no-wait', 'https://example.com'],
|
||||
capture_output=True,
|
||||
text=True,
|
||||
env=disable_extractors_dict,
|
||||
)
|
||||
|
||||
conn = sqlite3.connect('index.sqlite3')
|
||||
c = conn.cursor()
|
||||
|
||||
# Get the crawl ID
|
||||
crawl = c.execute("SELECT id FROM crawls_crawl ORDER BY created_at DESC LIMIT 1").fetchone()
|
||||
assert crawl is not None
|
||||
crawl_id = crawl[0]
|
||||
|
||||
# Check snapshot has correct crawl_id
|
||||
snapshot = c.execute("SELECT crawl_id FROM core_snapshot WHERE url = ?",
|
||||
('https://example.com',)).fetchone()
|
||||
conn.close()
|
||||
|
||||
assert snapshot is not None
|
||||
assert snapshot[0] == crawl_id, "Snapshot should be linked to Crawl"
|
||||
|
||||
|
||||
def test_crawl_multiple_urls_creates_multiple_snapshots(tmp_path, process, disable_extractors_dict):
|
||||
"""Test that crawling multiple URLs creates multiple snapshots."""
|
||||
os.chdir(tmp_path)
|
||||
|
||||
subprocess.run(
|
||||
['archivebox', 'crawl', '--no-wait',
|
||||
'https://example.com',
|
||||
'https://iana.org'],
|
||||
capture_output=True,
|
||||
text=True,
|
||||
env=disable_extractors_dict,
|
||||
)
|
||||
|
||||
conn = sqlite3.connect('index.sqlite3')
|
||||
c = conn.cursor()
|
||||
urls = c.execute("SELECT url FROM core_snapshot ORDER BY url").fetchall()
|
||||
conn.close()
|
||||
|
||||
urls = [u[0] for u in urls]
|
||||
assert 'https://example.com' in urls
|
||||
assert 'https://iana.org' in urls
|
||||
|
||||
|
||||
def test_crawl_from_file_creates_snapshot(tmp_path, process, disable_extractors_dict):
|
||||
"""Test that crawl can create snapshots from a file of URLs."""
|
||||
os.chdir(tmp_path)
|
||||
|
||||
# Write URLs to a file
|
||||
urls_file = tmp_path / 'urls.txt'
|
||||
urls_file.write_text('https://example.com\n')
|
||||
|
||||
subprocess.run(
|
||||
['archivebox', 'crawl', '--no-wait', str(urls_file)],
|
||||
capture_output=True,
|
||||
text=True,
|
||||
env=disable_extractors_dict,
|
||||
)
|
||||
|
||||
conn = sqlite3.connect('index.sqlite3')
|
||||
c = conn.cursor()
|
||||
snapshot = c.execute("SELECT url FROM core_snapshot").fetchone()
|
||||
conn.close()
|
||||
|
||||
# Should create at least one snapshot (the source file or the URL)
|
||||
assert snapshot is not None, "Should create at least one snapshot"
|
||||
|
||||
|
||||
def test_crawl_creates_seed_for_input(tmp_path, process, disable_extractors_dict):
|
||||
"""Test that crawl creates a Seed object for input."""
|
||||
os.chdir(tmp_path)
|
||||
|
||||
subprocess.run(
|
||||
['archivebox', 'crawl', '--no-wait', 'https://example.com'],
|
||||
capture_output=True,
|
||||
text=True,
|
||||
env=disable_extractors_dict,
|
||||
)
|
||||
|
||||
conn = sqlite3.connect('index.sqlite3')
|
||||
c = conn.cursor()
|
||||
seed = c.execute("SELECT id FROM crawls_seed").fetchone()
|
||||
conn.close()
|
||||
|
||||
assert seed is not None, "Seed should be created for crawl input"
|
||||
|
||||
|
||||
class TestCrawlCLI:
|
||||
"""Test the CLI interface for crawl command."""
|
||||
|
||||
def test_cli_help(self, tmp_path, process):
|
||||
"""Test that --help works for crawl command."""
|
||||
os.chdir(tmp_path)
|
||||
|
||||
result = subprocess.run(
|
||||
['archivebox', 'crawl', '--help'],
|
||||
capture_output=True,
|
||||
text=True,
|
||||
)
|
||||
|
||||
assert result.returncode == 0
|
||||
assert '--depth' in result.stdout or '-d' in result.stdout
|
||||
|
||||
|
||||
if __name__ == '__main__':
|
||||
pytest.main([__file__, '-v'])
|
||||
277
tests/test_extract.py
Normal file
277
tests/test_extract.py
Normal file
@@ -0,0 +1,277 @@
|
||||
#!/usr/bin/env python3
|
||||
"""Integration tests for archivebox extract command."""
|
||||
|
||||
import os
|
||||
import subprocess
|
||||
import sqlite3
|
||||
import json
|
||||
|
||||
import pytest
|
||||
|
||||
from .fixtures import process, disable_extractors_dict
|
||||
|
||||
|
||||
def test_extract_runs_on_snapshot_id(tmp_path, process, disable_extractors_dict):
|
||||
"""Test that extract command accepts a snapshot ID."""
|
||||
os.chdir(tmp_path)
|
||||
|
||||
# First create a snapshot
|
||||
subprocess.run(
|
||||
['archivebox', 'add', '--index-only', 'https://example.com'],
|
||||
capture_output=True,
|
||||
env=disable_extractors_dict,
|
||||
)
|
||||
|
||||
# Get the snapshot ID
|
||||
conn = sqlite3.connect('index.sqlite3')
|
||||
c = conn.cursor()
|
||||
snapshot_id = c.execute("SELECT id FROM core_snapshot LIMIT 1").fetchone()[0]
|
||||
conn.close()
|
||||
|
||||
# Run extract on the snapshot
|
||||
result = subprocess.run(
|
||||
['archivebox', 'extract', '--no-wait', str(snapshot_id)],
|
||||
capture_output=True,
|
||||
text=True,
|
||||
env=disable_extractors_dict,
|
||||
)
|
||||
|
||||
# Should not error about invalid snapshot ID
|
||||
assert 'not found' not in result.stderr.lower()
|
||||
|
||||
|
||||
def test_extract_with_enabled_extractor_creates_archiveresult(tmp_path, process, disable_extractors_dict):
|
||||
"""Test that extract creates ArchiveResult when extractor is enabled."""
|
||||
os.chdir(tmp_path)
|
||||
|
||||
# First create a snapshot
|
||||
subprocess.run(
|
||||
['archivebox', 'add', '--index-only', 'https://example.com'],
|
||||
capture_output=True,
|
||||
env=disable_extractors_dict,
|
||||
)
|
||||
|
||||
# Get the snapshot ID
|
||||
conn = sqlite3.connect('index.sqlite3')
|
||||
c = conn.cursor()
|
||||
snapshot_id = c.execute("SELECT id FROM core_snapshot LIMIT 1").fetchone()[0]
|
||||
conn.close()
|
||||
|
||||
# Run extract with title extractor enabled
|
||||
env = disable_extractors_dict.copy()
|
||||
env['SAVE_TITLE'] = 'true'
|
||||
|
||||
subprocess.run(
|
||||
['archivebox', 'extract', '--no-wait', str(snapshot_id)],
|
||||
capture_output=True,
|
||||
text=True,
|
||||
env=env,
|
||||
)
|
||||
|
||||
# Check for archiveresults (may be queued, not completed with --no-wait)
|
||||
conn = sqlite3.connect('index.sqlite3')
|
||||
c = conn.cursor()
|
||||
count = c.execute("SELECT COUNT(*) FROM core_archiveresult WHERE snapshot_id = ?",
|
||||
(snapshot_id,)).fetchone()[0]
|
||||
conn.close()
|
||||
|
||||
# May or may not have results depending on timing
|
||||
assert count >= 0
|
||||
|
||||
|
||||
def test_extract_plugin_option_accepted(tmp_path, process, disable_extractors_dict):
|
||||
"""Test that --plugin option is accepted."""
|
||||
os.chdir(tmp_path)
|
||||
|
||||
# First create a snapshot
|
||||
subprocess.run(
|
||||
['archivebox', 'add', '--index-only', 'https://example.com'],
|
||||
capture_output=True,
|
||||
env=disable_extractors_dict,
|
||||
)
|
||||
|
||||
# Get the snapshot ID
|
||||
conn = sqlite3.connect('index.sqlite3')
|
||||
c = conn.cursor()
|
||||
snapshot_id = c.execute("SELECT id FROM core_snapshot LIMIT 1").fetchone()[0]
|
||||
conn.close()
|
||||
|
||||
result = subprocess.run(
|
||||
['archivebox', 'extract', '--plugin=title', '--no-wait', str(snapshot_id)],
|
||||
capture_output=True,
|
||||
text=True,
|
||||
env=disable_extractors_dict,
|
||||
)
|
||||
|
||||
assert 'unrecognized arguments: --plugin' not in result.stderr
|
||||
|
||||
|
||||
def test_extract_stdin_snapshot_id(tmp_path, process, disable_extractors_dict):
|
||||
"""Test that extract reads snapshot IDs from stdin."""
|
||||
os.chdir(tmp_path)
|
||||
|
||||
# First create a snapshot
|
||||
subprocess.run(
|
||||
['archivebox', 'add', '--index-only', 'https://example.com'],
|
||||
capture_output=True,
|
||||
env=disable_extractors_dict,
|
||||
)
|
||||
|
||||
# Get the snapshot ID
|
||||
conn = sqlite3.connect('index.sqlite3')
|
||||
c = conn.cursor()
|
||||
snapshot_id = c.execute("SELECT id FROM core_snapshot LIMIT 1").fetchone()[0]
|
||||
conn.close()
|
||||
|
||||
result = subprocess.run(
|
||||
['archivebox', 'extract', '--no-wait'],
|
||||
input=f'{snapshot_id}\n',
|
||||
capture_output=True,
|
||||
text=True,
|
||||
env=disable_extractors_dict,
|
||||
)
|
||||
|
||||
# Should not show "not found" error
|
||||
assert 'not found' not in result.stderr.lower() or result.returncode == 0
|
||||
|
||||
|
||||
def test_extract_stdin_jsonl_input(tmp_path, process, disable_extractors_dict):
|
||||
"""Test that extract reads JSONL records from stdin."""
|
||||
os.chdir(tmp_path)
|
||||
|
||||
# First create a snapshot
|
||||
subprocess.run(
|
||||
['archivebox', 'add', '--index-only', 'https://example.com'],
|
||||
capture_output=True,
|
||||
env=disable_extractors_dict,
|
||||
)
|
||||
|
||||
# Get the snapshot ID
|
||||
conn = sqlite3.connect('index.sqlite3')
|
||||
c = conn.cursor()
|
||||
snapshot_id = c.execute("SELECT id FROM core_snapshot LIMIT 1").fetchone()[0]
|
||||
conn.close()
|
||||
|
||||
jsonl_input = json.dumps({"type": "Snapshot", "id": str(snapshot_id)}) + '\n'
|
||||
|
||||
result = subprocess.run(
|
||||
['archivebox', 'extract', '--no-wait'],
|
||||
input=jsonl_input,
|
||||
capture_output=True,
|
||||
text=True,
|
||||
env=disable_extractors_dict,
|
||||
)
|
||||
|
||||
# Should not show "not found" error
|
||||
assert 'not found' not in result.stderr.lower() or result.returncode == 0
|
||||
|
||||
|
||||
def test_extract_pipeline_from_snapshot(tmp_path, process, disable_extractors_dict):
|
||||
"""Test piping snapshot output to extract."""
|
||||
os.chdir(tmp_path)
|
||||
|
||||
# Create snapshot and pipe to extract
|
||||
snapshot_proc = subprocess.Popen(
|
||||
['archivebox', 'snapshot', 'https://example.com'],
|
||||
stdout=subprocess.PIPE,
|
||||
stderr=subprocess.PIPE,
|
||||
env=disable_extractors_dict,
|
||||
)
|
||||
|
||||
subprocess.run(
|
||||
['archivebox', 'extract', '--no-wait'],
|
||||
stdin=snapshot_proc.stdout,
|
||||
capture_output=True,
|
||||
text=True,
|
||||
env=disable_extractors_dict,
|
||||
)
|
||||
|
||||
snapshot_proc.wait()
|
||||
|
||||
# Check database for snapshot
|
||||
conn = sqlite3.connect('index.sqlite3')
|
||||
c = conn.cursor()
|
||||
snapshot = c.execute("SELECT id, url FROM core_snapshot WHERE url = ?",
|
||||
('https://example.com',)).fetchone()
|
||||
conn.close()
|
||||
|
||||
assert snapshot is not None, "Snapshot should be created by pipeline"
|
||||
|
||||
|
||||
def test_extract_multiple_snapshots(tmp_path, process, disable_extractors_dict):
|
||||
"""Test extracting from multiple snapshots."""
|
||||
os.chdir(tmp_path)
|
||||
|
||||
# Create multiple snapshots one at a time to avoid deduplication issues
|
||||
subprocess.run(
|
||||
['archivebox', 'add', '--index-only', 'https://example.com'],
|
||||
capture_output=True,
|
||||
env=disable_extractors_dict,
|
||||
)
|
||||
subprocess.run(
|
||||
['archivebox', 'add', '--index-only', 'https://iana.org'],
|
||||
capture_output=True,
|
||||
env=disable_extractors_dict,
|
||||
)
|
||||
|
||||
# Get all snapshot IDs
|
||||
conn = sqlite3.connect('index.sqlite3')
|
||||
c = conn.cursor()
|
||||
snapshot_ids = c.execute("SELECT id FROM core_snapshot").fetchall()
|
||||
conn.close()
|
||||
|
||||
assert len(snapshot_ids) >= 2, "Should have at least 2 snapshots"
|
||||
|
||||
# Extract from all snapshots
|
||||
ids_input = '\n'.join(str(s[0]) for s in snapshot_ids) + '\n'
|
||||
result = subprocess.run(
|
||||
['archivebox', 'extract', '--no-wait'],
|
||||
input=ids_input,
|
||||
capture_output=True,
|
||||
text=True,
|
||||
env=disable_extractors_dict,
|
||||
)
|
||||
|
||||
# Should not error
|
||||
conn = sqlite3.connect('index.sqlite3')
|
||||
c = conn.cursor()
|
||||
count = c.execute("SELECT COUNT(*) FROM core_snapshot").fetchone()[0]
|
||||
conn.close()
|
||||
|
||||
assert count >= 2, "Both snapshots should still exist after extraction"
|
||||
|
||||
|
||||
class TestExtractCLI:
|
||||
"""Test the CLI interface for extract command."""
|
||||
|
||||
def test_cli_help(self, tmp_path, process):
|
||||
"""Test that --help works for extract command."""
|
||||
os.chdir(tmp_path)
|
||||
|
||||
result = subprocess.run(
|
||||
['archivebox', 'extract', '--help'],
|
||||
capture_output=True,
|
||||
text=True,
|
||||
)
|
||||
|
||||
assert result.returncode == 0
|
||||
assert '--plugin' in result.stdout or '-p' in result.stdout
|
||||
assert '--wait' in result.stdout or '--no-wait' in result.stdout
|
||||
|
||||
def test_cli_no_snapshots_shows_warning(self, tmp_path, process):
|
||||
"""Test that running without snapshots shows a warning."""
|
||||
os.chdir(tmp_path)
|
||||
|
||||
result = subprocess.run(
|
||||
['archivebox', 'extract', '--no-wait'],
|
||||
input='',
|
||||
capture_output=True,
|
||||
text=True,
|
||||
)
|
||||
|
||||
# Should show warning about no snapshots or exit normally (empty input)
|
||||
assert result.returncode == 0 or 'No' in result.stderr
|
||||
|
||||
|
||||
if __name__ == '__main__':
|
||||
pytest.main([__file__, '-v'])
|
||||
129
tests/test_install.py
Normal file
129
tests/test_install.py
Normal file
@@ -0,0 +1,129 @@
|
||||
#!/usr/bin/env python3
|
||||
"""Integration tests for archivebox install command."""
|
||||
|
||||
import os
|
||||
import subprocess
|
||||
import sqlite3
|
||||
|
||||
import pytest
|
||||
|
||||
from .fixtures import process, disable_extractors_dict
|
||||
|
||||
|
||||
class TestInstallDryRun:
|
||||
"""Test the dry-run mode of install command."""
|
||||
|
||||
def test_dry_run_prints_message(self, tmp_path, process):
|
||||
"""Test that dry-run mode prints appropriate message."""
|
||||
os.chdir(tmp_path)
|
||||
|
||||
result = subprocess.run(
|
||||
['archivebox', 'install', '--dry-run'],
|
||||
capture_output=True,
|
||||
text=True,
|
||||
)
|
||||
|
||||
assert result.returncode == 0
|
||||
assert 'Dry run' in result.stdout
|
||||
|
||||
def test_dry_run_does_not_create_crawl(self, tmp_path, process):
|
||||
"""Test that dry-run mode doesn't create a crawl."""
|
||||
os.chdir(tmp_path)
|
||||
|
||||
# Get initial crawl count
|
||||
conn = sqlite3.connect('index.sqlite3')
|
||||
c = conn.cursor()
|
||||
c.execute("SELECT COUNT(*) FROM crawls_crawl")
|
||||
initial_count = c.fetchone()[0]
|
||||
conn.close()
|
||||
|
||||
# Run install with dry-run
|
||||
result = subprocess.run(
|
||||
['archivebox', 'install', '--dry-run'],
|
||||
capture_output=True,
|
||||
text=True,
|
||||
)
|
||||
|
||||
assert result.returncode == 0
|
||||
|
||||
# Check crawl count unchanged
|
||||
conn = sqlite3.connect('index.sqlite3')
|
||||
c = conn.cursor()
|
||||
c.execute("SELECT COUNT(*) FROM crawls_crawl")
|
||||
final_count = c.fetchone()[0]
|
||||
conn.close()
|
||||
|
||||
assert final_count == initial_count
|
||||
|
||||
|
||||
class TestInstallOutput:
|
||||
"""Test the output/messages from install command."""
|
||||
|
||||
def test_install_prints_detecting_message(self, tmp_path, process, disable_extractors_dict):
|
||||
"""Test that install prints detecting dependencies message."""
|
||||
os.chdir(tmp_path)
|
||||
|
||||
result = subprocess.run(
|
||||
['archivebox', 'install', '--dry-run'],
|
||||
capture_output=True,
|
||||
text=True,
|
||||
env=disable_extractors_dict,
|
||||
)
|
||||
|
||||
assert result.returncode == 0
|
||||
# Should mention detecting or dependencies
|
||||
output = result.stdout.lower()
|
||||
assert 'detect' in output or 'dependenc' in output or 'dry run' in output
|
||||
|
||||
|
||||
class TestInstallCLI:
|
||||
"""Test the CLI interface for install command."""
|
||||
|
||||
def test_cli_help(self, tmp_path):
|
||||
"""Test that --help works for install command."""
|
||||
os.chdir(tmp_path)
|
||||
|
||||
result = subprocess.run(
|
||||
['archivebox', 'install', '--help'],
|
||||
capture_output=True,
|
||||
text=True,
|
||||
)
|
||||
|
||||
assert result.returncode == 0
|
||||
assert '--dry-run' in result.stdout or '-d' in result.stdout
|
||||
|
||||
def test_cli_invalid_option(self, tmp_path):
|
||||
"""Test that invalid options are handled."""
|
||||
os.chdir(tmp_path)
|
||||
|
||||
result = subprocess.run(
|
||||
['archivebox', 'install', '--invalid-option'],
|
||||
capture_output=True,
|
||||
text=True,
|
||||
)
|
||||
|
||||
# Should fail with non-zero exit code
|
||||
assert result.returncode != 0
|
||||
|
||||
|
||||
class TestInstallInitialization:
|
||||
"""Test that install initializes the data directory if needed."""
|
||||
|
||||
def test_install_from_empty_dir(self, tmp_path):
|
||||
"""Test that install from empty dir initializes first."""
|
||||
os.chdir(tmp_path)
|
||||
|
||||
# Don't use process fixture - start from empty dir
|
||||
result = subprocess.run(
|
||||
['archivebox', 'install', '--dry-run'],
|
||||
capture_output=True,
|
||||
text=True,
|
||||
)
|
||||
|
||||
# Should either initialize or show dry run message
|
||||
output = result.stdout
|
||||
assert 'Initializing' in output or 'Dry run' in output or 'init' in output.lower()
|
||||
|
||||
|
||||
if __name__ == '__main__':
|
||||
pytest.main([__file__, '-v'])
|
||||
75
tests/test_schedule.py
Normal file
75
tests/test_schedule.py
Normal file
@@ -0,0 +1,75 @@
|
||||
#!/usr/bin/env python3
|
||||
"""Integration tests for archivebox schedule command."""
|
||||
|
||||
import os
|
||||
import subprocess
|
||||
|
||||
import pytest
|
||||
|
||||
from .fixtures import process, disable_extractors_dict
|
||||
|
||||
|
||||
def test_schedule_show_lists_jobs(tmp_path, process):
|
||||
"""Test that --show lists current scheduled jobs."""
|
||||
os.chdir(tmp_path)
|
||||
|
||||
result = subprocess.run(
|
||||
['archivebox', 'schedule', '--show'],
|
||||
capture_output=True,
|
||||
text=True,
|
||||
)
|
||||
|
||||
# Should either show jobs or indicate no jobs
|
||||
assert 'no' in result.stdout.lower() or 'archivebox' in result.stdout.lower() or result.returncode == 0
|
||||
|
||||
|
||||
def test_schedule_clear_removes_jobs(tmp_path, process):
|
||||
"""Test that --clear removes scheduled jobs."""
|
||||
os.chdir(tmp_path)
|
||||
|
||||
result = subprocess.run(
|
||||
['archivebox', 'schedule', '--clear'],
|
||||
capture_output=True,
|
||||
text=True,
|
||||
)
|
||||
|
||||
# Should complete successfully (may have no jobs to clear)
|
||||
assert result.returncode == 0
|
||||
|
||||
|
||||
def test_schedule_every_requires_valid_period(tmp_path, process):
|
||||
"""Test that --every requires valid time period."""
|
||||
os.chdir(tmp_path)
|
||||
|
||||
result = subprocess.run(
|
||||
['archivebox', 'schedule', '--every=invalid_period', 'https://example.com/feed.xml'],
|
||||
capture_output=True,
|
||||
text=True,
|
||||
)
|
||||
|
||||
# Should fail with invalid period
|
||||
assert result.returncode != 0 or 'invalid' in result.stdout.lower()
|
||||
|
||||
|
||||
class TestScheduleCLI:
|
||||
"""Test the CLI interface for schedule command."""
|
||||
|
||||
def test_cli_help(self, tmp_path, process):
|
||||
"""Test that --help works for schedule command."""
|
||||
os.chdir(tmp_path)
|
||||
|
||||
result = subprocess.run(
|
||||
['archivebox', 'schedule', '--help'],
|
||||
capture_output=True,
|
||||
text=True,
|
||||
)
|
||||
|
||||
assert result.returncode == 0
|
||||
assert '--every' in result.stdout
|
||||
assert '--show' in result.stdout
|
||||
assert '--clear' in result.stdout
|
||||
assert '--depth' in result.stdout
|
||||
|
||||
|
||||
if __name__ == '__main__':
|
||||
pytest.main([__file__, '-v'])
|
||||
145
tests/test_search.py
Normal file
145
tests/test_search.py
Normal file
@@ -0,0 +1,145 @@
|
||||
#!/usr/bin/env python3
|
||||
"""Integration tests for archivebox search command."""
|
||||
|
||||
import os
|
||||
import subprocess
|
||||
import sqlite3
|
||||
import json
|
||||
|
||||
import pytest
|
||||
|
||||
from .fixtures import process, disable_extractors_dict
|
||||
|
||||
|
||||
def test_search_returns_snapshots(tmp_path, process, disable_extractors_dict):
|
||||
"""Test that search returns snapshots."""
|
||||
os.chdir(tmp_path)
|
||||
|
||||
# Add some snapshots
|
||||
subprocess.run(
|
||||
['archivebox', 'add', '--index-only', 'https://example.com'],
|
||||
capture_output=True,
|
||||
env=disable_extractors_dict,
|
||||
)
|
||||
|
||||
result = subprocess.run(
|
||||
['archivebox', 'search'],
|
||||
capture_output=True,
|
||||
text=True,
|
||||
)
|
||||
|
||||
# Should return some output (path or URL info)
|
||||
assert result.stdout.strip() != '' or result.returncode == 0
|
||||
|
||||
|
||||
def test_search_filter_by_substring(tmp_path, process, disable_extractors_dict):
|
||||
"""Test that substring filter works."""
|
||||
os.chdir(tmp_path)
|
||||
|
||||
subprocess.run(
|
||||
['archivebox', 'add', '--index-only', 'https://example.com'],
|
||||
capture_output=True,
|
||||
env=disable_extractors_dict,
|
||||
)
|
||||
|
||||
# Search with filter - may not find if URL isn't stored as expected
|
||||
result = subprocess.run(
|
||||
['archivebox', 'search', '--filter-type=substring', 'example'],
|
||||
capture_output=True,
|
||||
text=True,
|
||||
)
|
||||
|
||||
# Should run without error
|
||||
assert result.returncode == 0 or 'No Snapshots' in result.stderr
|
||||
|
||||
|
||||
def test_search_sort_option(tmp_path, process, disable_extractors_dict):
|
||||
"""Test that --sort option works."""
|
||||
os.chdir(tmp_path)
|
||||
|
||||
subprocess.run(
|
||||
['archivebox', 'add', '--index-only', 'https://example.com'],
|
||||
capture_output=True,
|
||||
env=disable_extractors_dict,
|
||||
)
|
||||
|
||||
result = subprocess.run(
|
||||
['archivebox', 'search', '--sort=url'],
|
||||
capture_output=True,
|
||||
text=True,
|
||||
)
|
||||
|
||||
# Should run without error
|
||||
assert result.returncode == 0
|
||||
|
||||
|
||||
def test_search_with_headers_requires_format(tmp_path, process):
|
||||
"""Test that --with-headers requires --json, --html, or --csv."""
|
||||
os.chdir(tmp_path)
|
||||
|
||||
result = subprocess.run(
|
||||
['archivebox', 'search', '--with-headers'],
|
||||
capture_output=True,
|
||||
text=True,
|
||||
)
|
||||
|
||||
# Should fail with error message
|
||||
assert result.returncode != 0
|
||||
assert 'requires' in result.stderr.lower() or 'json' in result.stderr.lower()
|
||||
|
||||
|
||||
def test_search_status_option(tmp_path, process, disable_extractors_dict):
|
||||
"""Test that --status option filters by status."""
|
||||
os.chdir(tmp_path)
|
||||
|
||||
subprocess.run(
|
||||
['archivebox', 'add', '--index-only', 'https://example.com'],
|
||||
capture_output=True,
|
||||
env=disable_extractors_dict,
|
||||
)
|
||||
|
||||
result = subprocess.run(
|
||||
['archivebox', 'search', '--status=indexed'],
|
||||
capture_output=True,
|
||||
text=True,
|
||||
)
|
||||
|
||||
# Should run without error
|
||||
assert result.returncode == 0
|
||||
|
||||
|
||||
def test_search_no_snapshots_message(tmp_path, process):
|
||||
"""Test that searching empty archive shows appropriate output."""
|
||||
os.chdir(tmp_path)
|
||||
|
||||
result = subprocess.run(
|
||||
['archivebox', 'search'],
|
||||
capture_output=True,
|
||||
text=True,
|
||||
)
|
||||
|
||||
# Should complete (empty results are OK)
|
||||
assert result.returncode == 0
|
||||
|
||||
|
||||
class TestSearchCLI:
|
||||
"""Test the CLI interface for search command."""
|
||||
|
||||
def test_cli_help(self, tmp_path, process):
|
||||
"""Test that --help works for search command."""
|
||||
os.chdir(tmp_path)
|
||||
|
||||
result = subprocess.run(
|
||||
['archivebox', 'search', '--help'],
|
||||
capture_output=True,
|
||||
text=True,
|
||||
)
|
||||
|
||||
assert result.returncode == 0
|
||||
assert '--filter-type' in result.stdout or '-f' in result.stdout
|
||||
assert '--status' in result.stdout
|
||||
assert '--sort' in result.stdout
|
||||
|
||||
|
||||
if __name__ == '__main__':
|
||||
pytest.main([__file__, '-v'])
|
||||
194
tests/test_snapshot.py
Normal file
194
tests/test_snapshot.py
Normal file
@@ -0,0 +1,194 @@
|
||||
#!/usr/bin/env python3
|
||||
"""Integration tests for archivebox snapshot command."""
|
||||
|
||||
import os
|
||||
import subprocess
|
||||
import sqlite3
|
||||
import json
|
||||
|
||||
import pytest
|
||||
|
||||
from .fixtures import process, disable_extractors_dict
|
||||
|
||||
|
||||
def test_snapshot_creates_snapshot_with_correct_url(tmp_path, process, disable_extractors_dict):
|
||||
"""Test that snapshot stores the exact URL in the database."""
|
||||
os.chdir(tmp_path)
|
||||
|
||||
subprocess.run(
|
||||
['archivebox', 'snapshot', 'https://example.com'],
|
||||
capture_output=True,
|
||||
env=disable_extractors_dict,
|
||||
)
|
||||
|
||||
conn = sqlite3.connect('index.sqlite3')
|
||||
c = conn.cursor()
|
||||
result = c.execute("SELECT url FROM core_snapshot WHERE url = ?",
|
||||
('https://example.com',)).fetchone()
|
||||
conn.close()
|
||||
|
||||
assert result is not None
|
||||
assert result[0] == 'https://example.com'
|
||||
|
||||
|
||||
def test_snapshot_multiple_urls_creates_multiple_records(tmp_path, process, disable_extractors_dict):
|
||||
"""Test that multiple URLs each get their own snapshot record."""
|
||||
os.chdir(tmp_path)
|
||||
|
||||
subprocess.run(
|
||||
['archivebox', 'snapshot',
|
||||
'https://example.com',
|
||||
'https://iana.org'],
|
||||
capture_output=True,
|
||||
env=disable_extractors_dict,
|
||||
)
|
||||
|
||||
conn = sqlite3.connect('index.sqlite3')
|
||||
c = conn.cursor()
|
||||
urls = c.execute("SELECT url FROM core_snapshot ORDER BY url").fetchall()
|
||||
conn.close()
|
||||
|
||||
urls = [u[0] for u in urls]
|
||||
assert 'https://example.com' in urls
|
||||
assert 'https://iana.org' in urls
|
||||
assert len(urls) >= 2
|
||||
|
||||
|
||||
def test_snapshot_tag_creates_tag_and_links_to_snapshot(tmp_path, process, disable_extractors_dict):
|
||||
"""Test that --tag creates tag record and links it to the snapshot."""
|
||||
os.chdir(tmp_path)
|
||||
|
||||
subprocess.run(
|
||||
['archivebox', 'snapshot', '--tag=mytesttag',
|
||||
'https://example.com'],
|
||||
capture_output=True,
|
||||
env=disable_extractors_dict,
|
||||
)
|
||||
|
||||
conn = sqlite3.connect('index.sqlite3')
|
||||
c = conn.cursor()
|
||||
|
||||
# Verify tag was created
|
||||
tag = c.execute("SELECT id, name FROM core_tag WHERE name = ?", ('mytesttag',)).fetchone()
|
||||
assert tag is not None, "Tag 'mytesttag' should exist in core_tag"
|
||||
tag_id = tag[0]
|
||||
|
||||
# Verify snapshot exists
|
||||
snapshot = c.execute("SELECT id FROM core_snapshot WHERE url = ?",
|
||||
('https://example.com',)).fetchone()
|
||||
assert snapshot is not None
|
||||
snapshot_id = snapshot[0]
|
||||
|
||||
# Verify tag is linked to snapshot via join table
|
||||
link = c.execute("""
|
||||
SELECT * FROM core_snapshot_tags
|
||||
WHERE snapshot_id = ? AND tag_id = ?
|
||||
""", (snapshot_id, tag_id)).fetchone()
|
||||
conn.close()
|
||||
|
||||
assert link is not None, "Tag should be linked to snapshot via core_snapshot_tags"
|
||||
|
||||
|
||||
def test_snapshot_jsonl_output_has_correct_structure(tmp_path, process, disable_extractors_dict):
|
||||
"""Test that JSONL output contains required fields with correct types."""
|
||||
os.chdir(tmp_path)
|
||||
|
||||
# Pass URL as argument instead of stdin for more reliable behavior
|
||||
result = subprocess.run(
|
||||
['archivebox', 'snapshot', 'https://example.com'],
|
||||
capture_output=True,
|
||||
text=True,
|
||||
env=disable_extractors_dict,
|
||||
)
|
||||
|
||||
# Parse JSONL output lines
|
||||
snapshot_records = []
|
||||
for line in result.stdout.strip().split('\n'):
|
||||
if line:
|
||||
try:
|
||||
record = json.loads(line)
|
||||
if record.get('type') == 'Snapshot':
|
||||
snapshot_records.append(record)
|
||||
except json.JSONDecodeError:
|
||||
continue
|
||||
|
||||
assert len(snapshot_records) >= 1, "Should output at least one Snapshot JSONL record"
|
||||
|
||||
record = snapshot_records[0]
|
||||
assert record.get('type') == 'Snapshot'
|
||||
assert 'id' in record, "Snapshot record should have 'id' field"
|
||||
assert 'url' in record, "Snapshot record should have 'url' field"
|
||||
assert record['url'] == 'https://example.com'
|
||||
|
||||
|
||||
def test_snapshot_with_tag_stores_tag_name(tmp_path, process, disable_extractors_dict):
|
||||
"""Test that title is stored when provided via tag option."""
|
||||
os.chdir(tmp_path)
|
||||
|
||||
# Use command line args instead of stdin
|
||||
subprocess.run(
|
||||
['archivebox', 'snapshot', '--tag=customtag', 'https://example.com'],
|
||||
capture_output=True,
|
||||
text=True,
|
||||
env=disable_extractors_dict,
|
||||
)
|
||||
|
||||
conn = sqlite3.connect('index.sqlite3')
|
||||
c = conn.cursor()
|
||||
|
||||
# Verify tag was created with correct name
|
||||
tag = c.execute("SELECT name FROM core_tag WHERE name = ?",
|
||||
('customtag',)).fetchone()
|
||||
conn.close()
|
||||
|
||||
assert tag is not None
|
||||
assert tag[0] == 'customtag'
|
||||
|
||||
|
||||
def test_snapshot_with_depth_creates_crawl_object(tmp_path, process, disable_extractors_dict):
|
||||
"""Test that --depth > 0 creates a Crawl object with correct max_depth."""
|
||||
os.chdir(tmp_path)
|
||||
|
||||
subprocess.run(
|
||||
['archivebox', 'snapshot', '--depth=1',
|
||||
'https://example.com'],
|
||||
capture_output=True,
|
||||
env=disable_extractors_dict,
|
||||
)
|
||||
|
||||
conn = sqlite3.connect('index.sqlite3')
|
||||
c = conn.cursor()
|
||||
crawl = c.execute("SELECT max_depth FROM crawls_crawl ORDER BY created_at DESC LIMIT 1").fetchone()
|
||||
conn.close()
|
||||
|
||||
assert crawl is not None, "Crawl object should be created when depth > 0"
|
||||
assert crawl[0] == 1, "Crawl max_depth should match --depth value"
|
||||
|
||||
|
||||
def test_snapshot_deduplicates_urls(tmp_path, process, disable_extractors_dict):
|
||||
"""Test that adding the same URL twice doesn't create duplicate snapshots."""
|
||||
os.chdir(tmp_path)
|
||||
|
||||
# Add same URL twice
|
||||
subprocess.run(
|
||||
['archivebox', 'snapshot', 'https://example.com'],
|
||||
capture_output=True,
|
||||
env=disable_extractors_dict,
|
||||
)
|
||||
subprocess.run(
|
||||
['archivebox', 'snapshot', 'https://example.com'],
|
||||
capture_output=True,
|
||||
env=disable_extractors_dict,
|
||||
)
|
||||
|
||||
conn = sqlite3.connect('index.sqlite3')
|
||||
c = conn.cursor()
|
||||
count = c.execute("SELECT COUNT(*) FROM core_snapshot WHERE url = ?",
|
||||
('https://example.com',)).fetchone()[0]
|
||||
conn.close()
|
||||
|
||||
assert count == 1, "Same URL should not create duplicate snapshots"
|
||||
|
||||
|
||||
if __name__ == '__main__':
|
||||
pytest.main([__file__, '-v'])
|
||||
197
tests/test_status.py
Normal file
197
tests/test_status.py
Normal file
@@ -0,0 +1,197 @@
|
||||
#!/usr/bin/env python3
|
||||
"""Integration tests for archivebox status command."""
|
||||
|
||||
import os
|
||||
import subprocess
|
||||
import sqlite3
|
||||
|
||||
import pytest
|
||||
|
||||
from .fixtures import process, disable_extractors_dict
|
||||
|
||||
|
||||
def test_status_shows_index_info(tmp_path, process):
|
||||
"""Test that status shows index information."""
|
||||
os.chdir(tmp_path)
|
||||
|
||||
result = subprocess.run(
|
||||
['archivebox', 'status'],
|
||||
capture_output=True,
|
||||
text=True,
|
||||
)
|
||||
|
||||
# Should show index scanning info
|
||||
assert 'index' in result.stdout.lower() or 'Index' in result.stdout
|
||||
|
||||
|
||||
def test_status_shows_snapshot_count(tmp_path, process, disable_extractors_dict):
|
||||
"""Test that status shows snapshot count."""
|
||||
os.chdir(tmp_path)
|
||||
|
||||
# Add some snapshots
|
||||
subprocess.run(
|
||||
['archivebox', 'add', '--index-only', 'https://example.com'],
|
||||
capture_output=True,
|
||||
env=disable_extractors_dict,
|
||||
)
|
||||
subprocess.run(
|
||||
['archivebox', 'add', '--index-only', 'https://iana.org'],
|
||||
capture_output=True,
|
||||
env=disable_extractors_dict,
|
||||
)
|
||||
|
||||
result = subprocess.run(
|
||||
['archivebox', 'status'],
|
||||
capture_output=True,
|
||||
text=True,
|
||||
)
|
||||
|
||||
# Should show link/snapshot count
|
||||
assert '2' in result.stdout or 'links' in result.stdout.lower()
|
||||
|
||||
|
||||
def test_status_shows_archive_size(tmp_path, process, disable_extractors_dict):
|
||||
"""Test that status shows archive size information."""
|
||||
os.chdir(tmp_path)
|
||||
|
||||
subprocess.run(
|
||||
['archivebox', 'add', '--index-only', 'https://example.com'],
|
||||
capture_output=True,
|
||||
env=disable_extractors_dict,
|
||||
)
|
||||
|
||||
result = subprocess.run(
|
||||
['archivebox', 'status'],
|
||||
capture_output=True,
|
||||
text=True,
|
||||
)
|
||||
|
||||
# Should show size info (bytes, KB, MB, etc)
|
||||
assert 'Size' in result.stdout or 'size' in result.stdout or 'B' in result.stdout
|
||||
|
||||
|
||||
def test_status_shows_indexed_count(tmp_path, process, disable_extractors_dict):
|
||||
"""Test that status shows indexed folder count."""
|
||||
os.chdir(tmp_path)
|
||||
|
||||
subprocess.run(
|
||||
['archivebox', 'add', '--index-only', 'https://example.com'],
|
||||
capture_output=True,
|
||||
env=disable_extractors_dict,
|
||||
)
|
||||
|
||||
result = subprocess.run(
|
||||
['archivebox', 'status'],
|
||||
capture_output=True,
|
||||
text=True,
|
||||
)
|
||||
|
||||
# Should show indexed count
|
||||
assert 'indexed' in result.stdout.lower()
|
||||
|
||||
|
||||
def test_status_shows_archived_vs_unarchived(tmp_path, process, disable_extractors_dict):
|
||||
"""Test that status shows archived vs unarchived counts."""
|
||||
os.chdir(tmp_path)
|
||||
|
||||
# Add index-only snapshot (unarchived)
|
||||
subprocess.run(
|
||||
['archivebox', 'add', '--index-only', 'https://example.com'],
|
||||
capture_output=True,
|
||||
env=disable_extractors_dict,
|
||||
)
|
||||
|
||||
result = subprocess.run(
|
||||
['archivebox', 'status'],
|
||||
capture_output=True,
|
||||
text=True,
|
||||
)
|
||||
|
||||
# Should show archived/unarchived categories
|
||||
assert 'archived' in result.stdout.lower() or 'unarchived' in result.stdout.lower()
|
||||
|
||||
|
||||
def test_status_shows_data_directory_info(tmp_path, process):
|
||||
"""Test that status shows data directory path."""
|
||||
os.chdir(tmp_path)
|
||||
|
||||
result = subprocess.run(
|
||||
['archivebox', 'status'],
|
||||
capture_output=True,
|
||||
text=True,
|
||||
)
|
||||
|
||||
# Should show data directory or archive path
|
||||
assert 'archive' in result.stdout.lower() or str(tmp_path) in result.stdout
|
||||
|
||||
|
||||
def test_status_shows_user_info(tmp_path, process):
|
||||
"""Test that status shows user information."""
|
||||
os.chdir(tmp_path)
|
||||
|
||||
result = subprocess.run(
|
||||
['archivebox', 'status'],
|
||||
capture_output=True,
|
||||
text=True,
|
||||
)
|
||||
|
||||
# Should show user info section
|
||||
assert 'user' in result.stdout.lower() or 'login' in result.stdout.lower()
|
||||
|
||||
|
||||
def test_status_empty_archive(tmp_path, process):
|
||||
"""Test status on empty archive shows zero counts."""
|
||||
os.chdir(tmp_path)
|
||||
|
||||
result = subprocess.run(
|
||||
['archivebox', 'status'],
|
||||
capture_output=True,
|
||||
text=True,
|
||||
)
|
||||
|
||||
# Should still run successfully
|
||||
assert result.returncode == 0 or 'index' in result.stdout.lower()
|
||||
# Should show 0 links
|
||||
assert '0' in result.stdout or 'links' in result.stdout.lower()
|
||||
|
||||
|
||||
def test_status_shows_valid_vs_invalid(tmp_path, process, disable_extractors_dict):
|
||||
"""Test that status shows valid vs invalid folder counts."""
|
||||
os.chdir(tmp_path)
|
||||
|
||||
subprocess.run(
|
||||
['archivebox', 'add', '--index-only', 'https://example.com'],
|
||||
capture_output=True,
|
||||
env=disable_extractors_dict,
|
||||
)
|
||||
|
||||
result = subprocess.run(
|
||||
['archivebox', 'status'],
|
||||
capture_output=True,
|
||||
text=True,
|
||||
)
|
||||
|
||||
# Should show valid/invalid categories
|
||||
assert 'valid' in result.stdout.lower() or 'present' in result.stdout.lower()
|
||||
|
||||
|
||||
class TestStatusCLI:
|
||||
"""Test the CLI interface for status command."""
|
||||
|
||||
def test_cli_help(self, tmp_path, process):
|
||||
"""Test that --help works for status command."""
|
||||
os.chdir(tmp_path)
|
||||
|
||||
result = subprocess.run(
|
||||
['archivebox', 'status', '--help'],
|
||||
capture_output=True,
|
||||
text=True,
|
||||
)
|
||||
|
||||
assert result.returncode == 0
|
||||
# Help should show some info about the command
|
||||
assert 'status' in result.stdout.lower() or 'statistic' in result.stdout.lower()
|
||||
|
||||
|
||||
if __name__ == '__main__':
|
||||
pytest.main([__file__, '-v'])
|
||||
160
tests/test_version.py
Normal file
160
tests/test_version.py
Normal file
@@ -0,0 +1,160 @@
|
||||
#!/usr/bin/env python3
|
||||
"""Integration tests for archivebox version command."""
|
||||
|
||||
import os
|
||||
import subprocess
|
||||
import json
|
||||
|
||||
import pytest
|
||||
|
||||
from .fixtures import process, disable_extractors_dict
|
||||
|
||||
|
||||
class TestVersionQuiet:
|
||||
"""Test the quiet/minimal version output."""
|
||||
|
||||
def test_version_prints_version_number(self, tmp_path):
|
||||
"""Test that version prints the version number."""
|
||||
os.chdir(tmp_path)
|
||||
|
||||
result = subprocess.run(
|
||||
['archivebox', 'version', '--quiet'],
|
||||
capture_output=True,
|
||||
text=True,
|
||||
)
|
||||
|
||||
assert result.returncode == 0
|
||||
# Should contain a version string like "0.8.0" or similar
|
||||
version = result.stdout.strip()
|
||||
assert version
|
||||
# Version should be a valid semver-ish format
|
||||
parts = version.split('.')
|
||||
assert len(parts) >= 2 # At least major.minor
|
||||
|
||||
def test_version_flag_prints_version_number(self, tmp_path):
|
||||
"""Test that --version flag prints the version number."""
|
||||
os.chdir(tmp_path)
|
||||
|
||||
result = subprocess.run(
|
||||
['archivebox', '--version'],
|
||||
capture_output=True,
|
||||
text=True,
|
||||
)
|
||||
|
||||
assert result.returncode == 0
|
||||
version = result.stdout.strip()
|
||||
assert version
|
||||
parts = version.split('.')
|
||||
assert len(parts) >= 2
|
||||
|
||||
|
||||
class TestVersionFull:
|
||||
"""Test the full version output."""
|
||||
|
||||
def test_version_shows_system_info(self, tmp_path, process):
|
||||
"""Test that version shows system information."""
|
||||
os.chdir(tmp_path)
|
||||
|
||||
result = subprocess.run(
|
||||
['archivebox', 'version'],
|
||||
capture_output=True,
|
||||
text=True,
|
||||
)
|
||||
|
||||
output = result.stdout
|
||||
|
||||
# Should show basic system info (exit code may be 1 if binaries missing)
|
||||
assert 'ArchiveBox' in output
|
||||
|
||||
def test_version_shows_binary_section(self, tmp_path, process):
|
||||
"""Test that version shows binary dependencies section."""
|
||||
os.chdir(tmp_path)
|
||||
|
||||
result = subprocess.run(
|
||||
['archivebox', 'version'],
|
||||
capture_output=True,
|
||||
text=True,
|
||||
)
|
||||
|
||||
output = result.stdout
|
||||
|
||||
# Should show binary dependencies section
|
||||
assert 'Binary' in output or 'Dependenc' in output
|
||||
|
||||
def test_version_shows_data_locations(self, tmp_path, process):
|
||||
"""Test that version shows data locations."""
|
||||
os.chdir(tmp_path)
|
||||
|
||||
result = subprocess.run(
|
||||
['archivebox', 'version'],
|
||||
capture_output=True,
|
||||
text=True,
|
||||
)
|
||||
|
||||
output = result.stdout
|
||||
|
||||
# Should show data/code locations
|
||||
assert 'Data' in output or 'location' in output.lower() or 'DIR' in output or 'Code' in output
|
||||
|
||||
|
||||
class TestVersionWithInstalledBinaries:
|
||||
"""Test version output after running install."""
|
||||
|
||||
def test_version_shows_binary_status(self, tmp_path, process, disable_extractors_dict):
|
||||
"""Test that version shows binary status (installed or not)."""
|
||||
os.chdir(tmp_path)
|
||||
|
||||
# First run install (with dry-run to speed up)
|
||||
subprocess.run(
|
||||
['archivebox', 'install', '--dry-run'],
|
||||
capture_output=True,
|
||||
text=True,
|
||||
env=disable_extractors_dict,
|
||||
)
|
||||
|
||||
# Now check version
|
||||
result = subprocess.run(
|
||||
['archivebox', 'version'],
|
||||
capture_output=True,
|
||||
text=True,
|
||||
env=disable_extractors_dict,
|
||||
)
|
||||
|
||||
output = result.stdout
|
||||
|
||||
# Should show binary status (either installed or not installed)
|
||||
assert 'installed' in output.lower() or 'Binary' in output
|
||||
|
||||
|
||||
class TestVersionCLI:
|
||||
"""Test the CLI interface for version command."""
|
||||
|
||||
def test_cli_help(self, tmp_path):
|
||||
"""Test that --help works for version command."""
|
||||
os.chdir(tmp_path)
|
||||
|
||||
result = subprocess.run(
|
||||
['archivebox', 'version', '--help'],
|
||||
capture_output=True,
|
||||
text=True,
|
||||
)
|
||||
|
||||
assert result.returncode == 0
|
||||
assert '--quiet' in result.stdout or '-q' in result.stdout
|
||||
|
||||
def test_cli_invalid_option(self, tmp_path):
|
||||
"""Test that invalid options are handled."""
|
||||
os.chdir(tmp_path)
|
||||
|
||||
result = subprocess.run(
|
||||
['archivebox', 'version', '--invalid-option'],
|
||||
capture_output=True,
|
||||
text=True,
|
||||
)
|
||||
|
||||
# Should fail with non-zero exit code
|
||||
assert result.returncode != 0
|
||||
|
||||
|
||||
if __name__ == '__main__':
|
||||
pytest.main([__file__, '-v'])
|
||||
Reference in New Issue
Block a user