Fix update orphan import and host-aware tests

This commit is contained in:
Nick Sweeting
2026-03-15 04:51:06 -07:00
parent ea94029759
commit 58f801c220
4 changed files with 56 additions and 34 deletions

View File

@@ -17,6 +17,8 @@ pytestmark = pytest.mark.django_db
User = get_user_model()
ADMIN_HOST = 'admin.archivebox.localhost:8000'
PUBLIC_HOST = 'public.archivebox.localhost:8000'
@pytest.fixture
@@ -120,7 +122,7 @@ class TestAdminSnapshotListView:
"""Test that the list view renders successfully."""
client.login(username='testadmin', password='testpassword')
url = reverse('admin:core_snapshot_changelist')
response = client.get(url)
response = client.get(url, HTTP_HOST=ADMIN_HOST)
assert response.status_code == 200
@@ -128,7 +130,7 @@ class TestAdminSnapshotListView:
"""Test list view with snapshots displays them."""
client.login(username='testadmin', password='testpassword')
url = reverse('admin:core_snapshot_changelist')
response = client.get(url)
response = client.get(url, HTTP_HOST=ADMIN_HOST)
assert response.status_code == 200
assert b'example.com' in response.content
@@ -137,7 +139,7 @@ class TestAdminSnapshotListView:
"""Test that the grid view renders successfully."""
client.login(username='testadmin', password='testpassword')
url = reverse('admin:grid')
response = client.get(url)
response = client.get(url, HTTP_HOST=ADMIN_HOST)
assert response.status_code == 200
@@ -145,7 +147,7 @@ class TestAdminSnapshotListView:
"""Test that view mode switcher is present."""
client.login(username='testadmin', password='testpassword')
url = reverse('admin:core_snapshot_changelist')
response = client.get(url)
response = client.get(url, HTTP_HOST=ADMIN_HOST)
assert response.status_code == 200
# Check for view mode toggle elements
@@ -161,7 +163,7 @@ class TestAdminSnapshotSearch:
"""Test searching snapshots by URL."""
client.login(username='testadmin', password='testpassword')
url = reverse('admin:core_snapshot_changelist')
response = client.get(url, {'q': 'example.com'})
response = client.get(url, {'q': 'example.com'}, HTTP_HOST=ADMIN_HOST)
assert response.status_code == 200
# The search should find the example.com snapshot
@@ -178,7 +180,7 @@ class TestAdminSnapshotSearch:
client.login(username='testadmin', password='testpassword')
url = reverse('admin:core_snapshot_changelist')
response = client.get(url, {'q': 'Unique Title'})
response = client.get(url, {'q': 'Unique Title'}, HTTP_HOST=ADMIN_HOST)
assert response.status_code == 200
@@ -190,7 +192,7 @@ class TestAdminSnapshotSearch:
client.login(username='testadmin', password='testpassword')
url = reverse('admin:core_snapshot_changelist')
response = client.get(url, {'q': 'test-search-tag'})
response = client.get(url, {'q': 'test-search-tag'}, HTTP_HOST=ADMIN_HOST)
assert response.status_code == 200
@@ -198,7 +200,7 @@ class TestAdminSnapshotSearch:
"""Test empty search returns all snapshots."""
client.login(username='testadmin', password='testpassword')
url = reverse('admin:core_snapshot_changelist')
response = client.get(url, {'q': ''})
response = client.get(url, {'q': ''}, HTTP_HOST=ADMIN_HOST)
assert response.status_code == 200
@@ -206,7 +208,7 @@ class TestAdminSnapshotSearch:
"""Test search with no results."""
client.login(username='testadmin', password='testpassword')
url = reverse('admin:core_snapshot_changelist')
response = client.get(url, {'q': 'nonexistent-url-xyz789'})
response = client.get(url, {'q': 'nonexistent-url-xyz789'}, HTTP_HOST=ADMIN_HOST)
assert response.status_code == 200
@@ -228,29 +230,29 @@ class TestPublicIndexSearch:
@override_settings(PUBLIC_INDEX=True)
def test_public_search_by_url(self, client, public_snapshot):
"""Test public search by URL."""
response = client.get('/public/', {'q': 'public-example.com'})
response = client.get('/public/', {'q': 'public-example.com'}, HTTP_HOST=PUBLIC_HOST)
assert response.status_code == 200
@override_settings(PUBLIC_INDEX=True)
def test_public_search_by_title(self, client, public_snapshot):
"""Test public search by title."""
response = client.get('/public/', {'q': 'Public Example'})
response = client.get('/public/', {'q': 'Public Example'}, HTTP_HOST=PUBLIC_HOST)
assert response.status_code == 200
@override_settings(PUBLIC_INDEX=True)
def test_public_search_query_type_meta(self, client, public_snapshot):
"""Test public search with query_type=meta."""
response = client.get('/public/', {'q': 'example', 'query_type': 'meta'})
response = client.get('/public/', {'q': 'example', 'query_type': 'meta'}, HTTP_HOST=PUBLIC_HOST)
assert response.status_code == 200
@override_settings(PUBLIC_INDEX=True)
def test_public_search_query_type_url(self, client, public_snapshot):
"""Test public search with query_type=url."""
response = client.get('/public/', {'q': 'public-example.com', 'query_type': 'url'})
response = client.get('/public/', {'q': 'public-example.com', 'query_type': 'url'}, HTTP_HOST=PUBLIC_HOST)
assert response.status_code == 200
@override_settings(PUBLIC_INDEX=True)
def test_public_search_query_type_title(self, client, public_snapshot):
"""Test public search with query_type=title."""
response = client.get('/public/', {'q': 'Website', 'query_type': 'title'})
response = client.get('/public/', {'q': 'Website', 'query_type': 'title'}, HTTP_HOST=PUBLIC_HOST)
assert response.status_code == 200

View File

@@ -167,10 +167,12 @@ class TestArchiveBoxWithLDAP(unittest.TestCase):
# Run archivebox version with LDAP config env vars
result = subprocess.run(
[sys.executable, '-m', 'archivebox', 'version'],
cwd=self.work_dir,
capture_output=True,
timeout=10,
env={
**os.environ,
'DATA_DIR': self.work_dir,
'LDAP_ENABLED': 'False',
'LDAP_SERVER_URI': 'ldap://ldap-test.localhost:389',
}

View File

@@ -1,33 +1,42 @@
import json
import sqlite3
from .fixtures import *
def test_update_imports_orphaned_snapshots(tmp_path, process, disable_extractors_dict):
"""Test that archivebox update imports orphaned snapshot directories."""
# Add a snapshot
subprocess.run(['archivebox', 'add', 'https://example.com'], capture_output=True, env=disable_extractors_dict)
assert list((tmp_path / "archive").iterdir()) != []
"""Test that archivebox update imports real legacy archive directories."""
legacy_timestamp = '1710000000'
legacy_dir = tmp_path / 'archive' / legacy_timestamp
legacy_dir.mkdir(parents=True, exist_ok=True)
(legacy_dir / 'singlefile.html').write_text('<html>example</html>')
(legacy_dir / 'index.json').write_text(json.dumps({
'url': 'https://example.com',
'timestamp': legacy_timestamp,
'title': 'Example Domain',
'fs_version': '0.8.0',
'archive_results': [],
}))
# Remove from DB but leave directory intact
subprocess.run(['archivebox', 'remove', 'https://example.com', '--yes'], capture_output=True)
# Run update without filters - should import and migrate the legacy directory.
update_process = subprocess.run(
['archivebox', 'update'],
capture_output=True,
text=True,
env=disable_extractors_dict,
timeout=60,
)
assert update_process.returncode == 0, update_process.stderr
# Verify snapshot removed from DB
conn = sqlite3.connect(str(tmp_path / "index.sqlite3"))
c = conn.cursor()
link = c.execute("SELECT * FROM core_snapshot").fetchone()
row = c.execute("SELECT url, fs_version FROM core_snapshot").fetchone()
conn.commit()
conn.close()
assert link is None
assert row == ('https://example.com', '0.9.0')
assert legacy_dir.is_symlink()
# Run update without filters - should scan filesystem and import orphaned directory
update_process = subprocess.run(['archivebox', 'update'], capture_output=True, env=disable_extractors_dict)
# Verify snapshot was re-imported from orphaned directory
conn = sqlite3.connect(str(tmp_path / "index.sqlite3"))
c = conn.cursor()
url = c.execute("SELECT url FROM core_snapshot").fetchone()[0]
conn.commit()
conn.close()
assert url == 'https://example.com'
migrated_dir = legacy_dir.resolve()
assert migrated_dir.exists()
assert (migrated_dir / 'index.jsonl').exists()
assert (migrated_dir / 'singlefile.html').exists()