mirror of
https://github.com/ArchiveBox/ArchiveBox.git
synced 2026-04-06 07:47:53 +10:00
Fix update orphan import and host-aware tests
This commit is contained in:
@@ -17,6 +17,8 @@ pytestmark = pytest.mark.django_db
|
||||
|
||||
|
||||
User = get_user_model()
|
||||
ADMIN_HOST = 'admin.archivebox.localhost:8000'
|
||||
PUBLIC_HOST = 'public.archivebox.localhost:8000'
|
||||
|
||||
|
||||
@pytest.fixture
|
||||
@@ -120,7 +122,7 @@ class TestAdminSnapshotListView:
|
||||
"""Test that the list view renders successfully."""
|
||||
client.login(username='testadmin', password='testpassword')
|
||||
url = reverse('admin:core_snapshot_changelist')
|
||||
response = client.get(url)
|
||||
response = client.get(url, HTTP_HOST=ADMIN_HOST)
|
||||
|
||||
assert response.status_code == 200
|
||||
|
||||
@@ -128,7 +130,7 @@ class TestAdminSnapshotListView:
|
||||
"""Test list view with snapshots displays them."""
|
||||
client.login(username='testadmin', password='testpassword')
|
||||
url = reverse('admin:core_snapshot_changelist')
|
||||
response = client.get(url)
|
||||
response = client.get(url, HTTP_HOST=ADMIN_HOST)
|
||||
|
||||
assert response.status_code == 200
|
||||
assert b'example.com' in response.content
|
||||
@@ -137,7 +139,7 @@ class TestAdminSnapshotListView:
|
||||
"""Test that the grid view renders successfully."""
|
||||
client.login(username='testadmin', password='testpassword')
|
||||
url = reverse('admin:grid')
|
||||
response = client.get(url)
|
||||
response = client.get(url, HTTP_HOST=ADMIN_HOST)
|
||||
|
||||
assert response.status_code == 200
|
||||
|
||||
@@ -145,7 +147,7 @@ class TestAdminSnapshotListView:
|
||||
"""Test that view mode switcher is present."""
|
||||
client.login(username='testadmin', password='testpassword')
|
||||
url = reverse('admin:core_snapshot_changelist')
|
||||
response = client.get(url)
|
||||
response = client.get(url, HTTP_HOST=ADMIN_HOST)
|
||||
|
||||
assert response.status_code == 200
|
||||
# Check for view mode toggle elements
|
||||
@@ -161,7 +163,7 @@ class TestAdminSnapshotSearch:
|
||||
"""Test searching snapshots by URL."""
|
||||
client.login(username='testadmin', password='testpassword')
|
||||
url = reverse('admin:core_snapshot_changelist')
|
||||
response = client.get(url, {'q': 'example.com'})
|
||||
response = client.get(url, {'q': 'example.com'}, HTTP_HOST=ADMIN_HOST)
|
||||
|
||||
assert response.status_code == 200
|
||||
# The search should find the example.com snapshot
|
||||
@@ -178,7 +180,7 @@ class TestAdminSnapshotSearch:
|
||||
|
||||
client.login(username='testadmin', password='testpassword')
|
||||
url = reverse('admin:core_snapshot_changelist')
|
||||
response = client.get(url, {'q': 'Unique Title'})
|
||||
response = client.get(url, {'q': 'Unique Title'}, HTTP_HOST=ADMIN_HOST)
|
||||
|
||||
assert response.status_code == 200
|
||||
|
||||
@@ -190,7 +192,7 @@ class TestAdminSnapshotSearch:
|
||||
|
||||
client.login(username='testadmin', password='testpassword')
|
||||
url = reverse('admin:core_snapshot_changelist')
|
||||
response = client.get(url, {'q': 'test-search-tag'})
|
||||
response = client.get(url, {'q': 'test-search-tag'}, HTTP_HOST=ADMIN_HOST)
|
||||
|
||||
assert response.status_code == 200
|
||||
|
||||
@@ -198,7 +200,7 @@ class TestAdminSnapshotSearch:
|
||||
"""Test empty search returns all snapshots."""
|
||||
client.login(username='testadmin', password='testpassword')
|
||||
url = reverse('admin:core_snapshot_changelist')
|
||||
response = client.get(url, {'q': ''})
|
||||
response = client.get(url, {'q': ''}, HTTP_HOST=ADMIN_HOST)
|
||||
|
||||
assert response.status_code == 200
|
||||
|
||||
@@ -206,7 +208,7 @@ class TestAdminSnapshotSearch:
|
||||
"""Test search with no results."""
|
||||
client.login(username='testadmin', password='testpassword')
|
||||
url = reverse('admin:core_snapshot_changelist')
|
||||
response = client.get(url, {'q': 'nonexistent-url-xyz789'})
|
||||
response = client.get(url, {'q': 'nonexistent-url-xyz789'}, HTTP_HOST=ADMIN_HOST)
|
||||
|
||||
assert response.status_code == 200
|
||||
|
||||
@@ -228,29 +230,29 @@ class TestPublicIndexSearch:
|
||||
@override_settings(PUBLIC_INDEX=True)
|
||||
def test_public_search_by_url(self, client, public_snapshot):
|
||||
"""Test public search by URL."""
|
||||
response = client.get('/public/', {'q': 'public-example.com'})
|
||||
response = client.get('/public/', {'q': 'public-example.com'}, HTTP_HOST=PUBLIC_HOST)
|
||||
assert response.status_code == 200
|
||||
|
||||
@override_settings(PUBLIC_INDEX=True)
|
||||
def test_public_search_by_title(self, client, public_snapshot):
|
||||
"""Test public search by title."""
|
||||
response = client.get('/public/', {'q': 'Public Example'})
|
||||
response = client.get('/public/', {'q': 'Public Example'}, HTTP_HOST=PUBLIC_HOST)
|
||||
assert response.status_code == 200
|
||||
|
||||
@override_settings(PUBLIC_INDEX=True)
|
||||
def test_public_search_query_type_meta(self, client, public_snapshot):
|
||||
"""Test public search with query_type=meta."""
|
||||
response = client.get('/public/', {'q': 'example', 'query_type': 'meta'})
|
||||
response = client.get('/public/', {'q': 'example', 'query_type': 'meta'}, HTTP_HOST=PUBLIC_HOST)
|
||||
assert response.status_code == 200
|
||||
|
||||
@override_settings(PUBLIC_INDEX=True)
|
||||
def test_public_search_query_type_url(self, client, public_snapshot):
|
||||
"""Test public search with query_type=url."""
|
||||
response = client.get('/public/', {'q': 'public-example.com', 'query_type': 'url'})
|
||||
response = client.get('/public/', {'q': 'public-example.com', 'query_type': 'url'}, HTTP_HOST=PUBLIC_HOST)
|
||||
assert response.status_code == 200
|
||||
|
||||
@override_settings(PUBLIC_INDEX=True)
|
||||
def test_public_search_query_type_title(self, client, public_snapshot):
|
||||
"""Test public search with query_type=title."""
|
||||
response = client.get('/public/', {'q': 'Website', 'query_type': 'title'})
|
||||
response = client.get('/public/', {'q': 'Website', 'query_type': 'title'}, HTTP_HOST=PUBLIC_HOST)
|
||||
assert response.status_code == 200
|
||||
|
||||
@@ -167,10 +167,12 @@ class TestArchiveBoxWithLDAP(unittest.TestCase):
|
||||
# Run archivebox version with LDAP config env vars
|
||||
result = subprocess.run(
|
||||
[sys.executable, '-m', 'archivebox', 'version'],
|
||||
cwd=self.work_dir,
|
||||
capture_output=True,
|
||||
timeout=10,
|
||||
env={
|
||||
**os.environ,
|
||||
'DATA_DIR': self.work_dir,
|
||||
'LDAP_ENABLED': 'False',
|
||||
'LDAP_SERVER_URI': 'ldap://ldap-test.localhost:389',
|
||||
}
|
||||
|
||||
@@ -1,33 +1,42 @@
|
||||
import json
|
||||
import sqlite3
|
||||
|
||||
from .fixtures import *
|
||||
|
||||
def test_update_imports_orphaned_snapshots(tmp_path, process, disable_extractors_dict):
|
||||
"""Test that archivebox update imports orphaned snapshot directories."""
|
||||
# Add a snapshot
|
||||
subprocess.run(['archivebox', 'add', 'https://example.com'], capture_output=True, env=disable_extractors_dict)
|
||||
assert list((tmp_path / "archive").iterdir()) != []
|
||||
"""Test that archivebox update imports real legacy archive directories."""
|
||||
legacy_timestamp = '1710000000'
|
||||
legacy_dir = tmp_path / 'archive' / legacy_timestamp
|
||||
legacy_dir.mkdir(parents=True, exist_ok=True)
|
||||
(legacy_dir / 'singlefile.html').write_text('<html>example</html>')
|
||||
(legacy_dir / 'index.json').write_text(json.dumps({
|
||||
'url': 'https://example.com',
|
||||
'timestamp': legacy_timestamp,
|
||||
'title': 'Example Domain',
|
||||
'fs_version': '0.8.0',
|
||||
'archive_results': [],
|
||||
}))
|
||||
|
||||
# Remove from DB but leave directory intact
|
||||
subprocess.run(['archivebox', 'remove', 'https://example.com', '--yes'], capture_output=True)
|
||||
# Run update without filters - should import and migrate the legacy directory.
|
||||
update_process = subprocess.run(
|
||||
['archivebox', 'update'],
|
||||
capture_output=True,
|
||||
text=True,
|
||||
env=disable_extractors_dict,
|
||||
timeout=60,
|
||||
)
|
||||
assert update_process.returncode == 0, update_process.stderr
|
||||
|
||||
# Verify snapshot removed from DB
|
||||
conn = sqlite3.connect(str(tmp_path / "index.sqlite3"))
|
||||
c = conn.cursor()
|
||||
link = c.execute("SELECT * FROM core_snapshot").fetchone()
|
||||
row = c.execute("SELECT url, fs_version FROM core_snapshot").fetchone()
|
||||
conn.commit()
|
||||
conn.close()
|
||||
|
||||
assert link is None
|
||||
assert row == ('https://example.com', '0.9.0')
|
||||
assert legacy_dir.is_symlink()
|
||||
|
||||
# Run update without filters - should scan filesystem and import orphaned directory
|
||||
update_process = subprocess.run(['archivebox', 'update'], capture_output=True, env=disable_extractors_dict)
|
||||
|
||||
# Verify snapshot was re-imported from orphaned directory
|
||||
conn = sqlite3.connect(str(tmp_path / "index.sqlite3"))
|
||||
c = conn.cursor()
|
||||
url = c.execute("SELECT url FROM core_snapshot").fetchone()[0]
|
||||
conn.commit()
|
||||
conn.close()
|
||||
|
||||
assert url == 'https://example.com'
|
||||
migrated_dir = legacy_dir.resolve()
|
||||
assert migrated_dir.exists()
|
||||
assert (migrated_dir / 'index.jsonl').exists()
|
||||
assert (migrated_dir / 'singlefile.html').exists()
|
||||
|
||||
Reference in New Issue
Block a user