diff --git a/archivebox/core/admin_snapshots.py b/archivebox/core/admin_snapshots.py index 6d01c25b..bc1093c9 100644 --- a/archivebox/core/admin_snapshots.py +++ b/archivebox/core/admin_snapshots.py @@ -241,6 +241,7 @@ class SnapshotAdmin(SearchResultsAdminMixin, ConfigEditorMixin, BaseModelAdmin): qs = ( super() .get_queryset(request) + .select_related('crawl__created_by') .defer('config', 'notes') .prefetch_related('tags') .prefetch_related(Prefetch('archiveresult_set', queryset=prefetch_qs)) @@ -403,7 +404,7 @@ class SnapshotAdmin(SearchResultsAdminMixin, ConfigEditorMixin, BaseModelAdmin): show_title = bool(title_raw) and title_normalized != 'pending...' and title_normalized != url_normalized css_class = 'fetched' if show_title else 'pending' - detail_url = build_web_url(f'/{obj.archive_path}/index.html') + detail_url = build_web_url(f'/{obj.archive_path_from_db}/index.html') title_html = '' if show_title: title_html = format_html( @@ -489,7 +490,7 @@ class SnapshotAdmin(SearchResultsAdminMixin, ConfigEditorMixin, BaseModelAdmin): ) def files(self, obj): # return '-' - return obj.icons() + return obj.icons(path=obj.archive_path_from_db) @admin.display( @@ -595,7 +596,7 @@ class SnapshotAdmin(SearchResultsAdminMixin, ConfigEditorMixin, BaseModelAdmin): '{}' '
' '{}/{} hooks
', - build_web_url(f'/{obj.archive_path}'), + build_web_url(f'/{obj.archive_path_from_db}'), size_txt, stats['succeeded'], stats['total'], @@ -603,7 +604,7 @@ class SnapshotAdmin(SearchResultsAdminMixin, ConfigEditorMixin, BaseModelAdmin): return format_html( '{}', - build_web_url(f'/{obj.archive_path}'), + build_web_url(f'/{obj.archive_path_from_db}'), size_txt, ) diff --git a/archivebox/core/models.py b/archivebox/core/models.py index a8ea9c01..193e13be 100755 --- a/archivebox/core/models.py +++ b/archivebox/core/models.py @@ -1280,7 +1280,7 @@ class Snapshot(ModelWithOutputDir, ModelWithConfig, ModelWithNotes, ModelWithHea cache_key = f'{self.pk}-tags' return cache.get_or_set(cache_key, calc_tags_str) if not nocache else calc_tags_str() - def icons(self) -> str: + def icons(self, path: Optional[str] = None) -> str: """Generate HTML icons showing which extractor plugins have succeeded for this snapshot""" from django.utils.html import format_html, mark_safe @@ -1296,7 +1296,7 @@ class Snapshot(ModelWithOutputDir, ModelWithConfig, ModelWithNotes, ModelWithHea Q(status="succeeded") & (Q(output_files__isnull=False) | ~Q(output_str='')) )} - path = self.archive_path + archive_path = path or self.archive_path output = "" output_template = '{}' @@ -1316,7 +1316,7 @@ class Snapshot(ModelWithOutputDir, ModelWithConfig, ModelWithNotes, ModelWithHea embed_path = result.embed_path() if result else f'{plugin}/' output += format_html( output_template, - path, + archive_path, embed_path, str(bool(existing)), plugin, @@ -1435,6 +1435,34 @@ class Snapshot(ModelWithOutputDir, ModelWithConfig, ModelWithNotes, ModelWithHea def legacy_archive_path(self) -> str: return f'{CONSTANTS.ARCHIVE_DIR_NAME}/{self.timestamp}' + @cached_property + def archive_path_from_db(self) -> str: + """Best-effort public URL path derived from DB fields only.""" + if self.fs_version in ('0.7.0', '0.8.0'): + return self.legacy_archive_path + + if self.fs_version in ('0.9.0', '1.0.0'): + username = 'web' + crawl = getattr(self, 'crawl', None) + if crawl and getattr(crawl, 'created_by_id', None): + username = crawl.created_by.username + if username == 'system': + username = 'web' + + date_base = self.created_at or self.bookmarked_at + if date_base: + date_str = date_base.strftime('%Y%m%d') + else: + try: + date_str = datetime.fromtimestamp(float(self.timestamp)).strftime('%Y%m%d') + except (TypeError, ValueError, OSError): + return self.legacy_archive_path + + domain = self.extract_domain_from_url(self.url) + return f'{username}/{date_str}/{domain}/{self.id}' + + return self.legacy_archive_path + @cached_property def url_path(self) -> str: """URL path matching the current snapshot output_dir layout.""" diff --git a/archivebox/tests/conftest.py b/archivebox/tests/conftest.py index d8c38172..69740e16 100644 --- a/archivebox/tests/conftest.py +++ b/archivebox/tests/conftest.py @@ -11,6 +11,8 @@ from typing import List, Dict, Any, Optional, Tuple import pytest +from archivebox.uuid_compat import uuid7 + # ============================================================================= # CLI Helpers (defined before fixtures that use them) @@ -399,8 +401,7 @@ def assert_record_has_fields(record: Dict[str, Any], required_fields: List[str]) def create_test_url(domain: str = 'example.com', path: str = None) -> str: """Generate unique test URL.""" - import uuid - path = path or uuid.uuid4().hex[:8] + path = path or uuid7().hex[:8] return f'https://{domain}/{path}' diff --git a/archivebox/tests/migrations_helpers.py b/archivebox/tests/migrations_helpers.py index ffdf1b4d..5c620186 100644 --- a/archivebox/tests/migrations_helpers.py +++ b/archivebox/tests/migrations_helpers.py @@ -16,7 +16,8 @@ import subprocess from pathlib import Path from datetime import datetime, timezone from typing import Dict, List, Tuple -from uuid import uuid4 + +from archivebox.uuid_compat import uuid7 # ============================================================================= @@ -495,7 +496,7 @@ INSERT INTO django_content_type (app_label, model) VALUES def generate_uuid() -> str: """Generate a UUID string without dashes for SQLite.""" - return uuid4().hex + return uuid7().hex def generate_timestamp() -> str: diff --git a/archivebox/tests/test_admin_views.py b/archivebox/tests/test_admin_views.py index 99bbe244..707822cb 100644 --- a/archivebox/tests/test_admin_views.py +++ b/archivebox/tests/test_admin_views.py @@ -135,6 +135,44 @@ class TestAdminSnapshotListView: assert response.status_code == 200 assert b'example.com' in response.content + def test_list_view_avoids_legacy_title_fallbacks(self, client, admin_user, snapshot, monkeypatch): + """Title-less snapshots should render without touching history-based fallback paths.""" + from archivebox.core.models import Snapshot + + Snapshot.objects.filter(pk=snapshot.pk).update(title='') + + def _latest_title_should_not_be_used(self): + raise AssertionError('admin changelist should not access Snapshot.latest_title') + + def _history_should_not_be_used(self): + raise AssertionError('admin changelist should not access Snapshot.history') + + monkeypatch.setattr(Snapshot, 'latest_title', property(_latest_title_should_not_be_used), raising=False) + monkeypatch.setattr(Snapshot, 'history', property(_history_should_not_be_used), raising=False) + + client.login(username='testadmin', password='testpassword') + url = reverse('admin:core_snapshot_changelist') + response = client.get(url, HTTP_HOST=ADMIN_HOST) + + assert response.status_code == 200 + assert b'example.com' in response.content + + def test_list_view_avoids_output_dir_lookups(self, client, admin_user, snapshot, monkeypatch): + """Changelist links should render without probing snapshot paths on disk.""" + from archivebox.core.models import Snapshot + + def _output_dir_should_not_be_used(self): + raise AssertionError('admin changelist should not access Snapshot.output_dir') + + monkeypatch.setattr(Snapshot, 'output_dir', property(_output_dir_should_not_be_used), raising=False) + + client.login(username='testadmin', password='testpassword') + url = reverse('admin:core_snapshot_changelist') + response = client.get(url, HTTP_HOST=ADMIN_HOST) + + assert response.status_code == 200 + assert b'example.com' in response.content + def test_grid_view_renders(self, client, admin_user): """Test that the grid view renders successfully.""" client.login(username='testadmin', password='testpassword')