diff --git a/archivebox/core/admin_snapshots.py b/archivebox/core/admin_snapshots.py
index 6d01c25b..bc1093c9 100644
--- a/archivebox/core/admin_snapshots.py
+++ b/archivebox/core/admin_snapshots.py
@@ -241,6 +241,7 @@ class SnapshotAdmin(SearchResultsAdminMixin, ConfigEditorMixin, BaseModelAdmin):
qs = (
super()
.get_queryset(request)
+ .select_related('crawl__created_by')
.defer('config', 'notes')
.prefetch_related('tags')
.prefetch_related(Prefetch('archiveresult_set', queryset=prefetch_qs))
@@ -403,7 +404,7 @@ class SnapshotAdmin(SearchResultsAdminMixin, ConfigEditorMixin, BaseModelAdmin):
show_title = bool(title_raw) and title_normalized != 'pending...' and title_normalized != url_normalized
css_class = 'fetched' if show_title else 'pending'
- detail_url = build_web_url(f'/{obj.archive_path}/index.html')
+ detail_url = build_web_url(f'/{obj.archive_path_from_db}/index.html')
title_html = ''
if show_title:
title_html = format_html(
@@ -489,7 +490,7 @@ class SnapshotAdmin(SearchResultsAdminMixin, ConfigEditorMixin, BaseModelAdmin):
)
def files(self, obj):
# return '-'
- return obj.icons()
+ return obj.icons(path=obj.archive_path_from_db)
@admin.display(
@@ -595,7 +596,7 @@ class SnapshotAdmin(SearchResultsAdminMixin, ConfigEditorMixin, BaseModelAdmin):
'{}'
'
'
'{}/{} hooks
',
- build_web_url(f'/{obj.archive_path}'),
+ build_web_url(f'/{obj.archive_path_from_db}'),
size_txt,
stats['succeeded'],
stats['total'],
@@ -603,7 +604,7 @@ class SnapshotAdmin(SearchResultsAdminMixin, ConfigEditorMixin, BaseModelAdmin):
return format_html(
'{}',
- build_web_url(f'/{obj.archive_path}'),
+ build_web_url(f'/{obj.archive_path_from_db}'),
size_txt,
)
diff --git a/archivebox/core/models.py b/archivebox/core/models.py
index a8ea9c01..193e13be 100755
--- a/archivebox/core/models.py
+++ b/archivebox/core/models.py
@@ -1280,7 +1280,7 @@ class Snapshot(ModelWithOutputDir, ModelWithConfig, ModelWithNotes, ModelWithHea
cache_key = f'{self.pk}-tags'
return cache.get_or_set(cache_key, calc_tags_str) if not nocache else calc_tags_str()
- def icons(self) -> str:
+ def icons(self, path: Optional[str] = None) -> str:
"""Generate HTML icons showing which extractor plugins have succeeded for this snapshot"""
from django.utils.html import format_html, mark_safe
@@ -1296,7 +1296,7 @@ class Snapshot(ModelWithOutputDir, ModelWithConfig, ModelWithNotes, ModelWithHea
Q(status="succeeded") & (Q(output_files__isnull=False) | ~Q(output_str=''))
)}
- path = self.archive_path
+ archive_path = path or self.archive_path
output = ""
output_template = '{}'
@@ -1316,7 +1316,7 @@ class Snapshot(ModelWithOutputDir, ModelWithConfig, ModelWithNotes, ModelWithHea
embed_path = result.embed_path() if result else f'{plugin}/'
output += format_html(
output_template,
- path,
+ archive_path,
embed_path,
str(bool(existing)),
plugin,
@@ -1435,6 +1435,34 @@ class Snapshot(ModelWithOutputDir, ModelWithConfig, ModelWithNotes, ModelWithHea
def legacy_archive_path(self) -> str:
return f'{CONSTANTS.ARCHIVE_DIR_NAME}/{self.timestamp}'
+ @cached_property
+ def archive_path_from_db(self) -> str:
+ """Best-effort public URL path derived from DB fields only."""
+ if self.fs_version in ('0.7.0', '0.8.0'):
+ return self.legacy_archive_path
+
+ if self.fs_version in ('0.9.0', '1.0.0'):
+ username = 'web'
+ crawl = getattr(self, 'crawl', None)
+ if crawl and getattr(crawl, 'created_by_id', None):
+ username = crawl.created_by.username
+ if username == 'system':
+ username = 'web'
+
+ date_base = self.created_at or self.bookmarked_at
+ if date_base:
+ date_str = date_base.strftime('%Y%m%d')
+ else:
+ try:
+ date_str = datetime.fromtimestamp(float(self.timestamp)).strftime('%Y%m%d')
+ except (TypeError, ValueError, OSError):
+ return self.legacy_archive_path
+
+ domain = self.extract_domain_from_url(self.url)
+ return f'{username}/{date_str}/{domain}/{self.id}'
+
+ return self.legacy_archive_path
+
@cached_property
def url_path(self) -> str:
"""URL path matching the current snapshot output_dir layout."""
diff --git a/archivebox/tests/conftest.py b/archivebox/tests/conftest.py
index d8c38172..69740e16 100644
--- a/archivebox/tests/conftest.py
+++ b/archivebox/tests/conftest.py
@@ -11,6 +11,8 @@ from typing import List, Dict, Any, Optional, Tuple
import pytest
+from archivebox.uuid_compat import uuid7
+
# =============================================================================
# CLI Helpers (defined before fixtures that use them)
@@ -399,8 +401,7 @@ def assert_record_has_fields(record: Dict[str, Any], required_fields: List[str])
def create_test_url(domain: str = 'example.com', path: str = None) -> str:
"""Generate unique test URL."""
- import uuid
- path = path or uuid.uuid4().hex[:8]
+ path = path or uuid7().hex[:8]
return f'https://{domain}/{path}'
diff --git a/archivebox/tests/migrations_helpers.py b/archivebox/tests/migrations_helpers.py
index ffdf1b4d..5c620186 100644
--- a/archivebox/tests/migrations_helpers.py
+++ b/archivebox/tests/migrations_helpers.py
@@ -16,7 +16,8 @@ import subprocess
from pathlib import Path
from datetime import datetime, timezone
from typing import Dict, List, Tuple
-from uuid import uuid4
+
+from archivebox.uuid_compat import uuid7
# =============================================================================
@@ -495,7 +496,7 @@ INSERT INTO django_content_type (app_label, model) VALUES
def generate_uuid() -> str:
"""Generate a UUID string without dashes for SQLite."""
- return uuid4().hex
+ return uuid7().hex
def generate_timestamp() -> str:
diff --git a/archivebox/tests/test_admin_views.py b/archivebox/tests/test_admin_views.py
index 99bbe244..707822cb 100644
--- a/archivebox/tests/test_admin_views.py
+++ b/archivebox/tests/test_admin_views.py
@@ -135,6 +135,44 @@ class TestAdminSnapshotListView:
assert response.status_code == 200
assert b'example.com' in response.content
+ def test_list_view_avoids_legacy_title_fallbacks(self, client, admin_user, snapshot, monkeypatch):
+ """Title-less snapshots should render without touching history-based fallback paths."""
+ from archivebox.core.models import Snapshot
+
+ Snapshot.objects.filter(pk=snapshot.pk).update(title='')
+
+ def _latest_title_should_not_be_used(self):
+ raise AssertionError('admin changelist should not access Snapshot.latest_title')
+
+ def _history_should_not_be_used(self):
+ raise AssertionError('admin changelist should not access Snapshot.history')
+
+ monkeypatch.setattr(Snapshot, 'latest_title', property(_latest_title_should_not_be_used), raising=False)
+ monkeypatch.setattr(Snapshot, 'history', property(_history_should_not_be_used), raising=False)
+
+ client.login(username='testadmin', password='testpassword')
+ url = reverse('admin:core_snapshot_changelist')
+ response = client.get(url, HTTP_HOST=ADMIN_HOST)
+
+ assert response.status_code == 200
+ assert b'example.com' in response.content
+
+ def test_list_view_avoids_output_dir_lookups(self, client, admin_user, snapshot, monkeypatch):
+ """Changelist links should render without probing snapshot paths on disk."""
+ from archivebox.core.models import Snapshot
+
+ def _output_dir_should_not_be_used(self):
+ raise AssertionError('admin changelist should not access Snapshot.output_dir')
+
+ monkeypatch.setattr(Snapshot, 'output_dir', property(_output_dir_should_not_be_used), raising=False)
+
+ client.login(username='testadmin', password='testpassword')
+ url = reverse('admin:core_snapshot_changelist')
+ response = client.get(url, HTTP_HOST=ADMIN_HOST)
+
+ assert response.status_code == 200
+ assert b'example.com' in response.content
+
def test_grid_view_renders(self, client, admin_user):
"""Test that the grid view renders successfully."""
client.login(username='testadmin', password='testpassword')