Avoid filesystem lookups in snapshot admin list

This commit is contained in:
Nick Sweeting
2026-03-15 17:18:53 -07:00
parent 21a0a27091
commit e598614b05
5 changed files with 80 additions and 11 deletions

View File

@@ -241,6 +241,7 @@ class SnapshotAdmin(SearchResultsAdminMixin, ConfigEditorMixin, BaseModelAdmin):
qs = (
super()
.get_queryset(request)
.select_related('crawl__created_by')
.defer('config', 'notes')
.prefetch_related('tags')
.prefetch_related(Prefetch('archiveresult_set', queryset=prefetch_qs))
@@ -403,7 +404,7 @@ class SnapshotAdmin(SearchResultsAdminMixin, ConfigEditorMixin, BaseModelAdmin):
show_title = bool(title_raw) and title_normalized != 'pending...' and title_normalized != url_normalized
css_class = 'fetched' if show_title else 'pending'
detail_url = build_web_url(f'/{obj.archive_path}/index.html')
detail_url = build_web_url(f'/{obj.archive_path_from_db}/index.html')
title_html = ''
if show_title:
title_html = format_html(
@@ -489,7 +490,7 @@ class SnapshotAdmin(SearchResultsAdminMixin, ConfigEditorMixin, BaseModelAdmin):
)
def files(self, obj):
# return '-'
return obj.icons()
return obj.icons(path=obj.archive_path_from_db)
@admin.display(
@@ -595,7 +596,7 @@ class SnapshotAdmin(SearchResultsAdminMixin, ConfigEditorMixin, BaseModelAdmin):
'{}</a>'
'<div style="font-size: 10px; color: #94a3b8; margin-top: 2px;">'
'{}/{} hooks</div>',
build_web_url(f'/{obj.archive_path}'),
build_web_url(f'/{obj.archive_path_from_db}'),
size_txt,
stats['succeeded'],
stats['total'],
@@ -603,7 +604,7 @@ class SnapshotAdmin(SearchResultsAdminMixin, ConfigEditorMixin, BaseModelAdmin):
return format_html(
'<a href="{}" title="View all files">{}</a>',
build_web_url(f'/{obj.archive_path}'),
build_web_url(f'/{obj.archive_path_from_db}'),
size_txt,
)

View File

@@ -1280,7 +1280,7 @@ class Snapshot(ModelWithOutputDir, ModelWithConfig, ModelWithNotes, ModelWithHea
cache_key = f'{self.pk}-tags'
return cache.get_or_set(cache_key, calc_tags_str) if not nocache else calc_tags_str()
def icons(self) -> str:
def icons(self, path: Optional[str] = None) -> str:
"""Generate HTML icons showing which extractor plugins have succeeded for this snapshot"""
from django.utils.html import format_html, mark_safe
@@ -1296,7 +1296,7 @@ class Snapshot(ModelWithOutputDir, ModelWithConfig, ModelWithNotes, ModelWithHea
Q(status="succeeded") & (Q(output_files__isnull=False) | ~Q(output_str=''))
)}
path = self.archive_path
archive_path = path or self.archive_path
output = ""
output_template = '<a href="/{}/{}" class="exists-{}" title="{}">{}</a>'
@@ -1316,7 +1316,7 @@ class Snapshot(ModelWithOutputDir, ModelWithConfig, ModelWithNotes, ModelWithHea
embed_path = result.embed_path() if result else f'{plugin}/'
output += format_html(
output_template,
path,
archive_path,
embed_path,
str(bool(existing)),
plugin,
@@ -1435,6 +1435,34 @@ class Snapshot(ModelWithOutputDir, ModelWithConfig, ModelWithNotes, ModelWithHea
def legacy_archive_path(self) -> str:
return f'{CONSTANTS.ARCHIVE_DIR_NAME}/{self.timestamp}'
@cached_property
def archive_path_from_db(self) -> str:
"""Best-effort public URL path derived from DB fields only."""
if self.fs_version in ('0.7.0', '0.8.0'):
return self.legacy_archive_path
if self.fs_version in ('0.9.0', '1.0.0'):
username = 'web'
crawl = getattr(self, 'crawl', None)
if crawl and getattr(crawl, 'created_by_id', None):
username = crawl.created_by.username
if username == 'system':
username = 'web'
date_base = self.created_at or self.bookmarked_at
if date_base:
date_str = date_base.strftime('%Y%m%d')
else:
try:
date_str = datetime.fromtimestamp(float(self.timestamp)).strftime('%Y%m%d')
except (TypeError, ValueError, OSError):
return self.legacy_archive_path
domain = self.extract_domain_from_url(self.url)
return f'{username}/{date_str}/{domain}/{self.id}'
return self.legacy_archive_path
@cached_property
def url_path(self) -> str:
"""URL path matching the current snapshot output_dir layout."""