mirror of
https://github.com/ArchiveBox/ArchiveBox.git
synced 2026-04-06 07:47:53 +10:00
Avoid filesystem lookups in snapshot admin list
This commit is contained in:
@@ -241,6 +241,7 @@ class SnapshotAdmin(SearchResultsAdminMixin, ConfigEditorMixin, BaseModelAdmin):
|
|||||||
qs = (
|
qs = (
|
||||||
super()
|
super()
|
||||||
.get_queryset(request)
|
.get_queryset(request)
|
||||||
|
.select_related('crawl__created_by')
|
||||||
.defer('config', 'notes')
|
.defer('config', 'notes')
|
||||||
.prefetch_related('tags')
|
.prefetch_related('tags')
|
||||||
.prefetch_related(Prefetch('archiveresult_set', queryset=prefetch_qs))
|
.prefetch_related(Prefetch('archiveresult_set', queryset=prefetch_qs))
|
||||||
@@ -403,7 +404,7 @@ class SnapshotAdmin(SearchResultsAdminMixin, ConfigEditorMixin, BaseModelAdmin):
|
|||||||
show_title = bool(title_raw) and title_normalized != 'pending...' and title_normalized != url_normalized
|
show_title = bool(title_raw) and title_normalized != 'pending...' and title_normalized != url_normalized
|
||||||
css_class = 'fetched' if show_title else 'pending'
|
css_class = 'fetched' if show_title else 'pending'
|
||||||
|
|
||||||
detail_url = build_web_url(f'/{obj.archive_path}/index.html')
|
detail_url = build_web_url(f'/{obj.archive_path_from_db}/index.html')
|
||||||
title_html = ''
|
title_html = ''
|
||||||
if show_title:
|
if show_title:
|
||||||
title_html = format_html(
|
title_html = format_html(
|
||||||
@@ -489,7 +490,7 @@ class SnapshotAdmin(SearchResultsAdminMixin, ConfigEditorMixin, BaseModelAdmin):
|
|||||||
)
|
)
|
||||||
def files(self, obj):
|
def files(self, obj):
|
||||||
# return '-'
|
# return '-'
|
||||||
return obj.icons()
|
return obj.icons(path=obj.archive_path_from_db)
|
||||||
|
|
||||||
|
|
||||||
@admin.display(
|
@admin.display(
|
||||||
@@ -595,7 +596,7 @@ class SnapshotAdmin(SearchResultsAdminMixin, ConfigEditorMixin, BaseModelAdmin):
|
|||||||
'{}</a>'
|
'{}</a>'
|
||||||
'<div style="font-size: 10px; color: #94a3b8; margin-top: 2px;">'
|
'<div style="font-size: 10px; color: #94a3b8; margin-top: 2px;">'
|
||||||
'{}/{} hooks</div>',
|
'{}/{} hooks</div>',
|
||||||
build_web_url(f'/{obj.archive_path}'),
|
build_web_url(f'/{obj.archive_path_from_db}'),
|
||||||
size_txt,
|
size_txt,
|
||||||
stats['succeeded'],
|
stats['succeeded'],
|
||||||
stats['total'],
|
stats['total'],
|
||||||
@@ -603,7 +604,7 @@ class SnapshotAdmin(SearchResultsAdminMixin, ConfigEditorMixin, BaseModelAdmin):
|
|||||||
|
|
||||||
return format_html(
|
return format_html(
|
||||||
'<a href="{}" title="View all files">{}</a>',
|
'<a href="{}" title="View all files">{}</a>',
|
||||||
build_web_url(f'/{obj.archive_path}'),
|
build_web_url(f'/{obj.archive_path_from_db}'),
|
||||||
size_txt,
|
size_txt,
|
||||||
)
|
)
|
||||||
|
|
||||||
|
|||||||
@@ -1280,7 +1280,7 @@ class Snapshot(ModelWithOutputDir, ModelWithConfig, ModelWithNotes, ModelWithHea
|
|||||||
cache_key = f'{self.pk}-tags'
|
cache_key = f'{self.pk}-tags'
|
||||||
return cache.get_or_set(cache_key, calc_tags_str) if not nocache else calc_tags_str()
|
return cache.get_or_set(cache_key, calc_tags_str) if not nocache else calc_tags_str()
|
||||||
|
|
||||||
def icons(self) -> str:
|
def icons(self, path: Optional[str] = None) -> str:
|
||||||
"""Generate HTML icons showing which extractor plugins have succeeded for this snapshot"""
|
"""Generate HTML icons showing which extractor plugins have succeeded for this snapshot"""
|
||||||
from django.utils.html import format_html, mark_safe
|
from django.utils.html import format_html, mark_safe
|
||||||
|
|
||||||
@@ -1296,7 +1296,7 @@ class Snapshot(ModelWithOutputDir, ModelWithConfig, ModelWithNotes, ModelWithHea
|
|||||||
Q(status="succeeded") & (Q(output_files__isnull=False) | ~Q(output_str=''))
|
Q(status="succeeded") & (Q(output_files__isnull=False) | ~Q(output_str=''))
|
||||||
)}
|
)}
|
||||||
|
|
||||||
path = self.archive_path
|
archive_path = path or self.archive_path
|
||||||
output = ""
|
output = ""
|
||||||
output_template = '<a href="/{}/{}" class="exists-{}" title="{}">{}</a>'
|
output_template = '<a href="/{}/{}" class="exists-{}" title="{}">{}</a>'
|
||||||
|
|
||||||
@@ -1316,7 +1316,7 @@ class Snapshot(ModelWithOutputDir, ModelWithConfig, ModelWithNotes, ModelWithHea
|
|||||||
embed_path = result.embed_path() if result else f'{plugin}/'
|
embed_path = result.embed_path() if result else f'{plugin}/'
|
||||||
output += format_html(
|
output += format_html(
|
||||||
output_template,
|
output_template,
|
||||||
path,
|
archive_path,
|
||||||
embed_path,
|
embed_path,
|
||||||
str(bool(existing)),
|
str(bool(existing)),
|
||||||
plugin,
|
plugin,
|
||||||
@@ -1435,6 +1435,34 @@ class Snapshot(ModelWithOutputDir, ModelWithConfig, ModelWithNotes, ModelWithHea
|
|||||||
def legacy_archive_path(self) -> str:
|
def legacy_archive_path(self) -> str:
|
||||||
return f'{CONSTANTS.ARCHIVE_DIR_NAME}/{self.timestamp}'
|
return f'{CONSTANTS.ARCHIVE_DIR_NAME}/{self.timestamp}'
|
||||||
|
|
||||||
|
@cached_property
|
||||||
|
def archive_path_from_db(self) -> str:
|
||||||
|
"""Best-effort public URL path derived from DB fields only."""
|
||||||
|
if self.fs_version in ('0.7.0', '0.8.0'):
|
||||||
|
return self.legacy_archive_path
|
||||||
|
|
||||||
|
if self.fs_version in ('0.9.0', '1.0.0'):
|
||||||
|
username = 'web'
|
||||||
|
crawl = getattr(self, 'crawl', None)
|
||||||
|
if crawl and getattr(crawl, 'created_by_id', None):
|
||||||
|
username = crawl.created_by.username
|
||||||
|
if username == 'system':
|
||||||
|
username = 'web'
|
||||||
|
|
||||||
|
date_base = self.created_at or self.bookmarked_at
|
||||||
|
if date_base:
|
||||||
|
date_str = date_base.strftime('%Y%m%d')
|
||||||
|
else:
|
||||||
|
try:
|
||||||
|
date_str = datetime.fromtimestamp(float(self.timestamp)).strftime('%Y%m%d')
|
||||||
|
except (TypeError, ValueError, OSError):
|
||||||
|
return self.legacy_archive_path
|
||||||
|
|
||||||
|
domain = self.extract_domain_from_url(self.url)
|
||||||
|
return f'{username}/{date_str}/{domain}/{self.id}'
|
||||||
|
|
||||||
|
return self.legacy_archive_path
|
||||||
|
|
||||||
@cached_property
|
@cached_property
|
||||||
def url_path(self) -> str:
|
def url_path(self) -> str:
|
||||||
"""URL path matching the current snapshot output_dir layout."""
|
"""URL path matching the current snapshot output_dir layout."""
|
||||||
|
|||||||
@@ -11,6 +11,8 @@ from typing import List, Dict, Any, Optional, Tuple
|
|||||||
|
|
||||||
import pytest
|
import pytest
|
||||||
|
|
||||||
|
from archivebox.uuid_compat import uuid7
|
||||||
|
|
||||||
|
|
||||||
# =============================================================================
|
# =============================================================================
|
||||||
# CLI Helpers (defined before fixtures that use them)
|
# CLI Helpers (defined before fixtures that use them)
|
||||||
@@ -399,8 +401,7 @@ def assert_record_has_fields(record: Dict[str, Any], required_fields: List[str])
|
|||||||
|
|
||||||
def create_test_url(domain: str = 'example.com', path: str = None) -> str:
|
def create_test_url(domain: str = 'example.com', path: str = None) -> str:
|
||||||
"""Generate unique test URL."""
|
"""Generate unique test URL."""
|
||||||
import uuid
|
path = path or uuid7().hex[:8]
|
||||||
path = path or uuid.uuid4().hex[:8]
|
|
||||||
return f'https://{domain}/{path}'
|
return f'https://{domain}/{path}'
|
||||||
|
|
||||||
|
|
||||||
|
|||||||
@@ -16,7 +16,8 @@ import subprocess
|
|||||||
from pathlib import Path
|
from pathlib import Path
|
||||||
from datetime import datetime, timezone
|
from datetime import datetime, timezone
|
||||||
from typing import Dict, List, Tuple
|
from typing import Dict, List, Tuple
|
||||||
from uuid import uuid4
|
|
||||||
|
from archivebox.uuid_compat import uuid7
|
||||||
|
|
||||||
|
|
||||||
# =============================================================================
|
# =============================================================================
|
||||||
@@ -495,7 +496,7 @@ INSERT INTO django_content_type (app_label, model) VALUES
|
|||||||
|
|
||||||
def generate_uuid() -> str:
|
def generate_uuid() -> str:
|
||||||
"""Generate a UUID string without dashes for SQLite."""
|
"""Generate a UUID string without dashes for SQLite."""
|
||||||
return uuid4().hex
|
return uuid7().hex
|
||||||
|
|
||||||
|
|
||||||
def generate_timestamp() -> str:
|
def generate_timestamp() -> str:
|
||||||
|
|||||||
@@ -135,6 +135,44 @@ class TestAdminSnapshotListView:
|
|||||||
assert response.status_code == 200
|
assert response.status_code == 200
|
||||||
assert b'example.com' in response.content
|
assert b'example.com' in response.content
|
||||||
|
|
||||||
|
def test_list_view_avoids_legacy_title_fallbacks(self, client, admin_user, snapshot, monkeypatch):
|
||||||
|
"""Title-less snapshots should render without touching history-based fallback paths."""
|
||||||
|
from archivebox.core.models import Snapshot
|
||||||
|
|
||||||
|
Snapshot.objects.filter(pk=snapshot.pk).update(title='')
|
||||||
|
|
||||||
|
def _latest_title_should_not_be_used(self):
|
||||||
|
raise AssertionError('admin changelist should not access Snapshot.latest_title')
|
||||||
|
|
||||||
|
def _history_should_not_be_used(self):
|
||||||
|
raise AssertionError('admin changelist should not access Snapshot.history')
|
||||||
|
|
||||||
|
monkeypatch.setattr(Snapshot, 'latest_title', property(_latest_title_should_not_be_used), raising=False)
|
||||||
|
monkeypatch.setattr(Snapshot, 'history', property(_history_should_not_be_used), raising=False)
|
||||||
|
|
||||||
|
client.login(username='testadmin', password='testpassword')
|
||||||
|
url = reverse('admin:core_snapshot_changelist')
|
||||||
|
response = client.get(url, HTTP_HOST=ADMIN_HOST)
|
||||||
|
|
||||||
|
assert response.status_code == 200
|
||||||
|
assert b'example.com' in response.content
|
||||||
|
|
||||||
|
def test_list_view_avoids_output_dir_lookups(self, client, admin_user, snapshot, monkeypatch):
|
||||||
|
"""Changelist links should render without probing snapshot paths on disk."""
|
||||||
|
from archivebox.core.models import Snapshot
|
||||||
|
|
||||||
|
def _output_dir_should_not_be_used(self):
|
||||||
|
raise AssertionError('admin changelist should not access Snapshot.output_dir')
|
||||||
|
|
||||||
|
monkeypatch.setattr(Snapshot, 'output_dir', property(_output_dir_should_not_be_used), raising=False)
|
||||||
|
|
||||||
|
client.login(username='testadmin', password='testpassword')
|
||||||
|
url = reverse('admin:core_snapshot_changelist')
|
||||||
|
response = client.get(url, HTTP_HOST=ADMIN_HOST)
|
||||||
|
|
||||||
|
assert response.status_code == 200
|
||||||
|
assert b'example.com' in response.content
|
||||||
|
|
||||||
def test_grid_view_renders(self, client, admin_user):
|
def test_grid_view_renders(self, client, admin_user):
|
||||||
"""Test that the grid view renders successfully."""
|
"""Test that the grid view renders successfully."""
|
||||||
client.login(username='testadmin', password='testpassword')
|
client.login(username='testadmin', password='testpassword')
|
||||||
|
|||||||
Reference in New Issue
Block a user