mirror of
https://github.com/ArchiveBox/ArchiveBox.git
synced 2026-04-06 07:47:53 +10:00
1145 lines
46 KiB
Python
1145 lines
46 KiB
Python
__package__ = "archivebox.core"
|
||
|
||
import json
|
||
from functools import lru_cache
|
||
|
||
from django.contrib import admin, messages
|
||
from django.urls import path
|
||
from django.shortcuts import get_object_or_404, redirect
|
||
from django.utils.html import format_html
|
||
from django.utils.safestring import mark_safe
|
||
from django.db.models import Q, Sum, Count, Prefetch
|
||
from django.db.models.functions import Coalesce
|
||
from django import forms
|
||
from django.template import Template, RequestContext
|
||
from django.contrib.admin.helpers import ActionForm
|
||
|
||
from archivebox.config import DATA_DIR
|
||
from archivebox.config.common import SERVER_CONFIG
|
||
from archivebox.misc.util import htmldecode, urldecode
|
||
from archivebox.misc.paginators import AcceleratedPaginator
|
||
from archivebox.misc.logging_util import printable_filesize
|
||
from archivebox.search.admin import SearchResultsAdminMixin
|
||
from archivebox.core.host_utils import build_snapshot_url, build_web_url
|
||
from archivebox.hooks import get_plugin_icon, get_plugin_name, get_plugins
|
||
|
||
from archivebox.base_models.admin import BaseModelAdmin, ConfigEditorMixin
|
||
from archivebox.workers.tasks import bg_archive_snapshots, bg_add
|
||
|
||
from archivebox.core.models import Tag, Snapshot, ArchiveResult
|
||
from archivebox.core.admin_archiveresults import render_archiveresults_list
|
||
from archivebox.core.widgets import TagEditorWidget, InlineTagEditorWidget
|
||
|
||
|
||
# GLOBAL_CONTEXT = {'VERSION': VERSION, 'VERSIONS_AVAILABLE': [], 'CAN_UPGRADE': False}
|
||
GLOBAL_CONTEXT = {}
|
||
|
||
|
||
@lru_cache(maxsize=1)
|
||
def _plugin_sort_order() -> dict[str, int]:
|
||
return {get_plugin_name(plugin): idx for idx, plugin in enumerate(get_plugins())}
|
||
|
||
|
||
@lru_cache(maxsize=256)
|
||
def _expected_snapshot_hook_total(config_json: str) -> int:
|
||
from archivebox.hooks import discover_hooks
|
||
|
||
try:
|
||
config = json.loads(config_json) if config_json else {}
|
||
except Exception:
|
||
return 0
|
||
|
||
return len(discover_hooks("Snapshot", config=config))
|
||
|
||
|
||
class SnapshotActionForm(ActionForm):
|
||
def __init__(self, *args, **kwargs):
|
||
super().__init__(*args, **kwargs)
|
||
# Define tags field in __init__ to avoid database access during app initialization
|
||
self.fields["tags"] = forms.CharField(
|
||
label="",
|
||
required=False,
|
||
widget=TagEditorWidget(),
|
||
)
|
||
|
||
def clean_tags(self):
|
||
"""Parse comma-separated tag names into Tag objects."""
|
||
tags_str = self.cleaned_data.get("tags", "")
|
||
if not tags_str:
|
||
return []
|
||
|
||
tag_names = [name.strip() for name in tags_str.split(",") if name.strip()]
|
||
tags = []
|
||
for name in tag_names:
|
||
tag, _ = Tag.objects.get_or_create(
|
||
name__iexact=name,
|
||
defaults={"name": name},
|
||
)
|
||
# Use the existing tag if found by case-insensitive match
|
||
tag = Tag.objects.filter(name__iexact=name).first() or tag
|
||
tags.append(tag)
|
||
return tags
|
||
|
||
# TODO: allow selecting actions for specific extractor plugins? is this useful?
|
||
# plugin = forms.ChoiceField(
|
||
# choices=ArchiveResult.PLUGIN_CHOICES,
|
||
# required=False,
|
||
# widget=forms.MultileChoiceField(attrs={'class': "form-control"})
|
||
# )
|
||
|
||
|
||
class TagNameListFilter(admin.SimpleListFilter):
|
||
title = "By tag name"
|
||
parameter_name = "tag"
|
||
|
||
def lookups(self, request, model_admin):
|
||
return [(str(tag.pk), tag.name) for tag in Tag.objects.order_by("name")]
|
||
|
||
def queryset(self, request, queryset):
|
||
if self.value():
|
||
return queryset.filter(tags__id=self.value())
|
||
return queryset
|
||
|
||
|
||
class SnapshotAdminForm(forms.ModelForm):
|
||
"""Custom form for Snapshot admin with tag editor widget."""
|
||
|
||
tags_editor = forms.CharField(
|
||
label="Tags",
|
||
required=False,
|
||
widget=TagEditorWidget(),
|
||
help_text="Type tag names and press Enter or Space to add. Click × to remove.",
|
||
)
|
||
|
||
class Meta:
|
||
model = Snapshot
|
||
fields = "__all__"
|
||
|
||
def __init__(self, *args, **kwargs):
|
||
super().__init__(*args, **kwargs)
|
||
# Initialize tags_editor with current tags
|
||
if self.instance and self.instance.pk:
|
||
self.initial["tags_editor"] = ",".join(
|
||
sorted(tag.name for tag in self.instance.tags.all()),
|
||
)
|
||
|
||
def save(self, commit=True):
|
||
instance = super().save(commit=False)
|
||
|
||
# Handle tags_editor field
|
||
if commit:
|
||
instance.save()
|
||
save_m2m = getattr(self, "_save_m2m", None)
|
||
if callable(save_m2m):
|
||
save_m2m()
|
||
|
||
# Parse and save tags from tags_editor
|
||
tags_str = self.cleaned_data.get("tags_editor", "")
|
||
if tags_str:
|
||
tag_names = [name.strip() for name in tags_str.split(",") if name.strip()]
|
||
tags = []
|
||
for name in tag_names:
|
||
tag, _ = Tag.objects.get_or_create(
|
||
name__iexact=name,
|
||
defaults={"name": name},
|
||
)
|
||
tag = Tag.objects.filter(name__iexact=name).first() or tag
|
||
tags.append(tag)
|
||
instance.tags.set(tags)
|
||
else:
|
||
instance.tags.clear()
|
||
|
||
return instance
|
||
|
||
|
||
class SnapshotAdmin(SearchResultsAdminMixin, ConfigEditorMixin, BaseModelAdmin):
|
||
form = SnapshotAdminForm
|
||
list_display = ("created_at", "preview_icon", "title_str", "tags_inline", "status_with_progress", "files", "size_with_stats")
|
||
sort_fields = ("title_str", "created_at", "status", "crawl")
|
||
readonly_fields = (
|
||
"admin_actions",
|
||
"snapshot_summary",
|
||
"url_favicon",
|
||
"tags_badges",
|
||
"imported_timestamp",
|
||
"created_at",
|
||
"modified_at",
|
||
"downloaded_at",
|
||
"output_dir",
|
||
"archiveresults_list",
|
||
)
|
||
search_fields = ("id", "url", "timestamp", "title", "tags__name")
|
||
list_filter = ("created_at", "downloaded_at", "archiveresult__status", "crawl__created_by", TagNameListFilter)
|
||
|
||
fieldsets = (
|
||
(
|
||
"Actions",
|
||
{
|
||
"fields": ("admin_actions",),
|
||
"classes": ("card", "actions-card"),
|
||
},
|
||
),
|
||
(
|
||
"Snapshot",
|
||
{
|
||
"fields": ("snapshot_summary",),
|
||
"classes": ("card",),
|
||
},
|
||
),
|
||
(
|
||
"URL",
|
||
{
|
||
"fields": (("url_favicon", "url"), ("title", "tags_badges")),
|
||
"classes": ("card", "wide"),
|
||
},
|
||
),
|
||
(
|
||
"Tags",
|
||
{
|
||
"fields": ("tags_editor",),
|
||
"classes": ("card",),
|
||
},
|
||
),
|
||
(
|
||
"Status",
|
||
{
|
||
"fields": ("status", "retry_at"),
|
||
"classes": ("card",),
|
||
},
|
||
),
|
||
(
|
||
"Timestamps",
|
||
{
|
||
"fields": ("bookmarked_at", "created_at", "modified_at", "downloaded_at"),
|
||
"classes": ("card",),
|
||
},
|
||
),
|
||
(
|
||
"Relations",
|
||
{
|
||
"fields": ("crawl",),
|
||
"classes": ("card",),
|
||
},
|
||
),
|
||
(
|
||
"Config",
|
||
{
|
||
"fields": ("config",),
|
||
"description": '<span style="display:block; margin:-4px 0 6px; font-size:11px; line-height:1.35; color:#94a3b8;">Uses <code>Crawl.config</code> by default. Only set per-snapshot overrides here when needed.</span>',
|
||
"classes": ("card",),
|
||
},
|
||
),
|
||
(
|
||
"Files",
|
||
{
|
||
"fields": ("output_dir",),
|
||
"classes": ("card",),
|
||
},
|
||
),
|
||
(
|
||
"Archive Results",
|
||
{
|
||
"fields": ("archiveresults_list",),
|
||
"classes": ("card", "wide"),
|
||
},
|
||
),
|
||
)
|
||
|
||
ordering = ["-created_at"]
|
||
actions = ["add_tags", "remove_tags", "resnapshot_snapshot", "update_snapshots", "overwrite_snapshots", "delete_snapshots"]
|
||
inlines = [] # Removed TagInline, using TagEditorWidget instead
|
||
list_per_page = min(max(5, SERVER_CONFIG.SNAPSHOTS_PER_PAGE), 5000)
|
||
|
||
action_form = SnapshotActionForm
|
||
paginator = AcceleratedPaginator
|
||
|
||
save_on_top = True
|
||
show_full_result_count = False
|
||
|
||
def changelist_view(self, request, extra_context=None):
|
||
self.request = request
|
||
extra_context = extra_context or {}
|
||
try:
|
||
return super().changelist_view(request, extra_context | GLOBAL_CONTEXT)
|
||
except Exception as e:
|
||
self.message_user(request, f"Error occurred while loading the page: {str(e)} {request.GET} {request.POST}")
|
||
return super().changelist_view(request, GLOBAL_CONTEXT)
|
||
|
||
def get_actions(self, request):
|
||
actions = super().get_actions(request)
|
||
if not actions:
|
||
return {}
|
||
actions.pop("delete_selected", None)
|
||
return actions
|
||
|
||
def get_snapshot_view_url(self, obj: Snapshot) -> str:
|
||
return build_snapshot_url(str(obj.id), request=getattr(self, "request", None))
|
||
|
||
def get_snapshot_files_url(self, obj: Snapshot) -> str:
|
||
return f"{build_snapshot_url(str(obj.id), request=getattr(self, 'request', None))}/?files=1"
|
||
|
||
def get_snapshot_zip_url(self, obj: Snapshot) -> str:
|
||
return f"{self.get_snapshot_files_url(obj)}&download=zip"
|
||
|
||
def get_urls(self):
|
||
urls = super().get_urls()
|
||
custom_urls = [
|
||
path("grid/", self.admin_site.admin_view(self.grid_view), name="grid"),
|
||
path("<path:object_id>/redo-failed/", self.admin_site.admin_view(self.redo_failed_view), name="core_snapshot_redo_failed"),
|
||
]
|
||
return custom_urls + urls
|
||
|
||
def redo_failed_view(self, request, object_id):
|
||
snapshot = get_object_or_404(Snapshot, pk=object_id)
|
||
|
||
if request.method == "POST":
|
||
retried = snapshot.retry_failed_archiveresults()
|
||
if retried:
|
||
messages.success(
|
||
request,
|
||
f"Queued {retried} failed/skipped extractors for retry on this snapshot.",
|
||
)
|
||
else:
|
||
messages.info(
|
||
request,
|
||
"No failed/skipped extractors were found on this snapshot.",
|
||
)
|
||
|
||
return redirect(snapshot.admin_change_url)
|
||
|
||
# def get_queryset(self, request):
|
||
# # tags_qs = SnapshotTag.objects.all().select_related('tag')
|
||
# # prefetch = Prefetch('snapshottag_set', queryset=tags_qs)
|
||
|
||
# self.request = request
|
||
# return super().get_queryset(request).prefetch_related('archiveresult_set').distinct() # .annotate(archiveresult_count=Count('archiveresult'))
|
||
def get_queryset(self, request):
|
||
self.request = request
|
||
ordering_fields = self._get_ordering_fields(request)
|
||
needs_size_sort = "size_with_stats" in ordering_fields
|
||
needs_files_sort = "files" in ordering_fields
|
||
needs_tags_sort = "tags_inline" in ordering_fields
|
||
is_change_view = getattr(getattr(request, "resolver_match", None), "url_name", "") == "core_snapshot_change"
|
||
|
||
prefetch_qs = ArchiveResult.objects.only(
|
||
"id",
|
||
"snapshot_id",
|
||
"plugin",
|
||
"status",
|
||
"output_size",
|
||
"output_files",
|
||
"output_str",
|
||
)
|
||
if not is_change_view:
|
||
prefetch_qs = prefetch_qs.filter(Q(status="succeeded"))
|
||
|
||
qs = (
|
||
super()
|
||
.get_queryset(request)
|
||
.select_related("crawl__created_by")
|
||
.defer("config", "notes")
|
||
.prefetch_related("tags")
|
||
.prefetch_related(Prefetch("archiveresult_set", queryset=prefetch_qs))
|
||
)
|
||
|
||
if needs_size_sort:
|
||
qs = qs.annotate(
|
||
output_size_sum=Coalesce(
|
||
Sum("archiveresult__output_size"),
|
||
0,
|
||
),
|
||
)
|
||
|
||
if needs_files_sort:
|
||
qs = qs.annotate(
|
||
ar_succeeded_count=Count(
|
||
"archiveresult",
|
||
filter=Q(archiveresult__status="succeeded"),
|
||
),
|
||
)
|
||
if needs_tags_sort:
|
||
qs = qs.annotate(tag_count=Count("tags", distinct=True))
|
||
|
||
return qs
|
||
|
||
@admin.display(description="Imported Timestamp")
|
||
def imported_timestamp(self, obj):
|
||
context = RequestContext(
|
||
self.request,
|
||
{
|
||
"bookmarked_date": obj.bookmarked_at,
|
||
"timestamp": obj.timestamp,
|
||
},
|
||
)
|
||
|
||
html = Template("""{{bookmarked_date}} (<code>{{timestamp}}</code>)""")
|
||
return mark_safe(html.render(context))
|
||
|
||
# pretty_time = obj.bookmarked.strftime('%Y-%m-%d %H:%M:%S')
|
||
# return f'{pretty_time} ({obj.timestamp})'
|
||
|
||
# TODO: figure out a different way to do this, you cant nest forms so this doenst work
|
||
# def action(self, obj):
|
||
# # csrfmiddlewaretoken: Wa8UcQ4fD3FJibzxqHN3IYrrjLo4VguWynmbzzcPYoebfVUnDovon7GEMYFRgsh0
|
||
# # action: update_snapshots
|
||
# # select_across: 0
|
||
# # _selected_action: 76d29b26-2a88-439e-877c-a7cca1b72bb3
|
||
# return format_html(
|
||
# '''
|
||
# <form action="/admin/core/snapshot/" method="post" onsubmit="e => e.stopPropagation()">
|
||
# <input type="hidden" name="csrfmiddlewaretoken" value="{}">
|
||
# <input type="hidden" name="_selected_action" value="{}">
|
||
# <button name="update_snapshots">Check</button>
|
||
# <button name="update_titles">Pull title + favicon</button>
|
||
# <button name="update_snapshots">Update</button>
|
||
# <button name="overwrite_snapshots">Re-Archive (overwrite)</button>
|
||
# <button name="delete_snapshots">Permanently delete</button>
|
||
# </form>
|
||
# ''',
|
||
# csrf.get_token(self.request),
|
||
# obj.pk,
|
||
# )
|
||
|
||
@admin.display(description="")
|
||
def admin_actions(self, obj):
|
||
summary_url = self.get_snapshot_view_url(obj)
|
||
files_url = self.get_snapshot_files_url(obj)
|
||
zip_url = self.get_snapshot_zip_url(obj)
|
||
redo_failed_url = f"/admin/core/snapshot/{obj.pk}/redo-failed/"
|
||
return format_html(
|
||
"""
|
||
<div style="display: flex; flex-wrap: wrap; gap: 12px; align-items: center;">
|
||
<a class="btn" style="display: inline-flex; align-items: center; gap: 6px; padding: 10px 16px; background: #f8fafc; border: 1px solid #e2e8f0; border-radius: 8px; color: #334155; text-decoration: none; font-size: 14px; font-weight: 500; transition: all 0.15s;"
|
||
href="{}"
|
||
onmouseover="this.style.background='#f1f5f9'; this.style.borderColor='#cbd5e1';"
|
||
onmouseout="this.style.background='#f8fafc'; this.style.borderColor='#e2e8f0';">
|
||
📄 View Snapshot
|
||
</a>
|
||
<a class="btn" style="display: inline-flex; align-items: center; gap: 6px; padding: 10px 16px; background: #f8fafc; border: 1px solid #e2e8f0; border-radius: 8px; color: #334155; text-decoration: none; font-size: 14px; font-weight: 500; transition: all 0.15s;"
|
||
href="{}"
|
||
onmouseover="this.style.background='#f1f5f9'; this.style.borderColor='#cbd5e1';"
|
||
onmouseout="this.style.background='#f8fafc'; this.style.borderColor='#e2e8f0';">
|
||
📁 All files
|
||
</a>
|
||
<a class="btn archivebox-zip-button" style="display: inline-flex; align-items: center; gap: 6px; padding: 10px 16px; background: #eff6ff; border: 1px solid #bfdbfe; border-radius: 8px; color: #1d4ed8; text-decoration: none; font-size: 14px; font-weight: 500; transition: all 0.15s;"
|
||
href="{}"
|
||
data-loading-label="Preparing..."
|
||
onclick="return window.archiveboxHandleZipClick(this, event);"
|
||
onmouseover="this.style.background='#dbeafe'; this.style.borderColor='#93c5fd';"
|
||
onmouseout="this.style.background='#eff6ff'; this.style.borderColor='#bfdbfe';">
|
||
<span class="archivebox-zip-spinner" aria-hidden="true"></span>
|
||
<span class="archivebox-zip-label">⬇ Download Zip</span>
|
||
</a>
|
||
<a class="btn" style="display: inline-flex; align-items: center; gap: 6px; padding: 10px 16px; background: #f8fafc; border: 1px solid #e2e8f0; border-radius: 8px; color: #334155; text-decoration: none; font-size: 14px; font-weight: 500; transition: all 0.15s;"
|
||
href="{}"
|
||
target="_blank"
|
||
onmouseover="this.style.background='#f1f5f9'; this.style.borderColor='#cbd5e1';"
|
||
onmouseout="this.style.background='#f8fafc'; this.style.borderColor='#e2e8f0';">
|
||
🔗 Original URL
|
||
</a>
|
||
|
||
<span style="border-left: 1px solid #e2e8f0; height: 24px; margin: 0 4px;"></span>
|
||
|
||
<a class="btn" style="display: inline-flex; align-items: center; gap: 6px; padding: 10px 16px; background: #eff6ff; border: 1px solid #bfdbfe; border-radius: 8px; color: #1e40af; text-decoration: none; font-size: 14px; font-weight: 500; transition: all 0.15s;"
|
||
href="/admin/core/snapshot/?id__exact={}"
|
||
title="Create a fresh new snapshot of this URL"
|
||
onmouseover="this.style.background='#dbeafe';"
|
||
onmouseout="this.style.background='#eff6ff';">
|
||
🆕 Snapshot Again
|
||
</a>
|
||
<button type="submit"
|
||
formaction="{}"
|
||
formmethod="post"
|
||
formnovalidate
|
||
class="btn"
|
||
style="display: inline-flex; align-items: center; gap: 6px; padding: 10px 16px; background: #ecfdf5; border: 1px solid #a7f3d0; border-radius: 8px; color: #065f46; text-decoration: none; font-size: 14px; font-weight: 500; transition: all 0.15s; cursor: pointer;"
|
||
title="Redo failed extractors (missing outputs)"
|
||
onmouseover="this.style.background='#d1fae5';"
|
||
onmouseout="this.style.background='#ecfdf5';">
|
||
🔁 Retry Failed Extractors
|
||
</button>
|
||
<a class="btn" style="display: inline-flex; align-items: center; gap: 6px; padding: 10px 16px; background: #fffbeb; border: 1px solid #fde68a; border-radius: 8px; color: #92400e; text-decoration: none; font-size: 14px; font-weight: 500; transition: all 0.15s;"
|
||
href="/admin/core/snapshot/?id__exact={}"
|
||
title="Re-run all extractors (overwrite existing)"
|
||
onmouseover="this.style.background='#fef3c7';"
|
||
onmouseout="this.style.background='#fffbeb';">
|
||
🔄 Reset & Retry All Extractors
|
||
</a>
|
||
<a class="btn" style="display: inline-flex; align-items: center; gap: 6px; padding: 10px 16px; background: #fef2f2; border: 1px solid #fecaca; border-radius: 8px; color: #991b1b; text-decoration: none; font-size: 14px; font-weight: 500; transition: all 0.15s;"
|
||
href="/admin/core/snapshot/?id__exact={}"
|
||
title="Permanently delete this snapshot"
|
||
onmouseover="this.style.background='#fee2e2';"
|
||
onmouseout="this.style.background='#fef2f2';">
|
||
☠️ Delete
|
||
</a>
|
||
</div>
|
||
""",
|
||
summary_url,
|
||
files_url,
|
||
zip_url,
|
||
obj.url,
|
||
obj.pk,
|
||
redo_failed_url,
|
||
obj.pk,
|
||
obj.pk,
|
||
)
|
||
|
||
def status_info(self, obj):
|
||
favicon_url = build_snapshot_url(str(obj.id), "favicon.ico")
|
||
return format_html(
|
||
"""
|
||
Archived: {} ({} files {})
|
||
Favicon: <img src="{}" style="height: 20px"/>
|
||
Extension: {}
|
||
""",
|
||
"✅" if obj.is_archived else "❌",
|
||
obj.num_outputs,
|
||
self.size(obj) or "0kb",
|
||
favicon_url,
|
||
obj.extension or "-",
|
||
)
|
||
|
||
@admin.display(description="Archive Results")
|
||
def archiveresults_list(self, obj):
|
||
return render_archiveresults_list(obj.archiveresult_set.all())
|
||
|
||
@admin.display(
|
||
description="Title",
|
||
ordering="title",
|
||
)
|
||
def title_str(self, obj):
|
||
title_raw = (obj.title or "").strip()
|
||
url_raw = (obj.url or "").strip()
|
||
title_normalized = title_raw.lower()
|
||
url_normalized = url_raw.lower()
|
||
show_title = bool(title_raw) and title_normalized != "pending..." and title_normalized != url_normalized
|
||
css_class = "fetched" if show_title else "pending"
|
||
|
||
detail_url = build_web_url(f"/{obj.archive_path_from_db}/index.html")
|
||
title_html = ""
|
||
if show_title:
|
||
title_html = format_html(
|
||
'<a href="{}"><b class="status-{}">{}</b></a>',
|
||
detail_url,
|
||
css_class,
|
||
urldecode(htmldecode(title_raw))[:128],
|
||
)
|
||
|
||
return format_html(
|
||
"{}"
|
||
'<div style="font-size: 11px; color: #64748b; margin-top: 2px;">'
|
||
'<a href="{}"><code style="user-select: all;">{}</code></a>'
|
||
"</div>",
|
||
title_html,
|
||
url_raw or obj.url,
|
||
(url_raw or obj.url)[:128],
|
||
)
|
||
|
||
@admin.display(description="Tags", ordering="tag_count")
|
||
def tags_inline(self, obj):
|
||
widget = InlineTagEditorWidget(snapshot_id=str(obj.pk))
|
||
tags = self._get_prefetched_tags(obj)
|
||
tags_html = widget.render(
|
||
name=f"tags_{obj.pk}",
|
||
value=tags if tags is not None else obj.tags.all(),
|
||
attrs={"id": f"tags_{obj.pk}"},
|
||
snapshot_id=str(obj.pk),
|
||
)
|
||
return mark_safe(f'<span class="tags-inline-editor">{tags_html}</span>')
|
||
|
||
@admin.display(description="Tags")
|
||
def tags_badges(self, obj):
|
||
widget = InlineTagEditorWidget(snapshot_id=str(obj.pk), editable=False)
|
||
tags = self._get_prefetched_tags(obj)
|
||
tags_html = widget.render(
|
||
name=f"tags_readonly_{obj.pk}",
|
||
value=tags if tags is not None else obj.tags.all(),
|
||
attrs={"id": f"tags_readonly_{obj.pk}"},
|
||
snapshot_id=str(obj.pk),
|
||
)
|
||
return mark_safe(f'<span class="tags-inline-editor">{tags_html}</span>')
|
||
|
||
def _get_preview_data(self, obj):
|
||
results = self._get_prefetched_results(obj)
|
||
if results is not None:
|
||
has_screenshot = any(r.plugin == "screenshot" for r in results)
|
||
has_favicon = any(r.plugin == "favicon" for r in results)
|
||
else:
|
||
available_plugins = set(obj.archiveresult_set.filter(plugin__in=("screenshot", "favicon")).values_list("plugin", flat=True))
|
||
has_screenshot = "screenshot" in available_plugins
|
||
has_favicon = "favicon" in available_plugins
|
||
|
||
if not has_screenshot and not has_favicon:
|
||
return None
|
||
|
||
if has_screenshot:
|
||
img_url = build_snapshot_url(str(obj.id), "screenshot/screenshot.png")
|
||
fallbacks = [
|
||
build_snapshot_url(str(obj.id), "screenshot.png"),
|
||
build_snapshot_url(str(obj.id), "favicon/favicon.ico"),
|
||
build_snapshot_url(str(obj.id), "favicon.ico"),
|
||
]
|
||
img_alt = "Screenshot"
|
||
preview_class = "screenshot"
|
||
else:
|
||
img_url = build_snapshot_url(str(obj.id), "favicon/favicon.ico")
|
||
fallbacks = [
|
||
build_snapshot_url(str(obj.id), "favicon.ico"),
|
||
]
|
||
img_alt = "Favicon"
|
||
preview_class = "favicon"
|
||
|
||
fallback_list = ",".join(fallbacks)
|
||
onerror_js = (
|
||
"this.dataset.fallbacks && this.dataset.fallbacks.length ? "
|
||
"(this.src=this.dataset.fallbacks.split(',').shift(), "
|
||
"this.dataset.fallbacks=this.dataset.fallbacks.split(',').slice(1).join(',')) : "
|
||
"this.remove()"
|
||
)
|
||
|
||
return {
|
||
"img_url": img_url,
|
||
"img_alt": img_alt,
|
||
"preview_class": preview_class,
|
||
"onerror_js": onerror_js,
|
||
"fallback_list": fallback_list,
|
||
}
|
||
|
||
@admin.display(description="", empty_value="")
|
||
def url_favicon(self, obj):
|
||
preview = self._get_preview_data(obj)
|
||
if not preview:
|
||
return ""
|
||
|
||
favicon_url = build_snapshot_url(str(obj.id), "favicon/favicon.ico")
|
||
fallback_list = ",".join([build_snapshot_url(str(obj.id), "favicon.ico")])
|
||
onerror_js = (
|
||
"this.dataset.fallbacks && this.dataset.fallbacks.length ? "
|
||
"(this.src=this.dataset.fallbacks.split(',').shift(), "
|
||
"this.dataset.fallbacks=this.dataset.fallbacks.split(',').slice(1).join(',')) : "
|
||
"this.closest('a') && this.closest('a').remove()"
|
||
)
|
||
|
||
return format_html(
|
||
'<a href="{}" title="Open favicon" style="display:inline-flex; align-items:center; justify-content:center; width:32px; height:32px;">'
|
||
'<img src="{}" alt="Favicon" decoding="async" loading="lazy" onerror="{}" data-fallbacks="{}" '
|
||
'style="display:block; width:24px; height:24px; border-radius:6px; border:1px solid #e2e8f0; background:#fff; object-fit:contain; padding:2px;">'
|
||
"</a>",
|
||
favicon_url,
|
||
favicon_url,
|
||
onerror_js,
|
||
fallback_list,
|
||
)
|
||
|
||
@admin.display(description="Preview", empty_value="")
|
||
def preview_icon(self, obj):
|
||
preview = self._get_preview_data(obj)
|
||
if not preview:
|
||
return None
|
||
|
||
return format_html(
|
||
'<img src="{}" alt="{}" class="snapshot-preview {}" decoding="async" loading="lazy" onerror="{}" data-fallbacks="{}">',
|
||
preview["img_url"],
|
||
preview["img_alt"],
|
||
preview["preview_class"],
|
||
preview["onerror_js"],
|
||
preview["fallback_list"],
|
||
)
|
||
|
||
@admin.display(description=" ", empty_value="")
|
||
def snapshot_summary(self, obj):
|
||
preview = self._get_preview_data(obj)
|
||
stats = self._get_progress_stats(obj)
|
||
archive_size = stats["output_size"] or 0
|
||
size_txt = printable_filesize(archive_size) if archive_size else "pending"
|
||
screenshot_html = ""
|
||
|
||
if preview:
|
||
screenshot_html = format_html(
|
||
'<a href="{href}" title="Open snapshot live view" style="display:block; flex:0 0 220px; width:220px;">'
|
||
'<img src="{src}" alt="{alt}" decoding="async" loading="lazy" onerror="{onerror}" data-fallbacks="{fallbacks}" '
|
||
'style="display:block; width:100%; max-width:220px; aspect-ratio: 16 / 10; object-fit: cover; object-position: top; '
|
||
'border-radius: 10px; border: 1px solid #e2e8f0; background: #f8fafc;">'
|
||
"</a>",
|
||
href=build_web_url(f"/{obj.archive_path}"),
|
||
src=preview["img_url"],
|
||
alt=preview["img_alt"],
|
||
onerror=preview["onerror_js"],
|
||
fallbacks=preview["fallback_list"],
|
||
)
|
||
|
||
return format_html(
|
||
'<div style="display:flex; gap:16px; align-items:flex-start;">'
|
||
"{}"
|
||
'<div style="min-width:0; flex:1;">'
|
||
'<div style="font: 600 12px/1.4 -apple-system,BlinkMacSystemFont,Segoe UI,Roboto,Helvetica Neue,Arial,sans-serif; color:#64748b; text-transform:uppercase; letter-spacing:0.04em; margin-bottom:4px;">snap_dir size</div>'
|
||
'<div style="font: 700 28px/1.1 -apple-system,BlinkMacSystemFont,Segoe UI,Roboto,Helvetica Neue,Arial,sans-serif; color:#0f172a; margin-bottom:8px;">{}</div>'
|
||
'<div style="font-size:13px; line-height:1.5; color:#64748b;">'
|
||
'Open <a href="{}"><code>{}</code></a> to inspect files.'
|
||
"</div>"
|
||
"</div>"
|
||
"</div>",
|
||
screenshot_html,
|
||
size_txt,
|
||
build_web_url(f"/{obj.archive_path}"),
|
||
obj.archive_path,
|
||
)
|
||
|
||
@admin.display(
|
||
description="Files Saved",
|
||
ordering="ar_succeeded_count",
|
||
)
|
||
def files(self, obj):
|
||
results = self._get_prefetched_results(obj)
|
||
if results is None:
|
||
results = obj.archiveresult_set.only("plugin", "status", "output_files", "output_str")
|
||
|
||
plugins_with_output: dict[str, ArchiveResult] = {}
|
||
for result in results:
|
||
if result.status != ArchiveResult.StatusChoices.SUCCEEDED:
|
||
continue
|
||
if not (result.output_files or str(result.output_str or "").strip()):
|
||
continue
|
||
plugins_with_output.setdefault(result.plugin, result)
|
||
|
||
if not plugins_with_output:
|
||
return mark_safe('<span style="opacity: 0.35;">...</span>')
|
||
|
||
sorted_results = sorted(
|
||
plugins_with_output.values(),
|
||
key=lambda result: (_plugin_sort_order().get(result.plugin, 9999), result.plugin),
|
||
)
|
||
output = [
|
||
format_html(
|
||
'<a href="{}" class="exists-True" title="{}">{}</a>',
|
||
self._result_output_href(obj, result),
|
||
result.plugin,
|
||
get_plugin_icon(result.plugin),
|
||
)
|
||
for result in sorted_results
|
||
]
|
||
|
||
return format_html(
|
||
'<span class="files-icons files-icons--compact" style="font-size: 1em; opacity: 0.8;">{}</span>',
|
||
mark_safe("".join(output)),
|
||
)
|
||
|
||
@admin.display(
|
||
# ordering='archiveresult_count'
|
||
)
|
||
def size(self, obj):
|
||
archive_size = self._get_progress_stats(obj)["output_size"] or 0
|
||
if archive_size:
|
||
size_txt = printable_filesize(archive_size)
|
||
if archive_size > 52428800:
|
||
size_txt = mark_safe(f"<b>{size_txt}</b>")
|
||
else:
|
||
size_txt = mark_safe('<span style="opacity: 0.3">...</span>')
|
||
return format_html(
|
||
'<a href="{}" title="View all files">{}</a>',
|
||
build_web_url(f"/{obj.archive_path}"),
|
||
size_txt,
|
||
)
|
||
|
||
@admin.display(
|
||
description="Status",
|
||
ordering="status",
|
||
)
|
||
def status_with_progress(self, obj):
|
||
"""Show status with progress bar for in-progress snapshots."""
|
||
stats = self._get_progress_stats(obj)
|
||
|
||
# Status badge colors
|
||
status_colors = {
|
||
"queued": ("#f59e0b", "#fef3c7"), # amber
|
||
"started": ("#3b82f6", "#dbeafe"), # blue
|
||
"sealed": ("#10b981", "#d1fae5"), # green
|
||
"succeeded": ("#10b981", "#d1fae5"), # green
|
||
"failed": ("#ef4444", "#fee2e2"), # red
|
||
"backoff": ("#f59e0b", "#fef3c7"), # amber
|
||
"skipped": ("#6b7280", "#f3f4f6"), # gray
|
||
}
|
||
fg_color, bg_color = status_colors.get(obj.status, ("#6b7280", "#f3f4f6"))
|
||
|
||
# For started snapshots, show progress bar
|
||
if obj.status == "started" and stats["total"] > 0:
|
||
percent = stats["percent"]
|
||
running = stats["running"]
|
||
succeeded = stats["succeeded"]
|
||
failed = stats["failed"]
|
||
|
||
return format_html(
|
||
"""<div style="min-width: 90px;">
|
||
<div style="display: flex; align-items: center; gap: 6px; margin-bottom: 4px;">
|
||
<span class="snapshot-progress-spinner"></span>
|
||
<span style="font-size: 11px; color: #64748b;">{}/{} hooks</span>
|
||
</div>
|
||
<div style="background: #e2e8f0; border-radius: 4px; height: 6px; overflow: hidden;">
|
||
<div style="background: linear-gradient(90deg, #10b981 0%, #10b981 {}%, #ef4444 {}%, #ef4444 {}%, #3b82f6 {}%, #3b82f6 100%);
|
||
width: {}%; height: 100%; transition: width 0.3s;"></div>
|
||
</div>
|
||
<div style="font-size: 10px; color: #94a3b8; margin-top: 2px;">
|
||
✓{} ✗{} ⏳{}
|
||
</div>
|
||
</div>""",
|
||
succeeded + failed + stats["skipped"],
|
||
stats["total"],
|
||
int(succeeded / stats["total"] * 100) if stats["total"] else 0,
|
||
int(succeeded / stats["total"] * 100) if stats["total"] else 0,
|
||
int((succeeded + failed) / stats["total"] * 100) if stats["total"] else 0,
|
||
int((succeeded + failed) / stats["total"] * 100) if stats["total"] else 0,
|
||
percent,
|
||
succeeded,
|
||
failed,
|
||
running,
|
||
)
|
||
|
||
# For other statuses, show simple badge
|
||
return format_html(
|
||
'<span style="display: inline-block; padding: 2px 8px; border-radius: 12px; '
|
||
'font-size: 11px; font-weight: 500; background: {}; color: {};">{}</span>',
|
||
bg_color,
|
||
fg_color,
|
||
obj.status.upper(),
|
||
)
|
||
|
||
@admin.display(
|
||
description="Size",
|
||
ordering="output_size_sum",
|
||
)
|
||
def size_with_stats(self, obj):
|
||
"""Show archive size with output size from archive results."""
|
||
stats = self._get_progress_stats(obj)
|
||
output_size = stats["output_size"]
|
||
size_bytes = output_size or 0
|
||
zip_url = self.get_snapshot_zip_url(obj)
|
||
zip_link = format_html(
|
||
'<a href="{}" class="archivebox-zip-button" data-loading-mode="spinner-only" onclick="return window.archiveboxHandleZipClick(this, event);" style="display:inline-flex; align-items:center; justify-content:center; gap:4px; width:48px; min-width:48px; height:22px; margin-top:4px; padding:0; box-sizing:border-box; border-radius:999px; border:1px solid #cbd5e1; background:#f8fafc; color:#64748b; font-size:10px; font-weight:600; line-height:1; text-decoration:none; transition:all 0.15s;" onmouseover="this.style.color=\'#1d4ed8\'; this.style.borderColor=\'#93c5fd\'; this.style.background=\'#eff6ff\';" onmouseout="this.style.color=\'#64748b\'; this.style.borderColor=\'#cbd5e1\'; this.style.background=\'#f8fafc\';"><span class="archivebox-zip-spinner" aria-hidden="true"></span><span class="archivebox-zip-label">⬇ ZIP</span></a>',
|
||
zip_url,
|
||
)
|
||
|
||
if size_bytes:
|
||
size_txt = printable_filesize(size_bytes)
|
||
if size_bytes > 52428800: # 50MB
|
||
size_txt = mark_safe(f"<b>{size_txt}</b>")
|
||
else:
|
||
size_txt = mark_safe('<span style="opacity: 0.3">...</span>')
|
||
|
||
# Show hook statistics
|
||
if stats["total"] > 0:
|
||
return format_html(
|
||
'<a href="{}" title="View all files" style="white-space: nowrap;">'
|
||
"{}</a>"
|
||
'<div style="font-size: 10px; color: #94a3b8; margin-top: 2px;">'
|
||
"{}/{} hooks</div>"
|
||
"{}",
|
||
build_web_url(f"/{obj.archive_path_from_db}"),
|
||
size_txt,
|
||
stats["succeeded"],
|
||
stats["total"],
|
||
zip_link,
|
||
)
|
||
|
||
return format_html(
|
||
'<a href="{}" title="View all files">{}</a>{}',
|
||
build_web_url(f"/{obj.archive_path_from_db}"),
|
||
size_txt,
|
||
zip_link,
|
||
)
|
||
|
||
def _get_progress_stats(self, obj):
|
||
results = self._get_prefetched_results(obj)
|
||
if results is None:
|
||
stats = obj.get_progress_stats()
|
||
expected_total = self._get_expected_hook_total(obj)
|
||
total = max(stats["total"], expected_total)
|
||
completed = stats["succeeded"] + stats["failed"] + stats.get("skipped", 0) + stats.get("noresults", 0)
|
||
stats["total"] = total
|
||
stats["pending"] = max(total - completed - stats["running"], 0)
|
||
stats["percent"] = int((completed / total * 100) if total > 0 else 0)
|
||
return stats
|
||
|
||
expected_total = self._get_expected_hook_total(obj)
|
||
observed_total = len(results)
|
||
total = max(observed_total, expected_total)
|
||
succeeded = sum(1 for r in results if r.status == "succeeded")
|
||
failed = sum(1 for r in results if r.status == "failed")
|
||
running = sum(1 for r in results if r.status == "started")
|
||
skipped = sum(1 for r in results if r.status == "skipped")
|
||
noresults = sum(1 for r in results if r.status == "noresults")
|
||
pending = max(total - succeeded - failed - running - skipped - noresults, 0)
|
||
completed = succeeded + failed + skipped + noresults
|
||
percent = int((completed / total * 100) if total > 0 else 0)
|
||
is_sealed = obj.status not in (obj.StatusChoices.QUEUED, obj.StatusChoices.STARTED)
|
||
output_size = None
|
||
|
||
if hasattr(obj, "output_size_sum"):
|
||
output_size = obj.output_size_sum or 0
|
||
else:
|
||
output_size = sum(r.output_size or 0 for r in results)
|
||
|
||
return {
|
||
"total": total,
|
||
"succeeded": succeeded,
|
||
"failed": failed,
|
||
"running": running,
|
||
"pending": pending,
|
||
"skipped": skipped,
|
||
"noresults": noresults,
|
||
"percent": percent,
|
||
"output_size": output_size or 0,
|
||
"is_sealed": is_sealed,
|
||
}
|
||
|
||
def _get_prefetched_results(self, obj):
|
||
if hasattr(obj, "_prefetched_objects_cache") and "archiveresult_set" in obj._prefetched_objects_cache:
|
||
return obj.archiveresult_set.all()
|
||
return None
|
||
|
||
def _get_expected_hook_total(self, obj) -> int:
|
||
from archivebox.config.configset import get_config
|
||
|
||
try:
|
||
config = get_config(crawl=obj.crawl, snapshot=obj)
|
||
config_json = json.dumps(config, sort_keys=True, default=str, separators=(",", ":"))
|
||
return _expected_snapshot_hook_total(config_json)
|
||
except Exception:
|
||
return 0
|
||
|
||
def _get_prefetched_tags(self, obj):
|
||
if hasattr(obj, "_prefetched_objects_cache") and "tags" in obj._prefetched_objects_cache:
|
||
return list(obj._prefetched_objects_cache["tags"])
|
||
return None
|
||
|
||
def _result_output_href(self, obj, result: ArchiveResult) -> str:
|
||
ignored = {"stdout.log", "stderr.log", "hook.pid", "listener.pid", "cmd.sh"}
|
||
|
||
for rel_path in result.output_file_paths():
|
||
raw_path = str(rel_path or "").strip().lstrip("/")
|
||
if not raw_path:
|
||
continue
|
||
basename = raw_path.rsplit("/", 1)[-1]
|
||
if basename in ignored or raw_path.endswith((".pid", ".log", ".sh")):
|
||
continue
|
||
relative_path = raw_path if raw_path.startswith(f"{result.plugin}/") else f"{result.plugin}/{raw_path}"
|
||
return f"/{obj.archive_path_from_db}/{relative_path}"
|
||
|
||
raw_output = str(result.output_str or "").strip().lstrip("/")
|
||
if raw_output and raw_output not in {".", "./"} and "://" not in raw_output and not raw_output.startswith("/"):
|
||
relative_path = raw_output if raw_output.startswith(f"{result.plugin}/") else f"{result.plugin}/{raw_output}"
|
||
return f"/{obj.archive_path_from_db}/{relative_path}"
|
||
|
||
return f"/{obj.archive_path_from_db}/{result.plugin}/"
|
||
|
||
def _get_ordering_fields(self, request):
|
||
ordering = request.GET.get("o")
|
||
if not ordering:
|
||
return set()
|
||
fields = set()
|
||
for part in ordering.split("."):
|
||
if not part:
|
||
continue
|
||
try:
|
||
idx = abs(int(part)) - 1
|
||
except ValueError:
|
||
continue
|
||
if 0 <= idx < len(self.list_display):
|
||
fields.add(self.list_display[idx])
|
||
return fields
|
||
|
||
@admin.display(
|
||
description="Original URL",
|
||
ordering="url",
|
||
)
|
||
def url_str(self, obj):
|
||
return format_html(
|
||
'<a href="{}"><code style="user-select: all;">{}</code></a>',
|
||
obj.url,
|
||
obj.url[:128],
|
||
)
|
||
|
||
@admin.display(description="Health", ordering="health")
|
||
def health_display(self, obj):
|
||
h = obj.health
|
||
color = "green" if h >= 80 else "orange" if h >= 50 else "red"
|
||
return format_html('<span style="color: {};">{}</span>', color, h)
|
||
|
||
def grid_view(self, request, extra_context=None):
|
||
|
||
# cl = self.get_changelist_instance(request)
|
||
|
||
# Save before monkey patching to restore for changelist list view
|
||
admin_cls = type(self)
|
||
saved_change_list_template = admin_cls.change_list_template
|
||
saved_list_per_page = admin_cls.list_per_page
|
||
saved_list_max_show_all = admin_cls.list_max_show_all
|
||
|
||
# Monkey patch here plus core_tags.py
|
||
admin_cls.change_list_template = "private_index_grid.html"
|
||
admin_cls.list_per_page = SERVER_CONFIG.SNAPSHOTS_PER_PAGE
|
||
admin_cls.list_max_show_all = admin_cls.list_per_page
|
||
|
||
# Call monkey patched view
|
||
rendered_response = self.changelist_view(request, extra_context=extra_context)
|
||
|
||
# Restore values
|
||
admin_cls.change_list_template = saved_change_list_template
|
||
admin_cls.list_per_page = saved_list_per_page
|
||
admin_cls.list_max_show_all = saved_list_max_show_all
|
||
|
||
return rendered_response
|
||
|
||
# for debugging, uncomment this to print all requests:
|
||
# def changelist_view(self, request, extra_context=None):
|
||
# print('[*] Got request', request.method, request.POST)
|
||
# return super().changelist_view(request, extra_context=None)
|
||
|
||
@admin.action(
|
||
description="🔁 Redo Failed",
|
||
)
|
||
def update_snapshots(self, request, queryset):
|
||
queued = bg_archive_snapshots(queryset, kwargs={"overwrite": False, "out_dir": DATA_DIR})
|
||
|
||
messages.success(
|
||
request,
|
||
f"Queued {queued} snapshots for re-archiving. The background runner will process them.",
|
||
)
|
||
|
||
@admin.action(
|
||
description="🆕 Archive Now",
|
||
)
|
||
def resnapshot_snapshot(self, request, queryset):
|
||
snapshots = list(queryset)
|
||
if not snapshots:
|
||
messages.info(request, "No snapshots selected.")
|
||
return
|
||
|
||
urls = "\n".join(snapshot.url for snapshot in snapshots if snapshot.url)
|
||
if not urls:
|
||
messages.info(request, "No valid snapshot URLs were found to archive.")
|
||
return
|
||
|
||
bg_add({"urls": urls})
|
||
|
||
messages.success(
|
||
request,
|
||
f"Creating 1 new crawl with {len(snapshots)} fresh snapshots. The background runner will process them.",
|
||
)
|
||
|
||
@admin.action(
|
||
description="🔄 Redo",
|
||
)
|
||
def overwrite_snapshots(self, request, queryset):
|
||
queued = bg_archive_snapshots(queryset, kwargs={"overwrite": True, "out_dir": DATA_DIR})
|
||
|
||
messages.success(
|
||
request,
|
||
f"Queued {queued} snapshots for full re-archive (overwriting existing). The background runner will process them.",
|
||
)
|
||
|
||
@admin.action(
|
||
description="🗑️ Delete",
|
||
)
|
||
def delete_snapshots(self, request, queryset):
|
||
"""Delete snapshots in a single transaction to avoid SQLite concurrency issues."""
|
||
from django.db import transaction
|
||
|
||
total = queryset.count()
|
||
|
||
# Get list of IDs to delete first (outside transaction)
|
||
ids_to_delete = list(queryset.values_list("pk", flat=True))
|
||
|
||
# Delete everything in a single atomic transaction
|
||
with transaction.atomic():
|
||
deleted_count, _ = Snapshot.objects.filter(pk__in=ids_to_delete).delete()
|
||
|
||
messages.success(
|
||
request,
|
||
mark_safe(
|
||
f"Successfully deleted {total} Snapshots ({deleted_count} total objects including related records). Don't forget to scrub URLs from import logs (data/sources) and error logs (data/logs) if needed.",
|
||
),
|
||
)
|
||
|
||
@admin.action(
|
||
description="+",
|
||
)
|
||
def add_tags(self, request, queryset):
|
||
from archivebox.core.models import SnapshotTag
|
||
|
||
# Get tags from the form - now comma-separated string
|
||
tags_str = request.POST.get("tags", "")
|
||
if not tags_str:
|
||
messages.warning(request, "No tags specified.")
|
||
return
|
||
|
||
# Parse comma-separated tag names and get/create Tag objects
|
||
tag_names = [name.strip() for name in tags_str.split(",") if name.strip()]
|
||
tags = []
|
||
for name in tag_names:
|
||
tag, _ = Tag.objects.get_or_create(
|
||
name__iexact=name,
|
||
defaults={"name": name},
|
||
)
|
||
tag = Tag.objects.filter(name__iexact=name).first() or tag
|
||
tags.append(tag)
|
||
|
||
# Get snapshot IDs efficiently (works with select_across for all pages)
|
||
snapshot_ids = list(queryset.values_list("id", flat=True))
|
||
num_snapshots = len(snapshot_ids)
|
||
|
||
print("[+] Adding tags", [t.name for t in tags], "to", num_snapshots, "Snapshots")
|
||
|
||
# Bulk create M2M relationships (1 query per tag, not per snapshot)
|
||
for tag in tags:
|
||
SnapshotTag.objects.bulk_create(
|
||
[SnapshotTag(snapshot_id=sid, tag=tag) for sid in snapshot_ids],
|
||
ignore_conflicts=True, # Skip if relationship already exists
|
||
)
|
||
|
||
messages.success(
|
||
request,
|
||
f"Added {len(tags)} tag(s) to {num_snapshots} Snapshot(s).",
|
||
)
|
||
|
||
@admin.action(
|
||
description="–",
|
||
)
|
||
def remove_tags(self, request, queryset):
|
||
from archivebox.core.models import SnapshotTag
|
||
|
||
# Get tags from the form - now comma-separated string
|
||
tags_str = request.POST.get("tags", "")
|
||
if not tags_str:
|
||
messages.warning(request, "No tags specified.")
|
||
return
|
||
|
||
# Parse comma-separated tag names and find matching Tag objects (case-insensitive)
|
||
tag_names = [name.strip() for name in tags_str.split(",") if name.strip()]
|
||
tags = []
|
||
for name in tag_names:
|
||
tag = Tag.objects.filter(name__iexact=name).first()
|
||
if tag:
|
||
tags.append(tag)
|
||
|
||
if not tags:
|
||
messages.warning(request, "No matching tags found.")
|
||
return
|
||
|
||
# Get snapshot IDs efficiently (works with select_across for all pages)
|
||
snapshot_ids = list(queryset.values_list("id", flat=True))
|
||
num_snapshots = len(snapshot_ids)
|
||
tag_ids = [t.pk for t in tags]
|
||
|
||
print("[-] Removing tags", [t.name for t in tags], "from", num_snapshots, "Snapshots")
|
||
|
||
# Bulk delete M2M relationships (1 query total, not per snapshot)
|
||
deleted_count, _ = SnapshotTag.objects.filter(
|
||
snapshot_id__in=snapshot_ids,
|
||
tag_id__in=tag_ids,
|
||
).delete()
|
||
|
||
messages.success(
|
||
request,
|
||
f"Removed {len(tags)} tag(s) from {num_snapshots} Snapshot(s) ({deleted_count} associations deleted).",
|
||
)
|