mirror of
https://github.com/ArchiveBox/ArchiveBox.git
synced 2026-04-06 07:47:53 +10:00
wip
This commit is contained in:
@@ -14,6 +14,7 @@ from archivebox.config.common import SERVER_CONFIG
|
||||
from archivebox.misc.paginators import AccelleratedPaginator
|
||||
from archivebox.base_models.admin import BaseModelAdmin
|
||||
from archivebox.hooks import get_plugin_icon
|
||||
from archivebox.core.host_utils import build_snapshot_url
|
||||
|
||||
|
||||
from archivebox.core.models import ArchiveResult, Snapshot
|
||||
@@ -57,7 +58,11 @@ def render_archiveresults_list(archiveresults_qs, limit=50):
|
||||
|
||||
# Build output link - use embed_path() which checks output_files first
|
||||
embed_path = result.embed_path() if hasattr(result, 'embed_path') else None
|
||||
output_link = f'/{result.snapshot.archive_path}/{embed_path}' if embed_path and result.status == 'succeeded' else f'/{result.snapshot.archive_path}/'
|
||||
snapshot_id = str(getattr(result, 'snapshot_id', ''))
|
||||
if embed_path and result.status == 'succeeded':
|
||||
output_link = build_snapshot_url(snapshot_id, embed_path)
|
||||
else:
|
||||
output_link = build_snapshot_url(snapshot_id, '')
|
||||
|
||||
# Get version - try cmd_version field
|
||||
version = result.cmd_version if result.cmd_version else '-'
|
||||
@@ -252,7 +257,7 @@ class ArchiveResultInline(admin.TabularInline):
|
||||
class ArchiveResultAdmin(BaseModelAdmin):
|
||||
list_display = ('id', 'created_at', 'snapshot_info', 'tags_str', 'status', 'plugin_with_icon', 'cmd_str', 'output_str')
|
||||
sort_fields = ('id', 'created_at', 'plugin', 'status')
|
||||
readonly_fields = ('cmd_str', 'snapshot_info', 'tags_str', 'created_at', 'modified_at', 'output_summary', 'plugin_with_icon')
|
||||
readonly_fields = ('cmd', 'cmd_version', 'pwd', 'cmd_str', 'snapshot_info', 'tags_str', 'created_at', 'modified_at', 'output_summary', 'plugin_with_icon')
|
||||
search_fields = ('id', 'snapshot__url', 'plugin', 'output_str', 'cmd_version', 'cmd', 'snapshot__timestamp')
|
||||
autocomplete_fields = ['snapshot']
|
||||
|
||||
@@ -300,10 +305,11 @@ class ArchiveResultAdmin(BaseModelAdmin):
|
||||
description='Snapshot Info'
|
||||
)
|
||||
def snapshot_info(self, result):
|
||||
snapshot_id = str(result.snapshot_id)
|
||||
return format_html(
|
||||
'<a href="/{}/index.html"><b><code>[{}]</code></b> {} {}</a><br/>',
|
||||
result.snapshot.archive_path,
|
||||
str(result.snapshot.id)[:8],
|
||||
'<a href="{}"><b><code>[{}]</code></b> {} {}</a><br/>',
|
||||
build_snapshot_url(snapshot_id, "index.html"),
|
||||
snapshot_id[:8],
|
||||
result.snapshot.bookmarked_at.strftime('%Y-%m-%d %H:%M'),
|
||||
result.snapshot.url[:128],
|
||||
)
|
||||
@@ -335,10 +341,10 @@ class ArchiveResultAdmin(BaseModelAdmin):
|
||||
# Determine output link path - use embed_path() which checks output_files
|
||||
embed_path = result.embed_path() if hasattr(result, 'embed_path') else None
|
||||
output_path = embed_path if (result.status == 'succeeded' and embed_path) else 'index.html'
|
||||
snapshot_id = str(result.snapshot_id)
|
||||
return format_html(
|
||||
'<a href="/{}/{}" class="output-link">↗️</a><pre>{}</pre>',
|
||||
result.snapshot.archive_path,
|
||||
output_path,
|
||||
'<a href="{}" class="output-link">↗️</a><pre>{}</pre>',
|
||||
build_snapshot_url(snapshot_id, output_path),
|
||||
result.output_str,
|
||||
)
|
||||
|
||||
@@ -348,7 +354,11 @@ class ArchiveResultAdmin(BaseModelAdmin):
|
||||
'<pre style="display: inline-block">{}</pre><br/>',
|
||||
result.output_str,
|
||||
)
|
||||
output_html += format_html('<a href="/{}/index.html#all">See result files ...</a><br/><pre><code>', str(result.snapshot.archive_path))
|
||||
snapshot_id = str(result.snapshot_id)
|
||||
output_html += format_html(
|
||||
'<a href="{}#all">See result files ...</a><br/><pre><code>',
|
||||
build_snapshot_url(snapshot_id, "index.html"),
|
||||
)
|
||||
embed_path = result.embed_path() if hasattr(result, 'embed_path') else ''
|
||||
path_from_embed = (snapshot_dir / (embed_path or ''))
|
||||
output_html += format_html('<i style="padding: 1px">{}</i><b style="padding-right: 20px">/</b><i>{}</i><br/><hr/>', str(snapshot_dir), str(embed_path))
|
||||
|
||||
@@ -8,6 +8,8 @@ from django.contrib import admin, messages
|
||||
from django.urls import path
|
||||
from django.utils.html import format_html, mark_safe
|
||||
from django.utils import timezone
|
||||
from django.db.models import Q, Sum, Count, Prefetch
|
||||
from django.db.models.functions import Coalesce
|
||||
from django import forms
|
||||
from django.template import Template, RequestContext
|
||||
from django.contrib.admin.helpers import ActionForm
|
||||
@@ -18,11 +20,12 @@ from archivebox.misc.util import htmldecode, urldecode
|
||||
from archivebox.misc.paginators import AccelleratedPaginator
|
||||
from archivebox.misc.logging_util import printable_filesize
|
||||
from archivebox.search.admin import SearchResultsAdminMixin
|
||||
from archivebox.core.host_utils import build_snapshot_url, build_web_url
|
||||
|
||||
from archivebox.base_models.admin import BaseModelAdmin, ConfigEditorMixin
|
||||
from archivebox.workers.tasks import bg_archive_snapshots, bg_add
|
||||
|
||||
from archivebox.core.models import Tag, Snapshot
|
||||
from archivebox.core.models import Tag, Snapshot, ArchiveResult
|
||||
from archivebox.core.admin_archiveresults import ArchiveResultInline, render_archiveresults_list
|
||||
from archivebox.core.widgets import TagEditorWidget, InlineTagEditorWidget
|
||||
|
||||
@@ -36,7 +39,7 @@ class SnapshotActionForm(ActionForm):
|
||||
super().__init__(*args, **kwargs)
|
||||
# Define tags field in __init__ to avoid database access during app initialization
|
||||
self.fields['tags'] = forms.CharField(
|
||||
label='Edit tags',
|
||||
label='',
|
||||
required=False,
|
||||
widget=TagEditorWidget(),
|
||||
)
|
||||
@@ -67,6 +70,19 @@ class SnapshotActionForm(ActionForm):
|
||||
# )
|
||||
|
||||
|
||||
class TagNameListFilter(admin.SimpleListFilter):
|
||||
title = 'By tag name'
|
||||
parameter_name = 'tag'
|
||||
|
||||
def lookups(self, request, model_admin):
|
||||
return [(str(tag.pk), tag.name) for tag in Tag.objects.order_by('name')]
|
||||
|
||||
def queryset(self, request, queryset):
|
||||
if self.value():
|
||||
return queryset.filter(tags__id=self.value())
|
||||
return queryset
|
||||
|
||||
|
||||
class SnapshotAdminForm(forms.ModelForm):
|
||||
"""Custom form for Snapshot admin with tag editor widget."""
|
||||
tags_editor = forms.CharField(
|
||||
@@ -117,11 +133,11 @@ class SnapshotAdminForm(forms.ModelForm):
|
||||
|
||||
class SnapshotAdmin(SearchResultsAdminMixin, ConfigEditorMixin, BaseModelAdmin):
|
||||
form = SnapshotAdminForm
|
||||
list_display = ('created_at', 'title_str', 'status_with_progress', 'files', 'size_with_stats', 'health_display', 'url_str')
|
||||
sort_fields = ('title_str', 'url_str', 'created_at', 'status', 'crawl')
|
||||
list_display = ('created_at', 'preview_icon', 'title_str', 'tags_inline', 'status_with_progress', 'files', 'size_with_stats')
|
||||
sort_fields = ('title_str', 'created_at', 'status', 'crawl')
|
||||
readonly_fields = ('admin_actions', 'status_info', 'imported_timestamp', 'created_at', 'modified_at', 'downloaded_at', 'output_dir', 'archiveresults_list')
|
||||
search_fields = ('id', 'url', 'timestamp', 'title', 'tags__name')
|
||||
list_filter = ('created_at', 'downloaded_at', 'archiveresult__status', 'crawl__created_by', 'tags__name')
|
||||
list_filter = ('created_at', 'downloaded_at', 'archiveresult__status', 'crawl__created_by', TagNameListFilter)
|
||||
|
||||
fieldsets = (
|
||||
('URL', {
|
||||
@@ -163,7 +179,7 @@ class SnapshotAdmin(SearchResultsAdminMixin, ConfigEditorMixin, BaseModelAdmin):
|
||||
)
|
||||
|
||||
ordering = ['-created_at']
|
||||
actions = ['add_tags', 'remove_tags', 'update_titles', 'update_snapshots', 'resnapshot_snapshot', 'overwrite_snapshots', 'delete_snapshots']
|
||||
actions = ['add_tags', 'remove_tags', 'update_snapshots', 'resnapshot_snapshot', 'overwrite_snapshots', 'delete_snapshots']
|
||||
inlines = [] # Removed TagInline, using TagEditorWidget instead
|
||||
list_per_page = min(max(5, SERVER_CONFIG.SNAPSHOTS_PER_PAGE), 5000)
|
||||
|
||||
@@ -182,6 +198,13 @@ class SnapshotAdmin(SearchResultsAdminMixin, ConfigEditorMixin, BaseModelAdmin):
|
||||
self.message_user(request, f'Error occurred while loading the page: {str(e)} {request.GET} {request.POST}')
|
||||
return super().changelist_view(request, GLOBAL_CONTEXT)
|
||||
|
||||
def get_actions(self, request):
|
||||
actions = super().get_actions(request)
|
||||
if 'delete_selected' in actions:
|
||||
func, name, _desc = actions['delete_selected']
|
||||
actions['delete_selected'] = (func, name, 'Delete')
|
||||
return actions
|
||||
|
||||
|
||||
def get_urls(self):
|
||||
urls = super().get_urls()
|
||||
@@ -196,6 +219,52 @@ class SnapshotAdmin(SearchResultsAdminMixin, ConfigEditorMixin, BaseModelAdmin):
|
||||
|
||||
# self.request = request
|
||||
# return super().get_queryset(request).prefetch_related('archiveresult_set').distinct() # .annotate(archiveresult_count=Count('archiveresult'))
|
||||
def get_queryset(self, request):
|
||||
self.request = request
|
||||
ordering_fields = self._get_ordering_fields(request)
|
||||
needs_size_sort = 'size_with_stats' in ordering_fields
|
||||
needs_files_sort = 'files' in ordering_fields
|
||||
needs_tags_sort = 'tags_inline' in ordering_fields
|
||||
|
||||
prefetch_qs = ArchiveResult.objects.filter(
|
||||
Q(status='succeeded')
|
||||
).only(
|
||||
'id',
|
||||
'snapshot_id',
|
||||
'plugin',
|
||||
'status',
|
||||
'output_size',
|
||||
'output_files',
|
||||
'output_str',
|
||||
)
|
||||
|
||||
qs = (
|
||||
super()
|
||||
.get_queryset(request)
|
||||
.defer('config', 'notes')
|
||||
.prefetch_related('tags')
|
||||
.prefetch_related(Prefetch('archiveresult_set', queryset=prefetch_qs))
|
||||
)
|
||||
|
||||
if needs_size_sort:
|
||||
qs = qs.annotate(
|
||||
output_size_sum=Coalesce(Sum(
|
||||
'archiveresult__output_size',
|
||||
filter=Q(archiveresult__status='succeeded'),
|
||||
), 0),
|
||||
)
|
||||
|
||||
if needs_files_sort:
|
||||
qs = qs.annotate(
|
||||
ar_succeeded_count=Count(
|
||||
'archiveresult',
|
||||
filter=Q(archiveresult__status='succeeded'),
|
||||
),
|
||||
)
|
||||
if needs_tags_sort:
|
||||
qs = qs.annotate(tag_count=Count('tags', distinct=True))
|
||||
|
||||
return qs
|
||||
|
||||
@admin.display(description="Imported Timestamp")
|
||||
def imported_timestamp(self, obj):
|
||||
@@ -233,17 +302,19 @@ class SnapshotAdmin(SearchResultsAdminMixin, ConfigEditorMixin, BaseModelAdmin):
|
||||
# )
|
||||
|
||||
def admin_actions(self, obj):
|
||||
summary_url = build_web_url(f'/{obj.archive_path}')
|
||||
results_url = build_web_url(f'/{obj.archive_path}/index.html#all')
|
||||
return format_html(
|
||||
'''
|
||||
<div style="display: flex; flex-wrap: wrap; gap: 12px; align-items: center;">
|
||||
<a class="btn" style="display: inline-flex; align-items: center; gap: 6px; padding: 10px 16px; background: #f8fafc; border: 1px solid #e2e8f0; border-radius: 8px; color: #334155; text-decoration: none; font-size: 14px; font-weight: 500; transition: all 0.15s;"
|
||||
href="/{}"
|
||||
href="{}"
|
||||
onmouseover="this.style.background='#f1f5f9'; this.style.borderColor='#cbd5e1';"
|
||||
onmouseout="this.style.background='#f8fafc'; this.style.borderColor='#e2e8f0';">
|
||||
📄 Summary Page
|
||||
</a>
|
||||
<a class="btn" style="display: inline-flex; align-items: center; gap: 6px; padding: 10px 16px; background: #f8fafc; border: 1px solid #e2e8f0; border-radius: 8px; color: #334155; text-decoration: none; font-size: 14px; font-weight: 500; transition: all 0.15s;"
|
||||
href="/{}/index.html#all"
|
||||
href="{}"
|
||||
onmouseover="this.style.background='#f1f5f9'; this.style.borderColor='#cbd5e1';"
|
||||
onmouseout="this.style.background='#f8fafc'; this.style.borderColor='#e2e8f0';">
|
||||
📁 Result Files
|
||||
@@ -263,7 +334,7 @@ class SnapshotAdmin(SearchResultsAdminMixin, ConfigEditorMixin, BaseModelAdmin):
|
||||
title="Get missing extractors"
|
||||
onmouseover="this.style.background='#d1fae5';"
|
||||
onmouseout="this.style.background='#ecfdf5';">
|
||||
⬇️ Get Missing
|
||||
⬇️ Finish
|
||||
</a>
|
||||
<a class="btn" style="display: inline-flex; align-items: center; gap: 6px; padding: 10px 16px; background: #eff6ff; border: 1px solid #bfdbfe; border-radius: 8px; color: #1e40af; text-decoration: none; font-size: 14px; font-weight: 500; transition: all 0.15s;"
|
||||
href="/admin/core/snapshot/?id__exact={}"
|
||||
@@ -291,8 +362,8 @@ class SnapshotAdmin(SearchResultsAdminMixin, ConfigEditorMixin, BaseModelAdmin):
|
||||
<b>Tip:</b> Action buttons link to the list view with this snapshot pre-selected. Select it and use the action dropdown to execute.
|
||||
</p>
|
||||
''',
|
||||
obj.archive_path,
|
||||
obj.archive_path,
|
||||
summary_url,
|
||||
results_url,
|
||||
obj.url,
|
||||
obj.pk,
|
||||
obj.pk,
|
||||
@@ -301,6 +372,7 @@ class SnapshotAdmin(SearchResultsAdminMixin, ConfigEditorMixin, BaseModelAdmin):
|
||||
)
|
||||
|
||||
def status_info(self, obj):
|
||||
favicon_url = build_snapshot_url(str(obj.id), 'favicon.ico')
|
||||
return format_html(
|
||||
'''
|
||||
Archived: {} ({} files {})
|
||||
@@ -310,7 +382,7 @@ class SnapshotAdmin(SearchResultsAdminMixin, ConfigEditorMixin, BaseModelAdmin):
|
||||
'✅' if obj.is_archived else '❌',
|
||||
obj.num_outputs,
|
||||
self.size(obj) or '0kb',
|
||||
f'/{obj.archive_path}/favicon.ico',
|
||||
favicon_url,
|
||||
obj.extension or '-',
|
||||
)
|
||||
|
||||
@@ -323,7 +395,37 @@ class SnapshotAdmin(SearchResultsAdminMixin, ConfigEditorMixin, BaseModelAdmin):
|
||||
ordering='title',
|
||||
)
|
||||
def title_str(self, obj):
|
||||
# Render inline tag editor widget
|
||||
title_raw = (obj.title or '').strip()
|
||||
url_raw = (obj.url or '').strip()
|
||||
title_normalized = title_raw.lower()
|
||||
url_normalized = url_raw.lower()
|
||||
show_title = bool(title_raw) and title_normalized != 'pending...' and title_normalized != url_normalized
|
||||
css_class = 'fetched' if show_title else 'pending'
|
||||
|
||||
detail_url = build_web_url(f'/{obj.archive_path}/index.html')
|
||||
title_html = ''
|
||||
if show_title:
|
||||
title_html = format_html(
|
||||
'<a href="{}">'
|
||||
'<b class="status-{}">{}</b>'
|
||||
'</a>',
|
||||
detail_url,
|
||||
css_class,
|
||||
urldecode(htmldecode(title_raw))[:128],
|
||||
)
|
||||
|
||||
return format_html(
|
||||
'{}'
|
||||
'<div style="font-size: 11px; color: #64748b; margin-top: 2px;">'
|
||||
'<a href="{}"><code style="user-select: all;">{}</code></a>'
|
||||
'</div>',
|
||||
title_html,
|
||||
url_raw or obj.url,
|
||||
(url_raw or obj.url)[:128],
|
||||
)
|
||||
|
||||
@admin.display(description='Tags', ordering='tag_count')
|
||||
def tags_inline(self, obj):
|
||||
widget = InlineTagEditorWidget(snapshot_id=str(obj.pk))
|
||||
tags_html = widget.render(
|
||||
name=f'tags_{obj.pk}',
|
||||
@@ -331,28 +433,58 @@ class SnapshotAdmin(SearchResultsAdminMixin, ConfigEditorMixin, BaseModelAdmin):
|
||||
attrs={'id': f'tags_{obj.pk}'},
|
||||
snapshot_id=str(obj.pk),
|
||||
)
|
||||
return mark_safe(f'<span class="tags-inline-editor">{tags_html}</span>')
|
||||
|
||||
# Show title if available, otherwise show URL
|
||||
display_text = obj.title or obj.url
|
||||
css_class = 'fetched' if obj.title else 'pending'
|
||||
@admin.display(description='Preview', empty_value='')
|
||||
def preview_icon(self, obj):
|
||||
results = self._get_prefetched_results(obj)
|
||||
has_screenshot = False
|
||||
has_favicon = False
|
||||
if results is not None:
|
||||
has_screenshot = any(r.plugin == 'screenshot' for r in results)
|
||||
has_favicon = any(r.plugin == 'favicon' for r in results)
|
||||
|
||||
if not has_screenshot and not has_favicon:
|
||||
return None
|
||||
|
||||
if has_screenshot:
|
||||
img_url = build_snapshot_url(str(obj.id), 'screenshot/screenshot.png')
|
||||
fallbacks = [
|
||||
build_snapshot_url(str(obj.id), 'screenshot.png'),
|
||||
build_snapshot_url(str(obj.id), 'favicon/favicon.ico'),
|
||||
build_snapshot_url(str(obj.id), 'favicon.ico'),
|
||||
]
|
||||
img_alt = 'Screenshot'
|
||||
preview_class = 'screenshot'
|
||||
else:
|
||||
img_url = build_snapshot_url(str(obj.id), 'favicon/favicon.ico')
|
||||
fallbacks = [
|
||||
build_snapshot_url(str(obj.id), 'favicon.ico'),
|
||||
]
|
||||
img_alt = 'Favicon'
|
||||
preview_class = 'favicon'
|
||||
|
||||
fallback_list = ','.join(fallbacks)
|
||||
onerror_js = (
|
||||
"this.dataset.fallbacks && this.dataset.fallbacks.length ? "
|
||||
"(this.src=this.dataset.fallbacks.split(',').shift(), "
|
||||
"this.dataset.fallbacks=this.dataset.fallbacks.split(',').slice(1).join(',')) : "
|
||||
"this.remove()"
|
||||
)
|
||||
|
||||
return format_html(
|
||||
'<a href="/{}">'
|
||||
'<img src="/{}/favicon.ico" class="favicon" onerror="this.remove()">'
|
||||
'</a>'
|
||||
'<a href="/{}/index.html">'
|
||||
'<b class="status-{}">{}</b>'
|
||||
'</a>',
|
||||
obj.archive_path,
|
||||
obj.archive_path,
|
||||
obj.archive_path,
|
||||
css_class,
|
||||
urldecode(htmldecode(display_text))[:128]
|
||||
) + mark_safe(f' <span class="tags-inline-editor">{tags_html}</span>')
|
||||
'<img src="{}" alt="{}" class="snapshot-preview {}" decoding="async" loading="lazy" '
|
||||
'onerror="{}" data-fallbacks="{}">',
|
||||
img_url,
|
||||
img_alt,
|
||||
preview_class,
|
||||
onerror_js,
|
||||
fallback_list,
|
||||
)
|
||||
|
||||
@admin.display(
|
||||
description='Files Saved',
|
||||
# ordering='archiveresult_count',
|
||||
ordering='ar_succeeded_count',
|
||||
)
|
||||
def files(self, obj):
|
||||
# return '-'
|
||||
@@ -371,8 +503,8 @@ class SnapshotAdmin(SearchResultsAdminMixin, ConfigEditorMixin, BaseModelAdmin):
|
||||
else:
|
||||
size_txt = mark_safe('<span style="opacity: 0.3">...</span>')
|
||||
return format_html(
|
||||
'<a href="/{}" title="View all files">{}</a>',
|
||||
obj.archive_path,
|
||||
'<a href="{}" title="View all files">{}</a>',
|
||||
build_web_url(f'/{obj.archive_path}'),
|
||||
size_txt,
|
||||
)
|
||||
|
||||
@@ -382,7 +514,7 @@ class SnapshotAdmin(SearchResultsAdminMixin, ConfigEditorMixin, BaseModelAdmin):
|
||||
)
|
||||
def status_with_progress(self, obj):
|
||||
"""Show status with progress bar for in-progress snapshots."""
|
||||
stats = obj.get_progress_stats()
|
||||
stats = self._get_progress_stats(obj)
|
||||
|
||||
# Status badge colors
|
||||
status_colors = {
|
||||
@@ -440,16 +572,13 @@ class SnapshotAdmin(SearchResultsAdminMixin, ConfigEditorMixin, BaseModelAdmin):
|
||||
|
||||
@admin.display(
|
||||
description='Size',
|
||||
ordering='output_size_sum',
|
||||
)
|
||||
def size_with_stats(self, obj):
|
||||
"""Show archive size with output size from archive results."""
|
||||
stats = obj.get_progress_stats()
|
||||
|
||||
# Use output_size from archive results if available, fallback to disk size
|
||||
stats = self._get_progress_stats(obj)
|
||||
output_size = stats['output_size']
|
||||
archive_size = os.access(Path(obj.output_dir) / 'index.html', os.F_OK) and obj.archive_size
|
||||
|
||||
size_bytes = output_size or archive_size or 0
|
||||
size_bytes = output_size or 0
|
||||
|
||||
if size_bytes:
|
||||
size_txt = printable_filesize(size_bytes)
|
||||
@@ -461,22 +590,76 @@ class SnapshotAdmin(SearchResultsAdminMixin, ConfigEditorMixin, BaseModelAdmin):
|
||||
# Show hook statistics
|
||||
if stats['total'] > 0:
|
||||
return format_html(
|
||||
'<a href="/{}" title="View all files" style="white-space: nowrap;">'
|
||||
'<a href="{}" title="View all files" style="white-space: nowrap;">'
|
||||
'{}</a>'
|
||||
'<div style="font-size: 10px; color: #94a3b8; margin-top: 2px;">'
|
||||
'{}/{} hooks</div>',
|
||||
obj.archive_path,
|
||||
build_web_url(f'/{obj.archive_path}'),
|
||||
size_txt,
|
||||
stats['succeeded'],
|
||||
stats['total'],
|
||||
)
|
||||
|
||||
return format_html(
|
||||
'<a href="/{}" title="View all files">{}</a>',
|
||||
obj.archive_path,
|
||||
'<a href="{}" title="View all files">{}</a>',
|
||||
build_web_url(f'/{obj.archive_path}'),
|
||||
size_txt,
|
||||
)
|
||||
|
||||
def _get_progress_stats(self, obj):
|
||||
results = self._get_prefetched_results(obj)
|
||||
if results is None:
|
||||
return obj.get_progress_stats()
|
||||
|
||||
total = len(results)
|
||||
succeeded = sum(1 for r in results if r.status == 'succeeded')
|
||||
failed = sum(1 for r in results if r.status == 'failed')
|
||||
running = sum(1 for r in results if r.status == 'started')
|
||||
skipped = sum(1 for r in results if r.status == 'skipped')
|
||||
pending = max(total - succeeded - failed - running - skipped, 0)
|
||||
completed = succeeded + failed + skipped
|
||||
percent = int((completed / total * 100) if total > 0 else 0)
|
||||
is_sealed = obj.status not in (obj.StatusChoices.QUEUED, obj.StatusChoices.STARTED)
|
||||
output_size = None
|
||||
|
||||
if hasattr(obj, 'output_size_sum'):
|
||||
output_size = obj.output_size_sum or 0
|
||||
else:
|
||||
output_size = sum(r.output_size or 0 for r in results if r.status == 'succeeded')
|
||||
|
||||
return {
|
||||
'total': total,
|
||||
'succeeded': succeeded,
|
||||
'failed': failed,
|
||||
'running': running,
|
||||
'pending': pending,
|
||||
'skipped': skipped,
|
||||
'percent': percent,
|
||||
'output_size': output_size or 0,
|
||||
'is_sealed': is_sealed,
|
||||
}
|
||||
|
||||
def _get_prefetched_results(self, obj):
|
||||
if hasattr(obj, '_prefetched_objects_cache') and 'archiveresult_set' in obj._prefetched_objects_cache:
|
||||
return obj.archiveresult_set.all()
|
||||
return None
|
||||
|
||||
def _get_ordering_fields(self, request):
|
||||
ordering = request.GET.get('o')
|
||||
if not ordering:
|
||||
return set()
|
||||
fields = set()
|
||||
for part in ordering.split('.'):
|
||||
if not part:
|
||||
continue
|
||||
try:
|
||||
idx = abs(int(part)) - 1
|
||||
except ValueError:
|
||||
continue
|
||||
if 0 <= idx < len(self.list_display):
|
||||
fields.add(self.list_display[idx])
|
||||
return fields
|
||||
|
||||
@admin.display(
|
||||
description='Original URL',
|
||||
ordering='url',
|
||||
@@ -524,20 +707,7 @@ class SnapshotAdmin(SearchResultsAdminMixin, ConfigEditorMixin, BaseModelAdmin):
|
||||
# return super().changelist_view(request, extra_context=None)
|
||||
|
||||
@admin.action(
|
||||
description="ℹ️ Get Title"
|
||||
)
|
||||
def update_titles(self, request, queryset):
|
||||
count = queryset.count()
|
||||
|
||||
# Queue snapshots for archiving via the state machine system
|
||||
queued = bg_archive_snapshots(queryset, kwargs={"overwrite": True, "methods": ["title", "favicon"], "out_dir": DATA_DIR})
|
||||
messages.success(
|
||||
request,
|
||||
f"Queued {queued} snapshots for title/favicon update. The orchestrator will process them in the background.",
|
||||
)
|
||||
|
||||
@admin.action(
|
||||
description="⬇️ Get Missing"
|
||||
description="⏯️ Finish"
|
||||
)
|
||||
def update_snapshots(self, request, queryset):
|
||||
count = queryset.count()
|
||||
@@ -551,7 +721,7 @@ class SnapshotAdmin(SearchResultsAdminMixin, ConfigEditorMixin, BaseModelAdmin):
|
||||
|
||||
|
||||
@admin.action(
|
||||
description="🆕 Archive Again"
|
||||
description="⬇️ Fresh"
|
||||
)
|
||||
def resnapshot_snapshot(self, request, queryset):
|
||||
for snapshot in queryset:
|
||||
@@ -579,7 +749,7 @@ class SnapshotAdmin(SearchResultsAdminMixin, ConfigEditorMixin, BaseModelAdmin):
|
||||
)
|
||||
|
||||
@admin.action(
|
||||
description="☠️ Delete"
|
||||
description="🗑️ Delete"
|
||||
)
|
||||
def delete_snapshots(self, request, queryset):
|
||||
"""Delete snapshots in a single transaction to avoid SQLite concurrency issues."""
|
||||
|
||||
@@ -1,6 +1,9 @@
|
||||
__package__ = 'archivebox.core'
|
||||
|
||||
from django.apps import AppConfig
|
||||
import os
|
||||
|
||||
_ORCHESTRATOR_BOOTSTRAPPED = False
|
||||
|
||||
|
||||
class CoreConfig(AppConfig):
|
||||
@@ -10,6 +13,7 @@ class CoreConfig(AppConfig):
|
||||
def ready(self):
|
||||
"""Register the archivebox.core.admin_site as the main django admin site"""
|
||||
import sys
|
||||
from django.utils.autoreload import DJANGO_AUTORELOAD_ENV
|
||||
|
||||
from archivebox.core.admin_site import register_admin_site
|
||||
register_admin_site()
|
||||
@@ -18,3 +22,45 @@ class CoreConfig(AppConfig):
|
||||
# Skip during makemigrations to avoid premature state machine access
|
||||
if 'makemigrations' not in sys.argv:
|
||||
from archivebox.core import models # noqa: F401
|
||||
|
||||
pidfile = os.environ.get('ARCHIVEBOX_RUNSERVER_PIDFILE')
|
||||
if pidfile:
|
||||
should_write_pid = True
|
||||
if os.environ.get('ARCHIVEBOX_AUTORELOAD') == '1':
|
||||
should_write_pid = os.environ.get(DJANGO_AUTORELOAD_ENV) == 'true'
|
||||
if should_write_pid:
|
||||
try:
|
||||
with open(pidfile, 'w') as handle:
|
||||
handle.write(str(os.getpid()))
|
||||
except Exception:
|
||||
pass
|
||||
|
||||
def _should_manage_orchestrator() -> bool:
|
||||
if os.environ.get('ARCHIVEBOX_ORCHESTRATOR_MANAGED_BY_WATCHER') == '1':
|
||||
return False
|
||||
if os.environ.get('ARCHIVEBOX_ORCHESTRATOR_PROCESS') == '1':
|
||||
return False
|
||||
if os.environ.get('ARCHIVEBOX_RUNSERVER') == '1':
|
||||
if os.environ.get('ARCHIVEBOX_AUTORELOAD') == '1':
|
||||
return os.environ.get(DJANGO_AUTORELOAD_ENV) == 'true'
|
||||
return True
|
||||
|
||||
argv = ' '.join(sys.argv).lower()
|
||||
if 'orchestrator' in argv:
|
||||
return False
|
||||
return 'daphne' in argv and '--reload' in sys.argv
|
||||
|
||||
if _should_manage_orchestrator():
|
||||
global _ORCHESTRATOR_BOOTSTRAPPED
|
||||
if _ORCHESTRATOR_BOOTSTRAPPED:
|
||||
return
|
||||
_ORCHESTRATOR_BOOTSTRAPPED = True
|
||||
|
||||
from archivebox.machine.models import Process, Machine
|
||||
from archivebox.workers.orchestrator import Orchestrator
|
||||
|
||||
Process.cleanup_stale_running()
|
||||
machine = Machine.current()
|
||||
|
||||
if not Orchestrator.is_running():
|
||||
Orchestrator(exit_on_idle=False).start()
|
||||
|
||||
189
archivebox/core/host_utils.py
Normal file
189
archivebox/core/host_utils.py
Normal file
@@ -0,0 +1,189 @@
|
||||
from __future__ import annotations
|
||||
|
||||
from __future__ import annotations
|
||||
|
||||
import re
|
||||
from urllib.parse import urlparse
|
||||
|
||||
from archivebox.config.common import SERVER_CONFIG
|
||||
|
||||
|
||||
_SNAPSHOT_ID_RE = re.compile(r"^[0-9a-fA-F-]{8,36}$")
|
||||
|
||||
|
||||
def split_host_port(host: str) -> tuple[str, str | None]:
|
||||
parsed = urlparse(f"//{host}")
|
||||
hostname = (parsed.hostname or host or "").lower()
|
||||
port = str(parsed.port) if parsed.port else None
|
||||
return hostname, port
|
||||
|
||||
|
||||
def _normalize_base_url(value: str | None) -> str:
|
||||
if not value:
|
||||
return ""
|
||||
base = value.strip()
|
||||
if not base:
|
||||
return ""
|
||||
if "://" not in base:
|
||||
base = f"http://{base}"
|
||||
parsed = urlparse(base)
|
||||
if not parsed.netloc:
|
||||
return ""
|
||||
return f"{parsed.scheme}://{parsed.netloc}"
|
||||
|
||||
|
||||
def normalize_base_url(value: str | None) -> str:
|
||||
return _normalize_base_url(value)
|
||||
|
||||
|
||||
def get_listen_host() -> str:
|
||||
return (SERVER_CONFIG.LISTEN_HOST or "").strip()
|
||||
|
||||
|
||||
def get_listen_parts() -> tuple[str, str | None]:
|
||||
return split_host_port(get_listen_host())
|
||||
|
||||
|
||||
def _build_listen_host(subdomain: str | None) -> str:
|
||||
host, port = get_listen_parts()
|
||||
if not host:
|
||||
return ""
|
||||
full_host = f"{subdomain}.{host}" if subdomain else host
|
||||
if port:
|
||||
return f"{full_host}:{port}"
|
||||
return full_host
|
||||
|
||||
|
||||
def get_admin_host() -> str:
|
||||
override = _normalize_base_url(SERVER_CONFIG.ADMIN_BASE_URL)
|
||||
if override:
|
||||
return urlparse(override).netloc.lower()
|
||||
return _build_listen_host("admin")
|
||||
|
||||
|
||||
def get_web_host() -> str:
|
||||
override = _normalize_base_url(SERVER_CONFIG.ARCHIVE_BASE_URL)
|
||||
if override:
|
||||
return urlparse(override).netloc.lower()
|
||||
return _build_listen_host("web")
|
||||
|
||||
def get_api_host() -> str:
|
||||
return _build_listen_host("api")
|
||||
|
||||
def get_public_host() -> str:
|
||||
return _build_listen_host("public")
|
||||
|
||||
|
||||
def get_snapshot_host(snapshot_id: str) -> str:
|
||||
return _build_listen_host(snapshot_id)
|
||||
|
||||
|
||||
def get_original_host(domain: str) -> str:
|
||||
return _build_listen_host(domain)
|
||||
|
||||
|
||||
def is_snapshot_subdomain(subdomain: str) -> bool:
|
||||
return bool(_SNAPSHOT_ID_RE.match(subdomain or ""))
|
||||
|
||||
|
||||
def get_listen_subdomain(request_host: str) -> str:
|
||||
req_host, req_port = split_host_port(request_host)
|
||||
listen_host, listen_port = get_listen_parts()
|
||||
if not listen_host:
|
||||
return ""
|
||||
if listen_port and req_port and listen_port != req_port:
|
||||
return ""
|
||||
if req_host == listen_host:
|
||||
return ""
|
||||
suffix = f".{listen_host}"
|
||||
if req_host.endswith(suffix):
|
||||
return req_host[: -len(suffix)]
|
||||
return ""
|
||||
|
||||
|
||||
def host_matches(request_host: str, target_host: str) -> bool:
|
||||
if not request_host or not target_host:
|
||||
return False
|
||||
req_host, req_port = split_host_port(request_host)
|
||||
target_host_only, target_port = split_host_port(target_host)
|
||||
if req_host != target_host_only:
|
||||
return False
|
||||
if target_port and req_port and target_port != req_port:
|
||||
return False
|
||||
return True
|
||||
|
||||
|
||||
def _scheme_from_request(request=None) -> str:
|
||||
if request:
|
||||
return request.scheme
|
||||
return "http"
|
||||
|
||||
|
||||
def _build_base_url_for_host(host: str, request=None) -> str:
|
||||
if not host:
|
||||
return ""
|
||||
scheme = _scheme_from_request(request)
|
||||
return f"{scheme}://{host}"
|
||||
|
||||
|
||||
def get_admin_base_url(request=None) -> str:
|
||||
override = _normalize_base_url(SERVER_CONFIG.ADMIN_BASE_URL)
|
||||
if override:
|
||||
return override
|
||||
return _build_base_url_for_host(get_admin_host(), request=request)
|
||||
|
||||
|
||||
def get_web_base_url(request=None) -> str:
|
||||
override = _normalize_base_url(SERVER_CONFIG.ARCHIVE_BASE_URL)
|
||||
if override:
|
||||
return override
|
||||
return _build_base_url_for_host(get_web_host(), request=request)
|
||||
|
||||
def get_api_base_url(request=None) -> str:
|
||||
return _build_base_url_for_host(get_api_host(), request=request)
|
||||
|
||||
|
||||
# Backwards-compat aliases (archive == web)
|
||||
def get_archive_base_url(request=None) -> str:
|
||||
return get_web_base_url(request=request)
|
||||
|
||||
|
||||
def get_snapshot_base_url(snapshot_id: str, request=None) -> str:
|
||||
return _build_base_url_for_host(get_snapshot_host(snapshot_id), request=request)
|
||||
|
||||
|
||||
def get_original_base_url(domain: str, request=None) -> str:
|
||||
return _build_base_url_for_host(get_original_host(domain), request=request)
|
||||
|
||||
|
||||
def build_admin_url(path: str = "", request=None) -> str:
|
||||
return _build_url(get_admin_base_url(request), path)
|
||||
|
||||
|
||||
def build_web_url(path: str = "", request=None) -> str:
|
||||
return _build_url(get_web_base_url(request), path)
|
||||
|
||||
def build_api_url(path: str = "", request=None) -> str:
|
||||
return _build_url(get_api_base_url(request), path)
|
||||
|
||||
|
||||
def build_archive_url(path: str = "", request=None) -> str:
|
||||
return _build_url(get_archive_base_url(request), path)
|
||||
|
||||
|
||||
def build_snapshot_url(snapshot_id: str, path: str = "", request=None) -> str:
|
||||
return _build_url(get_snapshot_base_url(snapshot_id, request=request), path)
|
||||
|
||||
|
||||
def build_original_url(domain: str, path: str = "", request=None) -> str:
|
||||
return _build_url(get_original_base_url(domain, request=request), path)
|
||||
|
||||
|
||||
def _build_url(base_url: str, path: str) -> str:
|
||||
if not base_url:
|
||||
if not path:
|
||||
return ""
|
||||
return path if path.startswith("/") else f"/{path}"
|
||||
if not path:
|
||||
return base_url
|
||||
return f"{base_url}{path if path.startswith('/') else f'/{path}'}"
|
||||
@@ -2,11 +2,33 @@ __package__ = 'archivebox.core'
|
||||
|
||||
import ipaddress
|
||||
import re
|
||||
from pathlib import Path
|
||||
from django.utils import timezone
|
||||
from django.contrib.auth.middleware import RemoteUserMiddleware
|
||||
from django.contrib.auth.models import AnonymousUser
|
||||
from django.core.exceptions import ImproperlyConfigured
|
||||
from django.shortcuts import redirect
|
||||
from django.contrib.staticfiles import finders
|
||||
from django.utils.http import http_date
|
||||
from django.http import HttpResponseNotModified
|
||||
|
||||
from archivebox.config.common import SERVER_CONFIG
|
||||
from archivebox.config import VERSION
|
||||
from archivebox.config.version import get_COMMIT_HASH
|
||||
from archivebox.core.host_utils import (
|
||||
build_admin_url,
|
||||
build_api_url,
|
||||
build_web_url,
|
||||
get_api_host,
|
||||
get_admin_host,
|
||||
get_listen_host,
|
||||
get_listen_subdomain,
|
||||
get_public_host,
|
||||
get_web_host,
|
||||
host_matches,
|
||||
is_snapshot_subdomain,
|
||||
)
|
||||
from archivebox.core.views import SnapshotHostView, OriginalDomainHostView
|
||||
|
||||
|
||||
def detect_timezone(request, activate: bool=True):
|
||||
@@ -30,17 +52,112 @@ def TimezoneMiddleware(get_response):
|
||||
|
||||
def CacheControlMiddleware(get_response):
|
||||
snapshot_path_re = re.compile(r"^/[^/]+/\\d{8}/[^/]+/[0-9a-fA-F-]{8,36}/")
|
||||
static_cache_key = (get_COMMIT_HASH() or VERSION or "dev").strip()
|
||||
def middleware(request):
|
||||
response = get_response(request)
|
||||
|
||||
if request.path.startswith('/static/'):
|
||||
rel_path = request.path[len('/static/'):]
|
||||
static_path = finders.find(rel_path)
|
||||
if static_path:
|
||||
try:
|
||||
mtime = Path(static_path).stat().st_mtime
|
||||
except OSError:
|
||||
mtime = None
|
||||
etag = f'"{static_cache_key}:{int(mtime) if mtime else 0}"'
|
||||
inm = request.META.get("HTTP_IF_NONE_MATCH")
|
||||
if inm:
|
||||
inm_list = [item.strip() for item in inm.split(",")]
|
||||
if etag in inm_list or etag.strip('"') in [i.strip('"') for i in inm_list]:
|
||||
not_modified = HttpResponseNotModified()
|
||||
not_modified.headers["ETag"] = etag
|
||||
not_modified.headers["Cache-Control"] = "public, max-age=31536000, immutable"
|
||||
if mtime:
|
||||
not_modified.headers["Last-Modified"] = http_date(mtime)
|
||||
return not_modified
|
||||
response.headers["ETag"] = etag
|
||||
response.headers["Cache-Control"] = "public, max-age=31536000, immutable"
|
||||
if mtime and not response.headers.get("Last-Modified"):
|
||||
response.headers["Last-Modified"] = http_date(mtime)
|
||||
return response
|
||||
|
||||
if '/archive/' in request.path or '/static/' in request.path or snapshot_path_re.match(request.path):
|
||||
policy = 'public' if SERVER_CONFIG.PUBLIC_SNAPSHOTS else 'private'
|
||||
response['Cache-Control'] = f'{policy}, max-age=60, stale-while-revalidate=300'
|
||||
# print('Set Cache-Control header to', response['Cache-Control'])
|
||||
if not response.get('Cache-Control'):
|
||||
policy = 'public' if SERVER_CONFIG.PUBLIC_SNAPSHOTS else 'private'
|
||||
response['Cache-Control'] = f'{policy}, max-age=60, stale-while-revalidate=300'
|
||||
# print('Set Cache-Control header to', response['Cache-Control'])
|
||||
return response
|
||||
|
||||
return middleware
|
||||
|
||||
|
||||
def HostRoutingMiddleware(get_response):
|
||||
def middleware(request):
|
||||
request_host = (request.get_host() or "").lower()
|
||||
admin_host = get_admin_host()
|
||||
web_host = get_web_host()
|
||||
api_host = get_api_host()
|
||||
public_host = get_public_host()
|
||||
listen_host = get_listen_host()
|
||||
subdomain = get_listen_subdomain(request_host)
|
||||
|
||||
if host_matches(request_host, admin_host):
|
||||
return get_response(request)
|
||||
|
||||
if host_matches(request_host, api_host):
|
||||
request.user = AnonymousUser()
|
||||
request._cached_user = request.user
|
||||
if request.path.startswith("/admin"):
|
||||
target = build_admin_url(request.path, request=request)
|
||||
if request.META.get("QUERY_STRING"):
|
||||
target = f"{target}?{request.META['QUERY_STRING']}"
|
||||
return redirect(target)
|
||||
if not request.path.startswith("/api/"):
|
||||
target_path = f"/api{request.path if request.path.startswith('/') else f'/{request.path}'}"
|
||||
if request.META.get("QUERY_STRING"):
|
||||
target_path = f"{target_path}?{request.META['QUERY_STRING']}"
|
||||
return redirect(target_path)
|
||||
return get_response(request)
|
||||
|
||||
if host_matches(request_host, web_host):
|
||||
request.user = AnonymousUser()
|
||||
request._cached_user = request.user
|
||||
if request.path.startswith("/admin"):
|
||||
target = build_admin_url(request.path, request=request)
|
||||
if request.META.get("QUERY_STRING"):
|
||||
target = f"{target}?{request.META['QUERY_STRING']}"
|
||||
return redirect(target)
|
||||
return get_response(request)
|
||||
|
||||
if host_matches(request_host, public_host):
|
||||
request.user = AnonymousUser()
|
||||
request._cached_user = request.user
|
||||
return get_response(request)
|
||||
|
||||
if subdomain:
|
||||
if is_snapshot_subdomain(subdomain):
|
||||
view = SnapshotHostView.as_view()
|
||||
return view(request, snapshot_id=subdomain, path=request.path.lstrip("/"))
|
||||
view = OriginalDomainHostView.as_view()
|
||||
return view(request, domain=subdomain, path=request.path.lstrip("/"))
|
||||
|
||||
if host_matches(request_host, listen_host):
|
||||
target = build_web_url(request.path, request=request)
|
||||
if request.META.get("QUERY_STRING"):
|
||||
target = f"{target}?{request.META['QUERY_STRING']}"
|
||||
return redirect(target)
|
||||
|
||||
if admin_host or web_host:
|
||||
target = build_web_url(request.path, request=request)
|
||||
if target:
|
||||
if request.META.get("QUERY_STRING"):
|
||||
target = f"{target}?{request.META['QUERY_STRING']}"
|
||||
return redirect(target)
|
||||
|
||||
return get_response(request)
|
||||
|
||||
return middleware
|
||||
|
||||
class ReverseProxyAuthMiddleware(RemoteUserMiddleware):
|
||||
header = 'HTTP_{normalized}'.format(normalized=SERVER_CONFIG.REVERSE_PROXY_USER_HEADER.replace('-', '_').upper())
|
||||
|
||||
|
||||
@@ -0,0 +1,17 @@
|
||||
# Generated by Codex on 2026-01-21
|
||||
|
||||
from django.db import migrations, models
|
||||
|
||||
|
||||
class Migration(migrations.Migration):
|
||||
|
||||
dependencies = [
|
||||
('core', '0030_alter_archiveresult_id'),
|
||||
]
|
||||
|
||||
operations = [
|
||||
migrations.AddIndex(
|
||||
model_name='archiveresult',
|
||||
index=models.Index(fields=['snapshot', 'status'], name='archiveresult_snap_status_idx'),
|
||||
),
|
||||
]
|
||||
@@ -1297,7 +1297,7 @@ class Snapshot(ModelWithOutputDir, ModelWithConfig, ModelWithNotes, ModelWithHea
|
||||
|
||||
path = self.archive_path
|
||||
output = ""
|
||||
output_template = '<a href="/{}/{}" class="exists-{}" title="{}">{}</a> '
|
||||
output_template = '<a href="/{}/{}" class="exists-{}" title="{}">{}</a>'
|
||||
|
||||
# Get all plugins from hooks system (sorted by numeric prefix)
|
||||
all_plugins = [get_plugin_name(e) for e in get_plugins()]
|
||||
@@ -1322,7 +1322,7 @@ class Snapshot(ModelWithOutputDir, ModelWithConfig, ModelWithNotes, ModelWithHea
|
||||
icon
|
||||
)
|
||||
|
||||
return format_html('<span class="files-icons" style="font-size: 1.1em; opacity: 0.8; min-width: 240px; display: inline-block">{}</span>', mark_safe(output))
|
||||
return format_html('<span class="files-icons" style="font-size: 1em; opacity: 0.8; display: inline-grid; grid-auto-flow: column; grid-auto-columns: auto; grid-template-rows: repeat(4, auto); gap: 0 0; justify-content: start; align-content: start;">{}</span>', mark_safe(output))
|
||||
|
||||
cache_result = cache.get(cache_key)
|
||||
if cache_result:
|
||||
@@ -1789,7 +1789,7 @@ class Snapshot(ModelWithOutputDir, ModelWithConfig, ModelWithNotes, ModelWithHea
|
||||
)['total_size'] or 0
|
||||
|
||||
# Check if sealed
|
||||
is_sealed = self.status in (self.StatusChoices.SEALED, self.StatusChoices.FAILED, self.StatusChoices.BACKOFF)
|
||||
is_sealed = self.status not in (self.StatusChoices.QUEUED, self.StatusChoices.STARTED)
|
||||
|
||||
return {
|
||||
'total': total,
|
||||
@@ -1992,6 +1992,14 @@ class Snapshot(ModelWithOutputDir, ModelWithConfig, ModelWithNotes, ModelWithHea
|
||||
size = sum(p.stat().st_size for p in abs_path.rglob('*') if p.is_file())
|
||||
else:
|
||||
size = abs_path.stat().st_size
|
||||
plugin_lower = (result.plugin or '').lower()
|
||||
if plugin_lower in ('ytdlp', 'yt-dlp', 'youtube-dl'):
|
||||
plugin_dir = snap_dir / result.plugin
|
||||
if plugin_dir.exists():
|
||||
try:
|
||||
size = sum(p.stat().st_size for p in plugin_dir.rglob('*') if p.is_file())
|
||||
except OSError:
|
||||
pass
|
||||
outputs.append({
|
||||
'name': result.plugin,
|
||||
'path': embed_path,
|
||||
@@ -2057,6 +2065,7 @@ class Snapshot(ModelWithOutputDir, ModelWithConfig, ModelWithNotes, ModelWithHea
|
||||
def to_dict(self, extended: bool = False) -> Dict[str, Any]:
|
||||
"""Convert Snapshot to a dictionary (replacement for Link._asdict())"""
|
||||
from archivebox.misc.util import ts_to_date_str
|
||||
from archivebox.core.host_utils import build_snapshot_url
|
||||
|
||||
result = {
|
||||
'TYPE': 'core.models.Snapshot',
|
||||
@@ -2078,6 +2087,7 @@ class Snapshot(ModelWithOutputDir, ModelWithConfig, ModelWithNotes, ModelWithHea
|
||||
'is_static': self.is_static,
|
||||
'is_archived': self.is_archived,
|
||||
'archive_path': self.archive_path,
|
||||
'archive_url': build_snapshot_url(str(self.id), 'index.html'),
|
||||
'output_dir': self.output_dir,
|
||||
'link_dir': self.output_dir, # backwards compatibility alias
|
||||
'archive_size': self.archive_size,
|
||||
@@ -2129,14 +2139,17 @@ class Snapshot(ModelWithOutputDir, ModelWithConfig, ModelWithNotes, ModelWithHea
|
||||
outputs_by_plugin = {out['name']: out for out in outputs}
|
||||
|
||||
best_preview_path = 'about:blank'
|
||||
best_result = {'path': 'about:blank', 'result': None}
|
||||
for plugin in preview_priority:
|
||||
out = outputs_by_plugin.get(plugin)
|
||||
if out and out.get('path'):
|
||||
best_preview_path = out['path']
|
||||
best_result = out
|
||||
break
|
||||
|
||||
if best_preview_path == 'about:blank' and outputs:
|
||||
best_preview_path = outputs[0].get('path') or 'about:blank'
|
||||
best_result = outputs[0]
|
||||
context = {
|
||||
**self.to_dict(extended=True),
|
||||
'title': htmlencode(self.title or (self.base_url if self.is_archived else TITLE_LOADING_MSG)),
|
||||
@@ -2151,6 +2164,7 @@ class Snapshot(ModelWithOutputDir, ModelWithConfig, ModelWithNotes, ModelWithHea
|
||||
'SAVE_ARCHIVE_DOT_ORG': SAVE_ARCHIVE_DOT_ORG,
|
||||
'PREVIEW_ORIGINALS': SERVER_CONFIG.PREVIEW_ORIGINALS,
|
||||
'best_preview_path': best_preview_path,
|
||||
'best_result': best_result,
|
||||
'archiveresults': outputs,
|
||||
}
|
||||
rendered_html = render_to_string('snapshot.html', context)
|
||||
@@ -2326,6 +2340,9 @@ class ArchiveResult(ModelWithOutputDir, ModelWithConfig, ModelWithNotes, ModelWi
|
||||
app_label = 'core'
|
||||
verbose_name = 'Archive Result'
|
||||
verbose_name_plural = 'Archive Results Log'
|
||||
indexes = [
|
||||
models.Index(fields=['snapshot', 'status'], name='archiveresult_snap_status_idx'),
|
||||
]
|
||||
|
||||
def __str__(self):
|
||||
return f'[{self.id}] {self.snapshot.url[:64]} -> {self.plugin}'
|
||||
@@ -2487,6 +2504,20 @@ class ArchiveResult(ModelWithOutputDir, ModelWithConfig, ModelWithNotes, ModelWi
|
||||
plugin_lower = (plugin_name or '').lower()
|
||||
prefer_media = plugin_lower in ('ytdlp', 'yt-dlp', 'youtube-dl')
|
||||
|
||||
preferred_text = []
|
||||
if plugin_lower:
|
||||
preferred_text.extend([
|
||||
f'{plugin_lower}.jsonl',
|
||||
f'{plugin_lower}.json',
|
||||
f'{plugin_lower}.txt',
|
||||
f'{plugin_lower}.log',
|
||||
])
|
||||
preferred_text.extend(['index.jsonl', 'index.json'])
|
||||
for name in preferred_text:
|
||||
candidate = dir_path / name
|
||||
if candidate.exists() and candidate.is_file():
|
||||
return candidate
|
||||
|
||||
if not prefer_media:
|
||||
for name in ('index.html', 'index.htm'):
|
||||
candidate = dir_path / name
|
||||
@@ -2504,6 +2535,8 @@ class ArchiveResult(ModelWithOutputDir, ModelWithConfig, ModelWithNotes, ModelWi
|
||||
if file_path.is_dir() or file_path.name.startswith('.'):
|
||||
continue
|
||||
ext = file_path.suffix.lstrip('.').lower()
|
||||
if ext in ('pid', 'log', 'sh'):
|
||||
continue
|
||||
if ext not in embeddable_exts:
|
||||
continue
|
||||
try:
|
||||
@@ -2547,20 +2580,44 @@ class ArchiveResult(ModelWithOutputDir, ModelWithConfig, ModelWithNotes, ModelWi
|
||||
# Fallback: treat output_str as a file path only if it exists on disk
|
||||
if self.output_str:
|
||||
try:
|
||||
output_path = Path(self.output_str)
|
||||
raw_output = str(self.output_str).strip()
|
||||
if raw_output in ('.', './', ''):
|
||||
best_file = self._find_best_output_file(plugin_dir, self.plugin)
|
||||
if best_file:
|
||||
return str(best_file.relative_to(snapshot_dir))
|
||||
output_path = None
|
||||
else:
|
||||
output_path = Path(raw_output)
|
||||
|
||||
if output_path.is_absolute():
|
||||
if output_path and output_path.is_absolute():
|
||||
# If absolute and within snapshot dir, normalize to relative
|
||||
if snapshot_dir in output_path.parents and output_path.exists():
|
||||
return str(output_path.relative_to(snapshot_dir))
|
||||
else:
|
||||
if output_path.is_file():
|
||||
return str(output_path.relative_to(snapshot_dir))
|
||||
if output_path.is_dir():
|
||||
best_file = self._find_best_output_file(output_path, self.plugin)
|
||||
if best_file:
|
||||
return str(best_file.relative_to(snapshot_dir))
|
||||
elif output_path:
|
||||
# If relative, prefer plugin-prefixed path, then direct path
|
||||
if (plugin_dir / output_path).exists():
|
||||
return f'{self.plugin}/{output_path}'
|
||||
plugin_candidate = plugin_dir / output_path
|
||||
if plugin_candidate.exists():
|
||||
if plugin_candidate.is_file():
|
||||
return f'{self.plugin}/{output_path}'
|
||||
if plugin_candidate.is_dir():
|
||||
best_file = self._find_best_output_file(plugin_candidate, self.plugin)
|
||||
if best_file:
|
||||
return str(best_file.relative_to(snapshot_dir))
|
||||
if output_path.name in ('index.html', 'index.json') and output_path.parent == Path('.'):
|
||||
return None
|
||||
if (snapshot_dir / output_path).exists():
|
||||
return str(output_path)
|
||||
snapshot_candidate = snapshot_dir / output_path
|
||||
if snapshot_candidate.exists():
|
||||
if snapshot_candidate.is_file():
|
||||
return str(output_path)
|
||||
if snapshot_candidate.is_dir():
|
||||
best_file = self._find_best_output_file(snapshot_candidate, self.plugin)
|
||||
if best_file:
|
||||
return str(best_file.relative_to(snapshot_dir))
|
||||
except Exception:
|
||||
pass
|
||||
|
||||
@@ -2569,7 +2626,7 @@ class ArchiveResult(ModelWithOutputDir, ModelWithConfig, ModelWithNotes, ModelWi
|
||||
ignored = {'stdout.log', 'stderr.log', 'hook.pid', 'listener.pid', 'cmd.sh'}
|
||||
output_candidates = [
|
||||
f for f in self.output_files.keys()
|
||||
if Path(f).name not in ignored
|
||||
if Path(f).name not in ignored and Path(f).suffix not in ('.pid', '.log', '.sh')
|
||||
]
|
||||
first_file = output_candidates[0] if output_candidates else None
|
||||
if first_file and (plugin_dir / first_file).exists():
|
||||
|
||||
@@ -12,6 +12,7 @@ import archivebox
|
||||
|
||||
from archivebox.config import DATA_DIR, PACKAGE_DIR, ARCHIVE_DIR, CONSTANTS # noqa
|
||||
from archivebox.config.common import SHELL_CONFIG, SERVER_CONFIG, STORAGE_CONFIG # noqa
|
||||
from archivebox.core.host_utils import normalize_base_url, get_admin_base_url, get_api_base_url
|
||||
|
||||
|
||||
IS_MIGRATING = "makemigrations" in sys.argv[:3] or "migrate" in sys.argv[:3]
|
||||
@@ -77,9 +78,11 @@ MIDDLEWARE = [
|
||||
"django.middleware.security.SecurityMiddleware",
|
||||
"django.contrib.sessions.middleware.SessionMiddleware",
|
||||
"django.middleware.common.CommonMiddleware",
|
||||
"archivebox.api.middleware.ApiCorsMiddleware",
|
||||
"django.middleware.csrf.CsrfViewMiddleware",
|
||||
"django.contrib.auth.middleware.AuthenticationMiddleware",
|
||||
"archivebox.core.middleware.ReverseProxyAuthMiddleware",
|
||||
"archivebox.core.middleware.HostRoutingMiddleware",
|
||||
"django.contrib.messages.middleware.MessageMiddleware",
|
||||
"archivebox.core.middleware.CacheControlMiddleware",
|
||||
# Additional middlewares from plugins (if any)
|
||||
@@ -347,6 +350,14 @@ SECRET_KEY = SERVER_CONFIG.SECRET_KEY or get_random_string(50, "abcdefghijklmnop
|
||||
ALLOWED_HOSTS = SERVER_CONFIG.ALLOWED_HOSTS.split(",")
|
||||
CSRF_TRUSTED_ORIGINS = list(set(SERVER_CONFIG.CSRF_TRUSTED_ORIGINS.split(",")))
|
||||
|
||||
admin_base_url = normalize_base_url(get_admin_base_url())
|
||||
if admin_base_url and admin_base_url not in CSRF_TRUSTED_ORIGINS:
|
||||
CSRF_TRUSTED_ORIGINS.append(admin_base_url)
|
||||
|
||||
api_base_url = normalize_base_url(get_api_base_url())
|
||||
if api_base_url and api_base_url not in CSRF_TRUSTED_ORIGINS:
|
||||
CSRF_TRUSTED_ORIGINS.append(api_base_url)
|
||||
|
||||
# automatically fix case when user sets ALLOWED_HOSTS (e.g. to archivebox.example.com)
|
||||
# but forgets to add https://archivebox.example.com to CSRF_TRUSTED_ORIGINS
|
||||
for hostname in ALLOWED_HOSTS:
|
||||
@@ -363,6 +374,7 @@ CSRF_COOKIE_SECURE = False
|
||||
SESSION_COOKIE_SECURE = False
|
||||
SESSION_COOKIE_HTTPONLY = True
|
||||
SESSION_COOKIE_DOMAIN = None
|
||||
CSRF_COOKIE_DOMAIN = None
|
||||
SESSION_COOKIE_AGE = 1209600 # 2 weeks
|
||||
SESSION_EXPIRE_AT_BROWSER_CLOSE = False
|
||||
SESSION_SAVE_EVERY_REQUEST = False
|
||||
|
||||
@@ -15,6 +15,6 @@ def get_config(key: str) -> any:
|
||||
Usage: {% get_config "ARCHIVEDOTORG_ENABLED" as enabled %}
|
||||
"""
|
||||
try:
|
||||
return _get_config(key)
|
||||
return _get_config().get(key)
|
||||
except (KeyError, AttributeError):
|
||||
return None
|
||||
|
||||
@@ -9,10 +9,114 @@ from pathlib import Path
|
||||
from archivebox.hooks import (
|
||||
get_plugin_icon, get_plugin_template, get_plugin_name,
|
||||
)
|
||||
from archivebox.core.host_utils import (
|
||||
get_admin_base_url,
|
||||
get_web_base_url,
|
||||
get_snapshot_base_url,
|
||||
build_snapshot_url,
|
||||
)
|
||||
|
||||
|
||||
register = template.Library()
|
||||
|
||||
_MEDIA_FILE_EXTS = {
|
||||
'.mp4', '.webm', '.mkv', '.avi', '.mov', '.flv', '.wmv', '.m4v', '.mpg', '.mpeg', '.ts', '.m2ts', '.mts',
|
||||
'.3gp', '.3g2', '.ogv',
|
||||
'.mp3', '.m4a', '.aac', '.ogg', '.oga', '.opus', '.wav', '.flac', '.alac', '.aiff', '.wma', '.mka', '.ac3', '.eac3', '.dts',
|
||||
}
|
||||
|
||||
|
||||
def _count_media_files(result) -> int:
|
||||
try:
|
||||
output_files = getattr(result, 'output_files', None) or {}
|
||||
except Exception:
|
||||
output_files = {}
|
||||
|
||||
count_from_output = 0
|
||||
if output_files:
|
||||
count_from_output = sum(
|
||||
1
|
||||
for path in output_files.keys()
|
||||
if Path(path).suffix.lower() in _MEDIA_FILE_EXTS
|
||||
)
|
||||
if count_from_output >= 2:
|
||||
return count_from_output
|
||||
|
||||
try:
|
||||
plugin_dir = Path(result.snapshot_dir) / result.plugin
|
||||
except Exception:
|
||||
return 0
|
||||
|
||||
if not plugin_dir.exists():
|
||||
return 0
|
||||
|
||||
count = 0
|
||||
scanned = 0
|
||||
max_scan = 500
|
||||
for file_path in plugin_dir.rglob('*'):
|
||||
if scanned >= max_scan:
|
||||
break
|
||||
scanned += 1
|
||||
if not file_path.is_file():
|
||||
continue
|
||||
if file_path.suffix.lower() in _MEDIA_FILE_EXTS:
|
||||
count += 1
|
||||
return max(count_from_output, count)
|
||||
|
||||
|
||||
def _list_media_files(result) -> list[dict]:
|
||||
media_files: list[dict] = []
|
||||
try:
|
||||
plugin_dir = Path(result.snapshot_dir) / result.plugin
|
||||
snapshot_dir = Path(result.snapshot_dir)
|
||||
except Exception:
|
||||
return media_files
|
||||
|
||||
output_files = getattr(result, 'output_files', None) or {}
|
||||
candidates: list[Path] = []
|
||||
if output_files:
|
||||
for path in output_files.keys():
|
||||
rel_path = Path(path)
|
||||
if rel_path.suffix.lower() in _MEDIA_FILE_EXTS:
|
||||
candidates.append(rel_path)
|
||||
|
||||
if not candidates and plugin_dir.exists():
|
||||
scanned = 0
|
||||
max_scan = 2000
|
||||
for file_path in plugin_dir.rglob('*'):
|
||||
if scanned >= max_scan:
|
||||
break
|
||||
scanned += 1
|
||||
if not file_path.is_file():
|
||||
continue
|
||||
if file_path.suffix.lower() in _MEDIA_FILE_EXTS:
|
||||
try:
|
||||
rel_path = file_path.relative_to(plugin_dir)
|
||||
except ValueError:
|
||||
continue
|
||||
candidates.append(rel_path)
|
||||
|
||||
for rel_path in candidates:
|
||||
file_path = plugin_dir / rel_path
|
||||
if not file_path.exists() or not file_path.is_file():
|
||||
continue
|
||||
try:
|
||||
size = file_path.stat().st_size
|
||||
except OSError:
|
||||
size = None
|
||||
try:
|
||||
href = str(file_path.relative_to(snapshot_dir))
|
||||
except ValueError:
|
||||
href = str(Path(result.plugin) / rel_path)
|
||||
media_files.append({
|
||||
'name': file_path.name,
|
||||
'path': href,
|
||||
'size': size,
|
||||
})
|
||||
|
||||
media_files.sort(key=lambda item: item['name'].lower())
|
||||
return media_files
|
||||
|
||||
@register.filter(name='split')
|
||||
def split(value, separator: str=','):
|
||||
return (value or '').split(separator)
|
||||
@@ -52,6 +156,28 @@ def url_replace(context, **kwargs):
|
||||
return dict_.urlencode()
|
||||
|
||||
|
||||
@register.simple_tag(takes_context=True)
|
||||
def admin_base_url(context) -> str:
|
||||
return get_admin_base_url(request=context.get('request'))
|
||||
|
||||
|
||||
@register.simple_tag(takes_context=True)
|
||||
def web_base_url(context) -> str:
|
||||
return get_web_base_url(request=context.get('request'))
|
||||
|
||||
|
||||
@register.simple_tag(takes_context=True)
|
||||
def snapshot_base_url(context, snapshot) -> str:
|
||||
snapshot_id = getattr(snapshot, 'id', snapshot)
|
||||
return get_snapshot_base_url(str(snapshot_id), request=context.get('request'))
|
||||
|
||||
|
||||
@register.simple_tag(takes_context=True)
|
||||
def snapshot_url(context, snapshot, path: str = "") -> str:
|
||||
snapshot_id = getattr(snapshot, 'id', snapshot)
|
||||
return build_snapshot_url(str(snapshot_id), path, request=context.get('request'))
|
||||
|
||||
|
||||
@register.simple_tag
|
||||
def plugin_icon(plugin: str) -> str:
|
||||
"""
|
||||
@@ -82,24 +208,41 @@ def plugin_card(context, result) -> str:
|
||||
template_str = get_plugin_template(plugin, 'card')
|
||||
|
||||
# Use embed_path() for the display path
|
||||
output_path = result.embed_path() if hasattr(result, 'embed_path') else ''
|
||||
raw_output_path = result.embed_path() if hasattr(result, 'embed_path') else ''
|
||||
output_url = build_snapshot_url(
|
||||
str(getattr(result, 'snapshot_id', '')),
|
||||
raw_output_path or '',
|
||||
request=context.get('request'),
|
||||
)
|
||||
|
||||
icon_html = get_plugin_icon(plugin)
|
||||
plugin_lower = (plugin or '').lower()
|
||||
media_file_count = _count_media_files(result) if plugin_lower in ('ytdlp', 'yt-dlp', 'youtube-dl') else 0
|
||||
media_files = _list_media_files(result) if plugin_lower in ('ytdlp', 'yt-dlp', 'youtube-dl') else []
|
||||
if media_files:
|
||||
snapshot_id = str(getattr(result, 'snapshot_id', ''))
|
||||
request = context.get('request')
|
||||
for item in media_files:
|
||||
path = item.get('path') or ''
|
||||
item['url'] = build_snapshot_url(snapshot_id, path, request=request) if path else ''
|
||||
|
||||
output_lower = (output_path or '').lower()
|
||||
output_lower = (raw_output_path or '').lower()
|
||||
text_preview_exts = ('.json', '.jsonl', '.txt', '.csv', '.tsv', '.xml', '.yml', '.yaml', '.md', '.log')
|
||||
force_text_preview = output_lower.endswith(text_preview_exts)
|
||||
|
||||
# Create a mini template and render it with context
|
||||
try:
|
||||
if template_str and output_path and str(output_path).strip() not in ('.', '/', './') and not force_text_preview:
|
||||
if template_str and raw_output_path and str(raw_output_path).strip() not in ('.', '/', './') and not force_text_preview:
|
||||
tpl = template.Template(template_str)
|
||||
ctx = template.Context({
|
||||
'result': result,
|
||||
'snapshot': result.snapshot,
|
||||
'output_path': output_path,
|
||||
'output_path': output_url,
|
||||
'output_path_raw': raw_output_path,
|
||||
'plugin': plugin,
|
||||
'plugin_icon': icon_html,
|
||||
'media_file_count': media_file_count,
|
||||
'media_files': media_files,
|
||||
})
|
||||
rendered = tpl.render(ctx)
|
||||
# Only return non-empty content (strip whitespace to check)
|
||||
@@ -108,10 +251,10 @@ def plugin_card(context, result) -> str:
|
||||
except Exception:
|
||||
pass
|
||||
|
||||
if force_text_preview and output_path and str(output_path).strip() not in ('.', '/', './'):
|
||||
output_file = Path(output_path)
|
||||
if force_text_preview and raw_output_path and str(raw_output_path).strip() not in ('.', '/', './'):
|
||||
output_file = Path(raw_output_path)
|
||||
if not output_file.is_absolute():
|
||||
output_file = Path(result.snapshot_dir) / output_path
|
||||
output_file = Path(result.snapshot_dir) / raw_output_path
|
||||
try:
|
||||
output_file = output_file.resolve()
|
||||
snap_dir = Path(result.snapshot_dir).resolve()
|
||||
@@ -169,14 +312,20 @@ def plugin_full(context, result) -> str:
|
||||
if not template_str:
|
||||
return ''
|
||||
|
||||
output_path = result.embed_path() if hasattr(result, 'embed_path') else ''
|
||||
raw_output_path = result.embed_path() if hasattr(result, 'embed_path') else ''
|
||||
output_url = build_snapshot_url(
|
||||
str(getattr(result, 'snapshot_id', '')),
|
||||
raw_output_path or '',
|
||||
request=context.get('request'),
|
||||
)
|
||||
|
||||
try:
|
||||
tpl = template.Template(template_str)
|
||||
ctx = template.Context({
|
||||
'result': result,
|
||||
'snapshot': result.snapshot,
|
||||
'output_path': output_path,
|
||||
'output_path': output_url,
|
||||
'output_path_raw': raw_output_path,
|
||||
'plugin': plugin,
|
||||
})
|
||||
rendered = tpl.render(ctx)
|
||||
@@ -198,3 +347,30 @@ def plugin_name(value: str) -> str:
|
||||
Usage: {{ result.plugin|plugin_name }}
|
||||
"""
|
||||
return get_plugin_name(value)
|
||||
|
||||
|
||||
@register.filter
|
||||
def plugin_display_name(value: str) -> str:
|
||||
"""
|
||||
Human-friendly plugin name overrides for UI display.
|
||||
"""
|
||||
name = get_plugin_name(value)
|
||||
if name == 'merkletree':
|
||||
return 'hashes'
|
||||
return name
|
||||
|
||||
|
||||
@register.simple_tag(takes_context=True)
|
||||
def api_token(context) -> str:
|
||||
"""
|
||||
Return an API token string for the logged-in user, creating one if needed.
|
||||
"""
|
||||
from archivebox.api.auth import get_or_create_api_token
|
||||
|
||||
request = context.get('request')
|
||||
user = getattr(request, 'user', None)
|
||||
if not user or not user.is_authenticated:
|
||||
return ''
|
||||
|
||||
token = get_or_create_api_token(user)
|
||||
return token.token if token else ''
|
||||
|
||||
@@ -8,7 +8,7 @@ from django.views.generic.base import RedirectView
|
||||
from archivebox.misc.serve_static import serve_static
|
||||
|
||||
from archivebox.core.admin_site import archivebox_admin
|
||||
from archivebox.core.views import HomepageView, SnapshotView, SnapshotPathView, PublicIndexView, AddView, HealthCheckView, live_progress_view
|
||||
from archivebox.core.views import HomepageView, SnapshotView, SnapshotPathView, PublicIndexView, AddView, WebAddView, HealthCheckView, live_progress_view
|
||||
|
||||
from archivebox.workers.views import JobsDashboardView
|
||||
|
||||
@@ -29,11 +29,15 @@ urlpatterns = [
|
||||
path('docs/', RedirectView.as_view(url='https://github.com/ArchiveBox/ArchiveBox/wiki'), name='Docs'),
|
||||
|
||||
path('public/', PublicIndexView.as_view(), name='public-index'),
|
||||
path('public.html', RedirectView.as_view(url='/public/'), name='public-index-html'),
|
||||
|
||||
path('archive/', RedirectView.as_view(url='/')),
|
||||
path('archive/<path:path>', SnapshotView.as_view(), name='Snapshot'),
|
||||
re_path(r'^web/(?P<url>(?!\d{4}(?:\d{2})?(?:\d{2})?(?:/|$)).+)$', WebAddView.as_view(), name='web-add'),
|
||||
re_path(r'^(?P<username>[^/]+)/(?P<date>\d{4}(?:\d{2})?(?:\d{2})?)/(?P<url>https?://.*)$', SnapshotPathView.as_view(), name='snapshot-path-url'),
|
||||
re_path(r'^(?P<username>[^/]+)/(?P<date>\d{4}(?:\d{2})?(?:\d{2})?)/(?P<domain>[^/]+)(?:/(?P<snapshot_id>[0-9a-fA-F-]{8,36})(?:/(?P<path>.*))?)?$', SnapshotPathView.as_view(), name='snapshot-path'),
|
||||
re_path(r'^(?P<username>[^/]+)/(?P<url>https?://.*)$', SnapshotPathView.as_view(), name='snapshot-path-url-nodate'),
|
||||
re_path(r'^(?P<username>[^/]+)/(?P<domain>[^/]+)(?:/(?P<snapshot_id>[0-9a-fA-F-]{8,36})(?:/(?P<path>.*))?)?$', SnapshotPathView.as_view(), name='snapshot-path-nodate'),
|
||||
|
||||
path('admin/core/snapshot/add/', RedirectView.as_view(url='/add/')),
|
||||
path('add/', AddView.as_view(), name='add'),
|
||||
|
||||
@@ -1,13 +1,16 @@
|
||||
__package__ = 'archivebox.core'
|
||||
|
||||
import os
|
||||
import posixpath
|
||||
from glob import glob, escape
|
||||
from django.utils import timezone
|
||||
import inspect
|
||||
from typing import Callable, get_type_hints
|
||||
from pathlib import Path
|
||||
from urllib.parse import urlparse
|
||||
|
||||
from django.shortcuts import render, redirect
|
||||
from django.http import HttpRequest, HttpResponse, Http404
|
||||
from django.http import HttpRequest, HttpResponse, Http404, HttpResponseForbidden
|
||||
from django.utils.html import format_html, mark_safe
|
||||
from django.views import View
|
||||
from django.views.generic.list import ListView
|
||||
@@ -31,6 +34,21 @@ from archivebox.misc.logging_util import printable_filesize
|
||||
from archivebox.search import query_search_index
|
||||
|
||||
from archivebox.core.models import Snapshot
|
||||
from archivebox.core.host_utils import build_snapshot_url
|
||||
|
||||
|
||||
def _files_index_target(snapshot: Snapshot, archivefile: str | None) -> str:
|
||||
target = archivefile or ''
|
||||
if target == 'index.html':
|
||||
target = ''
|
||||
fullpath = Path(snapshot.output_dir) / target
|
||||
if fullpath.is_file():
|
||||
target = str(Path(target).parent)
|
||||
if target == '.':
|
||||
target = ''
|
||||
return target
|
||||
|
||||
|
||||
from archivebox.core.forms import AddLinkForm
|
||||
from archivebox.crawls.models import Crawl
|
||||
from archivebox.hooks import get_enabled_plugins, get_plugin_name
|
||||
@@ -86,13 +104,95 @@ class SnapshotView(View):
|
||||
def render_live_index(request, snapshot):
|
||||
TITLE_LOADING_MSG = 'Not yet archived...'
|
||||
|
||||
outputs = snapshot.discover_outputs()
|
||||
hidden_card_plugins = {'archivedotorg', 'favicon', 'title'}
|
||||
outputs = [
|
||||
out for out in snapshot.discover_outputs()
|
||||
if (out.get('size') or 0) > 0 and out.get('name') not in hidden_card_plugins
|
||||
]
|
||||
archiveresults = {out['name']: out for out in outputs}
|
||||
snap_dir = Path(snapshot.output_dir)
|
||||
|
||||
# Get available extractor plugins from hooks (sorted by numeric prefix for ordering)
|
||||
# Convert to base names for display ordering
|
||||
all_plugins = [get_plugin_name(e) for e in get_enabled_plugins()]
|
||||
accounted_entries: set[str] = set()
|
||||
for output in outputs:
|
||||
output_name = output.get('name') or ''
|
||||
if output_name:
|
||||
accounted_entries.add(output_name)
|
||||
output_path = output.get('path') or ''
|
||||
if not output_path:
|
||||
continue
|
||||
parts = Path(output_path).parts
|
||||
if parts:
|
||||
accounted_entries.add(parts[0])
|
||||
|
||||
ignore_names = {
|
||||
'.DS_Store',
|
||||
'index.html',
|
||||
'index.json',
|
||||
'index.jsonl',
|
||||
'favicon.ico',
|
||||
}
|
||||
ignored_suffixes = {'.log', '.pid', '.sh'}
|
||||
max_loose_scan = 300
|
||||
|
||||
def has_meaningful_files(dir_path: Path) -> bool:
|
||||
scanned = 0
|
||||
for file_path in dir_path.rglob('*'):
|
||||
scanned += 1
|
||||
if scanned > max_loose_scan:
|
||||
return True
|
||||
if file_path.is_dir() or file_path.name.startswith('.'):
|
||||
continue
|
||||
if file_path.suffix.lower() in ignored_suffixes:
|
||||
continue
|
||||
try:
|
||||
if file_path.stat().st_size == 0:
|
||||
continue
|
||||
except OSError:
|
||||
continue
|
||||
return True
|
||||
return False
|
||||
|
||||
unaccounted_entries = []
|
||||
if snap_dir.exists():
|
||||
for entry in snap_dir.iterdir():
|
||||
name = entry.name
|
||||
if name.startswith('.') or name in ignore_names or name in accounted_entries:
|
||||
continue
|
||||
is_dir = entry.is_dir()
|
||||
is_meaningful = False
|
||||
size = None
|
||||
if is_dir:
|
||||
is_meaningful = has_meaningful_files(entry)
|
||||
elif entry.is_file():
|
||||
if entry.suffix.lower() not in ignored_suffixes:
|
||||
try:
|
||||
size = entry.stat().st_size
|
||||
is_meaningful = size > 0
|
||||
except OSError:
|
||||
size = None
|
||||
is_meaningful = False
|
||||
|
||||
unaccounted_entries.append({
|
||||
'name': name,
|
||||
'path': name,
|
||||
'is_dir': is_dir,
|
||||
'size': size,
|
||||
'is_meaningful': is_meaningful,
|
||||
})
|
||||
|
||||
unaccounted_entries.sort(key=lambda item: item['name'].lower())
|
||||
loose_items = [item for item in unaccounted_entries if item['is_meaningful']]
|
||||
failed_exclude_suffixes = {'.json', '.jsonl', '.sh', '.log'}
|
||||
failed_items = [
|
||||
item for item in unaccounted_entries
|
||||
if not item['is_meaningful']
|
||||
and not (
|
||||
not item['is_dir']
|
||||
and Path(item['name']).suffix.lower() in failed_exclude_suffixes
|
||||
)
|
||||
]
|
||||
preview_priority = [
|
||||
'singlefile',
|
||||
'screenshot',
|
||||
@@ -111,12 +211,48 @@ class SnapshotView(View):
|
||||
break
|
||||
|
||||
snapshot_info = snapshot.to_dict(extended=True)
|
||||
related_snapshots_qs = SnapshotView.find_snapshots_for_url(snapshot.url)
|
||||
related_snapshots = list(
|
||||
related_snapshots_qs.exclude(id=snapshot.id).order_by('-bookmarked_at', '-created_at', '-timestamp')[:25]
|
||||
)
|
||||
related_years_map: dict[int, list[Snapshot]] = {}
|
||||
for snap in [snapshot, *related_snapshots]:
|
||||
snap_dt = snap.bookmarked_at or snap.created_at or snap.downloaded_at
|
||||
if not snap_dt:
|
||||
continue
|
||||
related_years_map.setdefault(snap_dt.year, []).append(snap)
|
||||
related_years = []
|
||||
for year, snaps in related_years_map.items():
|
||||
snaps_sorted = sorted(
|
||||
snaps,
|
||||
key=lambda s: (s.bookmarked_at or s.created_at or s.downloaded_at or timezone.now()),
|
||||
reverse=True,
|
||||
)
|
||||
related_years.append({
|
||||
'year': year,
|
||||
'latest': snaps_sorted[0],
|
||||
'snapshots': snaps_sorted,
|
||||
})
|
||||
related_years.sort(key=lambda item: item['year'], reverse=True)
|
||||
|
||||
try:
|
||||
warc_path = 'warc/' + list(Path(snap_dir).glob('warc/*.warc.*'))[0].name
|
||||
except IndexError:
|
||||
warc_path = 'warc/'
|
||||
|
||||
ordered_outputs = sorted(
|
||||
archiveresults.values(),
|
||||
key=lambda r: all_types.index(r['name']) if r['name'] in all_types else -r['size'],
|
||||
)
|
||||
non_compact_outputs = [
|
||||
out for out in ordered_outputs
|
||||
if not out.get('is_compact') and not out.get('is_metadata')
|
||||
]
|
||||
compact_outputs = [
|
||||
out for out in ordered_outputs
|
||||
if out.get('is_compact') or out.get('is_metadata')
|
||||
]
|
||||
|
||||
context = {
|
||||
**snapshot_info,
|
||||
'title': htmlencode(
|
||||
@@ -131,9 +267,13 @@ class SnapshotView(View):
|
||||
'oldest_archive_date': ts_to_date_str(snapshot.oldest_archive_date),
|
||||
'warc_path': warc_path,
|
||||
'PREVIEW_ORIGINALS': SERVER_CONFIG.PREVIEW_ORIGINALS,
|
||||
'archiveresults': sorted(archiveresults.values(), key=lambda r: all_types.index(r['name']) if r['name'] in all_types else -r['size']),
|
||||
'archiveresults': [*non_compact_outputs, *compact_outputs],
|
||||
'best_result': best_result,
|
||||
'snapshot': snapshot, # Pass the snapshot object for template tags
|
||||
'related_snapshots': related_snapshots,
|
||||
'related_years': related_years,
|
||||
'loose_items': loose_items,
|
||||
'failed_items': failed_items,
|
||||
}
|
||||
return render(template_name='core/snapshot_live.html', request=request, context=context)
|
||||
|
||||
@@ -168,13 +308,20 @@ class SnapshotView(View):
|
||||
target_path = f'{target_path}?{query}'
|
||||
return redirect(target_path)
|
||||
|
||||
if archivefile == 'index.html':
|
||||
if request.GET.get('files'):
|
||||
target_path = _files_index_target(snapshot, archivefile)
|
||||
response = serve_static_with_byterange_support(
|
||||
request, target_path, document_root=snapshot.output_dir, show_indexes=True,
|
||||
)
|
||||
elif archivefile == 'index.html':
|
||||
# if they requested snapshot index, serve live rendered template instead of static html
|
||||
response = self.render_live_index(request, snapshot)
|
||||
else:
|
||||
response = serve_static_with_byterange_support(
|
||||
request, archivefile, document_root=snapshot.output_dir, show_indexes=True,
|
||||
)
|
||||
target = build_snapshot_url(str(snapshot.id), archivefile, request=request)
|
||||
query = request.META.get('QUERY_STRING')
|
||||
if query:
|
||||
target = f'{target}?{query}'
|
||||
return redirect(target)
|
||||
response["Link"] = f'<{snapshot.url}>; rel="canonical"'
|
||||
return response
|
||||
except Snapshot.DoesNotExist:
|
||||
@@ -328,13 +475,16 @@ class SnapshotView(View):
|
||||
class SnapshotPathView(View):
|
||||
"""Serve snapshots by the new URL scheme: /<username>/<YYYYMMDD>/<domain>/<uuid>/..."""
|
||||
|
||||
def get(self, request, username: str, date: str, domain: str | None = None, snapshot_id: str | None = None, path: str = "", url: str | None = None):
|
||||
def get(self, request, username: str, date: str | None = None, domain: str | None = None, snapshot_id: str | None = None, path: str = "", url: str | None = None):
|
||||
if not request.user.is_authenticated and not SERVER_CONFIG.PUBLIC_SNAPSHOTS:
|
||||
return redirect(f'/admin/login/?next={request.path}')
|
||||
|
||||
if username == 'system':
|
||||
return redirect(request.path.replace('/system/', '/web/', 1))
|
||||
|
||||
if date and domain and domain == date:
|
||||
raise Http404
|
||||
|
||||
requested_url = url
|
||||
if not requested_url and domain and domain.startswith(('http://', 'https://')):
|
||||
requested_url = domain
|
||||
@@ -358,19 +508,20 @@ class SnapshotPathView(View):
|
||||
else:
|
||||
qs = Snapshot.objects.filter(crawl__created_by__username=username_lookup)
|
||||
|
||||
try:
|
||||
if len(date) == 4:
|
||||
qs = qs.filter(created_at__year=int(date))
|
||||
elif len(date) == 6:
|
||||
qs = qs.filter(created_at__year=int(date[:4]), created_at__month=int(date[4:6]))
|
||||
elif len(date) == 8:
|
||||
qs = qs.filter(
|
||||
created_at__year=int(date[:4]),
|
||||
created_at__month=int(date[4:6]),
|
||||
created_at__day=int(date[6:8]),
|
||||
)
|
||||
except ValueError:
|
||||
pass
|
||||
if date:
|
||||
try:
|
||||
if len(date) == 4:
|
||||
qs = qs.filter(created_at__year=int(date))
|
||||
elif len(date) == 6:
|
||||
qs = qs.filter(created_at__year=int(date[:4]), created_at__month=int(date[4:6]))
|
||||
elif len(date) == 8:
|
||||
qs = qs.filter(
|
||||
created_at__year=int(date[:4]),
|
||||
created_at__month=int(date[4:6]),
|
||||
created_at__day=int(date[6:8]),
|
||||
)
|
||||
except ValueError:
|
||||
pass
|
||||
|
||||
if requested_url:
|
||||
snapshot = qs.order_by('-created_at', '-bookmarked_at', '-timestamp').first()
|
||||
@@ -401,7 +552,10 @@ class SnapshotPathView(View):
|
||||
)
|
||||
|
||||
canonical_base = snapshot.url_path
|
||||
requested_base = f'{username}/{date}/{domain or url or ""}'
|
||||
if date:
|
||||
requested_base = f'{username}/{date}/{domain or url or ""}'
|
||||
else:
|
||||
requested_base = f'{username}/{domain or url or ""}'
|
||||
if snapshot_id:
|
||||
requested_base = f'{requested_base}/{snapshot_id}'
|
||||
if canonical_base != requested_base:
|
||||
@@ -412,6 +566,18 @@ class SnapshotPathView(View):
|
||||
return redirect(target)
|
||||
|
||||
archivefile = path or "index.html"
|
||||
if archivefile != "index.html" and not request.GET.get('files'):
|
||||
target = build_snapshot_url(str(snapshot.id), archivefile, request=request)
|
||||
query = request.META.get('QUERY_STRING')
|
||||
if query:
|
||||
target = f'{target}?{query}'
|
||||
return redirect(target)
|
||||
|
||||
if request.GET.get('files'):
|
||||
target_path = _files_index_target(snapshot, archivefile)
|
||||
return serve_static_with_byterange_support(
|
||||
request, target_path, document_root=snapshot.output_dir, show_indexes=True,
|
||||
)
|
||||
|
||||
if archivefile == "index.html":
|
||||
return SnapshotView.render_live_index(request, snapshot)
|
||||
@@ -421,6 +587,202 @@ class SnapshotPathView(View):
|
||||
)
|
||||
|
||||
|
||||
def _safe_archive_relpath(path: str) -> str | None:
|
||||
if not path:
|
||||
return ""
|
||||
cleaned = posixpath.normpath(path)
|
||||
cleaned = cleaned.lstrip("/")
|
||||
if cleaned.startswith("..") or "/../" in f"/{cleaned}/":
|
||||
return None
|
||||
return cleaned
|
||||
|
||||
|
||||
def _latest_response_match(domain: str, rel_path: str) -> tuple[Path, Path] | None:
|
||||
if not domain or not rel_path:
|
||||
return None
|
||||
domain = domain.split(":", 1)[0].lower()
|
||||
# TODO: optimize by querying output_files in DB instead of globbing filesystem
|
||||
data_root = DATA_DIR / "users"
|
||||
escaped_domain = escape(domain)
|
||||
escaped_path = escape(rel_path)
|
||||
pattern = str(data_root / "*" / "snapshots" / "*" / escaped_domain / "*" / "responses" / escaped_domain / escaped_path)
|
||||
matches = glob(pattern)
|
||||
if not matches:
|
||||
return None
|
||||
|
||||
def sort_key(match_path: str) -> tuple[str, str]:
|
||||
parts = Path(match_path).parts
|
||||
date_str = ""
|
||||
try:
|
||||
idx = parts.index("snapshots")
|
||||
date_str = parts[idx + 1]
|
||||
except Exception:
|
||||
date_str = ""
|
||||
return (date_str, match_path)
|
||||
|
||||
best = max(matches, key=sort_key)
|
||||
best_path = Path(best)
|
||||
parts = best_path.parts
|
||||
try:
|
||||
responses_idx = parts.index("responses")
|
||||
except ValueError:
|
||||
return None
|
||||
responses_root = Path(*parts[: responses_idx + 1])
|
||||
rel_to_root = Path(*parts[responses_idx + 1 :])
|
||||
return responses_root, rel_to_root
|
||||
|
||||
|
||||
def _latest_responses_root(domain: str) -> Path | None:
|
||||
if not domain:
|
||||
return None
|
||||
domain = domain.split(":", 1)[0].lower()
|
||||
data_root = DATA_DIR / "users"
|
||||
escaped_domain = escape(domain)
|
||||
pattern = str(data_root / "*" / "snapshots" / "*" / escaped_domain / "*" / "responses" / escaped_domain)
|
||||
matches = glob(pattern)
|
||||
if not matches:
|
||||
return None
|
||||
|
||||
def sort_key(match_path: str) -> tuple[str, str]:
|
||||
parts = Path(match_path).parts
|
||||
date_str = ""
|
||||
try:
|
||||
idx = parts.index("snapshots")
|
||||
date_str = parts[idx + 1]
|
||||
except Exception:
|
||||
date_str = ""
|
||||
return (date_str, match_path)
|
||||
|
||||
best = max(matches, key=sort_key)
|
||||
return Path(best)
|
||||
|
||||
|
||||
def _serve_responses_path(request, responses_root: Path, rel_path: str, show_indexes: bool):
|
||||
candidates: list[str] = []
|
||||
rel_path = rel_path or ""
|
||||
if rel_path.endswith("/"):
|
||||
rel_path = f"{rel_path}index.html"
|
||||
if "." not in Path(rel_path).name:
|
||||
candidates.append(f"{rel_path.rstrip('/')}/index.html")
|
||||
candidates.append(rel_path)
|
||||
|
||||
for candidate in candidates:
|
||||
try:
|
||||
return serve_static_with_byterange_support(
|
||||
request,
|
||||
candidate,
|
||||
document_root=str(responses_root),
|
||||
show_indexes=show_indexes,
|
||||
)
|
||||
except Http404:
|
||||
pass
|
||||
|
||||
if rel_path.endswith("index.html"):
|
||||
rel_dir = rel_path[: -len("index.html")]
|
||||
try:
|
||||
return serve_static_with_byterange_support(
|
||||
request,
|
||||
rel_dir,
|
||||
document_root=str(responses_root),
|
||||
show_indexes=True,
|
||||
)
|
||||
except Http404:
|
||||
return None
|
||||
return None
|
||||
|
||||
|
||||
class SnapshotHostView(View):
|
||||
"""Serve snapshot directory contents on <snapshot_id>.<listen_host>/<path>."""
|
||||
|
||||
def get(self, request, snapshot_id: str, path: str = ""):
|
||||
if not request.user.is_authenticated and not SERVER_CONFIG.PUBLIC_SNAPSHOTS:
|
||||
return HttpResponseForbidden("Public snapshots are disabled.")
|
||||
snapshot = None
|
||||
if snapshot_id:
|
||||
try:
|
||||
snapshot = Snapshot.objects.get(pk=snapshot_id)
|
||||
except Snapshot.DoesNotExist:
|
||||
try:
|
||||
snapshot = Snapshot.objects.get(id__startswith=snapshot_id)
|
||||
except Snapshot.DoesNotExist:
|
||||
snapshot = None
|
||||
except Snapshot.MultipleObjectsReturned:
|
||||
snapshot = Snapshot.objects.filter(id__startswith=snapshot_id).first()
|
||||
|
||||
if not snapshot:
|
||||
raise Http404
|
||||
|
||||
rel_path = path or ""
|
||||
show_indexes = bool(request.GET.get("files"))
|
||||
if not rel_path or rel_path.endswith("/"):
|
||||
if show_indexes:
|
||||
rel_path = rel_path.rstrip("/")
|
||||
else:
|
||||
rel_path = f"{rel_path}index.html"
|
||||
rel_path = _safe_archive_relpath(rel_path)
|
||||
if rel_path is None:
|
||||
raise Http404
|
||||
|
||||
try:
|
||||
return serve_static_with_byterange_support(
|
||||
request,
|
||||
rel_path,
|
||||
document_root=snapshot.output_dir,
|
||||
show_indexes=show_indexes,
|
||||
)
|
||||
except Http404:
|
||||
pass
|
||||
|
||||
# Fallback to responses/<domain>/<path>
|
||||
host = urlparse(snapshot.url).hostname or snapshot.domain
|
||||
responses_root = Path(snapshot.output_dir) / "responses" / host
|
||||
if responses_root.exists():
|
||||
response = _serve_responses_path(request, responses_root, rel_path, show_indexes)
|
||||
if response is not None:
|
||||
return response
|
||||
|
||||
raise Http404
|
||||
|
||||
|
||||
class OriginalDomainHostView(View):
|
||||
"""Serve responses from the most recent snapshot when using <domain>.<listen_host>/<path>."""
|
||||
|
||||
def get(self, request, domain: str, path: str = ""):
|
||||
if not request.user.is_authenticated and not SERVER_CONFIG.PUBLIC_SNAPSHOTS:
|
||||
return HttpResponseForbidden("Public snapshots are disabled.")
|
||||
rel_path = path or ""
|
||||
if not rel_path or rel_path.endswith("/"):
|
||||
rel_path = f"{rel_path}index.html"
|
||||
rel_path = _safe_archive_relpath(rel_path)
|
||||
if rel_path is None:
|
||||
raise Http404
|
||||
|
||||
domain = domain.lower()
|
||||
match = _latest_response_match(domain, rel_path)
|
||||
if not match and "." not in Path(rel_path).name:
|
||||
index_path = f"{rel_path.rstrip('/')}/index.html"
|
||||
match = _latest_response_match(domain, index_path)
|
||||
if not match and "." not in Path(rel_path).name:
|
||||
html_path = f"{rel_path}.html"
|
||||
match = _latest_response_match(domain, html_path)
|
||||
|
||||
show_indexes = bool(request.GET.get("files"))
|
||||
if match:
|
||||
responses_root, rel_to_root = match
|
||||
response = _serve_responses_path(request, responses_root, str(rel_to_root), show_indexes)
|
||||
if response is not None:
|
||||
return response
|
||||
|
||||
# If no direct match, try serving directory index from latest responses root
|
||||
responses_root = _latest_responses_root(domain)
|
||||
if responses_root:
|
||||
response = _serve_responses_path(request, responses_root, rel_path, show_indexes)
|
||||
if response is not None:
|
||||
return response
|
||||
|
||||
raise Http404
|
||||
|
||||
|
||||
class PublicIndexView(ListView):
|
||||
template_name = 'public_index.html'
|
||||
model = Snapshot
|
||||
@@ -508,7 +870,7 @@ class AddView(UserPassesTestMixin, FormView):
|
||||
'available_tags': list(Tag.objects.all().order_by('name').values_list('name', flat=True)),
|
||||
}
|
||||
|
||||
def form_valid(self, form):
|
||||
def _create_crawl_from_form(self, form, *, created_by_id=None) -> Crawl:
|
||||
urls = form.cleaned_data["url"]
|
||||
print(f'[+] Adding URL: {urls}')
|
||||
|
||||
@@ -522,13 +884,21 @@ class AddView(UserPassesTestMixin, FormView):
|
||||
update = form.cleaned_data.get("update", False)
|
||||
index_only = form.cleaned_data.get("index_only", False)
|
||||
notes = form.cleaned_data.get("notes", "")
|
||||
custom_config = form.cleaned_data.get("config", {})
|
||||
custom_config = form.cleaned_data.get("config") or {}
|
||||
|
||||
from archivebox.config.permissions import HOSTNAME
|
||||
|
||||
if created_by_id is None:
|
||||
if self.request.user.is_authenticated:
|
||||
created_by_id = self.request.user.pk
|
||||
else:
|
||||
from archivebox.base_models.models import get_or_create_system_user_pk
|
||||
created_by_id = get_or_create_system_user_pk()
|
||||
|
||||
created_by_name = self.request.user.username if self.request.user.is_authenticated else 'web'
|
||||
|
||||
# 1. save the provided urls to sources/2024-11-05__23-59-59__web_ui_add_by_user_<user_pk>.txt
|
||||
sources_file = CONSTANTS.SOURCES_DIR / f'{timezone.now().strftime("%Y-%m-%d__%H-%M-%S")}__web_ui_add_by_user_{self.request.user.pk}.txt'
|
||||
sources_file = CONSTANTS.SOURCES_DIR / f'{timezone.now().strftime("%Y-%m-%d__%H-%M-%S")}__web_ui_add_by_user_{created_by_id}.txt'
|
||||
sources_file.write_text(urls if isinstance(urls, str) else '\n'.join(urls))
|
||||
|
||||
# 2. create a new Crawl with the URLs from the file
|
||||
@@ -552,8 +922,8 @@ class AddView(UserPassesTestMixin, FormView):
|
||||
max_depth=depth,
|
||||
tags_str=tag,
|
||||
notes=notes,
|
||||
label=f'{self.request.user.username}@{HOSTNAME}{self.request.path} {timestamp}',
|
||||
created_by_id=self.request.user.pk,
|
||||
label=f'{created_by_name}@{HOSTNAME}{self.request.path} {timestamp}',
|
||||
created_by_id=created_by_id,
|
||||
config=config
|
||||
)
|
||||
|
||||
@@ -566,7 +936,7 @@ class AddView(UserPassesTestMixin, FormView):
|
||||
is_enabled=True,
|
||||
label=crawl.label,
|
||||
notes=f"Auto-created from add page. {notes}".strip(),
|
||||
created_by_id=self.request.user.pk,
|
||||
created_by_id=created_by_id,
|
||||
)
|
||||
crawl.schedule = crawl_schedule
|
||||
crawl.save(update_fields=['schedule'])
|
||||
@@ -576,7 +946,13 @@ class AddView(UserPassesTestMixin, FormView):
|
||||
# from archivebox.crawls.actors import CrawlActor
|
||||
# from archivebox.core.actors import SnapshotActor, ArchiveResultActor
|
||||
|
||||
return crawl
|
||||
|
||||
def form_valid(self, form):
|
||||
crawl = self._create_crawl_from_form(form)
|
||||
|
||||
urls = form.cleaned_data["url"]
|
||||
schedule = form.cleaned_data.get("schedule", "").strip()
|
||||
rough_url_count = urls.count('://')
|
||||
|
||||
# Build success message with schedule link if created
|
||||
@@ -593,6 +969,74 @@ class AddView(UserPassesTestMixin, FormView):
|
||||
return redirect(crawl.admin_change_url)
|
||||
|
||||
|
||||
class WebAddView(AddView):
|
||||
def _latest_snapshot_for_url(self, requested_url: str):
|
||||
return SnapshotView.find_snapshots_for_url(requested_url).order_by(
|
||||
'-created_at', '-bookmarked_at', '-timestamp'
|
||||
).first()
|
||||
|
||||
def _normalize_add_url(self, requested_url: str) -> str:
|
||||
if requested_url.startswith(('http://', 'https://')):
|
||||
return requested_url
|
||||
return f'https://{requested_url}'
|
||||
|
||||
def dispatch(self, request, *args, **kwargs):
|
||||
requested_url = urldecode(kwargs.get('url', '') or '')
|
||||
if requested_url:
|
||||
snapshot = self._latest_snapshot_for_url(requested_url)
|
||||
if snapshot:
|
||||
return redirect(f'/{snapshot.url_path}')
|
||||
|
||||
if not self.test_func():
|
||||
return HttpResponse(
|
||||
format_html(
|
||||
(
|
||||
'<center><br/><br/><br/>'
|
||||
'No Snapshots match the given url: <code>{}</code><br/><br/><br/>'
|
||||
'Return to the <a href="/" target="_top">Main Index</a>'
|
||||
'</center>'
|
||||
),
|
||||
requested_url or '',
|
||||
),
|
||||
content_type="text/html",
|
||||
status=404,
|
||||
)
|
||||
|
||||
return super().dispatch(request, *args, **kwargs)
|
||||
|
||||
def get(self, request, url: str):
|
||||
requested_url = urldecode(url)
|
||||
if not requested_url:
|
||||
raise Http404
|
||||
|
||||
snapshot = self._latest_snapshot_for_url(requested_url)
|
||||
if snapshot:
|
||||
return redirect(f'/{snapshot.url_path}')
|
||||
|
||||
add_url = self._normalize_add_url(requested_url)
|
||||
defaults_form = self.form_class()
|
||||
form_data = {
|
||||
'url': add_url,
|
||||
'depth': defaults_form.fields['depth'].initial or '0',
|
||||
'persona': defaults_form.fields['persona'].initial or 'Default',
|
||||
'config': {},
|
||||
}
|
||||
if defaults_form.fields['update'].initial:
|
||||
form_data['update'] = 'on'
|
||||
if defaults_form.fields['overwrite'].initial:
|
||||
form_data['overwrite'] = 'on'
|
||||
if defaults_form.fields['index_only'].initial:
|
||||
form_data['index_only'] = 'on'
|
||||
|
||||
form = self.form_class(data=form_data)
|
||||
if not form.is_valid():
|
||||
return self.form_invalid(form)
|
||||
|
||||
crawl = self._create_crawl_from_form(form)
|
||||
snapshot = Snapshot.from_json({'url': add_url, 'tags': form.cleaned_data.get('tag', '')}, overrides={'crawl': crawl})
|
||||
return redirect(f'/{snapshot.url_path}')
|
||||
|
||||
|
||||
class HealthCheckView(View):
|
||||
"""
|
||||
A Django view that renders plain text "OK" for service discovery tools
|
||||
@@ -617,11 +1061,19 @@ def live_progress_view(request):
|
||||
from archivebox.workers.orchestrator import Orchestrator
|
||||
from archivebox.crawls.models import Crawl
|
||||
from archivebox.core.models import Snapshot, ArchiveResult
|
||||
from archivebox.machine.models import Process, Machine
|
||||
from django.db.models import Case, When, Value, IntegerField
|
||||
|
||||
# Get orchestrator status
|
||||
orchestrator_running = Orchestrator.is_running()
|
||||
total_workers = Orchestrator().get_total_worker_count() if orchestrator_running else 0
|
||||
machine = Machine.current()
|
||||
orchestrator_proc = Process.objects.filter(
|
||||
machine=machine,
|
||||
process_type=Process.TypeChoices.ORCHESTRATOR,
|
||||
status=Process.StatusChoices.RUNNING,
|
||||
).order_by('-started_at').first()
|
||||
orchestrator_pid = orchestrator_proc.pid if orchestrator_proc else None
|
||||
|
||||
# Get model counts by status
|
||||
crawls_pending = Crawl.objects.filter(status=Crawl.StatusChoices.QUEUED).count()
|
||||
@@ -653,24 +1105,47 @@ def live_progress_view(request):
|
||||
ext = embed.lower().split('.')[-1] if '.' in embed else ''
|
||||
is_embeddable = ext in ('png', 'jpg', 'jpeg', 'gif', 'webp', 'svg', 'ico', 'pdf', 'html')
|
||||
if is_embeddable or ar.plugin in ('screenshot', 'favicon', 'dom'):
|
||||
archive_path = embed or ''
|
||||
recent_thumbnails.append({
|
||||
'id': str(ar.id),
|
||||
'plugin': ar.plugin,
|
||||
'snapshot_id': str(ar.snapshot_id),
|
||||
'snapshot_url': ar.snapshot.url[:60] if ar.snapshot else '',
|
||||
'embed_path': embed,
|
||||
'archive_path': f'/{ar.snapshot.archive_path}/{embed}' if ar.snapshot else '',
|
||||
'archive_path': archive_path,
|
||||
'archive_url': build_snapshot_url(str(ar.snapshot_id), archive_path, request=request) if archive_path else '',
|
||||
'end_ts': ar.end_ts.isoformat() if ar.end_ts else None,
|
||||
})
|
||||
|
||||
# Build hierarchical active crawls with nested snapshots and archive results
|
||||
from django.db.models import Prefetch
|
||||
|
||||
running_workers = Process.objects.filter(
|
||||
machine=machine,
|
||||
process_type=Process.TypeChoices.WORKER,
|
||||
status=Process.StatusChoices.RUNNING,
|
||||
)
|
||||
crawl_worker_pids: dict[str, int] = {}
|
||||
snapshot_worker_pids: dict[str, int] = {}
|
||||
for proc in running_workers:
|
||||
env = proc.env or {}
|
||||
if not isinstance(env, dict):
|
||||
continue
|
||||
if proc.worker_type == 'crawl':
|
||||
crawl_id = env.get('CRAWL_ID')
|
||||
if crawl_id:
|
||||
crawl_worker_pids[str(crawl_id)] = proc.pid
|
||||
elif proc.worker_type == 'snapshot':
|
||||
snapshot_id = env.get('SNAPSHOT_ID')
|
||||
if snapshot_id:
|
||||
snapshot_worker_pids[str(snapshot_id)] = proc.pid
|
||||
|
||||
active_crawls_qs = Crawl.objects.filter(
|
||||
status__in=[Crawl.StatusChoices.QUEUED, Crawl.StatusChoices.STARTED]
|
||||
).prefetch_related(
|
||||
'snapshot_set',
|
||||
'snapshot_set__archiveresult_set',
|
||||
'snapshot_set__archiveresult_set__process',
|
||||
).distinct().order_by('-modified_at')[:10]
|
||||
|
||||
active_crawls = []
|
||||
@@ -710,8 +1185,9 @@ def live_progress_view(request):
|
||||
failed_plugins = sum(1 for ar in snapshot_results if ar.status == ArchiveResult.StatusChoices.FAILED)
|
||||
pending_plugins = sum(1 for ar in snapshot_results if ar.status == ArchiveResult.StatusChoices.QUEUED)
|
||||
|
||||
# Calculate snapshot progress
|
||||
snapshot_progress = int(((completed_plugins + failed_plugins) / total_plugins) * 100) if total_plugins > 0 else 0
|
||||
# Calculate snapshot progress using per-plugin progress
|
||||
now = timezone.now()
|
||||
plugin_progress_values: list[int] = []
|
||||
|
||||
# Get all extractor plugins for this snapshot (already prefetched, sort in Python)
|
||||
# Order: started first, then queued, then completed
|
||||
@@ -724,14 +1200,42 @@ def live_progress_view(request):
|
||||
}
|
||||
return (status_order.get(ar.status, 4), ar.plugin)
|
||||
|
||||
all_plugins = [
|
||||
{
|
||||
all_plugins = []
|
||||
for ar in sorted(snapshot_results, key=plugin_sort_key):
|
||||
status = ar.status
|
||||
progress_value = 0
|
||||
if status in (
|
||||
ArchiveResult.StatusChoices.SUCCEEDED,
|
||||
ArchiveResult.StatusChoices.FAILED,
|
||||
ArchiveResult.StatusChoices.SKIPPED,
|
||||
):
|
||||
progress_value = 100
|
||||
elif status == ArchiveResult.StatusChoices.STARTED:
|
||||
started_at = ar.start_ts or (ar.process.started_at if ar.process_id and ar.process else None)
|
||||
timeout = ar.timeout or 120
|
||||
if started_at and timeout:
|
||||
elapsed = max(0.0, (now - started_at).total_seconds())
|
||||
progress_value = int(min(99, max(1, (elapsed / float(timeout)) * 100)))
|
||||
else:
|
||||
progress_value = 1
|
||||
else:
|
||||
progress_value = 0
|
||||
|
||||
plugin_progress_values.append(progress_value)
|
||||
|
||||
plugin_payload = {
|
||||
'id': str(ar.id),
|
||||
'plugin': ar.plugin,
|
||||
'status': ar.status,
|
||||
'status': status,
|
||||
}
|
||||
for ar in sorted(snapshot_results, key=plugin_sort_key)
|
||||
]
|
||||
if ar.process_id and ar.process and ar.process.status == Process.StatusChoices.RUNNING:
|
||||
plugin_payload['pid'] = ar.process.pid
|
||||
if status == ArchiveResult.StatusChoices.STARTED:
|
||||
plugin_payload['progress'] = progress_value
|
||||
plugin_payload['timeout'] = ar.timeout or 120
|
||||
all_plugins.append(plugin_payload)
|
||||
|
||||
snapshot_progress = int(sum(plugin_progress_values) / total_plugins) if total_plugins > 0 else 0
|
||||
|
||||
active_snapshots_for_crawl.append({
|
||||
'id': str(snapshot.id),
|
||||
@@ -744,6 +1248,7 @@ def live_progress_view(request):
|
||||
'failed_plugins': failed_plugins,
|
||||
'pending_plugins': pending_plugins,
|
||||
'all_plugins': all_plugins,
|
||||
'worker_pid': snapshot_worker_pids.get(str(snapshot.id)),
|
||||
})
|
||||
|
||||
# Check if crawl can start (for debugging stuck crawls)
|
||||
@@ -772,10 +1277,12 @@ def live_progress_view(request):
|
||||
'urls_preview': urls_preview,
|
||||
'retry_at_future': retry_at_future,
|
||||
'seconds_until_retry': seconds_until_retry,
|
||||
'worker_pid': crawl_worker_pids.get(str(crawl.id)),
|
||||
})
|
||||
|
||||
return JsonResponse({
|
||||
'orchestrator_running': orchestrator_running,
|
||||
'orchestrator_pid': orchestrator_pid,
|
||||
'total_workers': total_workers,
|
||||
'crawls_pending': crawls_pending,
|
||||
'crawls_started': crawls_started,
|
||||
|
||||
@@ -1,8 +1,11 @@
|
||||
__package__ = 'archivebox.core'
|
||||
|
||||
import json
|
||||
import re
|
||||
import hashlib
|
||||
from django import forms
|
||||
from django.utils.html import escape
|
||||
from django.utils.safestring import mark_safe
|
||||
|
||||
|
||||
class TagEditorWidget(forms.Widget):
|
||||
@@ -27,6 +30,23 @@ class TagEditorWidget(forms.Widget):
|
||||
"""Escape HTML entities in value."""
|
||||
return escape(str(value)) if value else ''
|
||||
|
||||
def _normalize_id(self, value):
|
||||
"""Normalize IDs for HTML + JS usage (letters, digits, underscore; JS-safe start)."""
|
||||
normalized = re.sub(r'[^A-Za-z0-9_]', '_', str(value))
|
||||
if not normalized or not re.match(r'[A-Za-z_]', normalized):
|
||||
normalized = f't_{normalized}'
|
||||
return normalized
|
||||
|
||||
def _tag_style(self, value):
|
||||
"""Compute a stable pastel color style for a tag value."""
|
||||
tag = (value or '').strip().lower()
|
||||
digest = hashlib.md5(tag.encode('utf-8')).hexdigest()
|
||||
hue = int(digest[:4], 16) % 360
|
||||
bg = f'hsl({hue}, 70%, 92%)'
|
||||
border = f'hsl({hue}, 60%, 82%)'
|
||||
fg = f'hsl({hue}, 35%, 28%)'
|
||||
return f'--tag-bg: {bg}; --tag-border: {border}; --tag-fg: {fg};'
|
||||
|
||||
def render(self, name, value, attrs=None, renderer=None):
|
||||
"""
|
||||
Render the tag editor widget.
|
||||
@@ -67,13 +87,14 @@ class TagEditorWidget(forms.Widget):
|
||||
elif isinstance(value, str):
|
||||
tags = sorted([t.strip() for t in value.split(',') if t.strip()])
|
||||
|
||||
widget_id = attrs.get('id', name) if attrs else name
|
||||
widget_id_raw = attrs.get('id', name) if attrs else name
|
||||
widget_id = self._normalize_id(widget_id_raw)
|
||||
|
||||
# Build pills HTML
|
||||
pills_html = ''
|
||||
for tag in tags:
|
||||
pills_html += f'''
|
||||
<span class="tag-pill" data-tag="{self._escape(tag)}">
|
||||
<span class="tag-pill" data-tag="{self._escape(tag)}" style="{self._tag_style(tag)}">
|
||||
{self._escape(tag)}
|
||||
<button type="button" class="tag-remove-btn" data-tag-name="{self._escape(tag)}">×</button>
|
||||
</span>
|
||||
@@ -92,6 +113,7 @@ class TagEditorWidget(forms.Widget):
|
||||
placeholder="Add tag..."
|
||||
autocomplete="off"
|
||||
onkeydown="handleTagKeydown_{widget_id}(event)"
|
||||
onkeypress="if(event.key==='Enter' || event.keyCode===13){{event.preventDefault(); event.stopPropagation();}}"
|
||||
oninput="fetchTagAutocomplete_{widget_id}(this.value)"
|
||||
>
|
||||
<datalist id="{widget_id}_datalist"></datalist>
|
||||
@@ -112,6 +134,47 @@ class TagEditorWidget(forms.Widget):
|
||||
document.getElementById('{widget_id}').value = currentTags_{widget_id}.join(',');
|
||||
}};
|
||||
|
||||
function computeTagStyle_{widget_id}(tagName) {{
|
||||
var hash = 0;
|
||||
var name = String(tagName || '').toLowerCase();
|
||||
for (var i = 0; i < name.length; i++) {{
|
||||
hash = (hash * 31 + name.charCodeAt(i)) % 360;
|
||||
}}
|
||||
var bg = 'hsl(' + hash + ', 70%, 92%)';
|
||||
var border = 'hsl(' + hash + ', 60%, 82%)';
|
||||
var fg = 'hsl(' + hash + ', 35%, 28%)';
|
||||
return {{ bg: bg, border: border, fg: fg }};
|
||||
}}
|
||||
|
||||
function applyTagStyle_{widget_id}(el, tagName) {{
|
||||
var colors = computeTagStyle_{widget_id}(tagName);
|
||||
el.style.setProperty('--tag-bg', colors.bg);
|
||||
el.style.setProperty('--tag-border', colors.border);
|
||||
el.style.setProperty('--tag-fg', colors.fg);
|
||||
}}
|
||||
|
||||
function getApiKey() {{
|
||||
return (window.ARCHIVEBOX_API_KEY || '').trim();
|
||||
}}
|
||||
|
||||
function buildApiUrl(path) {{
|
||||
var apiKey = getApiKey();
|
||||
if (!apiKey) return path;
|
||||
var sep = path.indexOf('?') !== -1 ? '&' : '?';
|
||||
return path + sep + 'api_key=' + encodeURIComponent(apiKey);
|
||||
}}
|
||||
|
||||
function buildApiHeaders() {{
|
||||
var headers = {{
|
||||
'Content-Type': 'application/json',
|
||||
}};
|
||||
var apiKey = getApiKey();
|
||||
if (apiKey) headers['X-ArchiveBox-API-Key'] = apiKey;
|
||||
var csrfToken = getCSRFToken();
|
||||
if (csrfToken) headers['X-CSRFToken'] = csrfToken;
|
||||
return headers;
|
||||
}}
|
||||
|
||||
window.addTag_{widget_id} = function(tagName) {{
|
||||
tagName = tagName.trim();
|
||||
if (!tagName) return;
|
||||
@@ -139,12 +202,9 @@ class TagEditorWidget(forms.Widget):
|
||||
document.getElementById('{widget_id}_input').value = '';
|
||||
|
||||
// Create tag via API if it doesn't exist (fire and forget)
|
||||
fetch('/api/v1/core/tags/create/', {{
|
||||
fetch(buildApiUrl('/api/v1/core/tags/create/'), {{
|
||||
method: 'POST',
|
||||
headers: {{
|
||||
'Content-Type': 'application/json',
|
||||
'X-CSRFToken': getCSRFToken()
|
||||
}},
|
||||
headers: buildApiHeaders(),
|
||||
body: JSON.stringify({{ name: tagName }})
|
||||
}}).catch(function(err) {{
|
||||
console.log('Tag creation note:', err);
|
||||
@@ -166,6 +226,7 @@ class TagEditorWidget(forms.Widget):
|
||||
var pill = document.createElement('span');
|
||||
pill.className = 'tag-pill';
|
||||
pill.setAttribute('data-tag', tag);
|
||||
applyTagStyle_{widget_id}(pill, tag);
|
||||
|
||||
var tagText = document.createTextNode(tag);
|
||||
pill.appendChild(tagText);
|
||||
@@ -195,14 +256,16 @@ class TagEditorWidget(forms.Widget):
|
||||
var input = event.target;
|
||||
var value = input.value.trim();
|
||||
|
||||
if (event.key === 'Enter' || event.key === ' ' || event.key === ',') {{
|
||||
if (event.key === 'Enter' || event.keyCode === 13 || event.key === ' ' || event.key === ',') {{
|
||||
event.preventDefault();
|
||||
event.stopPropagation();
|
||||
if (value) {{
|
||||
// Handle comma-separated values
|
||||
value.split(',').forEach(function(tag) {{
|
||||
addTag_{widget_id}(tag.trim());
|
||||
}});
|
||||
}}
|
||||
return false;
|
||||
}} else if (event.key === 'Backspace' && !value && currentTags_{widget_id}.length > 0) {{
|
||||
// Remove last tag on backspace when input is empty
|
||||
var lastTag = currentTags_{widget_id}.pop();
|
||||
@@ -222,7 +285,7 @@ class TagEditorWidget(forms.Widget):
|
||||
return;
|
||||
}}
|
||||
|
||||
fetch('/api/v1/core/tags/autocomplete/?q=' + encodeURIComponent(query))
|
||||
fetch(buildApiUrl('/api/v1/core/tags/autocomplete/?q=' + encodeURIComponent(query)))
|
||||
.then(function(response) {{ return response.json(); }})
|
||||
.then(function(data) {{
|
||||
var datalist = document.getElementById('{widget_id}_datalist');
|
||||
@@ -261,7 +324,7 @@ class TagEditorWidget(forms.Widget):
|
||||
</script>
|
||||
'''
|
||||
|
||||
return html
|
||||
return mark_safe(html)
|
||||
|
||||
|
||||
class InlineTagEditorWidget(TagEditorWidget):
|
||||
@@ -295,20 +358,23 @@ class InlineTagEditorWidget(TagEditorWidget):
|
||||
tag_data.sort(key=lambda x: x['name'].lower())
|
||||
tags = [t['name'] for t in tag_data]
|
||||
|
||||
widget_id = f"inline_tags_{snapshot_id}" if snapshot_id else (attrs.get('id', name) if attrs else name)
|
||||
widget_id_raw = f"inline_tags_{snapshot_id}" if snapshot_id else (attrs.get('id', name) if attrs else name)
|
||||
widget_id = self._normalize_id(widget_id_raw)
|
||||
|
||||
# Build pills HTML with filter links
|
||||
pills_html = ''
|
||||
for td in tag_data:
|
||||
pills_html += f'''
|
||||
<span class="tag-pill" data-tag="{self._escape(td['name'])}" data-tag-id="{td['id']}">
|
||||
<span class="tag-pill" data-tag="{self._escape(td['name'])}" data-tag-id="{td['id']}" style="{self._tag_style(td['name'])}">
|
||||
<a href="/admin/core/snapshot/?tags__id__exact={td['id']}" class="tag-link">{self._escape(td['name'])}</a>
|
||||
<button type="button" class="tag-remove-btn" data-tag-id="{td['id']}" data-tag-name="{self._escape(td['name'])}">×</button>
|
||||
</span>
|
||||
'''
|
||||
|
||||
tags_json = escape(json.dumps(tag_data))
|
||||
|
||||
html = f'''
|
||||
<span id="{widget_id}_container" class="tag-editor-inline" onclick="focusInlineTagInput_{widget_id}(event)">
|
||||
<span id="{widget_id}_container" class="tag-editor-inline" data-snapshot-id="{snapshot_id}" data-tags="{tags_json}">
|
||||
<span id="{widget_id}_pills" class="tag-pills-inline">
|
||||
{pills_html}
|
||||
</span>
|
||||
@@ -318,195 +384,10 @@ class InlineTagEditorWidget(TagEditorWidget):
|
||||
list="{widget_id}_datalist"
|
||||
placeholder="+"
|
||||
autocomplete="off"
|
||||
onkeydown="handleInlineTagKeydown_{widget_id}(event)"
|
||||
oninput="fetchInlineTagAutocomplete_{widget_id}(this.value)"
|
||||
onfocus="this.placeholder='add tag...'"
|
||||
onblur="this.placeholder='+'"
|
||||
data-inline-tag-input="1"
|
||||
>
|
||||
<datalist id="{widget_id}_datalist"></datalist>
|
||||
</span>
|
||||
|
||||
<script>
|
||||
(function() {{
|
||||
var snapshotId_{widget_id} = '{snapshot_id}';
|
||||
var currentTagData_{widget_id} = {json.dumps(tag_data)};
|
||||
var autocompleteTimeout_{widget_id} = null;
|
||||
|
||||
window.focusInlineTagInput_{widget_id} = function(event) {{
|
||||
event.stopPropagation();
|
||||
if (event.target.classList.contains('tag-remove-btn') || event.target.classList.contains('tag-link')) return;
|
||||
document.getElementById('{widget_id}_input').focus();
|
||||
}};
|
||||
|
||||
window.addInlineTag_{widget_id} = function(tagName) {{
|
||||
tagName = tagName.trim();
|
||||
if (!tagName) return;
|
||||
|
||||
// Check if tag already exists
|
||||
var exists = currentTagData_{widget_id}.some(function(t) {{
|
||||
return t.name.toLowerCase() === tagName.toLowerCase();
|
||||
}});
|
||||
if (exists) {{
|
||||
document.getElementById('{widget_id}_input').value = '';
|
||||
return;
|
||||
}}
|
||||
|
||||
// Add via API
|
||||
fetch('/api/v1/core/tags/add-to-snapshot/', {{
|
||||
method: 'POST',
|
||||
headers: {{
|
||||
'Content-Type': 'application/json',
|
||||
'X-CSRFToken': getCSRFToken()
|
||||
}},
|
||||
body: JSON.stringify({{
|
||||
snapshot_id: snapshotId_{widget_id},
|
||||
tag_name: tagName
|
||||
}})
|
||||
}})
|
||||
.then(function(response) {{ return response.json(); }})
|
||||
.then(function(data) {{
|
||||
if (data.success) {{
|
||||
currentTagData_{widget_id}.push({{ id: data.tag_id, name: data.tag_name }});
|
||||
currentTagData_{widget_id}.sort(function(a, b) {{
|
||||
return a.name.toLowerCase().localeCompare(b.name.toLowerCase());
|
||||
}});
|
||||
rebuildInlinePills_{widget_id}();
|
||||
}}
|
||||
}})
|
||||
.catch(function(err) {{
|
||||
console.error('Error adding tag:', err);
|
||||
}});
|
||||
|
||||
document.getElementById('{widget_id}_input').value = '';
|
||||
}};
|
||||
|
||||
window.removeInlineTag_{widget_id} = function(tagId) {{
|
||||
fetch('/api/v1/core/tags/remove-from-snapshot/', {{
|
||||
method: 'POST',
|
||||
headers: {{
|
||||
'Content-Type': 'application/json',
|
||||
'X-CSRFToken': getCSRFToken()
|
||||
}},
|
||||
body: JSON.stringify({{
|
||||
snapshot_id: snapshotId_{widget_id},
|
||||
tag_id: tagId
|
||||
}})
|
||||
}})
|
||||
.then(function(response) {{ return response.json(); }})
|
||||
.then(function(data) {{
|
||||
if (data.success) {{
|
||||
currentTagData_{widget_id} = currentTagData_{widget_id}.filter(function(t) {{
|
||||
return t.id !== tagId;
|
||||
}});
|
||||
rebuildInlinePills_{widget_id}();
|
||||
}}
|
||||
}})
|
||||
.catch(function(err) {{
|
||||
console.error('Error removing tag:', err);
|
||||
}});
|
||||
}};
|
||||
|
||||
window.rebuildInlinePills_{widget_id} = function() {{
|
||||
var container = document.getElementById('{widget_id}_pills');
|
||||
container.innerHTML = '';
|
||||
currentTagData_{widget_id}.forEach(function(td) {{
|
||||
var pill = document.createElement('span');
|
||||
pill.className = 'tag-pill';
|
||||
pill.setAttribute('data-tag', td.name);
|
||||
pill.setAttribute('data-tag-id', td.id);
|
||||
|
||||
var link = document.createElement('a');
|
||||
link.href = '/admin/core/snapshot/?tags__id__exact=' + td.id;
|
||||
link.className = 'tag-link';
|
||||
link.textContent = td.name;
|
||||
pill.appendChild(link);
|
||||
|
||||
var removeBtn = document.createElement('button');
|
||||
removeBtn.type = 'button';
|
||||
removeBtn.className = 'tag-remove-btn';
|
||||
removeBtn.setAttribute('data-tag-id', td.id);
|
||||
removeBtn.setAttribute('data-tag-name', td.name);
|
||||
removeBtn.innerHTML = '×';
|
||||
pill.appendChild(removeBtn);
|
||||
|
||||
container.appendChild(pill);
|
||||
}});
|
||||
}};
|
||||
|
||||
// Add event delegation for remove buttons
|
||||
document.getElementById('{widget_id}_pills').addEventListener('click', function(event) {{
|
||||
if (event.target.classList.contains('tag-remove-btn')) {{
|
||||
event.stopPropagation();
|
||||
event.preventDefault();
|
||||
var tagId = parseInt(event.target.getAttribute('data-tag-id'), 10);
|
||||
if (tagId) {{
|
||||
removeInlineTag_{widget_id}(tagId);
|
||||
}}
|
||||
}}
|
||||
}});
|
||||
|
||||
window.handleInlineTagKeydown_{widget_id} = function(event) {{
|
||||
event.stopPropagation();
|
||||
var input = event.target;
|
||||
var value = input.value.trim();
|
||||
|
||||
if (event.key === 'Enter' || event.key === ',') {{
|
||||
event.preventDefault();
|
||||
if (value) {{
|
||||
value.split(',').forEach(function(tag) {{
|
||||
addInlineTag_{widget_id}(tag.trim());
|
||||
}});
|
||||
}}
|
||||
}}
|
||||
}};
|
||||
|
||||
window.fetchInlineTagAutocomplete_{widget_id} = function(query) {{
|
||||
if (autocompleteTimeout_{widget_id}) {{
|
||||
clearTimeout(autocompleteTimeout_{widget_id});
|
||||
}}
|
||||
|
||||
autocompleteTimeout_{widget_id} = setTimeout(function() {{
|
||||
if (!query || query.length < 1) {{
|
||||
document.getElementById('{widget_id}_datalist').innerHTML = '';
|
||||
return;
|
||||
}}
|
||||
|
||||
fetch('/api/v1/core/tags/autocomplete/?q=' + encodeURIComponent(query))
|
||||
.then(function(response) {{ return response.json(); }})
|
||||
.then(function(data) {{
|
||||
var datalist = document.getElementById('{widget_id}_datalist');
|
||||
datalist.innerHTML = '';
|
||||
(data.tags || []).forEach(function(tag) {{
|
||||
var option = document.createElement('option');
|
||||
option.value = tag.name;
|
||||
datalist.appendChild(option);
|
||||
}});
|
||||
}})
|
||||
.catch(function(err) {{
|
||||
console.log('Autocomplete error:', err);
|
||||
}});
|
||||
}}, 150);
|
||||
}};
|
||||
|
||||
function escapeHtml(text) {{
|
||||
var div = document.createElement('div');
|
||||
div.textContent = text;
|
||||
return div.innerHTML;
|
||||
}}
|
||||
|
||||
function getCSRFToken() {{
|
||||
var cookies = document.cookie.split(';');
|
||||
for (var i = 0; i < cookies.length; i++) {{
|
||||
var cookie = cookies[i].trim();
|
||||
if (cookie.startsWith('csrftoken=')) {{
|
||||
return cookie.substring('csrftoken='.length);
|
||||
}}
|
||||
}}
|
||||
var input = document.querySelector('input[name="csrfmiddlewaretoken"]');
|
||||
return input ? input.value : '';
|
||||
}}
|
||||
}})();
|
||||
</script>
|
||||
'''
|
||||
|
||||
return html
|
||||
return mark_safe(html)
|
||||
|
||||
Reference in New Issue
Block a user