mirror of
https://github.com/ArchiveBox/ArchiveBox.git
synced 2026-04-06 07:47:53 +10:00
wip
This commit is contained in:
@@ -1,10 +1,11 @@
|
||||
__package__ = 'archivebox.core'
|
||||
__package__ = "archivebox.core"
|
||||
__order__ = 100
|
||||
|
||||
|
||||
def register_admin(admin_site):
|
||||
"""Register the core.models views (Snapshot, ArchiveResult, Tag, etc.) with the admin site"""
|
||||
from archivebox.core.admin import register_admin as do_register
|
||||
|
||||
do_register(admin_site)
|
||||
|
||||
|
||||
@@ -17,11 +18,12 @@ def get_CONFIG():
|
||||
ARCHIVING_CONFIG,
|
||||
SEARCH_BACKEND_CONFIG,
|
||||
)
|
||||
|
||||
return {
|
||||
'SHELL_CONFIG': SHELL_CONFIG,
|
||||
'STORAGE_CONFIG': STORAGE_CONFIG,
|
||||
'GENERAL_CONFIG': GENERAL_CONFIG,
|
||||
'SERVER_CONFIG': SERVER_CONFIG,
|
||||
'ARCHIVING_CONFIG': ARCHIVING_CONFIG,
|
||||
'SEARCHBACKEND_CONFIG': SEARCH_BACKEND_CONFIG,
|
||||
"SHELL_CONFIG": SHELL_CONFIG,
|
||||
"STORAGE_CONFIG": STORAGE_CONFIG,
|
||||
"GENERAL_CONFIG": GENERAL_CONFIG,
|
||||
"SERVER_CONFIG": SERVER_CONFIG,
|
||||
"ARCHIVING_CONFIG": ARCHIVING_CONFIG,
|
||||
"SEARCHBACKEND_CONFIG": SEARCH_BACKEND_CONFIG,
|
||||
}
|
||||
|
||||
@@ -1,4 +1,4 @@
|
||||
__package__ = 'archivebox.core'
|
||||
__package__ = "archivebox.core"
|
||||
|
||||
from django.contrib.auth import get_user_model
|
||||
|
||||
|
||||
@@ -1,4 +1,4 @@
|
||||
__package__ = 'archivebox.core'
|
||||
__package__ = "archivebox.core"
|
||||
|
||||
import html
|
||||
import json
|
||||
@@ -21,57 +21,45 @@ from django.utils.text import smart_split
|
||||
|
||||
from archivebox.config import DATA_DIR
|
||||
from archivebox.config.common import SERVER_CONFIG
|
||||
from archivebox.misc.paginators import AccelleratedPaginator
|
||||
from archivebox.misc.paginators import AcceleratedPaginator
|
||||
from archivebox.base_models.admin import BaseModelAdmin
|
||||
from archivebox.hooks import get_plugin_icon
|
||||
from archivebox.core.host_utils import build_snapshot_url
|
||||
from archivebox.core.widgets import InlineTagEditorWidget
|
||||
from archivebox.core.views import LIVE_PLUGIN_BASE_URL
|
||||
from archivebox.machine.env_utils import env_to_shell_exports
|
||||
|
||||
|
||||
from archivebox.core.models import ArchiveResult, Snapshot
|
||||
|
||||
|
||||
def _stringify_env_value(value) -> str:
|
||||
if value is None:
|
||||
return ''
|
||||
if isinstance(value, str):
|
||||
return value
|
||||
return json.dumps(value, separators=(',', ':'))
|
||||
|
||||
|
||||
def _quote_shell_string(value: str) -> str:
|
||||
return "'" + str(value).replace("'", "'\"'\"'") + "'"
|
||||
|
||||
|
||||
def _get_replay_source_url(result: ArchiveResult) -> str:
|
||||
process_env = getattr(getattr(result, 'process', None), 'env', None) or {}
|
||||
return str(process_env.get('SOURCE_URL') or result.snapshot.url or '')
|
||||
process_env = getattr(getattr(result, "process", None), "env", None) or {}
|
||||
return str(process_env.get("SOURCE_URL") or result.snapshot.url or "")
|
||||
|
||||
|
||||
def build_abx_dl_display_command(result: ArchiveResult) -> str:
|
||||
source_url = _get_replay_source_url(result)
|
||||
plugin_name = str(result.plugin or '').strip()
|
||||
plugin_name = str(result.plugin or "").strip()
|
||||
if not plugin_name and not source_url:
|
||||
return 'abx-dl'
|
||||
return "abx-dl"
|
||||
if not source_url:
|
||||
return f'abx-dl --plugins={plugin_name}'
|
||||
return f'abx-dl --plugins={plugin_name} {_quote_shell_string(source_url)}'
|
||||
return f"abx-dl --plugins={plugin_name}"
|
||||
return f"abx-dl --plugins={plugin_name} {_quote_shell_string(source_url)}"
|
||||
|
||||
|
||||
def build_abx_dl_replay_command(result: ArchiveResult) -> str:
|
||||
display_command = build_abx_dl_display_command(result)
|
||||
process = getattr(result, 'process', None)
|
||||
env = getattr(process, 'env', None) or {}
|
||||
env_items = ' '.join(
|
||||
f'{key}={shlex.quote(_stringify_env_value(value))}'
|
||||
for key, value in sorted(env.items())
|
||||
if value is not None
|
||||
)
|
||||
process = getattr(result, "process", None)
|
||||
env_items = env_to_shell_exports(getattr(process, "env", None) or {})
|
||||
snapshot_dir = shlex.quote(str(result.snapshot_dir))
|
||||
if env_items:
|
||||
return f'cd {snapshot_dir}; env {env_items} {display_command}'
|
||||
return f'cd {snapshot_dir}; {display_command}'
|
||||
return f"cd {snapshot_dir}; env {env_items} {display_command}"
|
||||
return f"cd {snapshot_dir}; {display_command}"
|
||||
|
||||
|
||||
def get_plugin_admin_url(plugin_name: str) -> str:
|
||||
@@ -81,50 +69,87 @@ def get_plugin_admin_url(plugin_name: str) -> str:
|
||||
if plugin_dir:
|
||||
builtin_root = BUILTIN_PLUGINS_DIR.resolve()
|
||||
if plugin_dir.is_relative_to(builtin_root):
|
||||
return f'{LIVE_PLUGIN_BASE_URL}builtin.{quote(plugin_name)}/'
|
||||
return f"{LIVE_PLUGIN_BASE_URL}builtin.{quote(plugin_name)}/"
|
||||
|
||||
user_root = USER_PLUGINS_DIR.resolve()
|
||||
if plugin_dir.is_relative_to(user_root):
|
||||
return f'{LIVE_PLUGIN_BASE_URL}user.{quote(plugin_name)}/'
|
||||
return f"{LIVE_PLUGIN_BASE_URL}user.{quote(plugin_name)}/"
|
||||
|
||||
return f'{LIVE_PLUGIN_BASE_URL}builtin.{quote(plugin_name)}/'
|
||||
return f"{LIVE_PLUGIN_BASE_URL}builtin.{quote(plugin_name)}/"
|
||||
|
||||
|
||||
def render_archiveresults_list(archiveresults_qs, limit=50):
|
||||
"""Render a nice inline list view of archive results with status, plugin, output, and actions."""
|
||||
|
||||
results = list(archiveresults_qs.order_by('plugin').select_related('snapshot')[:limit])
|
||||
result_ids = list(archiveresults_qs.order_by("plugin").values_list("pk", flat=True)[:limit])
|
||||
if not result_ids:
|
||||
return mark_safe('<div style="color: #64748b; font-style: italic; padding: 16px 0;">No Archive Results yet...</div>')
|
||||
|
||||
results_by_id = {
|
||||
result.pk: result
|
||||
for result in ArchiveResult.objects.filter(pk__in=result_ids).select_related("snapshot", "process", "process__machine")
|
||||
}
|
||||
results = [results_by_id[result_id] for result_id in result_ids if result_id in results_by_id]
|
||||
|
||||
if not results:
|
||||
return mark_safe('<div style="color: #64748b; font-style: italic; padding: 16px 0;">No Archive Results yet...</div>')
|
||||
|
||||
# Status colors
|
||||
status_colors = {
|
||||
'succeeded': ('#166534', '#dcfce7'), # green
|
||||
'failed': ('#991b1b', '#fee2e2'), # red
|
||||
'queued': ('#6b7280', '#f3f4f6'), # gray
|
||||
'started': ('#92400e', '#fef3c7'), # amber
|
||||
'backoff': ('#92400e', '#fef3c7'),
|
||||
'skipped': ('#475569', '#f1f5f9'),
|
||||
'noresults': ('#475569', '#f1f5f9'),
|
||||
"succeeded": ("#166534", "#dcfce7"), # green
|
||||
"failed": ("#991b1b", "#fee2e2"), # red
|
||||
"queued": ("#6b7280", "#f3f4f6"), # gray
|
||||
"started": ("#92400e", "#fef3c7"), # amber
|
||||
"backoff": ("#92400e", "#fef3c7"),
|
||||
"skipped": ("#475569", "#f1f5f9"),
|
||||
"noresults": ("#475569", "#f1f5f9"),
|
||||
}
|
||||
|
||||
rows = []
|
||||
for idx, result in enumerate(results):
|
||||
status = result.status or 'queued'
|
||||
color, bg = status_colors.get(status, ('#6b7280', '#f3f4f6'))
|
||||
status = result.status or "queued"
|
||||
color, bg = status_colors.get(status, ("#6b7280", "#f3f4f6"))
|
||||
output_files = result.output_files or {}
|
||||
if isinstance(output_files, dict):
|
||||
output_file_count = len(output_files)
|
||||
elif isinstance(output_files, (list, tuple, set)):
|
||||
output_file_count = len(output_files)
|
||||
elif isinstance(output_files, str):
|
||||
try:
|
||||
parsed = json.loads(output_files)
|
||||
output_file_count = len(parsed) if isinstance(parsed, (dict, list, tuple, set)) else 0
|
||||
except Exception:
|
||||
output_file_count = 0
|
||||
else:
|
||||
output_file_count = 0
|
||||
|
||||
# Get plugin icon
|
||||
icon = get_plugin_icon(result.plugin)
|
||||
|
||||
# Format timestamp
|
||||
end_time = result.end_ts.strftime('%Y-%m-%d %H:%M:%S') if result.end_ts else '-'
|
||||
end_time = result.end_ts.strftime("%Y-%m-%d %H:%M:%S") if result.end_ts else "-"
|
||||
|
||||
process_display = "-"
|
||||
if result.process_id and result.process:
|
||||
process_display = f'''
|
||||
<a href="{reverse("admin:machine_process_change", args=[result.process_id])}"
|
||||
style="color: #2563eb; text-decoration: none; font-family: ui-monospace, monospace; font-size: 12px;"
|
||||
title="View process">{result.process.pid or "-"}</a>
|
||||
'''
|
||||
|
||||
machine_display = "-"
|
||||
if result.process_id and result.process and result.process.machine_id:
|
||||
machine_display = f'''
|
||||
<a href="{reverse("admin:machine_machine_change", args=[result.process.machine_id])}"
|
||||
style="color: #2563eb; text-decoration: none; font-size: 12px;"
|
||||
title="View machine">{result.process.machine.hostname}</a>
|
||||
'''
|
||||
|
||||
# Truncate output for display
|
||||
full_output = result.output_str or '-'
|
||||
full_output = result.output_str or "-"
|
||||
output_display = full_output[:60]
|
||||
if len(full_output) > 60:
|
||||
output_display += '...'
|
||||
output_display += "..."
|
||||
|
||||
display_cmd = build_abx_dl_display_command(result)
|
||||
replay_cmd = build_abx_dl_replay_command(result)
|
||||
@@ -132,23 +157,23 @@ def render_archiveresults_list(archiveresults_qs, limit=50):
|
||||
cmd_attr = html.escape(replay_cmd, quote=True)
|
||||
|
||||
# Build output link - use embed_path() which checks output_files first
|
||||
embed_path = result.embed_path() if hasattr(result, 'embed_path') else None
|
||||
snapshot_id = str(getattr(result, 'snapshot_id', ''))
|
||||
if embed_path and result.status == 'succeeded':
|
||||
embed_path = result.embed_path() if hasattr(result, "embed_path") else None
|
||||
snapshot_id = str(getattr(result, "snapshot_id", ""))
|
||||
if embed_path and result.status == "succeeded":
|
||||
output_link = build_snapshot_url(snapshot_id, embed_path)
|
||||
else:
|
||||
output_link = build_snapshot_url(snapshot_id, '')
|
||||
output_link = build_snapshot_url(snapshot_id, "")
|
||||
|
||||
# Get version - try cmd_version field
|
||||
version = result.cmd_version if result.cmd_version else '-'
|
||||
version = result.cmd_version if result.cmd_version else "-"
|
||||
|
||||
# Unique ID for this row's expandable output
|
||||
row_id = f'output_{idx}_{str(result.id)[:8]}'
|
||||
row_id = f"output_{idx}_{str(result.id)[:8]}"
|
||||
|
||||
rows.append(f'''
|
||||
<tr style="border-bottom: 1px solid #f1f5f9; transition: background 0.15s;" onmouseover="this.style.background='#f8fafc'" onmouseout="this.style.background='transparent'">
|
||||
<td style="padding: 10px 12px; white-space: nowrap;">
|
||||
<a href="{reverse('admin:core_archiveresult_change', args=[result.id])}"
|
||||
<a href="{reverse("admin:core_archiveresult_change", args=[result.id])}"
|
||||
style="color: #2563eb; text-decoration: none; font-family: ui-monospace, monospace; font-size: 11px;"
|
||||
title="View/edit archive result">
|
||||
<code>{str(result.id)[-8:]}</code>
|
||||
@@ -178,9 +203,18 @@ def render_archiveresults_list(archiveresults_qs, limit=50):
|
||||
{output_display}
|
||||
</span>
|
||||
</td>
|
||||
<td style="padding: 10px 12px; white-space: nowrap; color: #64748b; font-size: 12px; text-align: right;">
|
||||
{output_file_count}
|
||||
</td>
|
||||
<td style="padding: 10px 12px; white-space: nowrap; color: #64748b; font-size: 12px;">
|
||||
{end_time}
|
||||
</td>
|
||||
<td style="padding: 10px 12px; white-space: nowrap;">
|
||||
{process_display}
|
||||
</td>
|
||||
<td style="padding: 10px 12px; white-space: nowrap;">
|
||||
{machine_display}
|
||||
</td>
|
||||
<td style="padding: 10px 12px; white-space: nowrap; font-family: ui-monospace, monospace; font-size: 11px; color: #64748b;">
|
||||
{version}
|
||||
</td>
|
||||
@@ -189,14 +223,14 @@ def render_archiveresults_list(archiveresults_qs, limit=50):
|
||||
<a href="{output_link}" target="_blank"
|
||||
style="padding: 4px 8px; background: #f1f5f9; border-radius: 4px; color: #475569; text-decoration: none; font-size: 11px;"
|
||||
title="View output">📄</a>
|
||||
<a href="{reverse('admin:core_archiveresult_change', args=[result.id])}"
|
||||
<a href="{reverse("admin:core_archiveresult_change", args=[result.id])}"
|
||||
style="padding: 4px 8px; background: #f1f5f9; border-radius: 4px; color: #475569; text-decoration: none; font-size: 11px;"
|
||||
title="Edit">✏️</a>
|
||||
</div>
|
||||
</td>
|
||||
</tr>
|
||||
<tr style="border-bottom: 1px solid #e2e8f0;">
|
||||
<td colspan="8" style="padding: 0 12px 10px 12px;">
|
||||
<td colspan="11" style="padding: 0 12px 10px 12px;">
|
||||
<details id="{row_id}" style="margin: 0;">
|
||||
<summary style="cursor: pointer; font-size: 11px; color: #94a3b8; user-select: none;">
|
||||
Details & Output
|
||||
@@ -205,7 +239,7 @@ def render_archiveresults_list(archiveresults_qs, limit=50):
|
||||
<div style="font-size: 11px; color: #64748b; margin-bottom: 8px;">
|
||||
<span style="margin-right: 16px;"><b>ID:</b> <code>{str(result.id)}</code></span>
|
||||
<span style="margin-right: 16px;"><b>Version:</b> <code>{version}</code></span>
|
||||
<span style="margin-right: 16px;"><b>PWD:</b> <code>{result.pwd or '-'}</code></span>
|
||||
<span style="margin-right: 16px;"><b>PWD:</b> <code>{result.pwd or "-"}</code></span>
|
||||
</div>
|
||||
<div style="font-size: 11px; color: #64748b; margin-bottom: 8px;">
|
||||
<b>Output:</b>
|
||||
@@ -230,19 +264,19 @@ def render_archiveresults_list(archiveresults_qs, limit=50):
|
||||
''')
|
||||
|
||||
total_count = archiveresults_qs.count()
|
||||
footer = ''
|
||||
footer = ""
|
||||
if total_count > limit:
|
||||
footer = f'''
|
||||
footer = f"""
|
||||
<tr>
|
||||
<td colspan="8" style="padding: 12px; text-align: center; color: #64748b; font-size: 13px; background: #f8fafc;">
|
||||
<td colspan="11" style="padding: 12px; text-align: center; color: #64748b; font-size: 13px; background: #f8fafc;">
|
||||
Showing {limit} of {total_count} results
|
||||
<a href="/admin/core/archiveresult/?snapshot__id__exact={results[0].snapshot_id if results else ''}"
|
||||
<a href="/admin/core/archiveresult/?snapshot__id__exact={results[0].snapshot_id if results else ""}"
|
||||
style="color: #2563eb;">View all →</a>
|
||||
</td>
|
||||
</tr>
|
||||
'''
|
||||
"""
|
||||
|
||||
return mark_safe(f'''
|
||||
return mark_safe(f"""
|
||||
<div style="border: 1px solid #e2e8f0; border-radius: 8px; overflow: hidden; background: #fff; width: 100%;">
|
||||
<table style="width: 100%; border-collapse: collapse; font-size: 14px;">
|
||||
<thead>
|
||||
@@ -252,86 +286,92 @@ def render_archiveresults_list(archiveresults_qs, limit=50):
|
||||
<th style="padding: 10px 12px; text-align: left; font-weight: 600; color: #475569; font-size: 12px; width: 32px;"></th>
|
||||
<th style="padding: 10px 12px; text-align: left; font-weight: 600; color: #475569; font-size: 12px; text-transform: uppercase; letter-spacing: 0.05em;">Plugin</th>
|
||||
<th style="padding: 10px 12px; text-align: left; font-weight: 600; color: #475569; font-size: 12px; text-transform: uppercase; letter-spacing: 0.05em;">Output</th>
|
||||
<th style="padding: 10px 12px; text-align: right; font-weight: 600; color: #475569; font-size: 12px; text-transform: uppercase; letter-spacing: 0.05em;">Files</th>
|
||||
<th style="padding: 10px 12px; text-align: left; font-weight: 600; color: #475569; font-size: 12px; text-transform: uppercase; letter-spacing: 0.05em;">Completed</th>
|
||||
<th style="padding: 10px 12px; text-align: left; font-weight: 600; color: #475569; font-size: 12px; text-transform: uppercase; letter-spacing: 0.05em;">Process</th>
|
||||
<th style="padding: 10px 12px; text-align: left; font-weight: 600; color: #475569; font-size: 12px; text-transform: uppercase; letter-spacing: 0.05em;">Machine</th>
|
||||
<th style="padding: 10px 12px; text-align: left; font-weight: 600; color: #475569; font-size: 12px; text-transform: uppercase; letter-spacing: 0.05em;">Version</th>
|
||||
<th style="padding: 10px 8px; text-align: left; font-weight: 600; color: #475569; font-size: 12px; text-transform: uppercase; letter-spacing: 0.05em;">Actions</th>
|
||||
</tr>
|
||||
</thead>
|
||||
<tbody>
|
||||
{''.join(rows)}
|
||||
{"".join(rows)}
|
||||
{footer}
|
||||
</tbody>
|
||||
</table>
|
||||
</div>
|
||||
''')
|
||||
|
||||
""")
|
||||
|
||||
|
||||
class ArchiveResultInline(admin.TabularInline):
|
||||
name = 'Archive Results Log'
|
||||
name = "Archive Results Log"
|
||||
model = ArchiveResult
|
||||
parent_model = Snapshot
|
||||
# fk_name = 'snapshot'
|
||||
extra = 0
|
||||
sort_fields = ('end_ts', 'plugin', 'output_str', 'status', 'cmd_version')
|
||||
readonly_fields = ('id', 'result_id', 'completed', 'command', 'version')
|
||||
fields = ('start_ts', 'end_ts', *readonly_fields, 'plugin', 'cmd', 'cmd_version', 'pwd', 'status', 'output_str')
|
||||
sort_fields = ("end_ts", "plugin", "output_str", "status", "cmd_version")
|
||||
readonly_fields = ("id", "result_id", "completed", "command", "version")
|
||||
fields = ("start_ts", "end_ts", *readonly_fields, "plugin", "cmd", "cmd_version", "pwd", "status", "output_str")
|
||||
# exclude = ('id',)
|
||||
ordering = ('end_ts',)
|
||||
ordering = ("end_ts",)
|
||||
show_change_link = True
|
||||
# # classes = ['collapse']
|
||||
|
||||
def get_parent_object_from_request(self, request):
|
||||
resolved = resolve(request.path_info)
|
||||
try:
|
||||
return self.parent_model.objects.get(pk=resolved.kwargs['object_id'])
|
||||
return self.parent_model.objects.get(pk=resolved.kwargs["object_id"])
|
||||
except (self.parent_model.DoesNotExist, ValidationError):
|
||||
return None
|
||||
|
||||
@admin.display(
|
||||
description='Completed',
|
||||
ordering='end_ts',
|
||||
description="Completed",
|
||||
ordering="end_ts",
|
||||
)
|
||||
def completed(self, obj):
|
||||
return format_html('<p style="white-space: nowrap">{}</p>', obj.end_ts.strftime('%Y-%m-%d %H:%M:%S'))
|
||||
return format_html('<p style="white-space: nowrap">{}</p>', obj.end_ts.strftime("%Y-%m-%d %H:%M:%S"))
|
||||
|
||||
def result_id(self, obj):
|
||||
return format_html('<a href="{}"><code style="font-size: 10px">[{}]</code></a>', reverse('admin:core_archiveresult_change', args=(obj.id,)), str(obj.id)[:8])
|
||||
|
||||
return format_html(
|
||||
'<a href="{}"><code style="font-size: 10px">[{}]</code></a>',
|
||||
reverse("admin:core_archiveresult_change", args=(obj.id,)),
|
||||
str(obj.id)[:8],
|
||||
)
|
||||
|
||||
def command(self, obj):
|
||||
return format_html('<small><code>{}</code></small>', " ".join(obj.cmd or []))
|
||||
|
||||
return format_html("<small><code>{}</code></small>", " ".join(obj.cmd or []))
|
||||
|
||||
def version(self, obj):
|
||||
return format_html('<small><code>{}</code></small>', obj.cmd_version or '-')
|
||||
|
||||
return format_html("<small><code>{}</code></small>", obj.cmd_version or "-")
|
||||
|
||||
def get_formset(self, request, obj=None, **kwargs):
|
||||
formset = super().get_formset(request, obj, **kwargs)
|
||||
snapshot = self.get_parent_object_from_request(request)
|
||||
form_class = getattr(formset, 'form', None)
|
||||
base_fields = getattr(form_class, 'base_fields', {})
|
||||
snapshot_output_dir = str(snapshot.output_dir) if snapshot else ''
|
||||
form_class = getattr(formset, "form", None)
|
||||
base_fields = getattr(form_class, "base_fields", {})
|
||||
snapshot_output_dir = str(snapshot.output_dir) if snapshot else ""
|
||||
|
||||
# import ipdb; ipdb.set_trace()
|
||||
# formset.form.base_fields['id'].widget = formset.form.base_fields['id'].hidden_widget()
|
||||
|
||||
|
||||
# default values for new entries
|
||||
base_fields['status'].initial = 'succeeded'
|
||||
base_fields['start_ts'].initial = timezone.now()
|
||||
base_fields['end_ts'].initial = timezone.now()
|
||||
base_fields['cmd_version'].initial = '-'
|
||||
base_fields['pwd'].initial = snapshot_output_dir
|
||||
base_fields['cmd'].initial = '["-"]'
|
||||
base_fields['output_str'].initial = 'Manually recorded cmd output...'
|
||||
base_fields["status"].initial = "succeeded"
|
||||
base_fields["start_ts"].initial = timezone.now()
|
||||
base_fields["end_ts"].initial = timezone.now()
|
||||
base_fields["cmd_version"].initial = "-"
|
||||
base_fields["pwd"].initial = snapshot_output_dir
|
||||
base_fields["cmd"].initial = '["-"]'
|
||||
base_fields["output_str"].initial = "Manually recorded cmd output..."
|
||||
|
||||
if obj is not None:
|
||||
# hidden values for existing entries and new entries
|
||||
base_fields['start_ts'].widget = base_fields['start_ts'].hidden_widget()
|
||||
base_fields['end_ts'].widget = base_fields['end_ts'].hidden_widget()
|
||||
base_fields['cmd'].widget = base_fields['cmd'].hidden_widget()
|
||||
base_fields['pwd'].widget = base_fields['pwd'].hidden_widget()
|
||||
base_fields['cmd_version'].widget = base_fields['cmd_version'].hidden_widget()
|
||||
base_fields["start_ts"].widget = base_fields["start_ts"].hidden_widget()
|
||||
base_fields["end_ts"].widget = base_fields["end_ts"].hidden_widget()
|
||||
base_fields["cmd"].widget = base_fields["cmd"].hidden_widget()
|
||||
base_fields["pwd"].widget = base_fields["pwd"].hidden_widget()
|
||||
base_fields["cmd_version"].widget = base_fields["cmd_version"].hidden_widget()
|
||||
return formset
|
||||
|
||||
|
||||
def get_readonly_fields(self, request, obj=None):
|
||||
if obj is not None:
|
||||
return self.readonly_fields
|
||||
@@ -339,62 +379,122 @@ class ArchiveResultInline(admin.TabularInline):
|
||||
return []
|
||||
|
||||
|
||||
|
||||
class ArchiveResultAdmin(BaseModelAdmin):
|
||||
list_display = ('details_link', 'created_at', 'snapshot_info', 'tags_inline', 'status_badge', 'plugin_with_icon', 'process_link', 'machine_link', 'cmd_str', 'output_str_display')
|
||||
list_display = (
|
||||
"details_link",
|
||||
"zip_link",
|
||||
"created_at",
|
||||
"snapshot_info",
|
||||
"tags_inline",
|
||||
"status_badge",
|
||||
"plugin_with_icon",
|
||||
"process_link",
|
||||
"machine_link",
|
||||
"cmd_str",
|
||||
"output_str_display",
|
||||
)
|
||||
list_display_links = None
|
||||
sort_fields = ('id', 'created_at', 'plugin', 'status')
|
||||
readonly_fields = ('cmd', 'cmd_version', 'pwd', 'cmd_str', 'snapshot_info', 'tags_str', 'created_at', 'modified_at', 'output_summary', 'plugin_with_icon', 'process_link')
|
||||
search_fields = ()
|
||||
autocomplete_fields = ['snapshot']
|
||||
sort_fields = ("id", "created_at", "plugin", "status")
|
||||
readonly_fields = (
|
||||
"admin_actions",
|
||||
"cmd",
|
||||
"cmd_version",
|
||||
"pwd",
|
||||
"cmd_str",
|
||||
"snapshot_info",
|
||||
"tags_str",
|
||||
"created_at",
|
||||
"modified_at",
|
||||
"output_summary",
|
||||
"plugin_with_icon",
|
||||
"process_link",
|
||||
)
|
||||
search_fields = (
|
||||
"snapshot__id",
|
||||
"snapshot__url",
|
||||
"snapshot__tags__name",
|
||||
"snapshot__crawl_id",
|
||||
"plugin",
|
||||
"hook_name",
|
||||
"output_str",
|
||||
"output_json",
|
||||
"process__cmd",
|
||||
)
|
||||
autocomplete_fields = ["snapshot"]
|
||||
|
||||
fieldsets = (
|
||||
('Snapshot', {
|
||||
'fields': ('snapshot', 'snapshot_info', 'tags_str'),
|
||||
'classes': ('card', 'wide'),
|
||||
}),
|
||||
('Plugin', {
|
||||
'fields': ('plugin_with_icon', 'process_link', 'status'),
|
||||
'classes': ('card',),
|
||||
}),
|
||||
('Timing', {
|
||||
'fields': ('start_ts', 'end_ts', 'created_at', 'modified_at'),
|
||||
'classes': ('card',),
|
||||
}),
|
||||
('Command', {
|
||||
'fields': ('cmd', 'cmd_str', 'cmd_version', 'pwd'),
|
||||
'classes': ('card',),
|
||||
}),
|
||||
('Output', {
|
||||
'fields': ('output_str', 'output_json', 'output_files', 'output_size', 'output_mimetypes', 'output_summary'),
|
||||
'classes': ('card', 'wide'),
|
||||
}),
|
||||
(
|
||||
"Actions",
|
||||
{
|
||||
"fields": ("admin_actions",),
|
||||
"classes": ("card", "wide"),
|
||||
},
|
||||
),
|
||||
(
|
||||
"Snapshot",
|
||||
{
|
||||
"fields": ("snapshot", "snapshot_info", "tags_str"),
|
||||
"classes": ("card", "wide"),
|
||||
},
|
||||
),
|
||||
(
|
||||
"Plugin",
|
||||
{
|
||||
"fields": ("plugin_with_icon", "process_link", "status"),
|
||||
"classes": ("card",),
|
||||
},
|
||||
),
|
||||
(
|
||||
"Timing",
|
||||
{
|
||||
"fields": ("start_ts", "end_ts", "created_at", "modified_at"),
|
||||
"classes": ("card",),
|
||||
},
|
||||
),
|
||||
(
|
||||
"Command",
|
||||
{
|
||||
"fields": ("cmd", "cmd_str", "cmd_version", "pwd"),
|
||||
"classes": ("card",),
|
||||
},
|
||||
),
|
||||
(
|
||||
"Output",
|
||||
{
|
||||
"fields": ("output_str", "output_json", "output_files", "output_size", "output_mimetypes", "output_summary"),
|
||||
"classes": ("card", "wide"),
|
||||
},
|
||||
),
|
||||
)
|
||||
|
||||
list_filter = ('status', 'plugin', 'start_ts')
|
||||
ordering = ['-start_ts']
|
||||
list_filter = ("status", "plugin", "start_ts")
|
||||
ordering = ["-start_ts"]
|
||||
list_per_page = SERVER_CONFIG.SNAPSHOTS_PER_PAGE
|
||||
|
||||
paginator = AccelleratedPaginator
|
||||
paginator = AcceleratedPaginator
|
||||
save_on_top = True
|
||||
|
||||
actions = ['delete_selected']
|
||||
actions = ["delete_selected"]
|
||||
|
||||
class Meta:
|
||||
verbose_name = 'Archive Result'
|
||||
verbose_name_plural = 'Archive Results'
|
||||
verbose_name = "Archive Result"
|
||||
verbose_name_plural = "Archive Results"
|
||||
|
||||
def change_view(self, request, object_id, form_url="", extra_context=None):
|
||||
self.request = request
|
||||
return super().change_view(request, object_id, form_url, extra_context)
|
||||
|
||||
def changelist_view(self, request, extra_context=None):
|
||||
self.request = request
|
||||
return super().changelist_view(request, extra_context)
|
||||
|
||||
def get_queryset(self, request):
|
||||
return (
|
||||
super()
|
||||
.get_queryset(request)
|
||||
.select_related('snapshot', 'process')
|
||||
.prefetch_related('snapshot__tags')
|
||||
.annotate(snapshot_first_tag=Min('snapshot__tags__name'))
|
||||
.select_related("snapshot", "process")
|
||||
.prefetch_related("snapshot__tags")
|
||||
.annotate(snapshot_first_tag=Min("snapshot__tags__name"))
|
||||
)
|
||||
|
||||
def get_search_results(self, request, queryset, search_term):
|
||||
@@ -402,15 +502,14 @@ class ArchiveResultAdmin(BaseModelAdmin):
|
||||
return queryset, False
|
||||
|
||||
queryset = queryset.annotate(
|
||||
snapshot_id_text=Cast('snapshot__id', output_field=TextField()),
|
||||
snapshot_crawl_id_text=Cast('snapshot__crawl_id', output_field=TextField()),
|
||||
output_json_text=Cast('output_json', output_field=TextField()),
|
||||
cmd_text=Cast('process__cmd', output_field=TextField()),
|
||||
snapshot_id_text=Cast("snapshot__id", output_field=TextField()),
|
||||
snapshot_crawl_id_text=Cast("snapshot__crawl_id", output_field=TextField()),
|
||||
output_json_text=Cast("output_json", output_field=TextField()),
|
||||
cmd_text=Cast("process__cmd", output_field=TextField()),
|
||||
)
|
||||
|
||||
search_bits = [
|
||||
bit[1:-1] if len(bit) >= 2 and bit[0] == bit[-1] and bit[0] in {'"', "'"} else bit
|
||||
for bit in smart_split(search_term)
|
||||
bit[1:-1] if len(bit) >= 2 and bit[0] == bit[-1] and bit[0] in {'"', "'"} else bit for bit in smart_split(search_term)
|
||||
]
|
||||
search_bits = [bit.strip() for bit in search_bits if bit.strip()]
|
||||
if not search_bits:
|
||||
@@ -427,22 +526,44 @@ class ArchiveResultAdmin(BaseModelAdmin):
|
||||
| Q(hook_name__icontains=bit)
|
||||
| Q(output_str__icontains=bit)
|
||||
| Q(output_json_text__icontains=bit)
|
||||
| Q(cmd_text__icontains=bit)
|
||||
| Q(cmd_text__icontains=bit),
|
||||
)
|
||||
|
||||
return queryset.filter(reduce(and_, filters)).distinct(), True
|
||||
|
||||
@admin.display(description='Details', ordering='id')
|
||||
def get_snapshot_view_url(self, result: ArchiveResult) -> str:
|
||||
return build_snapshot_url(str(result.snapshot_id), request=getattr(self, "request", None))
|
||||
|
||||
def get_output_view_url(self, result: ArchiveResult) -> str:
|
||||
output_path = result.embed_path() if hasattr(result, "embed_path") else None
|
||||
if not output_path:
|
||||
output_path = result.plugin or ""
|
||||
return build_snapshot_url(str(result.snapshot_id), output_path, request=getattr(self, "request", None))
|
||||
|
||||
def get_output_files_url(self, result: ArchiveResult) -> str:
|
||||
return f"{build_snapshot_url(str(result.snapshot_id), result.plugin, request=getattr(self, 'request', None))}/?files=1"
|
||||
|
||||
def get_output_zip_url(self, result: ArchiveResult) -> str:
|
||||
return f"{self.get_output_files_url(result)}&download=zip"
|
||||
|
||||
@admin.display(description="Details", ordering="id")
|
||||
def details_link(self, result):
|
||||
return format_html(
|
||||
'<a href="{}"><code>{}</code></a>',
|
||||
reverse('admin:core_archiveresult_change', args=[result.id]),
|
||||
reverse("admin:core_archiveresult_change", args=[result.id]),
|
||||
str(result.id)[-8:],
|
||||
)
|
||||
|
||||
@admin.display(description="Zip")
|
||||
def zip_link(self, result):
|
||||
return format_html(
|
||||
'<a href="{}" class="archivebox-zip-button" data-loading-mode="spinner-only" onclick="return window.archiveboxHandleZipClick(this, event);" style="display:inline-flex; align-items:center; justify-content:center; gap:4px; width:48px; min-width:48px; height:24px; padding:0; box-sizing:border-box; border-radius:999px; border:1px solid #bfdbfe; background:#eff6ff; color:#1d4ed8; font-size:11px; font-weight:600; line-height:1; text-decoration:none;"><span class="archivebox-zip-spinner" aria-hidden="true"></span><span class="archivebox-zip-label">⬇ ZIP</span></a>',
|
||||
self.get_output_zip_url(result),
|
||||
)
|
||||
|
||||
@admin.display(
|
||||
description='Snapshot',
|
||||
ordering='snapshot__url',
|
||||
description="Snapshot",
|
||||
ordering="snapshot__url",
|
||||
)
|
||||
def snapshot_info(self, result):
|
||||
snapshot_id = str(result.snapshot_id)
|
||||
@@ -450,29 +571,28 @@ class ArchiveResultAdmin(BaseModelAdmin):
|
||||
'<a href="{}"><b><code>[{}]</code></b> {} {}</a><br/>',
|
||||
build_snapshot_url(snapshot_id, "index.html"),
|
||||
snapshot_id[:8],
|
||||
result.snapshot.bookmarked_at.strftime('%Y-%m-%d %H:%M'),
|
||||
result.snapshot.bookmarked_at.strftime("%Y-%m-%d %H:%M"),
|
||||
result.snapshot.url[:128],
|
||||
)
|
||||
|
||||
|
||||
@admin.display(
|
||||
description='Snapshot Tags'
|
||||
description="Snapshot Tags",
|
||||
)
|
||||
def tags_str(self, result):
|
||||
return result.snapshot.tags_str()
|
||||
|
||||
@admin.display(description='Tags', ordering='snapshot_first_tag')
|
||||
@admin.display(description="Tags", ordering="snapshot_first_tag")
|
||||
def tags_inline(self, result):
|
||||
widget = InlineTagEditorWidget(snapshot_id=str(result.snapshot_id), editable=False)
|
||||
tags_html = widget.render(
|
||||
name=f'tags_{result.snapshot_id}',
|
||||
name=f"tags_{result.snapshot_id}",
|
||||
value=result.snapshot.tags.all(),
|
||||
attrs={'id': f'tags_{result.snapshot_id}'},
|
||||
attrs={"id": f"tags_{result.snapshot_id}"},
|
||||
snapshot_id=str(result.snapshot_id),
|
||||
)
|
||||
return mark_safe(f'<span class="tags-inline-editor">{tags_html}</span>')
|
||||
|
||||
@admin.display(description='Status', ordering='status')
|
||||
@admin.display(description="Status", ordering="status")
|
||||
def status_badge(self, result):
|
||||
status = result.status or ArchiveResult.StatusChoices.QUEUED
|
||||
return format_html(
|
||||
@@ -482,7 +602,7 @@ class ArchiveResultAdmin(BaseModelAdmin):
|
||||
result.get_status_display() or status,
|
||||
)
|
||||
|
||||
@admin.display(description='Plugin', ordering='plugin')
|
||||
@admin.display(description="Plugin", ordering="plugin")
|
||||
def plugin_with_icon(self, result):
|
||||
icon = get_plugin_icon(result.plugin)
|
||||
return format_html(
|
||||
@@ -494,36 +614,36 @@ class ArchiveResultAdmin(BaseModelAdmin):
|
||||
result.plugin,
|
||||
)
|
||||
|
||||
@admin.display(description='Process', ordering='process__pid')
|
||||
@admin.display(description="Process", ordering="process__pid")
|
||||
def process_link(self, result):
|
||||
if not result.process_id:
|
||||
return '-'
|
||||
process_label = result.process.pid if result.process and result.process.pid else '-'
|
||||
return "-"
|
||||
process_label = result.process.pid if result.process and result.process.pid else "-"
|
||||
return format_html(
|
||||
'<a href="{}"><code>{}</code></a>',
|
||||
reverse('admin:machine_process_change', args=[result.process_id]),
|
||||
reverse("admin:machine_process_change", args=[result.process_id]),
|
||||
process_label,
|
||||
)
|
||||
|
||||
@admin.display(description='Machine', ordering='process__machine__hostname')
|
||||
@admin.display(description="Machine", ordering="process__machine__hostname")
|
||||
def machine_link(self, result):
|
||||
if not result.process_id or not result.process or not result.process.machine_id:
|
||||
return '-'
|
||||
return "-"
|
||||
machine = result.process.machine
|
||||
return format_html(
|
||||
'<a href="{}"><code>{}</code> {}</a>',
|
||||
reverse('admin:machine_machine_change', args=[machine.id]),
|
||||
reverse("admin:machine_machine_change", args=[machine.id]),
|
||||
str(machine.id)[:8],
|
||||
machine.hostname,
|
||||
)
|
||||
|
||||
@admin.display(description='Command')
|
||||
@admin.display(description="Command")
|
||||
def cmd_str(self, result):
|
||||
display_cmd = build_abx_dl_display_command(result)
|
||||
replay_cmd = build_abx_dl_replay_command(result)
|
||||
return format_html(
|
||||
'''
|
||||
<div style="position: relative; width: 300px; min-width: 300px; max-width: 300px; overflow: hidden; box-sizing: border-box;">
|
||||
"""
|
||||
<div style="position: relative; width: 100%; max-width: 100%; overflow: hidden; box-sizing: border-box;">
|
||||
<button type="button"
|
||||
data-command="{}"
|
||||
onclick="(function(btn){{var text=btn.dataset.command||''; if(navigator.clipboard&&navigator.clipboard.writeText){{navigator.clipboard.writeText(text);}} else {{var ta=document.createElement('textarea'); ta.value=text; document.body.appendChild(ta); ta.select(); document.execCommand('copy'); document.body.removeChild(ta);}}}})(this); return false;"
|
||||
@@ -534,7 +654,7 @@ class ArchiveResultAdmin(BaseModelAdmin):
|
||||
{}
|
||||
</code>
|
||||
</div>
|
||||
''',
|
||||
""",
|
||||
replay_cmd,
|
||||
replay_cmd,
|
||||
display_cmd,
|
||||
@@ -542,8 +662,8 @@ class ArchiveResultAdmin(BaseModelAdmin):
|
||||
|
||||
def output_display(self, result):
|
||||
# Determine output link path - use embed_path() which checks output_files
|
||||
embed_path = result.embed_path() if hasattr(result, 'embed_path') else None
|
||||
output_path = embed_path if (result.status == 'succeeded' and embed_path) else 'index.html'
|
||||
embed_path = result.embed_path() if hasattr(result, "embed_path") else None
|
||||
output_path = embed_path if (result.status == "succeeded" and embed_path) else "index.html"
|
||||
snapshot_id = str(result.snapshot_id)
|
||||
return format_html(
|
||||
'<a href="{}" class="output-link">↗️</a><pre>{}</pre>',
|
||||
@@ -551,13 +671,13 @@ class ArchiveResultAdmin(BaseModelAdmin):
|
||||
result.output_str,
|
||||
)
|
||||
|
||||
@admin.display(description='Output', ordering='output_str')
|
||||
@admin.display(description="Output", ordering="output_str")
|
||||
def output_str_display(self, result):
|
||||
output_text = str(result.output_str or '').strip()
|
||||
output_text = str(result.output_str or "").strip()
|
||||
if not output_text:
|
||||
return '-'
|
||||
return "-"
|
||||
|
||||
live_path = result.embed_path() if hasattr(result, 'embed_path') else None
|
||||
live_path = result.embed_path() if hasattr(result, "embed_path") else None
|
||||
if live_path:
|
||||
return format_html(
|
||||
'<a href="{}" title="{}"><code>{}</code></a>',
|
||||
@@ -572,8 +692,48 @@ class ArchiveResultAdmin(BaseModelAdmin):
|
||||
output_text,
|
||||
)
|
||||
|
||||
@admin.display(description="")
|
||||
def admin_actions(self, result):
|
||||
return format_html(
|
||||
"""
|
||||
<div style="display:flex; flex-wrap:wrap; gap:12px; align-items:center;">
|
||||
<a class="btn" style="display:inline-flex; align-items:center; gap:6px; padding:10px 16px; background:#f8fafc; border:1px solid #e2e8f0; border-radius:8px; color:#334155; text-decoration:none; font-size:14px; font-weight:500; transition:all 0.15s;"
|
||||
href="{}"
|
||||
onmouseover="this.style.background='#f1f5f9'; this.style.borderColor='#cbd5e1';"
|
||||
onmouseout="this.style.background='#f8fafc'; this.style.borderColor='#e2e8f0';">
|
||||
📄 View Output
|
||||
</a>
|
||||
<a class="btn" style="display:inline-flex; align-items:center; gap:6px; padding:10px 16px; background:#f8fafc; border:1px solid #e2e8f0; border-radius:8px; color:#334155; text-decoration:none; font-size:14px; font-weight:500; transition:all 0.15s;"
|
||||
href="{}"
|
||||
onmouseover="this.style.background='#f1f5f9'; this.style.borderColor='#cbd5e1';"
|
||||
onmouseout="this.style.background='#f8fafc'; this.style.borderColor='#e2e8f0';">
|
||||
📁 Output files
|
||||
</a>
|
||||
<a class="btn archivebox-zip-button" style="display:inline-flex; align-items:center; gap:6px; padding:10px 16px; background:#eff6ff; border:1px solid #bfdbfe; border-radius:8px; color:#1d4ed8; text-decoration:none; font-size:14px; font-weight:500; transition:all 0.15s;"
|
||||
href="{}"
|
||||
data-loading-label="Preparing..."
|
||||
onclick="return window.archiveboxHandleZipClick(this, event);"
|
||||
onmouseover="this.style.background='#dbeafe'; this.style.borderColor='#93c5fd';"
|
||||
onmouseout="this.style.background='#eff6ff'; this.style.borderColor='#bfdbfe';">
|
||||
<span class="archivebox-zip-spinner" aria-hidden="true"></span>
|
||||
<span class="archivebox-zip-label">⬇ Download Zip</span>
|
||||
</a>
|
||||
<a class="btn" style="display:inline-flex; align-items:center; gap:6px; padding:10px 16px; background:#f8fafc; border:1px solid #e2e8f0; border-radius:8px; color:#334155; text-decoration:none; font-size:14px; font-weight:500; transition:all 0.15s;"
|
||||
href="{}"
|
||||
onmouseover="this.style.background='#f1f5f9'; this.style.borderColor='#cbd5e1';"
|
||||
onmouseout="this.style.background='#f8fafc'; this.style.borderColor='#e2e8f0';">
|
||||
🗂 Snapshot
|
||||
</a>
|
||||
</div>
|
||||
""",
|
||||
self.get_output_view_url(result),
|
||||
self.get_output_files_url(result),
|
||||
self.get_output_zip_url(result),
|
||||
self.get_snapshot_view_url(result),
|
||||
)
|
||||
|
||||
def output_summary(self, result):
|
||||
snapshot_dir = Path(DATA_DIR) / str(result.pwd).split('data/', 1)[-1]
|
||||
snapshot_dir = Path(DATA_DIR) / str(result.pwd).split("data/", 1)[-1]
|
||||
output_html = format_html(
|
||||
'<pre style="display: inline-block">{}</pre><br/>',
|
||||
result.output_str,
|
||||
@@ -583,9 +743,13 @@ class ArchiveResultAdmin(BaseModelAdmin):
|
||||
'<a href="{}#all">See result files ...</a><br/><pre><code>',
|
||||
build_snapshot_url(snapshot_id, "index.html"),
|
||||
)
|
||||
embed_path = result.embed_path() if hasattr(result, 'embed_path') else ''
|
||||
path_from_embed = (snapshot_dir / (embed_path or ''))
|
||||
output_html += format_html('<i style="padding: 1px">{}</i><b style="padding-right: 20px">/</b><i>{}</i><br/><hr/>', str(snapshot_dir), str(embed_path))
|
||||
embed_path = result.embed_path() if hasattr(result, "embed_path") else ""
|
||||
path_from_embed = snapshot_dir / (embed_path or "")
|
||||
output_html += format_html(
|
||||
'<i style="padding: 1px">{}</i><b style="padding-right: 20px">/</b><i>{}</i><br/><hr/>',
|
||||
str(snapshot_dir),
|
||||
str(embed_path),
|
||||
)
|
||||
if os.access(path_from_embed, os.R_OK):
|
||||
root_dir = str(path_from_embed)
|
||||
else:
|
||||
@@ -594,19 +758,22 @@ class ArchiveResultAdmin(BaseModelAdmin):
|
||||
# print(root_dir, str(list(os.walk(root_dir))))
|
||||
|
||||
for root, dirs, files in os.walk(root_dir):
|
||||
depth = root.replace(root_dir, '').count(os.sep) + 1
|
||||
depth = root.replace(root_dir, "").count(os.sep) + 1
|
||||
if depth > 2:
|
||||
continue
|
||||
indent = ' ' * 4 * (depth)
|
||||
indent = " " * 4 * (depth)
|
||||
output_html += format_html('<b style="padding: 1px">{}{}/</b><br/>', indent, os.path.basename(root))
|
||||
indentation_str = ' ' * 4 * (depth + 1)
|
||||
indentation_str = " " * 4 * (depth + 1)
|
||||
for filename in sorted(files):
|
||||
is_hidden = filename.startswith('.')
|
||||
output_html += format_html('<span style="opacity: {}.2">{}{}</span><br/>', int(not is_hidden), indentation_str, filename.strip())
|
||||
|
||||
return output_html + mark_safe('</code></pre>')
|
||||
|
||||
is_hidden = filename.startswith(".")
|
||||
output_html += format_html(
|
||||
'<span style="opacity: {}.2">{}{}</span><br/>',
|
||||
int(not is_hidden),
|
||||
indentation_str,
|
||||
filename.strip(),
|
||||
)
|
||||
|
||||
return output_html + mark_safe("</code></pre>")
|
||||
|
||||
|
||||
def register_admin(admin_site):
|
||||
|
||||
@@ -1,4 +1,4 @@
|
||||
__package__ = 'archivebox.core'
|
||||
__package__ = "archivebox.core"
|
||||
|
||||
from typing import TYPE_CHECKING, Any
|
||||
|
||||
@@ -18,23 +18,23 @@ if TYPE_CHECKING:
|
||||
|
||||
|
||||
class ArchiveBoxAdmin(admin.AdminSite):
|
||||
site_header = 'ArchiveBox'
|
||||
index_title = 'Admin Views'
|
||||
site_title = 'Admin'
|
||||
namespace = 'admin'
|
||||
site_header = "ArchiveBox"
|
||||
index_title = "Admin Views"
|
||||
site_title = "Admin"
|
||||
namespace = "admin"
|
||||
|
||||
def get_app_list(self, request: 'HttpRequest', app_label: str | None = None) -> list['AppDict']:
|
||||
def get_app_list(self, request: "HttpRequest", app_label: str | None = None) -> list["AppDict"]:
|
||||
if app_label is None:
|
||||
return adv_get_app_list(self, request)
|
||||
return adv_get_app_list(self, request, app_label)
|
||||
|
||||
def admin_data_index_view(self, request: 'HttpRequest', **kwargs: Any) -> 'TemplateResponse':
|
||||
def admin_data_index_view(self, request: "HttpRequest", **kwargs: Any) -> "TemplateResponse":
|
||||
return adv_admin_data_index_view(self, request, **kwargs)
|
||||
|
||||
def get_admin_data_urls(self) -> list['URLResolver | URLPattern']:
|
||||
def get_admin_data_urls(self) -> list["URLResolver | URLPattern"]:
|
||||
return adv_get_admin_data_urls(self)
|
||||
|
||||
def get_urls(self) -> list['URLResolver | URLPattern']:
|
||||
def get_urls(self) -> list["URLResolver | URLPattern"]:
|
||||
return self.get_admin_data_urls() + super().get_urls()
|
||||
|
||||
|
||||
@@ -43,7 +43,6 @@ archivebox_admin = ArchiveBoxAdmin()
|
||||
# TODO: https://stackoverflow.com/questions/40760880/add-custom-button-to-django-admin-panel
|
||||
|
||||
|
||||
|
||||
############### Admin Data View sections are defined in settings.ADMIN_DATA_VIEWS #########
|
||||
|
||||
|
||||
|
||||
File diff suppressed because it is too large
Load Diff
@@ -1,4 +1,4 @@
|
||||
__package__ = 'archivebox.core'
|
||||
__package__ = "archivebox.core"
|
||||
|
||||
from urllib.parse import quote
|
||||
|
||||
@@ -28,92 +28,107 @@ from archivebox.core.host_utils import build_snapshot_url
|
||||
|
||||
class TagInline(admin.TabularInline):
|
||||
model = SnapshotTag
|
||||
fields = ('id', 'tag')
|
||||
fields = ("id", "tag")
|
||||
extra = 1
|
||||
max_num = 1000
|
||||
autocomplete_fields = (
|
||||
'tag',
|
||||
)
|
||||
autocomplete_fields = ("tag",)
|
||||
|
||||
|
||||
class TagAdminForm(forms.ModelForm):
|
||||
class Meta:
|
||||
model = Tag
|
||||
fields = '__all__'
|
||||
fields = "__all__"
|
||||
widgets = {
|
||||
'name': forms.TextInput(attrs={
|
||||
'placeholder': 'research, receipts, product-design...',
|
||||
'autocomplete': 'off',
|
||||
'spellcheck': 'false',
|
||||
'data-tag-name-input': '1',
|
||||
}),
|
||||
"name": forms.TextInput(
|
||||
attrs={
|
||||
"placeholder": "research, receipts, product-design...",
|
||||
"autocomplete": "off",
|
||||
"spellcheck": "false",
|
||||
"data-tag-name-input": "1",
|
||||
},
|
||||
),
|
||||
}
|
||||
|
||||
def clean_name(self):
|
||||
name = (self.cleaned_data.get('name') or '').strip()
|
||||
name = (self.cleaned_data.get("name") or "").strip()
|
||||
if not name:
|
||||
raise forms.ValidationError('Tag name is required.')
|
||||
raise forms.ValidationError("Tag name is required.")
|
||||
return name
|
||||
|
||||
|
||||
class TagAdmin(BaseModelAdmin):
|
||||
form = TagAdminForm
|
||||
change_list_template = 'admin/core/tag/change_list.html'
|
||||
change_form_template = 'admin/core/tag/change_form.html'
|
||||
list_display = ('name', 'num_snapshots', 'created_at', 'created_by')
|
||||
list_filter = ('created_at', 'created_by')
|
||||
search_fields = ('id', 'name', 'slug')
|
||||
readonly_fields = ('slug', 'id', 'created_at', 'modified_at', 'snapshots')
|
||||
actions = ['delete_selected']
|
||||
ordering = ['name', 'id']
|
||||
change_list_template = "admin/core/tag/change_list.html"
|
||||
change_form_template = "admin/core/tag/change_form.html"
|
||||
list_display = ("name", "num_snapshots", "created_at", "created_by")
|
||||
list_filter = ("created_at", "created_by")
|
||||
search_fields = ("id", "name", "slug")
|
||||
readonly_fields = ("slug", "id", "created_at", "modified_at", "snapshots")
|
||||
actions = ["delete_selected"]
|
||||
ordering = ["name", "id"]
|
||||
|
||||
fieldsets = (
|
||||
('Tag', {
|
||||
'fields': ('name', 'slug'),
|
||||
'classes': ('card',),
|
||||
}),
|
||||
('Metadata', {
|
||||
'fields': ('id', 'created_by', 'created_at', 'modified_at'),
|
||||
'classes': ('card',),
|
||||
}),
|
||||
('Recent Snapshots', {
|
||||
'fields': ('snapshots',),
|
||||
'classes': ('card', 'wide'),
|
||||
}),
|
||||
(
|
||||
"Tag",
|
||||
{
|
||||
"fields": ("name", "slug"),
|
||||
"classes": ("card",),
|
||||
},
|
||||
),
|
||||
(
|
||||
"Metadata",
|
||||
{
|
||||
"fields": ("id", "created_by", "created_at", "modified_at"),
|
||||
"classes": ("card",),
|
||||
},
|
||||
),
|
||||
(
|
||||
"Recent Snapshots",
|
||||
{
|
||||
"fields": ("snapshots",),
|
||||
"classes": ("card", "wide"),
|
||||
},
|
||||
),
|
||||
)
|
||||
|
||||
add_fieldsets = (
|
||||
('Tag', {
|
||||
'fields': ('name',),
|
||||
'classes': ('card', 'wide'),
|
||||
}),
|
||||
('Metadata', {
|
||||
'fields': ('created_by',),
|
||||
'classes': ('card',),
|
||||
}),
|
||||
(
|
||||
"Tag",
|
||||
{
|
||||
"fields": ("name",),
|
||||
"classes": ("card", "wide"),
|
||||
},
|
||||
),
|
||||
(
|
||||
"Metadata",
|
||||
{
|
||||
"fields": ("created_by",),
|
||||
"classes": ("card",),
|
||||
},
|
||||
),
|
||||
)
|
||||
|
||||
def get_fieldsets(self, request: HttpRequest, obj: Tag | None = None):
|
||||
return self.fieldsets if obj else self.add_fieldsets
|
||||
|
||||
def changelist_view(self, request: HttpRequest, extra_context=None):
|
||||
query = (request.GET.get('q') or '').strip()
|
||||
sort = normalize_tag_sort((request.GET.get('sort') or 'created_desc').strip())
|
||||
created_by = normalize_created_by_filter((request.GET.get('created_by') or '').strip())
|
||||
year = normalize_created_year_filter((request.GET.get('year') or '').strip())
|
||||
has_snapshots = normalize_has_snapshots_filter((request.GET.get('has_snapshots') or 'all').strip())
|
||||
query = (request.GET.get("q") or "").strip()
|
||||
sort = normalize_tag_sort((request.GET.get("sort") or "created_desc").strip())
|
||||
created_by = normalize_created_by_filter((request.GET.get("created_by") or "").strip())
|
||||
year = normalize_created_year_filter((request.GET.get("year") or "").strip())
|
||||
has_snapshots = normalize_has_snapshots_filter((request.GET.get("has_snapshots") or "all").strip())
|
||||
extra_context = {
|
||||
**(extra_context or {}),
|
||||
'initial_query': query,
|
||||
'initial_sort': sort,
|
||||
'initial_created_by': created_by,
|
||||
'initial_year': year,
|
||||
'initial_has_snapshots': has_snapshots,
|
||||
'tag_sort_choices': TAG_SORT_CHOICES,
|
||||
'tag_has_snapshots_choices': TAG_HAS_SNAPSHOTS_CHOICES,
|
||||
'tag_created_by_choices': get_tag_creator_choices(),
|
||||
'tag_year_choices': get_tag_year_choices(),
|
||||
'initial_tag_cards': build_tag_cards(
|
||||
"initial_query": query,
|
||||
"initial_sort": sort,
|
||||
"initial_created_by": created_by,
|
||||
"initial_year": year,
|
||||
"initial_has_snapshots": has_snapshots,
|
||||
"tag_sort_choices": TAG_SORT_CHOICES,
|
||||
"tag_has_snapshots_choices": TAG_HAS_SNAPSHOTS_CHOICES,
|
||||
"tag_created_by_choices": get_tag_creator_choices(),
|
||||
"tag_year_choices": get_tag_year_choices(),
|
||||
"initial_tag_cards": build_tag_cards(
|
||||
query=query,
|
||||
request=request,
|
||||
sort=sort,
|
||||
@@ -121,62 +136,67 @@ class TagAdmin(BaseModelAdmin):
|
||||
year=year,
|
||||
has_snapshots=has_snapshots,
|
||||
),
|
||||
'tag_search_api_url': reverse('api-1:search_tags'),
|
||||
'tag_create_api_url': reverse('api-1:tags_create'),
|
||||
"tag_search_api_url": reverse("api-1:search_tags"),
|
||||
"tag_create_api_url": reverse("api-1:tags_create"),
|
||||
}
|
||||
return super().changelist_view(request, extra_context=extra_context)
|
||||
|
||||
def render_change_form(self, request, context, add=False, change=False, form_url='', obj=None):
|
||||
current_name = (request.POST.get('name') or '').strip()
|
||||
def render_change_form(self, request, context, add=False, change=False, form_url="", obj=None):
|
||||
current_name = (request.POST.get("name") or "").strip()
|
||||
if not current_name and obj:
|
||||
current_name = obj.name
|
||||
|
||||
similar_tag_cards = build_tag_cards(query=current_name, request=request, limit=12) if current_name else build_tag_cards(request=request, limit=12)
|
||||
similar_tag_cards = (
|
||||
build_tag_cards(query=current_name, request=request, limit=12) if current_name else build_tag_cards(request=request, limit=12)
|
||||
)
|
||||
if obj:
|
||||
similar_tag_cards = [card for card in similar_tag_cards if card['id'] != obj.pk]
|
||||
similar_tag_cards = [card for card in similar_tag_cards if card["id"] != obj.pk]
|
||||
|
||||
context.update({
|
||||
'tag_search_api_url': reverse('api-1:search_tags'),
|
||||
'tag_similar_cards': similar_tag_cards,
|
||||
'tag_similar_query': current_name,
|
||||
})
|
||||
context.update(
|
||||
{
|
||||
"tag_search_api_url": reverse("api-1:search_tags"),
|
||||
"tag_similar_cards": similar_tag_cards,
|
||||
"tag_similar_query": current_name,
|
||||
},
|
||||
)
|
||||
return super().render_change_form(request, context, add=add, change=change, form_url=form_url, obj=obj)
|
||||
|
||||
def response_add(self, request: HttpRequest, obj: Tag, post_url_continue=None):
|
||||
if IS_POPUP_VAR in request.POST or '_continue' in request.POST or '_addanother' in request.POST:
|
||||
if IS_POPUP_VAR in request.POST or "_continue" in request.POST or "_addanother" in request.POST:
|
||||
return super().response_add(request, obj, post_url_continue=post_url_continue)
|
||||
|
||||
self.message_user(request, f'Tag "{obj.name}" saved.', level=messages.SUCCESS)
|
||||
return self._redirect_to_changelist(obj.name)
|
||||
|
||||
def response_change(self, request: HttpRequest, obj: Tag):
|
||||
if IS_POPUP_VAR in request.POST or '_continue' in request.POST or '_addanother' in request.POST or '_saveasnew' in request.POST:
|
||||
if IS_POPUP_VAR in request.POST or "_continue" in request.POST or "_addanother" in request.POST or "_saveasnew" in request.POST:
|
||||
return super().response_change(request, obj)
|
||||
|
||||
self.message_user(request, f'Tag "{obj.name}" updated.', level=messages.SUCCESS)
|
||||
return self._redirect_to_changelist(obj.name)
|
||||
|
||||
def _redirect_to_changelist(self, query: str = '') -> HttpResponseRedirect:
|
||||
changelist_url = reverse('admin:core_tag_changelist')
|
||||
def _redirect_to_changelist(self, query: str = "") -> HttpResponseRedirect:
|
||||
changelist_url = reverse("admin:core_tag_changelist")
|
||||
if query:
|
||||
changelist_url = f'{changelist_url}?q={quote(query)}'
|
||||
changelist_url = f"{changelist_url}?q={quote(query)}"
|
||||
return HttpResponseRedirect(changelist_url)
|
||||
|
||||
@admin.display(description='Snapshots')
|
||||
@admin.display(description="Snapshots")
|
||||
def snapshots(self, tag: Tag):
|
||||
snapshots = tag.snapshot_set.select_related('crawl__created_by').order_by('-downloaded_at', '-created_at', '-pk')[:10]
|
||||
snapshots = tag.snapshot_set.select_related("crawl__created_by").order_by("-downloaded_at", "-created_at", "-pk")[:10]
|
||||
total_count = tag.snapshot_set.count()
|
||||
if not snapshots:
|
||||
return mark_safe(
|
||||
f'<p style="margin:0;color:#64748b;">No snapshots use this tag yet. '
|
||||
f'<a href="/admin/core/snapshot/?tags__id__exact={tag.id}">Open filtered snapshot list</a>.</p>'
|
||||
f'<a href="/admin/core/snapshot/?tags__id__exact={tag.id}">Open filtered snapshot list</a>.</p>',
|
||||
)
|
||||
|
||||
cards = []
|
||||
for snapshot in snapshots:
|
||||
title = (snapshot.title or '').strip() or snapshot.url
|
||||
cards.append(format_html(
|
||||
'''
|
||||
title = (snapshot.title or "").strip() or snapshot.url
|
||||
cards.append(
|
||||
format_html(
|
||||
"""
|
||||
<a href="{}" style="display:flex;align-items:center;gap:10px;padding:10px 12px;border:1px solid #e2e8f0;border-radius:12px;background:#fff;text-decoration:none;color:#0f172a;">
|
||||
<img src="{}" alt="" style="width:18px;height:18px;border-radius:4px;flex:0 0 auto;" onerror="this.style.display='none'">
|
||||
<span style="min-width:0;">
|
||||
@@ -184,23 +204,26 @@ class TagAdmin(BaseModelAdmin):
|
||||
<code style="display:block;color:#64748b;white-space:nowrap;overflow:hidden;text-overflow:ellipsis;">{}</code>
|
||||
</span>
|
||||
</a>
|
||||
''',
|
||||
reverse('admin:core_snapshot_change', args=[snapshot.pk]),
|
||||
build_snapshot_url(str(snapshot.pk), 'favicon.ico'),
|
||||
title[:120],
|
||||
snapshot.url[:120],
|
||||
))
|
||||
""",
|
||||
reverse("admin:core_snapshot_change", args=[snapshot.pk]),
|
||||
build_snapshot_url(str(snapshot.pk), "favicon.ico"),
|
||||
title[:120],
|
||||
snapshot.url[:120],
|
||||
),
|
||||
)
|
||||
|
||||
cards.append(format_html(
|
||||
'<a href="/admin/core/snapshot/?tags__id__exact={}" style="display:inline-flex;margin-top:10px;font-weight:600;">View all {} tagged snapshots</a>',
|
||||
tag.id,
|
||||
total_count,
|
||||
))
|
||||
return mark_safe('<div style="display:grid;gap:10px;">' + ''.join(cards) + '</div>')
|
||||
cards.append(
|
||||
format_html(
|
||||
'<a href="/admin/core/snapshot/?tags__id__exact={}" style="display:inline-flex;margin-top:10px;font-weight:600;">View all {} tagged snapshots</a>',
|
||||
tag.id,
|
||||
total_count,
|
||||
),
|
||||
)
|
||||
return mark_safe('<div style="display:grid;gap:10px;">' + "".join(cards) + "</div>")
|
||||
|
||||
@admin.display(description='Snapshots', ordering='num_snapshots')
|
||||
@admin.display(description="Snapshots", ordering="num_snapshots")
|
||||
def num_snapshots(self, tag: Tag):
|
||||
count = getattr(tag, 'num_snapshots', tag.snapshot_set.count())
|
||||
count = getattr(tag, "num_snapshots", tag.snapshot_set.count())
|
||||
return format_html(
|
||||
'<a href="/admin/core/snapshot/?tags__id__exact={}">{} total</a>',
|
||||
tag.id,
|
||||
|
||||
@@ -1,4 +1,4 @@
|
||||
__package__ = 'archivebox.core'
|
||||
__package__ = "archivebox.core"
|
||||
|
||||
from django.contrib import admin
|
||||
from django.contrib.auth.admin import UserAdmin
|
||||
@@ -8,87 +8,100 @@ from django.utils.safestring import mark_safe
|
||||
|
||||
|
||||
class CustomUserAdmin(UserAdmin):
|
||||
sort_fields = ['id', 'email', 'username', 'is_superuser', 'last_login', 'date_joined']
|
||||
list_display = ['username', 'id', 'email', 'is_superuser', 'last_login', 'date_joined']
|
||||
readonly_fields = ('snapshot_set', 'archiveresult_set', 'tag_set', 'apitoken_set', 'outboundwebhook_set')
|
||||
sort_fields = ["id", "email", "username", "is_superuser", "last_login", "date_joined"]
|
||||
list_display = ["username", "id", "email", "is_superuser", "last_login", "date_joined"]
|
||||
readonly_fields = ("snapshot_set", "archiveresult_set", "tag_set", "apitoken_set", "outboundwebhook_set")
|
||||
|
||||
# Preserve Django's default user creation form and fieldsets
|
||||
# This ensures passwords are properly hashed and permissions are set correctly
|
||||
add_fieldsets = UserAdmin.add_fieldsets
|
||||
|
||||
# Extend fieldsets for change form only (not user creation)
|
||||
fieldsets = [*(UserAdmin.fieldsets or ()), ('Data', {'fields': readonly_fields})]
|
||||
fieldsets = [*(UserAdmin.fieldsets or ()), ("Data", {"fields": readonly_fields})]
|
||||
|
||||
@admin.display(description='Snapshots')
|
||||
@admin.display(description="Snapshots")
|
||||
def snapshot_set(self, obj):
|
||||
total_count = obj.snapshot_set.count()
|
||||
return mark_safe('<br/>'.join(
|
||||
format_html(
|
||||
'<code><a href="/admin/core/snapshot/{}/change"><b>[{}]</b></a></code> <b>📅 {}</b> {}',
|
||||
snap.pk,
|
||||
str(snap.id)[:8],
|
||||
snap.downloaded_at.strftime('%Y-%m-%d %H:%M') if snap.downloaded_at else 'pending...',
|
||||
snap.url[:64],
|
||||
return mark_safe(
|
||||
"<br/>".join(
|
||||
format_html(
|
||||
'<code><a href="/admin/core/snapshot/{}/change"><b>[{}]</b></a></code> <b>📅 {}</b> {}',
|
||||
snap.pk,
|
||||
str(snap.id)[:8],
|
||||
snap.downloaded_at.strftime("%Y-%m-%d %H:%M") if snap.downloaded_at else "pending...",
|
||||
snap.url[:64],
|
||||
)
|
||||
for snap in obj.snapshot_set.order_by("-modified_at")[:10]
|
||||
)
|
||||
for snap in obj.snapshot_set.order_by('-modified_at')[:10]
|
||||
) + f'<br/><a href="/admin/core/snapshot/?created_by__id__exact={obj.pk}">{total_count} total records...<a>')
|
||||
+ f'<br/><a href="/admin/core/snapshot/?created_by__id__exact={obj.pk}">{total_count} total records...<a>',
|
||||
)
|
||||
|
||||
@admin.display(description='Archive Result Logs')
|
||||
@admin.display(description="Archive Result Logs")
|
||||
def archiveresult_set(self, obj):
|
||||
total_count = obj.archiveresult_set.count()
|
||||
return mark_safe('<br/>'.join(
|
||||
format_html(
|
||||
'<code><a href="/admin/core/archiveresult/{}/change"><b>[{}]</b></a></code> <b>📅 {}</b> <b>📄 {}</b> {}',
|
||||
result.pk,
|
||||
str(result.id)[:8],
|
||||
result.snapshot.downloaded_at.strftime('%Y-%m-%d %H:%M') if result.snapshot.downloaded_at else 'pending...',
|
||||
result.extractor,
|
||||
result.snapshot.url[:64],
|
||||
return mark_safe(
|
||||
"<br/>".join(
|
||||
format_html(
|
||||
'<code><a href="/admin/core/archiveresult/{}/change"><b>[{}]</b></a></code> <b>📅 {}</b> <b>📄 {}</b> {}',
|
||||
result.pk,
|
||||
str(result.id)[:8],
|
||||
result.snapshot.downloaded_at.strftime("%Y-%m-%d %H:%M") if result.snapshot.downloaded_at else "pending...",
|
||||
result.extractor,
|
||||
result.snapshot.url[:64],
|
||||
)
|
||||
for result in obj.archiveresult_set.order_by("-modified_at")[:10]
|
||||
)
|
||||
for result in obj.archiveresult_set.order_by('-modified_at')[:10]
|
||||
) + f'<br/><a href="/admin/core/archiveresult/?created_by__id__exact={obj.pk}">{total_count} total records...<a>')
|
||||
+ f'<br/><a href="/admin/core/archiveresult/?created_by__id__exact={obj.pk}">{total_count} total records...<a>',
|
||||
)
|
||||
|
||||
@admin.display(description='Tags')
|
||||
@admin.display(description="Tags")
|
||||
def tag_set(self, obj):
|
||||
total_count = obj.tag_set.count()
|
||||
return mark_safe(', '.join(
|
||||
format_html(
|
||||
'<code><a href="/admin/core/tag/{}/change"><b>{}</b></a></code>',
|
||||
tag.pk,
|
||||
tag.name,
|
||||
return mark_safe(
|
||||
", ".join(
|
||||
format_html(
|
||||
'<code><a href="/admin/core/tag/{}/change"><b>{}</b></a></code>',
|
||||
tag.pk,
|
||||
tag.name,
|
||||
)
|
||||
for tag in obj.tag_set.order_by("-modified_at")[:10]
|
||||
)
|
||||
for tag in obj.tag_set.order_by('-modified_at')[:10]
|
||||
) + f'<br/><a href="/admin/core/tag/?created_by__id__exact={obj.pk}">{total_count} total records...<a>')
|
||||
+ f'<br/><a href="/admin/core/tag/?created_by__id__exact={obj.pk}">{total_count} total records...<a>',
|
||||
)
|
||||
|
||||
@admin.display(description='API Tokens')
|
||||
@admin.display(description="API Tokens")
|
||||
def apitoken_set(self, obj):
|
||||
total_count = obj.apitoken_set.count()
|
||||
return mark_safe('<br/>'.join(
|
||||
format_html(
|
||||
'<code><a href="/admin/api/apitoken/{}/change"><b>[{}]</b></a></code> {} (expires {})',
|
||||
apitoken.pk,
|
||||
str(apitoken.id)[:8],
|
||||
apitoken.token_redacted[:64],
|
||||
apitoken.expires,
|
||||
return mark_safe(
|
||||
"<br/>".join(
|
||||
format_html(
|
||||
'<code><a href="/admin/api/apitoken/{}/change"><b>[{}]</b></a></code> {} (expires {})',
|
||||
apitoken.pk,
|
||||
str(apitoken.id)[:8],
|
||||
apitoken.token_redacted[:64],
|
||||
apitoken.expires,
|
||||
)
|
||||
for apitoken in obj.apitoken_set.order_by("-modified_at")[:10]
|
||||
)
|
||||
for apitoken in obj.apitoken_set.order_by('-modified_at')[:10]
|
||||
) + f'<br/><a href="/admin/api/apitoken/?created_by__id__exact={obj.pk}">{total_count} total records...<a>')
|
||||
+ f'<br/><a href="/admin/api/apitoken/?created_by__id__exact={obj.pk}">{total_count} total records...<a>',
|
||||
)
|
||||
|
||||
@admin.display(description='API Outbound Webhooks')
|
||||
@admin.display(description="API Outbound Webhooks")
|
||||
def outboundwebhook_set(self, obj):
|
||||
total_count = obj.outboundwebhook_set.count()
|
||||
return mark_safe('<br/>'.join(
|
||||
format_html(
|
||||
'<code><a href="/admin/api/outboundwebhook/{}/change"><b>[{}]</b></a></code> {} -> {}',
|
||||
outboundwebhook.pk,
|
||||
str(outboundwebhook.id)[:8],
|
||||
outboundwebhook.referenced_model,
|
||||
outboundwebhook.endpoint,
|
||||
return mark_safe(
|
||||
"<br/>".join(
|
||||
format_html(
|
||||
'<code><a href="/admin/api/outboundwebhook/{}/change"><b>[{}]</b></a></code> {} -> {}',
|
||||
outboundwebhook.pk,
|
||||
str(outboundwebhook.id)[:8],
|
||||
outboundwebhook.referenced_model,
|
||||
outboundwebhook.endpoint,
|
||||
)
|
||||
for outboundwebhook in obj.outboundwebhook_set.order_by("-modified_at")[:10]
|
||||
)
|
||||
for outboundwebhook in obj.outboundwebhook_set.order_by('-modified_at')[:10]
|
||||
) + f'<br/><a href="/admin/api/outboundwebhook/?created_by__id__exact={obj.pk}">{total_count} total records...<a>')
|
||||
|
||||
|
||||
+ f'<br/><a href="/admin/api/outboundwebhook/?created_by__id__exact={obj.pk}">{total_count} total records...<a>',
|
||||
)
|
||||
|
||||
|
||||
def register_admin(admin_site):
|
||||
|
||||
@@ -1,12 +1,12 @@
|
||||
__package__ = 'archivebox.core'
|
||||
__package__ = "archivebox.core"
|
||||
|
||||
from django.apps import AppConfig
|
||||
import os
|
||||
|
||||
|
||||
class CoreConfig(AppConfig):
|
||||
name = 'archivebox.core'
|
||||
label = 'core'
|
||||
name = "archivebox.core"
|
||||
label = "core"
|
||||
|
||||
def ready(self):
|
||||
"""Register the archivebox.core.admin_site as the main django admin site"""
|
||||
@@ -14,29 +14,30 @@ class CoreConfig(AppConfig):
|
||||
from django.utils.autoreload import DJANGO_AUTORELOAD_ENV
|
||||
|
||||
from archivebox.core.admin_site import register_admin_site
|
||||
|
||||
register_admin_site()
|
||||
|
||||
# Import models to register state machines with the registry
|
||||
# Skip during makemigrations to avoid premature state machine access
|
||||
if 'makemigrations' not in sys.argv:
|
||||
if "makemigrations" not in sys.argv:
|
||||
from archivebox.core import models # noqa: F401
|
||||
|
||||
pidfile = os.environ.get('ARCHIVEBOX_RUNSERVER_PIDFILE')
|
||||
pidfile = os.environ.get("ARCHIVEBOX_RUNSERVER_PIDFILE")
|
||||
if pidfile:
|
||||
should_write_pid = True
|
||||
if os.environ.get('ARCHIVEBOX_AUTORELOAD') == '1':
|
||||
should_write_pid = os.environ.get(DJANGO_AUTORELOAD_ENV) == 'true'
|
||||
if os.environ.get("ARCHIVEBOX_AUTORELOAD") == "1":
|
||||
should_write_pid = os.environ.get(DJANGO_AUTORELOAD_ENV) == "true"
|
||||
if should_write_pid:
|
||||
try:
|
||||
with open(pidfile, 'w') as handle:
|
||||
with open(pidfile, "w") as handle:
|
||||
handle.write(str(os.getpid()))
|
||||
except Exception:
|
||||
pass
|
||||
|
||||
def _should_prepare_runtime() -> bool:
|
||||
if os.environ.get('ARCHIVEBOX_RUNSERVER') == '1':
|
||||
if os.environ.get('ARCHIVEBOX_AUTORELOAD') == '1':
|
||||
return os.environ.get(DJANGO_AUTORELOAD_ENV) == 'true'
|
||||
if os.environ.get("ARCHIVEBOX_RUNSERVER") == "1":
|
||||
if os.environ.get("ARCHIVEBOX_AUTORELOAD") == "1":
|
||||
return os.environ.get(DJANGO_AUTORELOAD_ENV) == "true"
|
||||
return True
|
||||
return False
|
||||
|
||||
@@ -44,4 +45,5 @@ class CoreConfig(AppConfig):
|
||||
from archivebox.machine.models import Process, Machine
|
||||
|
||||
Process.cleanup_stale_running()
|
||||
Process.cleanup_orphaned_workers()
|
||||
Machine.current()
|
||||
|
||||
@@ -1,9 +1,9 @@
|
||||
__package__ = 'archivebox.core'
|
||||
__package__ = "archivebox.core"
|
||||
|
||||
from django import forms
|
||||
from django.utils.html import format_html
|
||||
|
||||
from archivebox.misc.util import URL_REGEX, find_all_urls
|
||||
from archivebox.misc.util import URL_REGEX, find_all_urls, parse_filesize_to_bytes
|
||||
from taggit.utils import edit_string_for_tags, parse_tags
|
||||
from archivebox.base_models.admin import KeyValueWidget
|
||||
from archivebox.crawls.schedule_utils import validate_schedule
|
||||
@@ -13,11 +13,11 @@ from archivebox.hooks import get_plugins, discover_plugin_configs, get_plugin_ic
|
||||
from archivebox.personas.models import Persona
|
||||
|
||||
DEPTH_CHOICES = (
|
||||
('0', 'depth = 0 (archive just these URLs)'),
|
||||
('1', 'depth = 1 (+ URLs one hop away)'),
|
||||
('2', 'depth = 2 (+ URLs two hops away)'),
|
||||
('3', 'depth = 3 (+ URLs three hops away)'),
|
||||
('4', 'depth = 4 (+ URLs four hops away)'),
|
||||
("0", "depth = 0 (archive just these URLs)"),
|
||||
("1", "depth = 1 (+ URLs one hop away)"),
|
||||
("2", "depth = 2 (+ URLs two hops away)"),
|
||||
("3", "depth = 3 (+ URLs three hops away)"),
|
||||
("4", "depth = 4 (+ URLs four hops away)"),
|
||||
)
|
||||
|
||||
|
||||
@@ -28,7 +28,7 @@ def get_plugin_choices():
|
||||
|
||||
def get_plugin_choice_label(plugin_name: str, plugin_configs: dict[str, dict]) -> str:
|
||||
schema = plugin_configs.get(plugin_name, {})
|
||||
description = str(schema.get('description') or '').strip()
|
||||
description = str(schema.get("description") or "").strip()
|
||||
if not description:
|
||||
return plugin_name
|
||||
icon_html = get_plugin_icon(plugin_name)
|
||||
@@ -45,7 +45,7 @@ def get_plugin_choice_label(plugin_name: str, plugin_configs: dict[str, dict]) -
|
||||
def get_choice_field(form: forms.Form, name: str) -> forms.ChoiceField:
|
||||
field = form.fields[name]
|
||||
if not isinstance(field, forms.ChoiceField):
|
||||
raise TypeError(f'{name} must be a ChoiceField')
|
||||
raise TypeError(f"{name} must be a ChoiceField")
|
||||
return field
|
||||
|
||||
|
||||
@@ -54,10 +54,12 @@ class AddLinkForm(forms.Form):
|
||||
url = forms.CharField(
|
||||
label="URLs",
|
||||
strip=True,
|
||||
widget=forms.Textarea(attrs={
|
||||
'data-url-regex': URL_REGEX.pattern,
|
||||
}),
|
||||
required=True
|
||||
widget=forms.Textarea(
|
||||
attrs={
|
||||
"data-url-regex": URL_REGEX.pattern,
|
||||
},
|
||||
),
|
||||
required=True,
|
||||
)
|
||||
tag = forms.CharField(
|
||||
label="Tags",
|
||||
@@ -68,16 +70,41 @@ class AddLinkForm(forms.Form):
|
||||
depth = forms.ChoiceField(
|
||||
label="Archive depth",
|
||||
choices=DEPTH_CHOICES,
|
||||
initial='0',
|
||||
widget=forms.RadioSelect(attrs={"class": "depth-selection"})
|
||||
initial="0",
|
||||
widget=forms.RadioSelect(attrs={"class": "depth-selection"}),
|
||||
)
|
||||
max_urls = forms.IntegerField(
|
||||
label="Max URLs",
|
||||
required=False,
|
||||
min_value=0,
|
||||
initial=0,
|
||||
widget=forms.NumberInput(
|
||||
attrs={
|
||||
"min": 0,
|
||||
"step": 1,
|
||||
"placeholder": "0 = unlimited",
|
||||
},
|
||||
),
|
||||
)
|
||||
max_size = forms.CharField(
|
||||
label="Max size",
|
||||
required=False,
|
||||
initial="0",
|
||||
widget=forms.TextInput(
|
||||
attrs={
|
||||
"placeholder": "0 = unlimited, or e.g. 45mb / 1gb",
|
||||
},
|
||||
),
|
||||
)
|
||||
notes = forms.CharField(
|
||||
label="Notes",
|
||||
strip=True,
|
||||
required=False,
|
||||
widget=forms.TextInput(attrs={
|
||||
'placeholder': 'Optional notes about this crawl',
|
||||
})
|
||||
widget=forms.TextInput(
|
||||
attrs={
|
||||
"placeholder": "Optional notes about this crawl",
|
||||
},
|
||||
),
|
||||
)
|
||||
url_filters = forms.Field(
|
||||
label="URL allowlist / denylist",
|
||||
@@ -128,16 +155,18 @@ class AddLinkForm(forms.Form):
|
||||
label="Repeat schedule",
|
||||
max_length=64,
|
||||
required=False,
|
||||
widget=forms.TextInput(attrs={
|
||||
'placeholder': 'e.g., daily, weekly, 0 */6 * * * (every 6 hours)',
|
||||
})
|
||||
widget=forms.TextInput(
|
||||
attrs={
|
||||
"placeholder": "e.g., daily, weekly, 0 */6 * * * (every 6 hours)",
|
||||
},
|
||||
),
|
||||
)
|
||||
persona = forms.ModelChoiceField(
|
||||
label="Persona (authentication profile)",
|
||||
required=False,
|
||||
queryset=Persona.objects.none(),
|
||||
empty_label=None,
|
||||
to_field_name='name',
|
||||
to_field_name="name",
|
||||
)
|
||||
index_only = forms.BooleanField(
|
||||
label="Index only dry run (add crawl but don't archive yet)",
|
||||
@@ -155,8 +184,8 @@ class AddLinkForm(forms.Form):
|
||||
super().__init__(*args, **kwargs)
|
||||
|
||||
default_persona = Persona.get_or_create_default()
|
||||
self.fields['persona'].queryset = Persona.objects.order_by('name')
|
||||
self.fields['persona'].initial = default_persona.name
|
||||
self.fields["persona"].queryset = Persona.objects.order_by("name")
|
||||
self.fields["persona"].initial = default_persona.name
|
||||
|
||||
# Get all plugins
|
||||
all_plugins = get_plugins()
|
||||
@@ -164,86 +193,136 @@ class AddLinkForm(forms.Form):
|
||||
|
||||
# Define plugin groups
|
||||
chrome_dependent = {
|
||||
'accessibility', 'chrome', 'consolelog', 'dom', 'headers',
|
||||
'parse_dom_outlinks', 'pdf', 'redirects', 'responses',
|
||||
'screenshot', 'seo', 'singlefile', 'ssl', 'staticfile', 'title'
|
||||
"accessibility",
|
||||
"chrome",
|
||||
"consolelog",
|
||||
"dom",
|
||||
"headers",
|
||||
"parse_dom_outlinks",
|
||||
"pdf",
|
||||
"redirects",
|
||||
"responses",
|
||||
"screenshot",
|
||||
"seo",
|
||||
"singlefile",
|
||||
"ssl",
|
||||
"staticfile",
|
||||
"title",
|
||||
}
|
||||
archiving = {
|
||||
'archivedotorg', 'defuddle', 'favicon', 'forumdl', 'gallerydl', 'git',
|
||||
'htmltotext', 'mercury', 'papersdl', 'readability', 'trafilatura', 'wget', 'ytdlp'
|
||||
"archivedotorg",
|
||||
"defuddle",
|
||||
"favicon",
|
||||
"forumdl",
|
||||
"gallerydl",
|
||||
"git",
|
||||
"htmltotext",
|
||||
"mercury",
|
||||
"papersdl",
|
||||
"readability",
|
||||
"trafilatura",
|
||||
"wget",
|
||||
"ytdlp",
|
||||
}
|
||||
parsing = {
|
||||
'parse_html_urls', 'parse_jsonl_urls',
|
||||
'parse_netscape_urls', 'parse_rss_urls', 'parse_txt_urls'
|
||||
"parse_html_urls",
|
||||
"parse_jsonl_urls",
|
||||
"parse_netscape_urls",
|
||||
"parse_rss_urls",
|
||||
"parse_txt_urls",
|
||||
}
|
||||
search = {
|
||||
'search_backend_ripgrep', 'search_backend_sonic', 'search_backend_sqlite'
|
||||
"search_backend_ripgrep",
|
||||
"search_backend_sonic",
|
||||
"search_backend_sqlite",
|
||||
}
|
||||
binary = {'apt', 'brew', 'custom', 'env', 'npm', 'pip'}
|
||||
extensions = {'twocaptcha', 'istilldontcareaboutcookies', 'ublock'}
|
||||
binary = {"apt", "brew", "custom", "env", "npm", "pip"}
|
||||
extensions = {"twocaptcha", "istilldontcareaboutcookies", "ublock"}
|
||||
|
||||
# Populate plugin field choices
|
||||
get_choice_field(self, 'chrome_plugins').choices = [
|
||||
get_choice_field(self, "chrome_plugins").choices = [
|
||||
(p, get_plugin_choice_label(p, plugin_configs)) for p in sorted(all_plugins) if p in chrome_dependent
|
||||
]
|
||||
get_choice_field(self, 'archiving_plugins').choices = [
|
||||
get_choice_field(self, "archiving_plugins").choices = [
|
||||
(p, get_plugin_choice_label(p, plugin_configs)) for p in sorted(all_plugins) if p in archiving
|
||||
]
|
||||
get_choice_field(self, 'parsing_plugins').choices = [
|
||||
get_choice_field(self, "parsing_plugins").choices = [
|
||||
(p, get_plugin_choice_label(p, plugin_configs)) for p in sorted(all_plugins) if p in parsing
|
||||
]
|
||||
get_choice_field(self, 'search_plugins').choices = [
|
||||
get_choice_field(self, "search_plugins").choices = [
|
||||
(p, get_plugin_choice_label(p, plugin_configs)) for p in sorted(all_plugins) if p in search
|
||||
]
|
||||
get_choice_field(self, 'binary_plugins').choices = [
|
||||
get_choice_field(self, "binary_plugins").choices = [
|
||||
(p, get_plugin_choice_label(p, plugin_configs)) for p in sorted(all_plugins) if p in binary
|
||||
]
|
||||
get_choice_field(self, 'extension_plugins').choices = [
|
||||
get_choice_field(self, "extension_plugins").choices = [
|
||||
(p, get_plugin_choice_label(p, plugin_configs)) for p in sorted(all_plugins) if p in extensions
|
||||
]
|
||||
|
||||
required_search_plugin = f'search_backend_{SEARCH_BACKEND_CONFIG.SEARCH_BACKEND_ENGINE}'.strip()
|
||||
search_choices = [choice[0] for choice in get_choice_field(self, 'search_plugins').choices]
|
||||
required_search_plugin = f"search_backend_{SEARCH_BACKEND_CONFIG.SEARCH_BACKEND_ENGINE}".strip()
|
||||
search_choices = [choice[0] for choice in get_choice_field(self, "search_plugins").choices]
|
||||
if required_search_plugin in search_choices:
|
||||
get_choice_field(self, 'search_plugins').initial = [required_search_plugin]
|
||||
get_choice_field(self, "search_plugins").initial = [required_search_plugin]
|
||||
|
||||
def clean(self):
|
||||
cleaned_data = super().clean() or {}
|
||||
|
||||
# Combine all plugin groups into single list
|
||||
all_selected_plugins = []
|
||||
for field in ['chrome_plugins', 'archiving_plugins', 'parsing_plugins',
|
||||
'search_plugins', 'binary_plugins', 'extension_plugins']:
|
||||
for field in [
|
||||
"chrome_plugins",
|
||||
"archiving_plugins",
|
||||
"parsing_plugins",
|
||||
"search_plugins",
|
||||
"binary_plugins",
|
||||
"extension_plugins",
|
||||
]:
|
||||
selected = cleaned_data.get(field)
|
||||
if isinstance(selected, list):
|
||||
all_selected_plugins.extend(selected)
|
||||
|
||||
# Store combined list for easy access
|
||||
cleaned_data['plugins'] = all_selected_plugins
|
||||
cleaned_data["plugins"] = all_selected_plugins
|
||||
|
||||
return cleaned_data
|
||||
|
||||
def clean_url(self):
|
||||
value = self.cleaned_data.get('url') or ''
|
||||
urls = '\n'.join(find_all_urls(value))
|
||||
value = self.cleaned_data.get("url") or ""
|
||||
urls = "\n".join(find_all_urls(value))
|
||||
if not urls:
|
||||
raise forms.ValidationError('Enter at least one valid URL.')
|
||||
raise forms.ValidationError("Enter at least one valid URL.")
|
||||
return urls
|
||||
|
||||
def clean_url_filters(self):
|
||||
from archivebox.crawls.models import Crawl
|
||||
|
||||
value = self.cleaned_data.get('url_filters') or {}
|
||||
value = self.cleaned_data.get("url_filters") or {}
|
||||
return {
|
||||
'allowlist': '\n'.join(Crawl.split_filter_patterns(value.get('allowlist', ''))),
|
||||
'denylist': '\n'.join(Crawl.split_filter_patterns(value.get('denylist', ''))),
|
||||
'same_domain_only': bool(value.get('same_domain_only')),
|
||||
"allowlist": "\n".join(Crawl.split_filter_patterns(value.get("allowlist", ""))),
|
||||
"denylist": "\n".join(Crawl.split_filter_patterns(value.get("denylist", ""))),
|
||||
"same_domain_only": bool(value.get("same_domain_only")),
|
||||
}
|
||||
|
||||
def clean_max_urls(self):
|
||||
value = self.cleaned_data.get("max_urls")
|
||||
return int(value or 0)
|
||||
|
||||
def clean_max_size(self):
|
||||
raw_value = str(self.cleaned_data.get("max_size") or "").strip()
|
||||
if not raw_value:
|
||||
return 0
|
||||
try:
|
||||
value = parse_filesize_to_bytes(raw_value)
|
||||
except ValueError as err:
|
||||
raise forms.ValidationError(str(err))
|
||||
if value < 0:
|
||||
raise forms.ValidationError("Max size must be 0 or a positive number of bytes.")
|
||||
return value
|
||||
|
||||
def clean_schedule(self):
|
||||
schedule = (self.cleaned_data.get('schedule') or '').strip()
|
||||
schedule = (self.cleaned_data.get("schedule") or "").strip()
|
||||
if not schedule:
|
||||
return ''
|
||||
return ""
|
||||
|
||||
try:
|
||||
validate_schedule(schedule)
|
||||
@@ -269,7 +348,7 @@ class TagField(forms.CharField):
|
||||
return parse_tags(value)
|
||||
except ValueError:
|
||||
raise forms.ValidationError(
|
||||
"Please provide a comma-separated list of tags."
|
||||
"Please provide a comma-separated list of tags.",
|
||||
)
|
||||
|
||||
def has_changed(self, initial, data):
|
||||
|
||||
@@ -1,7 +1,5 @@
|
||||
from __future__ import annotations
|
||||
|
||||
from __future__ import annotations
|
||||
|
||||
import re
|
||||
from urllib.parse import urlparse
|
||||
|
||||
@@ -9,6 +7,7 @@ from archivebox.config.common import SERVER_CONFIG
|
||||
|
||||
|
||||
_SNAPSHOT_ID_RE = re.compile(r"^[0-9a-fA-F-]{8,36}$")
|
||||
_SNAPSHOT_SUBDOMAIN_RE = re.compile(r"^snap-(?P<suffix>[0-9a-fA-F]{12})$")
|
||||
|
||||
|
||||
def split_host_port(host: str) -> tuple[str, str | None]:
|
||||
@@ -71,21 +70,29 @@ def get_web_host() -> str:
|
||||
return urlparse(override).netloc.lower()
|
||||
return _build_listen_host("web")
|
||||
|
||||
|
||||
def get_api_host() -> str:
|
||||
if not SERVER_CONFIG.USES_SUBDOMAIN_ROUTING:
|
||||
return get_listen_host().lower()
|
||||
return _build_listen_host("api")
|
||||
|
||||
|
||||
def get_public_host() -> str:
|
||||
if not SERVER_CONFIG.USES_SUBDOMAIN_ROUTING:
|
||||
return get_listen_host().lower()
|
||||
return _build_listen_host("public")
|
||||
|
||||
|
||||
def get_snapshot_subdomain(snapshot_id: str) -> str:
|
||||
normalized = re.sub(r"[^0-9a-fA-F]", "", snapshot_id or "")
|
||||
suffix = (normalized[-12:] if len(normalized) >= 12 else normalized).lower()
|
||||
return f"snap-{suffix}"
|
||||
|
||||
|
||||
def get_snapshot_host(snapshot_id: str) -> str:
|
||||
if not SERVER_CONFIG.USES_SUBDOMAIN_ROUTING:
|
||||
return get_listen_host().lower()
|
||||
return _build_listen_host(snapshot_id)
|
||||
return _build_listen_host(get_snapshot_subdomain(snapshot_id))
|
||||
|
||||
|
||||
def get_original_host(domain: str) -> str:
|
||||
@@ -95,7 +102,16 @@ def get_original_host(domain: str) -> str:
|
||||
|
||||
|
||||
def is_snapshot_subdomain(subdomain: str) -> bool:
|
||||
return bool(_SNAPSHOT_ID_RE.match(subdomain or ""))
|
||||
value = (subdomain or "").strip()
|
||||
return bool(_SNAPSHOT_SUBDOMAIN_RE.match(value) or _SNAPSHOT_ID_RE.match(value))
|
||||
|
||||
|
||||
def get_snapshot_lookup_key(snapshot_ref: str) -> str:
|
||||
value = (snapshot_ref or "").strip().lower()
|
||||
match = _SNAPSHOT_SUBDOMAIN_RE.match(value)
|
||||
if match:
|
||||
return match.group("suffix")
|
||||
return value
|
||||
|
||||
|
||||
def get_listen_subdomain(request_host: str) -> str:
|
||||
@@ -141,22 +157,23 @@ def _build_base_url_for_host(host: str, request=None) -> str:
|
||||
|
||||
|
||||
def get_admin_base_url(request=None) -> str:
|
||||
if not SERVER_CONFIG.USES_SUBDOMAIN_ROUTING:
|
||||
return _build_base_url_for_host(get_listen_host(), request=request)
|
||||
override = _normalize_base_url(SERVER_CONFIG.ADMIN_BASE_URL)
|
||||
if override:
|
||||
return override
|
||||
if not SERVER_CONFIG.USES_SUBDOMAIN_ROUTING:
|
||||
return _build_base_url_for_host(get_listen_host(), request=request)
|
||||
return _build_base_url_for_host(get_admin_host(), request=request)
|
||||
|
||||
|
||||
def get_web_base_url(request=None) -> str:
|
||||
if not SERVER_CONFIG.USES_SUBDOMAIN_ROUTING:
|
||||
return _build_base_url_for_host(get_listen_host(), request=request)
|
||||
override = _normalize_base_url(SERVER_CONFIG.ARCHIVE_BASE_URL)
|
||||
if override:
|
||||
return override
|
||||
if not SERVER_CONFIG.USES_SUBDOMAIN_ROUTING:
|
||||
return _build_base_url_for_host(get_listen_host(), request=request)
|
||||
return _build_base_url_for_host(get_web_host(), request=request)
|
||||
|
||||
|
||||
def get_api_base_url(request=None) -> str:
|
||||
if not SERVER_CONFIG.USES_SUBDOMAIN_ROUTING:
|
||||
return _build_base_url_for_host(get_listen_host(), request=request)
|
||||
@@ -191,6 +208,7 @@ def build_admin_url(path: str = "", request=None) -> str:
|
||||
def build_web_url(path: str = "", request=None) -> str:
|
||||
return _build_url(get_web_base_url(request), path)
|
||||
|
||||
|
||||
def build_api_url(path: str = "", request=None) -> str:
|
||||
return _build_url(get_api_base_url(request), path)
|
||||
|
||||
|
||||
@@ -1,4 +1,4 @@
|
||||
__package__ = 'archivebox'
|
||||
__package__ = "archivebox"
|
||||
|
||||
from django.core.management.base import BaseCommand
|
||||
|
||||
@@ -6,13 +6,12 @@ from archivebox.cli import main as run_cli
|
||||
|
||||
|
||||
class Command(BaseCommand):
|
||||
help = 'Run an ArchiveBox CLI subcommand (e.g. add, remove, list, etc)'
|
||||
help = "Run an ArchiveBox CLI subcommand (e.g. add, remove, list, etc)"
|
||||
|
||||
def add_arguments(self, parser):
|
||||
parser.add_argument('subcommand', type=str, help='The subcommand you want to run')
|
||||
parser.add_argument('command_args', nargs='*', help='Arguments to pass to the subcommand')
|
||||
|
||||
parser.add_argument("subcommand", type=str, help="The subcommand you want to run")
|
||||
parser.add_argument("command_args", nargs="*", help="Arguments to pass to the subcommand")
|
||||
|
||||
def handle(self, *args, **kwargs):
|
||||
command_args = [kwargs['subcommand'], *kwargs['command_args']]
|
||||
command_args = [kwargs["subcommand"], *kwargs["command_args"]]
|
||||
run_cli(args=command_args)
|
||||
|
||||
@@ -1,4 +1,4 @@
|
||||
__package__ = 'archivebox.core'
|
||||
__package__ = "archivebox.core"
|
||||
|
||||
import ipaddress
|
||||
import re
|
||||
@@ -16,6 +16,7 @@ from archivebox.config.common import SERVER_CONFIG
|
||||
from archivebox.config import VERSION
|
||||
from archivebox.config.version import get_COMMIT_HASH
|
||||
from archivebox.core.host_utils import (
|
||||
build_snapshot_url,
|
||||
build_admin_url,
|
||||
build_web_url,
|
||||
get_api_host,
|
||||
@@ -31,10 +32,10 @@ from archivebox.core.host_utils import (
|
||||
from archivebox.core.views import SnapshotHostView, OriginalDomainHostView
|
||||
|
||||
|
||||
def detect_timezone(request, activate: bool=True):
|
||||
gmt_offset = (request.COOKIES.get('GMT_OFFSET') or '').strip()
|
||||
def detect_timezone(request, activate: bool = True):
|
||||
gmt_offset = (request.COOKIES.get("GMT_OFFSET") or "").strip()
|
||||
tz = None
|
||||
if gmt_offset.replace('-', '').isdigit():
|
||||
if gmt_offset.replace("-", "").isdigit():
|
||||
tz = timezone.get_fixed_timezone(int(gmt_offset))
|
||||
if activate:
|
||||
timezone.activate(tz)
|
||||
@@ -53,11 +54,12 @@ def TimezoneMiddleware(get_response):
|
||||
def CacheControlMiddleware(get_response):
|
||||
snapshot_path_re = re.compile(r"^/[^/]+/\\d{8}/[^/]+/[0-9a-fA-F-]{8,36}/")
|
||||
static_cache_key = (get_COMMIT_HASH() or VERSION or "dev").strip()
|
||||
|
||||
def middleware(request):
|
||||
response = get_response(request)
|
||||
|
||||
if request.path.startswith('/static/'):
|
||||
rel_path = request.path[len('/static/'):]
|
||||
if request.path.startswith("/static/"):
|
||||
rel_path = request.path[len("/static/") :]
|
||||
static_path = finders.find(rel_path)
|
||||
if static_path:
|
||||
try:
|
||||
@@ -81,10 +83,10 @@ def CacheControlMiddleware(get_response):
|
||||
response.headers["Last-Modified"] = http_date(mtime)
|
||||
return response
|
||||
|
||||
if '/archive/' in request.path or '/static/' in request.path or snapshot_path_re.match(request.path):
|
||||
if not response.get('Cache-Control'):
|
||||
policy = 'public' if SERVER_CONFIG.PUBLIC_SNAPSHOTS else 'private'
|
||||
response['Cache-Control'] = f'{policy}, max-age=60, stale-while-revalidate=300'
|
||||
if "/archive/" in request.path or "/static/" in request.path or snapshot_path_re.match(request.path):
|
||||
if not response.get("Cache-Control"):
|
||||
policy = "public" if SERVER_CONFIG.PUBLIC_SNAPSHOTS else "private"
|
||||
response["Cache-Control"] = f"{policy}, max-age=60, stale-while-revalidate=300"
|
||||
# print('Set Cache-Control header to', response['Cache-Control'])
|
||||
return response
|
||||
|
||||
@@ -115,6 +117,10 @@ def ServerSecurityModeMiddleware(get_response):
|
||||
|
||||
|
||||
def HostRoutingMiddleware(get_response):
|
||||
snapshot_path_re = re.compile(
|
||||
r"^/(?P<username>[^/]+)/(?P<date>\d{4}(?:\d{2})?(?:\d{2})?)/(?P<domain>[^/]+)/(?P<snapshot_id>[0-9a-fA-F-]{8,36})(?:/(?P<path>.*))?$",
|
||||
)
|
||||
|
||||
def middleware(request):
|
||||
request_host = (request.get_host() or "").lower()
|
||||
admin_host = get_admin_host()
|
||||
@@ -124,6 +130,23 @@ def HostRoutingMiddleware(get_response):
|
||||
listen_host = get_listen_host()
|
||||
subdomain = get_listen_subdomain(request_host)
|
||||
|
||||
# Framework-owned assets must bypass snapshot/original-domain replay routing.
|
||||
# Otherwise pages on snapshot subdomains can receive HTML for JS/CSS requests.
|
||||
if request.path.startswith("/static/") or request.path in {"/favicon.ico", "/robots.txt"}:
|
||||
return get_response(request)
|
||||
|
||||
if SERVER_CONFIG.USES_SUBDOMAIN_ROUTING and not host_matches(request_host, admin_host):
|
||||
if (
|
||||
request.path == "/admin"
|
||||
or request.path.startswith("/admin/")
|
||||
or request.path == "/accounts"
|
||||
or request.path.startswith("/accounts/")
|
||||
):
|
||||
target = build_admin_url(request.path, request=request)
|
||||
if request.META.get("QUERY_STRING"):
|
||||
target = f"{target}?{request.META['QUERY_STRING']}"
|
||||
return redirect(target)
|
||||
|
||||
if not SERVER_CONFIG.USES_SUBDOMAIN_ROUTING:
|
||||
if host_matches(request_host, listen_host):
|
||||
return get_response(request)
|
||||
@@ -140,6 +163,16 @@ def HostRoutingMiddleware(get_response):
|
||||
return get_response(request)
|
||||
|
||||
if host_matches(request_host, admin_host):
|
||||
snapshot_match = snapshot_path_re.match(request.path)
|
||||
if SERVER_CONFIG.USES_SUBDOMAIN_ROUTING and snapshot_match:
|
||||
snapshot_id = snapshot_match.group("snapshot_id")
|
||||
replay_path = (snapshot_match.group("path") or "").strip("/")
|
||||
if replay_path == "index.html":
|
||||
replay_path = ""
|
||||
target = build_snapshot_url(snapshot_id, replay_path, request=request)
|
||||
if request.META.get("QUERY_STRING"):
|
||||
target = f"{target}?{request.META['QUERY_STRING']}"
|
||||
return redirect(target)
|
||||
return get_response(request)
|
||||
|
||||
if host_matches(request_host, api_host):
|
||||
@@ -160,16 +193,9 @@ def HostRoutingMiddleware(get_response):
|
||||
if host_matches(request_host, web_host):
|
||||
request.user = AnonymousUser()
|
||||
request._cached_user = request.user
|
||||
if request.path.startswith("/admin"):
|
||||
target = build_admin_url(request.path, request=request)
|
||||
if request.META.get("QUERY_STRING"):
|
||||
target = f"{target}?{request.META['QUERY_STRING']}"
|
||||
return redirect(target)
|
||||
return get_response(request)
|
||||
|
||||
if host_matches(request_host, public_host):
|
||||
request.user = AnonymousUser()
|
||||
request._cached_user = request.user
|
||||
return get_response(request)
|
||||
|
||||
if subdomain:
|
||||
@@ -196,24 +222,26 @@ def HostRoutingMiddleware(get_response):
|
||||
|
||||
return middleware
|
||||
|
||||
|
||||
class ReverseProxyAuthMiddleware(RemoteUserMiddleware):
|
||||
header = 'HTTP_{normalized}'.format(normalized=SERVER_CONFIG.REVERSE_PROXY_USER_HEADER.replace('-', '_').upper())
|
||||
header = "HTTP_{normalized}".format(normalized=SERVER_CONFIG.REVERSE_PROXY_USER_HEADER.replace("-", "_").upper())
|
||||
|
||||
def process_request(self, request):
|
||||
if SERVER_CONFIG.REVERSE_PROXY_WHITELIST == '':
|
||||
if SERVER_CONFIG.REVERSE_PROXY_WHITELIST == "":
|
||||
return
|
||||
|
||||
ip = request.META.get('REMOTE_ADDR')
|
||||
ip = request.META.get("REMOTE_ADDR")
|
||||
if not isinstance(ip, str):
|
||||
return
|
||||
|
||||
for cidr in SERVER_CONFIG.REVERSE_PROXY_WHITELIST.split(','):
|
||||
for cidr in SERVER_CONFIG.REVERSE_PROXY_WHITELIST.split(","):
|
||||
try:
|
||||
network = ipaddress.ip_network(cidr)
|
||||
except ValueError:
|
||||
raise ImproperlyConfigured(
|
||||
"The REVERSE_PROXY_WHITELIST config paramater is in invalid format, or "
|
||||
"contains invalid CIDR. Correct format is a coma-separated list of IPv4/IPv6 CIDRs.")
|
||||
"The REVERSE_PROXY_WHITELIST config parameter is in invalid format, or "
|
||||
"contains invalid CIDR. Correct format is a coma-separated list of IPv4/IPv6 CIDRs.",
|
||||
)
|
||||
|
||||
if ipaddress.ip_address(ip) in network:
|
||||
return super().process_request(request)
|
||||
|
||||
@@ -5,23 +5,21 @@ import uuid
|
||||
|
||||
|
||||
class Migration(migrations.Migration):
|
||||
|
||||
initial = True
|
||||
|
||||
dependencies = [
|
||||
]
|
||||
dependencies = []
|
||||
|
||||
operations = [
|
||||
migrations.CreateModel(
|
||||
name='Snapshot',
|
||||
name="Snapshot",
|
||||
fields=[
|
||||
('id', models.UUIDField(default=uuid.uuid4, editable=False, primary_key=True, serialize=False)),
|
||||
('url', models.URLField(unique=True)),
|
||||
('timestamp', models.CharField(default=None, max_length=32, null=True, unique=True)),
|
||||
('title', models.CharField(default=None, max_length=128, null=True)),
|
||||
('tags', models.CharField(default=None, max_length=256, null=True)),
|
||||
('added', models.DateTimeField(auto_now_add=True)),
|
||||
('updated', models.DateTimeField(default=None, null=True)),
|
||||
("id", models.UUIDField(default=uuid.uuid4, editable=False, primary_key=True, serialize=False)),
|
||||
("url", models.URLField(unique=True)),
|
||||
("timestamp", models.CharField(default=None, max_length=32, null=True, unique=True)),
|
||||
("title", models.CharField(default=None, max_length=128, null=True)),
|
||||
("tags", models.CharField(default=None, max_length=256, null=True)),
|
||||
("added", models.DateTimeField(auto_now_add=True)),
|
||||
("updated", models.DateTimeField(default=None, null=True)),
|
||||
],
|
||||
),
|
||||
]
|
||||
|
||||
@@ -4,15 +4,14 @@ from django.db import migrations, models
|
||||
|
||||
|
||||
class Migration(migrations.Migration):
|
||||
|
||||
dependencies = [
|
||||
('core', '0001_initial'),
|
||||
("core", "0001_initial"),
|
||||
]
|
||||
|
||||
operations = [
|
||||
migrations.AlterField(
|
||||
model_name='snapshot',
|
||||
name='timestamp',
|
||||
model_name="snapshot",
|
||||
name="timestamp",
|
||||
field=models.CharField(default=None, max_length=32, null=True),
|
||||
),
|
||||
]
|
||||
|
||||
@@ -4,35 +4,34 @@ from django.db import migrations, models
|
||||
|
||||
|
||||
class Migration(migrations.Migration):
|
||||
|
||||
dependencies = [
|
||||
('core', '0002_auto_20200625_1521'),
|
||||
("core", "0002_auto_20200625_1521"),
|
||||
]
|
||||
|
||||
operations = [
|
||||
migrations.AlterField(
|
||||
model_name='snapshot',
|
||||
name='added',
|
||||
model_name="snapshot",
|
||||
name="added",
|
||||
field=models.DateTimeField(auto_now_add=True, db_index=True),
|
||||
),
|
||||
migrations.AlterField(
|
||||
model_name='snapshot',
|
||||
name='tags',
|
||||
model_name="snapshot",
|
||||
name="tags",
|
||||
field=models.CharField(db_index=True, default=None, max_length=256, null=True),
|
||||
),
|
||||
migrations.AlterField(
|
||||
model_name='snapshot',
|
||||
name='timestamp',
|
||||
model_name="snapshot",
|
||||
name="timestamp",
|
||||
field=models.CharField(db_index=True, default=None, max_length=32, null=True),
|
||||
),
|
||||
migrations.AlterField(
|
||||
model_name='snapshot',
|
||||
name='title',
|
||||
model_name="snapshot",
|
||||
name="title",
|
||||
field=models.CharField(db_index=True, default=None, max_length=128, null=True),
|
||||
),
|
||||
migrations.AlterField(
|
||||
model_name='snapshot',
|
||||
name='updated',
|
||||
model_name="snapshot",
|
||||
name="updated",
|
||||
field=models.DateTimeField(db_index=True, default=None, null=True),
|
||||
),
|
||||
]
|
||||
|
||||
@@ -4,15 +4,14 @@ from django.db import migrations, models
|
||||
|
||||
|
||||
class Migration(migrations.Migration):
|
||||
|
||||
dependencies = [
|
||||
('core', '0003_auto_20200630_1034'),
|
||||
("core", "0003_auto_20200630_1034"),
|
||||
]
|
||||
|
||||
operations = [
|
||||
migrations.AlterField(
|
||||
model_name='snapshot',
|
||||
name='timestamp',
|
||||
model_name="snapshot",
|
||||
name="timestamp",
|
||||
field=models.CharField(db_index=True, default=None, max_length=32, unique=True),
|
||||
preserve_default=False,
|
||||
),
|
||||
|
||||
@@ -4,25 +4,24 @@ from django.db import migrations, models
|
||||
|
||||
|
||||
class Migration(migrations.Migration):
|
||||
|
||||
dependencies = [
|
||||
('core', '0004_auto_20200713_1552'),
|
||||
("core", "0004_auto_20200713_1552"),
|
||||
]
|
||||
|
||||
operations = [
|
||||
migrations.AlterField(
|
||||
model_name='snapshot',
|
||||
name='tags',
|
||||
model_name="snapshot",
|
||||
name="tags",
|
||||
field=models.CharField(blank=True, db_index=True, max_length=256, null=True),
|
||||
),
|
||||
migrations.AlterField(
|
||||
model_name='snapshot',
|
||||
name='title',
|
||||
model_name="snapshot",
|
||||
name="title",
|
||||
field=models.CharField(blank=True, db_index=True, max_length=128, null=True),
|
||||
),
|
||||
migrations.AlterField(
|
||||
model_name='snapshot',
|
||||
name='updated',
|
||||
model_name="snapshot",
|
||||
name="updated",
|
||||
field=models.DateTimeField(blank=True, db_index=True, null=True),
|
||||
),
|
||||
]
|
||||
|
||||
@@ -3,19 +3,18 @@
|
||||
from django.db import migrations, models
|
||||
from django.utils.text import slugify
|
||||
|
||||
|
||||
def forwards_func(apps, schema_editor):
|
||||
SnapshotModel = apps.get_model("core", "Snapshot")
|
||||
TagModel = apps.get_model("core", "Tag")
|
||||
|
||||
snapshots = SnapshotModel.objects.all()
|
||||
for snapshot in snapshots:
|
||||
tag_set = (
|
||||
set(tag.strip() for tag in (snapshot.tags_old or '').split(','))
|
||||
)
|
||||
tag_set = {tag.strip() for tag in (snapshot.tags_old or "").split(",")}
|
||||
tag_set.discard("")
|
||||
|
||||
for tag in tag_set:
|
||||
to_add, _ = TagModel.objects.get_or_create(name=tag, defaults={'slug': slugify(tag)})
|
||||
to_add, _ = TagModel.objects.get_or_create(name=tag, defaults={"slug": slugify(tag)})
|
||||
snapshot.tags.add(to_add)
|
||||
|
||||
|
||||
@@ -30,37 +29,36 @@ def reverse_func(apps, schema_editor):
|
||||
|
||||
|
||||
class Migration(migrations.Migration):
|
||||
|
||||
dependencies = [
|
||||
('core', '0005_auto_20200728_0326'),
|
||||
("core", "0005_auto_20200728_0326"),
|
||||
]
|
||||
|
||||
operations = [
|
||||
migrations.RenameField(
|
||||
model_name='snapshot',
|
||||
old_name='tags',
|
||||
new_name='tags_old',
|
||||
model_name="snapshot",
|
||||
old_name="tags",
|
||||
new_name="tags_old",
|
||||
),
|
||||
migrations.CreateModel(
|
||||
name='Tag',
|
||||
name="Tag",
|
||||
fields=[
|
||||
('id', models.AutoField(auto_created=True, primary_key=True, serialize=False, verbose_name='ID')),
|
||||
('name', models.CharField(max_length=100, unique=True, verbose_name='name')),
|
||||
('slug', models.SlugField(max_length=100, unique=True, verbose_name='slug')),
|
||||
("id", models.AutoField(auto_created=True, primary_key=True, serialize=False, verbose_name="ID")),
|
||||
("name", models.CharField(max_length=100, unique=True, verbose_name="name")),
|
||||
("slug", models.SlugField(max_length=100, unique=True, verbose_name="slug")),
|
||||
],
|
||||
options={
|
||||
'verbose_name': 'Tag',
|
||||
'verbose_name_plural': 'Tags',
|
||||
"verbose_name": "Tag",
|
||||
"verbose_name_plural": "Tags",
|
||||
},
|
||||
),
|
||||
migrations.AddField(
|
||||
model_name='snapshot',
|
||||
name='tags',
|
||||
field=models.ManyToManyField(to='core.Tag'),
|
||||
model_name="snapshot",
|
||||
name="tags",
|
||||
field=models.ManyToManyField(to="core.Tag"),
|
||||
),
|
||||
migrations.RunPython(forwards_func, reverse_func),
|
||||
migrations.RemoveField(
|
||||
model_name='snapshot',
|
||||
name='tags_old',
|
||||
model_name="snapshot",
|
||||
name="tags_old",
|
||||
),
|
||||
]
|
||||
|
||||
@@ -9,13 +9,15 @@ import django.db.models.deletion
|
||||
# Handle old vs new import paths
|
||||
try:
|
||||
from archivebox.config import CONSTANTS
|
||||
|
||||
ARCHIVE_DIR = CONSTANTS.ARCHIVE_DIR
|
||||
except ImportError:
|
||||
try:
|
||||
from archivebox.config import CONFIG
|
||||
ARCHIVE_DIR = Path(CONFIG.get('ARCHIVE_DIR', './archive'))
|
||||
|
||||
ARCHIVE_DIR = Path(CONFIG.get("ARCHIVE_DIR", "./archive"))
|
||||
except ImportError:
|
||||
ARCHIVE_DIR = Path('./archive')
|
||||
ARCHIVE_DIR = Path("./archive")
|
||||
|
||||
try:
|
||||
from archivebox.misc.util import to_json
|
||||
@@ -29,6 +31,7 @@ try:
|
||||
JSONField = models.JSONField
|
||||
except AttributeError:
|
||||
import jsonfield
|
||||
|
||||
JSONField = jsonfield.JSONField
|
||||
|
||||
|
||||
@@ -41,7 +44,7 @@ def forwards_func(apps, schema_editor):
|
||||
out_dir = Path(ARCHIVE_DIR) / snapshot.timestamp
|
||||
|
||||
try:
|
||||
with open(out_dir / "index.json", "r") as f:
|
||||
with open(out_dir / "index.json") as f:
|
||||
fs_index = json.load(f)
|
||||
except Exception:
|
||||
continue
|
||||
@@ -56,37 +59,46 @@ def forwards_func(apps, schema_editor):
|
||||
snapshot=snapshot,
|
||||
pwd=result["pwd"],
|
||||
cmd=result.get("cmd") or [],
|
||||
cmd_version=result.get("cmd_version") or 'unknown',
|
||||
cmd_version=result.get("cmd_version") or "unknown",
|
||||
start_ts=result["start_ts"],
|
||||
end_ts=result["end_ts"],
|
||||
status=result["status"],
|
||||
output=result.get("output") or 'null',
|
||||
output=result.get("output") or "null",
|
||||
)
|
||||
except Exception as e:
|
||||
print(
|
||||
' ! Skipping import due to missing/invalid index.json:',
|
||||
" ! Skipping import due to missing/invalid index.json:",
|
||||
out_dir,
|
||||
e,
|
||||
'(open an issue with this index.json for help)',
|
||||
"(open an issue with this index.json for help)",
|
||||
)
|
||||
|
||||
|
||||
def verify_json_index_integrity(snapshot):
|
||||
results = snapshot.archiveresult_set.all()
|
||||
out_dir = Path(ARCHIVE_DIR) / snapshot.timestamp
|
||||
with open(out_dir / "index.json", "r") as f:
|
||||
with open(out_dir / "index.json") as f:
|
||||
index = json.load(f)
|
||||
|
||||
history = index["history"]
|
||||
index_results = [result for extractor in history for result in history[extractor]]
|
||||
flattened_results = [result["start_ts"] for result in index_results]
|
||||
|
||||
|
||||
missing_results = [result for result in results if result.start_ts.isoformat() not in flattened_results]
|
||||
|
||||
for missing in missing_results:
|
||||
index["history"][missing.extractor].append({"cmd": missing.cmd, "cmd_version": missing.cmd_version, "end_ts": missing.end_ts.isoformat(),
|
||||
"start_ts": missing.start_ts.isoformat(), "pwd": missing.pwd, "output": missing.output,
|
||||
"schema": "ArchiveResult", "status": missing.status})
|
||||
index["history"][missing.extractor].append(
|
||||
{
|
||||
"cmd": missing.cmd,
|
||||
"cmd_version": missing.cmd_version,
|
||||
"end_ts": missing.end_ts.isoformat(),
|
||||
"start_ts": missing.start_ts.isoformat(),
|
||||
"pwd": missing.pwd,
|
||||
"output": missing.output,
|
||||
"schema": "ArchiveResult",
|
||||
"status": missing.status,
|
||||
},
|
||||
)
|
||||
|
||||
json_index = to_json(index)
|
||||
with open(out_dir / "index.json", "w") as f:
|
||||
@@ -103,25 +115,47 @@ def reverse_func(apps, schema_editor):
|
||||
|
||||
|
||||
class Migration(migrations.Migration):
|
||||
|
||||
dependencies = [
|
||||
('core', '0006_auto_20201012_1520'),
|
||||
("core", "0006_auto_20201012_1520"),
|
||||
]
|
||||
|
||||
operations = [
|
||||
migrations.CreateModel(
|
||||
name='ArchiveResult',
|
||||
name="ArchiveResult",
|
||||
fields=[
|
||||
('id', models.AutoField(auto_created=True, primary_key=True, serialize=False, verbose_name='ID')),
|
||||
('cmd', JSONField()),
|
||||
('pwd', models.CharField(max_length=256)),
|
||||
('cmd_version', models.CharField(max_length=32)),
|
||||
('status', models.CharField(choices=[('succeeded', 'succeeded'), ('failed', 'failed'), ('skipped', 'skipped')], max_length=16)),
|
||||
('output', models.CharField(max_length=512)),
|
||||
('start_ts', models.DateTimeField()),
|
||||
('end_ts', models.DateTimeField()),
|
||||
('extractor', models.CharField(choices=[('title', 'title'), ('favicon', 'favicon'), ('wget', 'wget'), ('singlefile', 'singlefile'), ('pdf', 'pdf'), ('screenshot', 'screenshot'), ('dom', 'dom'), ('readability', 'readability'), ('mercury', 'mercury'), ('git', 'git'), ('media', 'media'), ('headers', 'headers'), ('archivedotorg', 'archivedotorg')], max_length=32)),
|
||||
('snapshot', models.ForeignKey(on_delete=django.db.models.deletion.CASCADE, to='core.Snapshot')),
|
||||
("id", models.AutoField(auto_created=True, primary_key=True, serialize=False, verbose_name="ID")),
|
||||
("cmd", JSONField()),
|
||||
("pwd", models.CharField(max_length=256)),
|
||||
("cmd_version", models.CharField(max_length=32)),
|
||||
(
|
||||
"status",
|
||||
models.CharField(choices=[("succeeded", "succeeded"), ("failed", "failed"), ("skipped", "skipped")], max_length=16),
|
||||
),
|
||||
("output", models.CharField(max_length=512)),
|
||||
("start_ts", models.DateTimeField()),
|
||||
("end_ts", models.DateTimeField()),
|
||||
(
|
||||
"extractor",
|
||||
models.CharField(
|
||||
choices=[
|
||||
("title", "title"),
|
||||
("favicon", "favicon"),
|
||||
("wget", "wget"),
|
||||
("singlefile", "singlefile"),
|
||||
("pdf", "pdf"),
|
||||
("screenshot", "screenshot"),
|
||||
("dom", "dom"),
|
||||
("readability", "readability"),
|
||||
("mercury", "mercury"),
|
||||
("git", "git"),
|
||||
("media", "media"),
|
||||
("headers", "headers"),
|
||||
("archivedotorg", "archivedotorg"),
|
||||
],
|
||||
max_length=32,
|
||||
),
|
||||
),
|
||||
("snapshot", models.ForeignKey(on_delete=django.db.models.deletion.CASCADE, to="core.Snapshot")),
|
||||
],
|
||||
),
|
||||
migrations.RunPython(forwards_func, reverse_func),
|
||||
|
||||
@@ -4,15 +4,14 @@ from django.db import migrations, models
|
||||
|
||||
|
||||
class Migration(migrations.Migration):
|
||||
|
||||
dependencies = [
|
||||
('core', '0007_archiveresult'),
|
||||
("core", "0007_archiveresult"),
|
||||
]
|
||||
|
||||
operations = [
|
||||
migrations.AlterField(
|
||||
model_name='archiveresult',
|
||||
name='cmd_version',
|
||||
model_name="archiveresult",
|
||||
name="cmd_version",
|
||||
field=models.CharField(blank=True, default=None, max_length=32, null=True),
|
||||
),
|
||||
]
|
||||
|
||||
@@ -4,15 +4,14 @@ from django.db import migrations, models
|
||||
|
||||
|
||||
class Migration(migrations.Migration):
|
||||
|
||||
dependencies = [
|
||||
('core', '0008_auto_20210105_1421'),
|
||||
("core", "0008_auto_20210105_1421"),
|
||||
]
|
||||
|
||||
operations = [
|
||||
migrations.AlterField(
|
||||
model_name='snapshot',
|
||||
name='updated',
|
||||
model_name="snapshot",
|
||||
name="updated",
|
||||
field=models.DateTimeField(auto_now=True, db_index=True, null=True),
|
||||
),
|
||||
]
|
||||
|
||||
@@ -4,15 +4,14 @@ from django.db import migrations, models
|
||||
|
||||
|
||||
class Migration(migrations.Migration):
|
||||
|
||||
dependencies = [
|
||||
('core', '0009_auto_20210216_1038'),
|
||||
("core", "0009_auto_20210216_1038"),
|
||||
]
|
||||
|
||||
operations = [
|
||||
migrations.AlterField(
|
||||
model_name='archiveresult',
|
||||
name='start_ts',
|
||||
model_name="archiveresult",
|
||||
name="start_ts",
|
||||
field=models.DateTimeField(db_index=True),
|
||||
),
|
||||
]
|
||||
|
||||
@@ -5,20 +5,36 @@ import uuid
|
||||
|
||||
|
||||
class Migration(migrations.Migration):
|
||||
|
||||
dependencies = [
|
||||
('core', '0010_auto_20210216_1055'),
|
||||
("core", "0010_auto_20210216_1055"),
|
||||
]
|
||||
|
||||
operations = [
|
||||
migrations.AddField(
|
||||
model_name='archiveresult',
|
||||
name='uuid',
|
||||
model_name="archiveresult",
|
||||
name="uuid",
|
||||
field=models.UUIDField(default=uuid.uuid4, editable=False),
|
||||
),
|
||||
migrations.AlterField(
|
||||
model_name='archiveresult',
|
||||
name='extractor',
|
||||
field=models.CharField(choices=[('title', 'title'), ('favicon', 'favicon'), ('headers', 'headers'), ('singlefile', 'singlefile'), ('pdf', 'pdf'), ('screenshot', 'screenshot'), ('dom', 'dom'), ('wget', 'wget'), ('readability', 'readability'), ('mercury', 'mercury'), ('git', 'git'), ('media', 'media'), ('archivedotorg', 'archivedotorg')], max_length=32),
|
||||
model_name="archiveresult",
|
||||
name="extractor",
|
||||
field=models.CharField(
|
||||
choices=[
|
||||
("title", "title"),
|
||||
("favicon", "favicon"),
|
||||
("headers", "headers"),
|
||||
("singlefile", "singlefile"),
|
||||
("pdf", "pdf"),
|
||||
("screenshot", "screenshot"),
|
||||
("dom", "dom"),
|
||||
("wget", "wget"),
|
||||
("readability", "readability"),
|
||||
("mercury", "mercury"),
|
||||
("git", "git"),
|
||||
("media", "media"),
|
||||
("archivedotorg", "archivedotorg"),
|
||||
],
|
||||
max_length=32,
|
||||
),
|
||||
),
|
||||
]
|
||||
|
||||
@@ -4,20 +4,19 @@ from django.db import migrations, models
|
||||
|
||||
|
||||
class Migration(migrations.Migration):
|
||||
|
||||
dependencies = [
|
||||
('core', '0011_auto_20210216_1331'),
|
||||
("core", "0011_auto_20210216_1331"),
|
||||
]
|
||||
|
||||
operations = [
|
||||
migrations.AlterField(
|
||||
model_name='archiveresult',
|
||||
name='cmd_version',
|
||||
model_name="archiveresult",
|
||||
name="cmd_version",
|
||||
field=models.CharField(blank=True, default=None, max_length=128, null=True),
|
||||
),
|
||||
migrations.AlterField(
|
||||
model_name='archiveresult',
|
||||
name='output',
|
||||
model_name="archiveresult",
|
||||
name="output",
|
||||
field=models.CharField(max_length=1024),
|
||||
),
|
||||
]
|
||||
|
||||
@@ -4,15 +4,14 @@ from django.db import migrations, models
|
||||
|
||||
|
||||
class Migration(migrations.Migration):
|
||||
|
||||
dependencies = [
|
||||
('core', '0012_auto_20210216_1425'),
|
||||
("core", "0012_auto_20210216_1425"),
|
||||
]
|
||||
|
||||
operations = [
|
||||
migrations.AlterField(
|
||||
model_name='snapshot',
|
||||
name='title',
|
||||
model_name="snapshot",
|
||||
name="title",
|
||||
field=models.CharField(blank=True, db_index=True, max_length=256, null=True),
|
||||
),
|
||||
]
|
||||
|
||||
@@ -4,15 +4,14 @@ from django.db import migrations, models
|
||||
|
||||
|
||||
class Migration(migrations.Migration):
|
||||
|
||||
dependencies = [
|
||||
('core', '0013_auto_20210218_0729'),
|
||||
("core", "0013_auto_20210218_0729"),
|
||||
]
|
||||
|
||||
operations = [
|
||||
migrations.AlterField(
|
||||
model_name='snapshot',
|
||||
name='title',
|
||||
model_name="snapshot",
|
||||
name="title",
|
||||
field=models.CharField(blank=True, db_index=True, max_length=1024, null=True),
|
||||
),
|
||||
]
|
||||
|
||||
@@ -4,15 +4,14 @@ from django.db import migrations, models
|
||||
|
||||
|
||||
class Migration(migrations.Migration):
|
||||
|
||||
dependencies = [
|
||||
('core', '0014_auto_20210218_0729'),
|
||||
("core", "0014_auto_20210218_0729"),
|
||||
]
|
||||
|
||||
operations = [
|
||||
migrations.AlterField(
|
||||
model_name='snapshot',
|
||||
name='title',
|
||||
model_name="snapshot",
|
||||
name="title",
|
||||
field=models.CharField(blank=True, db_index=True, max_length=512, null=True),
|
||||
),
|
||||
]
|
||||
|
||||
@@ -4,15 +4,14 @@ from django.db import migrations, models
|
||||
|
||||
|
||||
class Migration(migrations.Migration):
|
||||
|
||||
dependencies = [
|
||||
('core', '0015_auto_20210218_0730'),
|
||||
("core", "0015_auto_20210218_0730"),
|
||||
]
|
||||
|
||||
operations = [
|
||||
migrations.AlterField(
|
||||
model_name='snapshot',
|
||||
name='tags',
|
||||
field=models.ManyToManyField(blank=True, to='core.Tag'),
|
||||
model_name="snapshot",
|
||||
name="tags",
|
||||
field=models.ManyToManyField(blank=True, to="core.Tag"),
|
||||
),
|
||||
]
|
||||
|
||||
@@ -4,15 +4,14 @@ from django.db import migrations, models
|
||||
|
||||
|
||||
class Migration(migrations.Migration):
|
||||
|
||||
dependencies = [
|
||||
('core', '0016_auto_20210218_1204'),
|
||||
("core", "0016_auto_20210218_1204"),
|
||||
]
|
||||
|
||||
operations = [
|
||||
migrations.AlterField(
|
||||
model_name='tag',
|
||||
name='slug',
|
||||
field=models.SlugField(blank=True, max_length=100, unique=True, verbose_name='slug'),
|
||||
model_name="tag",
|
||||
name="slug",
|
||||
field=models.SlugField(blank=True, max_length=100, unique=True, verbose_name="slug"),
|
||||
),
|
||||
]
|
||||
|
||||
@@ -4,20 +4,19 @@ from django.db import migrations, models
|
||||
|
||||
|
||||
class Migration(migrations.Migration):
|
||||
|
||||
dependencies = [
|
||||
('core', '0017_auto_20210219_0211'),
|
||||
("core", "0017_auto_20210219_0211"),
|
||||
]
|
||||
|
||||
operations = [
|
||||
migrations.AlterField(
|
||||
model_name='tag',
|
||||
name='name',
|
||||
model_name="tag",
|
||||
name="name",
|
||||
field=models.CharField(max_length=100, unique=True),
|
||||
),
|
||||
migrations.AlterField(
|
||||
model_name='tag',
|
||||
name='slug',
|
||||
model_name="tag",
|
||||
name="slug",
|
||||
field=models.SlugField(blank=True, max_length=100, unique=True),
|
||||
),
|
||||
]
|
||||
|
||||
@@ -4,15 +4,14 @@ from django.db import migrations, models
|
||||
|
||||
|
||||
class Migration(migrations.Migration):
|
||||
|
||||
dependencies = [
|
||||
('core', '0018_auto_20210327_0952'),
|
||||
("core", "0018_auto_20210327_0952"),
|
||||
]
|
||||
|
||||
operations = [
|
||||
migrations.AlterField(
|
||||
model_name='snapshot',
|
||||
name='url',
|
||||
model_name="snapshot",
|
||||
name="url",
|
||||
field=models.URLField(db_index=True, unique=True),
|
||||
),
|
||||
]
|
||||
|
||||
@@ -4,20 +4,19 @@ from django.db import migrations, models
|
||||
|
||||
|
||||
class Migration(migrations.Migration):
|
||||
|
||||
dependencies = [
|
||||
('core', '0019_auto_20210401_0654'),
|
||||
("core", "0019_auto_20210401_0654"),
|
||||
]
|
||||
|
||||
operations = [
|
||||
migrations.AlterField(
|
||||
model_name='archiveresult',
|
||||
name='id',
|
||||
field=models.AutoField(primary_key=True, serialize=False, verbose_name='ID'),
|
||||
model_name="archiveresult",
|
||||
name="id",
|
||||
field=models.AutoField(primary_key=True, serialize=False, verbose_name="ID"),
|
||||
),
|
||||
migrations.AlterField(
|
||||
model_name='tag',
|
||||
name='id',
|
||||
field=models.AutoField(primary_key=True, serialize=False, verbose_name='ID'),
|
||||
model_name="tag",
|
||||
name="id",
|
||||
field=models.AutoField(primary_key=True, serialize=False, verbose_name="ID"),
|
||||
),
|
||||
]
|
||||
|
||||
@@ -4,15 +4,31 @@ from django.db import migrations, models
|
||||
|
||||
|
||||
class Migration(migrations.Migration):
|
||||
|
||||
dependencies = [
|
||||
('core', '0020_auto_20210410_1031'),
|
||||
("core", "0020_auto_20210410_1031"),
|
||||
]
|
||||
|
||||
operations = [
|
||||
migrations.AlterField(
|
||||
model_name='archiveresult',
|
||||
name='extractor',
|
||||
field=models.CharField(choices=[('favicon', 'favicon'), ('headers', 'headers'), ('singlefile', 'singlefile'), ('pdf', 'pdf'), ('screenshot', 'screenshot'), ('dom', 'dom'), ('wget', 'wget'), ('title', 'title'), ('readability', 'readability'), ('mercury', 'mercury'), ('git', 'git'), ('media', 'media'), ('archivedotorg', 'archivedotorg')], max_length=32),
|
||||
model_name="archiveresult",
|
||||
name="extractor",
|
||||
field=models.CharField(
|
||||
choices=[
|
||||
("favicon", "favicon"),
|
||||
("headers", "headers"),
|
||||
("singlefile", "singlefile"),
|
||||
("pdf", "pdf"),
|
||||
("screenshot", "screenshot"),
|
||||
("dom", "dom"),
|
||||
("wget", "wget"),
|
||||
("title", "title"),
|
||||
("readability", "readability"),
|
||||
("mercury", "mercury"),
|
||||
("git", "git"),
|
||||
("media", "media"),
|
||||
("archivedotorg", "archivedotorg"),
|
||||
],
|
||||
max_length=32,
|
||||
),
|
||||
),
|
||||
]
|
||||
|
||||
@@ -4,15 +4,32 @@ from django.db import migrations, models
|
||||
|
||||
|
||||
class Migration(migrations.Migration):
|
||||
|
||||
dependencies = [
|
||||
('core', '0021_auto_20220914_0934'),
|
||||
("core", "0021_auto_20220914_0934"),
|
||||
]
|
||||
|
||||
operations = [
|
||||
migrations.AlterField(
|
||||
model_name='archiveresult',
|
||||
name='extractor',
|
||||
field=models.CharField(choices=[('favicon', 'favicon'), ('headers', 'headers'), ('singlefile', 'singlefile'), ('pdf', 'pdf'), ('screenshot', 'screenshot'), ('dom', 'dom'), ('wget', 'wget'), ('title', 'title'), ('readability', 'readability'), ('mercury', 'mercury'), ('htmltotext', 'htmltotext'), ('git', 'git'), ('media', 'media'), ('archivedotorg', 'archivedotorg')], max_length=32),
|
||||
model_name="archiveresult",
|
||||
name="extractor",
|
||||
field=models.CharField(
|
||||
choices=[
|
||||
("favicon", "favicon"),
|
||||
("headers", "headers"),
|
||||
("singlefile", "singlefile"),
|
||||
("pdf", "pdf"),
|
||||
("screenshot", "screenshot"),
|
||||
("dom", "dom"),
|
||||
("wget", "wget"),
|
||||
("title", "title"),
|
||||
("readability", "readability"),
|
||||
("mercury", "mercury"),
|
||||
("htmltotext", "htmltotext"),
|
||||
("git", "git"),
|
||||
("media", "media"),
|
||||
("archivedotorg", "archivedotorg"),
|
||||
],
|
||||
max_length=32,
|
||||
),
|
||||
),
|
||||
]
|
||||
|
||||
@@ -16,6 +16,7 @@ def get_table_columns(table_name):
|
||||
def upgrade_core_tables(apps, schema_editor):
|
||||
"""Upgrade core tables from v0.7.2 or v0.8.6rc0 to v0.9.0."""
|
||||
from archivebox.uuid_compat import uuid7
|
||||
|
||||
cursor = connection.cursor()
|
||||
|
||||
# Check if core_archiveresult table exists
|
||||
@@ -30,11 +31,11 @@ def upgrade_core_tables(apps, schema_editor):
|
||||
has_data = row_count > 0
|
||||
|
||||
# Detect which version we're migrating from
|
||||
archiveresult_cols = get_table_columns('core_archiveresult')
|
||||
has_uuid = 'uuid' in archiveresult_cols
|
||||
has_abid = 'abid' in archiveresult_cols
|
||||
archiveresult_cols = get_table_columns("core_archiveresult")
|
||||
has_uuid = "uuid" in archiveresult_cols
|
||||
has_abid = "abid" in archiveresult_cols
|
||||
|
||||
print(f'DEBUG: ArchiveResult row_count={row_count}, has_data={has_data}, has_uuid={has_uuid}, has_abid={has_abid}')
|
||||
print(f"DEBUG: ArchiveResult row_count={row_count}, has_data={has_data}, has_uuid={has_uuid}, has_abid={has_abid}")
|
||||
|
||||
# ============================================================================
|
||||
# PART 1: Upgrade core_archiveresult table
|
||||
@@ -62,7 +63,7 @@ def upgrade_core_tables(apps, schema_editor):
|
||||
if has_data:
|
||||
if has_uuid and not has_abid:
|
||||
# Migrating from v0.7.2+ (has uuid column)
|
||||
print('Migrating ArchiveResult from v0.7.2+ schema (with uuid)...')
|
||||
print("Migrating ArchiveResult from v0.7.2+ schema (with uuid)...")
|
||||
cursor.execute("""
|
||||
INSERT OR IGNORE INTO core_archiveresult_new (
|
||||
id, uuid, snapshot_id, cmd, pwd, cmd_version,
|
||||
@@ -75,7 +76,7 @@ def upgrade_core_tables(apps, schema_editor):
|
||||
""")
|
||||
elif has_abid and not has_uuid:
|
||||
# Migrating from v0.8.6rc0 (has abid instead of uuid)
|
||||
print('Migrating ArchiveResult from v0.8.6rc0 schema...')
|
||||
print("Migrating ArchiveResult from v0.8.6rc0 schema...")
|
||||
cursor.execute("""
|
||||
INSERT OR IGNORE INTO core_archiveresult_new (
|
||||
id, uuid, snapshot_id, cmd, pwd, cmd_version,
|
||||
@@ -88,17 +89,34 @@ def upgrade_core_tables(apps, schema_editor):
|
||||
""")
|
||||
else:
|
||||
# Migrating from v0.7.2 (no uuid or abid column - generate fresh UUIDs)
|
||||
print('Migrating ArchiveResult from v0.7.2 schema (no uuid - generating UUIDs)...')
|
||||
cursor.execute("SELECT id, snapshot_id, cmd, pwd, cmd_version, start_ts, end_ts, status, extractor, output FROM core_archiveresult")
|
||||
print("Migrating ArchiveResult from v0.7.2 schema (no uuid - generating UUIDs)...")
|
||||
cursor.execute(
|
||||
"SELECT id, snapshot_id, cmd, pwd, cmd_version, start_ts, end_ts, status, extractor, output FROM core_archiveresult",
|
||||
)
|
||||
old_records = cursor.fetchall()
|
||||
for record in old_records:
|
||||
new_uuid = uuid7().hex
|
||||
cursor.execute("""
|
||||
cursor.execute(
|
||||
"""
|
||||
INSERT OR IGNORE INTO core_archiveresult_new (
|
||||
id, uuid, snapshot_id, cmd, pwd, cmd_version,
|
||||
start_ts, end_ts, status, extractor, output
|
||||
) VALUES (?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?)
|
||||
""", (record[0], new_uuid, record[1], record[2], record[3], record[4], record[5], record[6], record[7], record[8], record[9]))
|
||||
""",
|
||||
(
|
||||
record[0],
|
||||
new_uuid,
|
||||
record[1],
|
||||
record[2],
|
||||
record[3],
|
||||
record[4],
|
||||
record[5],
|
||||
record[6],
|
||||
record[7],
|
||||
record[8],
|
||||
record[9],
|
||||
),
|
||||
)
|
||||
|
||||
cursor.execute("DROP TABLE IF EXISTS core_archiveresult;")
|
||||
cursor.execute("ALTER TABLE core_archiveresult_new RENAME TO core_archiveresult;")
|
||||
@@ -149,13 +167,13 @@ def upgrade_core_tables(apps, schema_editor):
|
||||
|
||||
if snapshot_has_data:
|
||||
# Detect which version we're migrating from
|
||||
snapshot_cols = get_table_columns('core_snapshot')
|
||||
has_added = 'added' in snapshot_cols
|
||||
has_bookmarked_at = 'bookmarked_at' in snapshot_cols
|
||||
snapshot_cols = get_table_columns("core_snapshot")
|
||||
has_added = "added" in snapshot_cols
|
||||
has_bookmarked_at = "bookmarked_at" in snapshot_cols
|
||||
|
||||
if has_added and not has_bookmarked_at:
|
||||
# Migrating from v0.7.2 (has added/updated fields)
|
||||
print('Migrating Snapshot from v0.7.2 schema...')
|
||||
print("Migrating Snapshot from v0.7.2 schema...")
|
||||
# Transform added→bookmarked_at/created_at and updated→modified_at
|
||||
cursor.execute("""
|
||||
INSERT OR IGNORE INTO core_snapshot_new (
|
||||
@@ -173,28 +191,28 @@ def upgrade_core_tables(apps, schema_editor):
|
||||
""")
|
||||
elif has_bookmarked_at and not has_added:
|
||||
# Migrating from v0.8.6rc0 (already has bookmarked_at/created_at/modified_at)
|
||||
print('Migrating Snapshot from v0.8.6rc0 schema...')
|
||||
print("Migrating Snapshot from v0.8.6rc0 schema...")
|
||||
# Check what fields exist
|
||||
has_status = 'status' in snapshot_cols
|
||||
has_retry_at = 'retry_at' in snapshot_cols
|
||||
has_crawl_id = 'crawl_id' in snapshot_cols
|
||||
has_status = "status" in snapshot_cols
|
||||
has_retry_at = "retry_at" in snapshot_cols
|
||||
has_crawl_id = "crawl_id" in snapshot_cols
|
||||
|
||||
# Build column list based on what exists
|
||||
cols = ['id', 'url', 'timestamp', 'title', 'bookmarked_at', 'created_at', 'modified_at', 'downloaded_at']
|
||||
cols = ["id", "url", "timestamp", "title", "bookmarked_at", "created_at", "modified_at", "downloaded_at"]
|
||||
if has_crawl_id:
|
||||
cols.append('crawl_id')
|
||||
cols.append("crawl_id")
|
||||
if has_status:
|
||||
cols.append('status')
|
||||
cols.append("status")
|
||||
if has_retry_at:
|
||||
cols.append('retry_at')
|
||||
cols.append("retry_at")
|
||||
|
||||
cursor.execute(f"""
|
||||
INSERT OR IGNORE INTO core_snapshot_new ({', '.join(cols)})
|
||||
SELECT {', '.join(cols)}
|
||||
INSERT OR IGNORE INTO core_snapshot_new ({", ".join(cols)})
|
||||
SELECT {", ".join(cols)}
|
||||
FROM core_snapshot;
|
||||
""")
|
||||
else:
|
||||
print(f'Warning: Unexpected Snapshot schema - has_added={has_added}, has_bookmarked_at={has_bookmarked_at}')
|
||||
print(f"Warning: Unexpected Snapshot schema - has_added={has_added}, has_bookmarked_at={has_bookmarked_at}")
|
||||
|
||||
cursor.execute("DROP TABLE IF EXISTS core_snapshot;")
|
||||
cursor.execute("ALTER TABLE core_snapshot_new RENAME TO core_snapshot;")
|
||||
@@ -237,13 +255,13 @@ def upgrade_core_tables(apps, schema_editor):
|
||||
cursor.execute("PRAGMA table_info(core_tag)")
|
||||
tag_id_type = None
|
||||
for row in cursor.fetchall():
|
||||
if row[1] == 'id': # row[1] is column name
|
||||
if row[1] == "id": # row[1] is column name
|
||||
tag_id_type = row[2] # row[2] is type
|
||||
break
|
||||
|
||||
if tag_id_type and 'char' in tag_id_type.lower():
|
||||
if tag_id_type and "char" in tag_id_type.lower():
|
||||
# v0.8.6rc0: Tag IDs are UUIDs, need to convert to INTEGER
|
||||
print('Converting Tag IDs from UUID to INTEGER...')
|
||||
print("Converting Tag IDs from UUID to INTEGER...")
|
||||
|
||||
# Get all tags with their UUIDs
|
||||
cursor.execute("SELECT id, name, slug, created_at, modified_at, created_by_id FROM core_tag ORDER BY name")
|
||||
@@ -255,10 +273,13 @@ def upgrade_core_tables(apps, schema_editor):
|
||||
old_id, name, slug, created_at, modified_at, created_by_id = tag
|
||||
uuid_to_int_map[old_id] = i
|
||||
# Insert with new INTEGER ID
|
||||
cursor.execute("""
|
||||
cursor.execute(
|
||||
"""
|
||||
INSERT OR IGNORE INTO core_tag_new (id, name, slug, created_at, modified_at, created_by_id)
|
||||
VALUES (?, ?, ?, ?, ?, ?)
|
||||
""", (i, name, slug, created_at, modified_at, created_by_id))
|
||||
""",
|
||||
(i, name, slug, created_at, modified_at, created_by_id),
|
||||
)
|
||||
|
||||
# Update snapshot_tags to use new INTEGER IDs
|
||||
cursor.execute("SELECT name FROM sqlite_master WHERE type='table' AND name='core_snapshot_tags'")
|
||||
@@ -273,13 +294,16 @@ def upgrade_core_tables(apps, schema_editor):
|
||||
for st_id, snapshot_id, old_tag_id in snapshot_tags:
|
||||
new_tag_id = uuid_to_int_map.get(old_tag_id)
|
||||
if new_tag_id:
|
||||
cursor.execute("""
|
||||
cursor.execute(
|
||||
"""
|
||||
INSERT OR IGNORE INTO core_snapshot_tags (id, snapshot_id, tag_id)
|
||||
VALUES (?, ?, ?)
|
||||
""", (st_id, snapshot_id, new_tag_id))
|
||||
""",
|
||||
(st_id, snapshot_id, new_tag_id),
|
||||
)
|
||||
else:
|
||||
# v0.7.2: Tag IDs are already INTEGER
|
||||
print('Migrating Tag from v0.7.2 schema...')
|
||||
print("Migrating Tag from v0.7.2 schema...")
|
||||
cursor.execute("""
|
||||
INSERT OR IGNORE INTO core_tag_new (id, name, slug)
|
||||
SELECT id, name, slug
|
||||
@@ -294,15 +318,14 @@ def upgrade_core_tables(apps, schema_editor):
|
||||
cursor.execute("CREATE INDEX IF NOT EXISTS core_tag_created_by_id_idx ON core_tag(created_by_id);")
|
||||
|
||||
if has_data:
|
||||
print('✓ Core tables upgraded to v0.9.0')
|
||||
print("✓ Core tables upgraded to v0.9.0")
|
||||
|
||||
|
||||
class Migration(migrations.Migration):
|
||||
|
||||
dependencies = [
|
||||
('core', '0022_auto_20231023_2008'),
|
||||
('crawls', '0001_initial'),
|
||||
('auth', '0012_alter_user_first_name_max_length'),
|
||||
("core", "0022_auto_20231023_2008"),
|
||||
("crawls", "0001_initial"),
|
||||
("auth", "0012_alter_user_first_name_max_length"),
|
||||
]
|
||||
|
||||
operations = [
|
||||
@@ -317,60 +340,58 @@ class Migration(migrations.Migration):
|
||||
# NOTE: We do NOT remove extractor/output for ArchiveResult!
|
||||
# They are still in the database and will be removed by migration 0025
|
||||
# after copying their data to plugin/output_str.
|
||||
|
||||
# However, for Snapshot, we DO remove added/updated and ADD the new timestamp fields
|
||||
# because the SQL above already transformed them.
|
||||
migrations.RemoveField(model_name='snapshot', name='added'),
|
||||
migrations.RemoveField(model_name='snapshot', name='updated'),
|
||||
migrations.RemoveField(model_name="snapshot", name="added"),
|
||||
migrations.RemoveField(model_name="snapshot", name="updated"),
|
||||
migrations.AddField(
|
||||
model_name='snapshot',
|
||||
name='bookmarked_at',
|
||||
model_name="snapshot",
|
||||
name="bookmarked_at",
|
||||
field=models.DateTimeField(db_index=True, default=django.utils.timezone.now),
|
||||
),
|
||||
migrations.AddField(
|
||||
model_name='snapshot',
|
||||
name='created_at',
|
||||
model_name="snapshot",
|
||||
name="created_at",
|
||||
field=models.DateTimeField(db_index=True, default=django.utils.timezone.now),
|
||||
),
|
||||
migrations.AddField(
|
||||
model_name='snapshot',
|
||||
name='modified_at',
|
||||
model_name="snapshot",
|
||||
name="modified_at",
|
||||
field=models.DateTimeField(auto_now=True),
|
||||
),
|
||||
# Declare fs_version (already created in database with DEFAULT '0.8.0')
|
||||
migrations.AddField(
|
||||
model_name='snapshot',
|
||||
name='fs_version',
|
||||
model_name="snapshot",
|
||||
name="fs_version",
|
||||
field=models.CharField(
|
||||
max_length=10,
|
||||
default='0.8.0',
|
||||
help_text='Filesystem version of this snapshot (e.g., "0.7.0", "0.8.0", "0.9.0"). Used to trigger lazy migration on save().'
|
||||
default="0.8.0",
|
||||
help_text='Filesystem version of this snapshot (e.g., "0.7.0", "0.8.0", "0.9.0"). Used to trigger lazy migration on save().',
|
||||
),
|
||||
),
|
||||
|
||||
# SnapshotTag table already exists from v0.7.2, just declare it in state
|
||||
migrations.CreateModel(
|
||||
name='SnapshotTag',
|
||||
name="SnapshotTag",
|
||||
fields=[
|
||||
('id', models.AutoField(primary_key=True, serialize=False)),
|
||||
('snapshot', models.ForeignKey(to='core.Snapshot', db_column='snapshot_id', on_delete=models.CASCADE)),
|
||||
('tag', models.ForeignKey(to='core.Tag', db_column='tag_id', on_delete=models.CASCADE)),
|
||||
("id", models.AutoField(primary_key=True, serialize=False)),
|
||||
("snapshot", models.ForeignKey(to="core.Snapshot", db_column="snapshot_id", on_delete=models.CASCADE)),
|
||||
("tag", models.ForeignKey(to="core.Tag", db_column="tag_id", on_delete=models.CASCADE)),
|
||||
],
|
||||
options={
|
||||
'db_table': 'core_snapshot_tags',
|
||||
'unique_together': {('snapshot', 'tag')},
|
||||
"db_table": "core_snapshot_tags",
|
||||
"unique_together": {("snapshot", "tag")},
|
||||
},
|
||||
),
|
||||
# Declare that Snapshot.tags M2M already uses through=SnapshotTag (from v0.7.2)
|
||||
migrations.AlterField(
|
||||
model_name='snapshot',
|
||||
name='tags',
|
||||
model_name="snapshot",
|
||||
name="tags",
|
||||
field=models.ManyToManyField(
|
||||
'Tag',
|
||||
"Tag",
|
||||
blank=True,
|
||||
related_name='snapshot_set',
|
||||
through='SnapshotTag',
|
||||
through_fields=('snapshot', 'tag'),
|
||||
related_name="snapshot_set",
|
||||
through="SnapshotTag",
|
||||
through_fields=("snapshot", "tag"),
|
||||
),
|
||||
),
|
||||
],
|
||||
|
||||
@@ -20,23 +20,27 @@ def create_default_crawl_and_assign_snapshots(apps, schema_editor):
|
||||
snapshots_without_crawl = cursor.fetchone()[0]
|
||||
|
||||
if snapshots_without_crawl == 0:
|
||||
print('✓ Fresh install or all snapshots already have crawls')
|
||||
print("✓ Fresh install or all snapshots already have crawls")
|
||||
return
|
||||
|
||||
# Get or create system user (pk=1)
|
||||
cursor.execute("SELECT id FROM auth_user WHERE id = 1")
|
||||
if not cursor.fetchone():
|
||||
cursor.execute("""
|
||||
cursor.execute(
|
||||
"""
|
||||
INSERT INTO auth_user (id, password, is_superuser, username, first_name, last_name, email, is_staff, is_active, date_joined)
|
||||
VALUES (1, '!', 1, 'system', '', '', '', 1, 1, ?)
|
||||
""", [datetime.now().isoformat()])
|
||||
""",
|
||||
[datetime.now().isoformat()],
|
||||
)
|
||||
|
||||
# Create a default crawl for migrated snapshots
|
||||
# At this point crawls_crawl is guaranteed to have v0.9.0 schema (crawls/0002 ran first)
|
||||
crawl_id = str(uuid_lib.uuid4())
|
||||
now = datetime.now().isoformat()
|
||||
|
||||
cursor.execute("""
|
||||
cursor.execute(
|
||||
"""
|
||||
INSERT INTO crawls_crawl (
|
||||
id, created_at, modified_at, num_uses_succeeded, num_uses_failed,
|
||||
urls, max_depth, tags_str, label, notes, output_dir,
|
||||
@@ -44,20 +48,21 @@ def create_default_crawl_and_assign_snapshots(apps, schema_editor):
|
||||
) VALUES (?, ?, ?, 0, 0, '', 0, '', 'Migrated from v0.7.2/v0.8.6',
|
||||
'Auto-created crawl for migrated snapshots', '',
|
||||
'sealed', ?, 1, NULL, '{}', NULL)
|
||||
""", [crawl_id, now, now, now])
|
||||
""",
|
||||
[crawl_id, now, now, now],
|
||||
)
|
||||
|
||||
# Assign all snapshots without a crawl to the default crawl
|
||||
cursor.execute("UPDATE core_snapshot SET crawl_id = ? WHERE crawl_id IS NULL", [crawl_id])
|
||||
|
||||
print(f'✓ Assigned {snapshots_without_crawl} snapshots to default crawl {crawl_id}')
|
||||
print(f"✓ Assigned {snapshots_without_crawl} snapshots to default crawl {crawl_id}")
|
||||
|
||||
|
||||
class Migration(migrations.Migration):
|
||||
|
||||
dependencies = [
|
||||
('core', '0023_upgrade_to_0_9_0'),
|
||||
('crawls', '0002_upgrade_from_0_8_6'),
|
||||
('auth', '0012_alter_user_first_name_max_length'),
|
||||
("core", "0023_upgrade_to_0_9_0"),
|
||||
("crawls", "0002_upgrade_from_0_8_6"),
|
||||
("auth", "0012_alter_user_first_name_max_length"),
|
||||
]
|
||||
|
||||
operations = [
|
||||
@@ -137,12 +142,12 @@ class Migration(migrations.Migration):
|
||||
],
|
||||
state_operations=[
|
||||
migrations.AddField(
|
||||
model_name='snapshot',
|
||||
name='crawl',
|
||||
model_name="snapshot",
|
||||
name="crawl",
|
||||
field=models.ForeignKey(
|
||||
on_delete=models.deletion.CASCADE,
|
||||
to='crawls.crawl',
|
||||
help_text='Crawl that created this snapshot'
|
||||
to="crawls.crawl",
|
||||
help_text="Crawl that created this snapshot",
|
||||
),
|
||||
),
|
||||
],
|
||||
|
||||
@@ -17,20 +17,24 @@ def copy_old_fields_to_new(apps, schema_editor):
|
||||
cursor.execute("PRAGMA table_info(core_archiveresult)")
|
||||
cols = {row[1] for row in cursor.fetchall()}
|
||||
|
||||
if 'extractor' in cols and 'plugin' in cols:
|
||||
if "extractor" in cols and "plugin" in cols:
|
||||
# Copy extractor -> plugin
|
||||
cursor.execute("UPDATE core_archiveresult SET plugin = COALESCE(extractor, '') WHERE plugin = '' OR plugin IS NULL")
|
||||
|
||||
if 'output' in cols and 'output_str' in cols:
|
||||
if "output" in cols and "output_str" in cols:
|
||||
# Copy output -> output_str
|
||||
cursor.execute("UPDATE core_archiveresult SET output_str = COALESCE(output, '') WHERE output_str = '' OR output_str IS NULL")
|
||||
|
||||
# Copy timestamps to new timestamp fields if they don't have values yet
|
||||
if 'start_ts' in cols and 'created_at' in cols:
|
||||
cursor.execute("UPDATE core_archiveresult SET created_at = COALESCE(start_ts, CURRENT_TIMESTAMP) WHERE created_at IS NULL OR created_at = ''")
|
||||
if "start_ts" in cols and "created_at" in cols:
|
||||
cursor.execute(
|
||||
"UPDATE core_archiveresult SET created_at = COALESCE(start_ts, CURRENT_TIMESTAMP) WHERE created_at IS NULL OR created_at = ''",
|
||||
)
|
||||
|
||||
if 'end_ts' in cols and 'modified_at' in cols:
|
||||
cursor.execute("UPDATE core_archiveresult SET modified_at = COALESCE(end_ts, start_ts, CURRENT_TIMESTAMP) WHERE modified_at IS NULL OR modified_at = ''")
|
||||
if "end_ts" in cols and "modified_at" in cols:
|
||||
cursor.execute(
|
||||
"UPDATE core_archiveresult SET modified_at = COALESCE(end_ts, start_ts, CURRENT_TIMESTAMP) WHERE modified_at IS NULL OR modified_at = ''",
|
||||
)
|
||||
|
||||
# NOTE: Snapshot timestamps (added→bookmarked_at, updated→modified_at) were already
|
||||
# transformed by migration 0023, so we don't need to copy them here.
|
||||
@@ -39,164 +43,191 @@ def copy_old_fields_to_new(apps, schema_editor):
|
||||
# Debug: Check Snapshot timestamps at end of RunPython
|
||||
cursor.execute("SELECT id, bookmarked_at, modified_at FROM core_snapshot LIMIT 2")
|
||||
snap_after = cursor.fetchall()
|
||||
print(f'DEBUG 0025: Snapshot timestamps at END of RunPython: {snap_after}')
|
||||
print(f"DEBUG 0025: Snapshot timestamps at END of RunPython: {snap_after}")
|
||||
|
||||
|
||||
class Migration(migrations.Migration):
|
||||
|
||||
dependencies = [
|
||||
('core', '0024_assign_default_crawl'),
|
||||
('crawls', '0001_initial'),
|
||||
("core", "0024_assign_default_crawl"),
|
||||
("crawls", "0001_initial"),
|
||||
migrations.swappable_dependency(settings.AUTH_USER_MODEL),
|
||||
]
|
||||
|
||||
operations = [
|
||||
migrations.AlterModelOptions(
|
||||
name='archiveresult',
|
||||
options={'verbose_name': 'Archive Result', 'verbose_name_plural': 'Archive Results Log'},
|
||||
name="archiveresult",
|
||||
options={"verbose_name": "Archive Result", "verbose_name_plural": "Archive Results Log"},
|
||||
),
|
||||
migrations.AlterModelOptions(
|
||||
name='snapshot',
|
||||
options={'verbose_name': 'Snapshot', 'verbose_name_plural': 'Snapshots'},
|
||||
name="snapshot",
|
||||
options={"verbose_name": "Snapshot", "verbose_name_plural": "Snapshots"},
|
||||
),
|
||||
# NOTE: RemoveField for cmd, cmd_version, pwd moved to migration 0027
|
||||
# to allow data migration to Process records first
|
||||
migrations.AddField(
|
||||
model_name='archiveresult',
|
||||
name='config',
|
||||
model_name="archiveresult",
|
||||
name="config",
|
||||
field=models.JSONField(blank=True, default=dict, null=True),
|
||||
),
|
||||
migrations.AddField(
|
||||
model_name='archiveresult',
|
||||
name='created_at',
|
||||
model_name="archiveresult",
|
||||
name="created_at",
|
||||
field=models.DateTimeField(db_index=True, default=django.utils.timezone.now),
|
||||
),
|
||||
migrations.AddField(
|
||||
model_name='archiveresult',
|
||||
name='hook_name',
|
||||
field=models.CharField(blank=True, db_index=True, default='', help_text='Full filename of the hook that executed (e.g., on_Snapshot__50_wget.py)', max_length=255),
|
||||
model_name="archiveresult",
|
||||
name="hook_name",
|
||||
field=models.CharField(
|
||||
blank=True,
|
||||
db_index=True,
|
||||
default="",
|
||||
help_text="Full filename of the hook that executed (e.g., on_Snapshot__50_wget.py)",
|
||||
max_length=255,
|
||||
),
|
||||
),
|
||||
migrations.AddField(
|
||||
model_name='archiveresult',
|
||||
name='modified_at',
|
||||
model_name="archiveresult",
|
||||
name="modified_at",
|
||||
field=models.DateTimeField(auto_now=True),
|
||||
),
|
||||
migrations.AddField(
|
||||
model_name='archiveresult',
|
||||
name='notes',
|
||||
field=models.TextField(blank=True, default=''),
|
||||
model_name="archiveresult",
|
||||
name="notes",
|
||||
field=models.TextField(blank=True, default=""),
|
||||
),
|
||||
migrations.AddField(
|
||||
model_name='archiveresult',
|
||||
name='num_uses_failed',
|
||||
model_name="archiveresult",
|
||||
name="num_uses_failed",
|
||||
field=models.PositiveIntegerField(default=0),
|
||||
),
|
||||
migrations.AddField(
|
||||
model_name='archiveresult',
|
||||
name='num_uses_succeeded',
|
||||
model_name="archiveresult",
|
||||
name="num_uses_succeeded",
|
||||
field=models.PositiveIntegerField(default=0),
|
||||
),
|
||||
migrations.AddField(
|
||||
model_name='archiveresult',
|
||||
name='output_files',
|
||||
field=models.JSONField(default=dict, help_text='Dict of {relative_path: {metadata}}'),
|
||||
model_name="archiveresult",
|
||||
name="output_files",
|
||||
field=models.JSONField(default=dict, help_text="Dict of {relative_path: {metadata}}"),
|
||||
),
|
||||
migrations.AddField(
|
||||
model_name='archiveresult',
|
||||
name='output_json',
|
||||
field=models.JSONField(blank=True, default=None, help_text='Structured metadata (headers, redirects, etc.)', null=True),
|
||||
model_name="archiveresult",
|
||||
name="output_json",
|
||||
field=models.JSONField(blank=True, default=None, help_text="Structured metadata (headers, redirects, etc.)", null=True),
|
||||
),
|
||||
migrations.AddField(
|
||||
model_name='archiveresult',
|
||||
name='output_mimetypes',
|
||||
field=models.CharField(blank=True, default='', help_text='CSV of mimetypes sorted by size', max_length=512),
|
||||
model_name="archiveresult",
|
||||
name="output_mimetypes",
|
||||
field=models.CharField(blank=True, default="", help_text="CSV of mimetypes sorted by size", max_length=512),
|
||||
),
|
||||
migrations.AddField(
|
||||
model_name='archiveresult',
|
||||
name='output_size',
|
||||
field=models.BigIntegerField(default=0, help_text='Total bytes of all output files'),
|
||||
model_name="archiveresult",
|
||||
name="output_size",
|
||||
field=models.BigIntegerField(default=0, help_text="Total bytes of all output files"),
|
||||
),
|
||||
migrations.AddField(
|
||||
model_name='archiveresult',
|
||||
name='output_str',
|
||||
field=models.TextField(blank=True, default='', help_text='Human-readable output summary'),
|
||||
model_name="archiveresult",
|
||||
name="output_str",
|
||||
field=models.TextField(blank=True, default="", help_text="Human-readable output summary"),
|
||||
),
|
||||
migrations.AddField(
|
||||
model_name='archiveresult',
|
||||
name='plugin',
|
||||
field=models.CharField(db_index=True, default='', max_length=32),
|
||||
model_name="archiveresult",
|
||||
name="plugin",
|
||||
field=models.CharField(db_index=True, default="", max_length=32),
|
||||
),
|
||||
migrations.AddField(
|
||||
model_name='archiveresult',
|
||||
name='retry_at',
|
||||
model_name="archiveresult",
|
||||
name="retry_at",
|
||||
field=models.DateTimeField(blank=True, db_index=True, default=django.utils.timezone.now, null=True),
|
||||
),
|
||||
# NOTE: bookmarked_at and created_at already added by migration 0023
|
||||
migrations.AddField(
|
||||
model_name='snapshot',
|
||||
name='config',
|
||||
model_name="snapshot",
|
||||
name="config",
|
||||
field=models.JSONField(default=dict),
|
||||
),
|
||||
migrations.AddField(
|
||||
model_name='snapshot',
|
||||
name='current_step',
|
||||
field=models.PositiveSmallIntegerField(db_index=True, default=0, help_text='Current hook step being executed (0-9). Used for sequential hook execution.'),
|
||||
model_name="snapshot",
|
||||
name="current_step",
|
||||
field=models.PositiveSmallIntegerField(
|
||||
db_index=True,
|
||||
default=0,
|
||||
help_text="Current hook step being executed (0-9). Used for sequential hook execution.",
|
||||
),
|
||||
),
|
||||
migrations.AddField(
|
||||
model_name='snapshot',
|
||||
name='depth',
|
||||
model_name="snapshot",
|
||||
name="depth",
|
||||
field=models.PositiveSmallIntegerField(db_index=True, default=0),
|
||||
),
|
||||
migrations.AddField(
|
||||
model_name='snapshot',
|
||||
name='downloaded_at',
|
||||
model_name="snapshot",
|
||||
name="downloaded_at",
|
||||
field=models.DateTimeField(blank=True, db_index=True, default=None, editable=False, null=True),
|
||||
),
|
||||
# NOTE: fs_version already added by migration 0023 with default='0.8.0'
|
||||
# NOTE: modified_at already added by migration 0023
|
||||
migrations.AddField(
|
||||
model_name='snapshot',
|
||||
name='notes',
|
||||
field=models.TextField(blank=True, default=''),
|
||||
model_name="snapshot",
|
||||
name="notes",
|
||||
field=models.TextField(blank=True, default=""),
|
||||
),
|
||||
migrations.AddField(
|
||||
model_name='snapshot',
|
||||
name='num_uses_failed',
|
||||
model_name="snapshot",
|
||||
name="num_uses_failed",
|
||||
field=models.PositiveIntegerField(default=0),
|
||||
),
|
||||
migrations.AddField(
|
||||
model_name='snapshot',
|
||||
name='num_uses_succeeded',
|
||||
model_name="snapshot",
|
||||
name="num_uses_succeeded",
|
||||
field=models.PositiveIntegerField(default=0),
|
||||
),
|
||||
migrations.AddField(
|
||||
model_name='snapshot',
|
||||
name='parent_snapshot',
|
||||
field=models.ForeignKey(blank=True, help_text='Parent snapshot that discovered this URL (for recursive crawling)', null=True, on_delete=django.db.models.deletion.SET_NULL, related_name='child_snapshots', to='core.snapshot'),
|
||||
model_name="snapshot",
|
||||
name="parent_snapshot",
|
||||
field=models.ForeignKey(
|
||||
blank=True,
|
||||
help_text="Parent snapshot that discovered this URL (for recursive crawling)",
|
||||
null=True,
|
||||
on_delete=django.db.models.deletion.SET_NULL,
|
||||
related_name="child_snapshots",
|
||||
to="core.snapshot",
|
||||
),
|
||||
),
|
||||
migrations.AddField(
|
||||
model_name='snapshot',
|
||||
name='retry_at',
|
||||
model_name="snapshot",
|
||||
name="retry_at",
|
||||
field=models.DateTimeField(blank=True, db_index=True, default=django.utils.timezone.now, null=True),
|
||||
),
|
||||
migrations.AddField(
|
||||
model_name='snapshot',
|
||||
name='status',
|
||||
field=models.CharField(choices=[('queued', 'Queued'), ('started', 'Started'), ('sealed', 'Sealed')], db_index=True, default='queued', max_length=15),
|
||||
model_name="snapshot",
|
||||
name="status",
|
||||
field=models.CharField(
|
||||
choices=[("queued", "Queued"), ("started", "Started"), ("sealed", "Sealed")],
|
||||
db_index=True,
|
||||
default="queued",
|
||||
max_length=15,
|
||||
),
|
||||
),
|
||||
migrations.AddField(
|
||||
model_name='tag',
|
||||
name='created_at',
|
||||
model_name="tag",
|
||||
name="created_at",
|
||||
field=models.DateTimeField(db_index=True, default=django.utils.timezone.now, null=True),
|
||||
),
|
||||
migrations.AddField(
|
||||
model_name='tag',
|
||||
name='created_by',
|
||||
field=models.ForeignKey(default=archivebox.base_models.models.get_or_create_system_user_pk, null=True, on_delete=django.db.models.deletion.CASCADE, related_name='tag_set', to=settings.AUTH_USER_MODEL),
|
||||
model_name="tag",
|
||||
name="created_by",
|
||||
field=models.ForeignKey(
|
||||
default=archivebox.base_models.models.get_or_create_system_user_pk,
|
||||
null=True,
|
||||
on_delete=django.db.models.deletion.CASCADE,
|
||||
related_name="tag_set",
|
||||
to=settings.AUTH_USER_MODEL,
|
||||
),
|
||||
),
|
||||
migrations.AddField(
|
||||
model_name='tag',
|
||||
name='modified_at',
|
||||
model_name="tag",
|
||||
name="modified_at",
|
||||
field=models.DateTimeField(auto_now=True),
|
||||
),
|
||||
# Copy data from old field names to new field names after AddField operations
|
||||
@@ -206,75 +237,93 @@ class Migration(migrations.Migration):
|
||||
),
|
||||
# Now remove the old ArchiveResult fields after data has been copied
|
||||
migrations.RemoveField(
|
||||
model_name='archiveresult',
|
||||
name='extractor',
|
||||
model_name="archiveresult",
|
||||
name="extractor",
|
||||
),
|
||||
migrations.RemoveField(
|
||||
model_name='archiveresult',
|
||||
name='output',
|
||||
model_name="archiveresult",
|
||||
name="output",
|
||||
),
|
||||
# NOTE: Snapshot's added/updated were already removed by migration 0023
|
||||
migrations.AlterField(
|
||||
model_name='archiveresult',
|
||||
name='end_ts',
|
||||
model_name="archiveresult",
|
||||
name="end_ts",
|
||||
field=models.DateTimeField(blank=True, default=None, null=True),
|
||||
),
|
||||
migrations.AlterField(
|
||||
model_name='archiveresult',
|
||||
name='id',
|
||||
model_name="archiveresult",
|
||||
name="id",
|
||||
field=models.AutoField(editable=False, primary_key=True, serialize=False),
|
||||
),
|
||||
migrations.AlterField(
|
||||
model_name='archiveresult',
|
||||
name='start_ts',
|
||||
model_name="archiveresult",
|
||||
name="start_ts",
|
||||
field=models.DateTimeField(blank=True, default=None, null=True),
|
||||
),
|
||||
migrations.AlterField(
|
||||
model_name='archiveresult',
|
||||
name='status',
|
||||
field=models.CharField(choices=[('queued', 'Queued'), ('started', 'Started'), ('backoff', 'Waiting to retry'), ('succeeded', 'Succeeded'), ('failed', 'Failed'), ('skipped', 'Skipped')], db_index=True, default='queued', max_length=15),
|
||||
model_name="archiveresult",
|
||||
name="status",
|
||||
field=models.CharField(
|
||||
choices=[
|
||||
("queued", "Queued"),
|
||||
("started", "Started"),
|
||||
("backoff", "Waiting to retry"),
|
||||
("succeeded", "Succeeded"),
|
||||
("failed", "Failed"),
|
||||
("skipped", "Skipped"),
|
||||
],
|
||||
db_index=True,
|
||||
default="queued",
|
||||
max_length=15,
|
||||
),
|
||||
),
|
||||
migrations.AlterField(
|
||||
model_name='archiveresult',
|
||||
name='uuid',
|
||||
model_name="archiveresult",
|
||||
name="uuid",
|
||||
field=models.UUIDField(blank=True, db_index=True, default=uuid7, null=True),
|
||||
),
|
||||
migrations.AlterField(
|
||||
model_name='snapshot',
|
||||
name='crawl',
|
||||
field=models.ForeignKey(on_delete=django.db.models.deletion.CASCADE, related_name='snapshot_set', to='crawls.crawl'),
|
||||
model_name="snapshot",
|
||||
name="crawl",
|
||||
field=models.ForeignKey(on_delete=django.db.models.deletion.CASCADE, related_name="snapshot_set", to="crawls.crawl"),
|
||||
),
|
||||
migrations.AlterField(
|
||||
model_name='snapshot',
|
||||
name='id',
|
||||
model_name="snapshot",
|
||||
name="id",
|
||||
field=models.UUIDField(default=uuid7, editable=False, primary_key=True, serialize=False, unique=True),
|
||||
),
|
||||
migrations.AlterField(
|
||||
model_name='snapshot',
|
||||
name='tags',
|
||||
field=models.ManyToManyField(blank=True, related_name='snapshot_set', through='core.SnapshotTag', through_fields=('snapshot', 'tag'), to='core.tag'),
|
||||
model_name="snapshot",
|
||||
name="tags",
|
||||
field=models.ManyToManyField(
|
||||
blank=True,
|
||||
related_name="snapshot_set",
|
||||
through="core.SnapshotTag",
|
||||
through_fields=("snapshot", "tag"),
|
||||
to="core.tag",
|
||||
),
|
||||
),
|
||||
migrations.AlterField(
|
||||
model_name='snapshot',
|
||||
name='timestamp',
|
||||
model_name="snapshot",
|
||||
name="timestamp",
|
||||
field=models.CharField(db_index=True, editable=False, max_length=32, unique=True),
|
||||
),
|
||||
migrations.AlterField(
|
||||
model_name='snapshot',
|
||||
name='url',
|
||||
model_name="snapshot",
|
||||
name="url",
|
||||
field=models.URLField(db_index=True),
|
||||
),
|
||||
migrations.AlterField(
|
||||
model_name='tag',
|
||||
name='slug',
|
||||
model_name="tag",
|
||||
name="slug",
|
||||
field=models.SlugField(editable=False, max_length=100, unique=True),
|
||||
),
|
||||
migrations.AddConstraint(
|
||||
model_name='snapshot',
|
||||
constraint=models.UniqueConstraint(fields=('url', 'crawl'), name='unique_url_per_crawl'),
|
||||
model_name="snapshot",
|
||||
constraint=models.UniqueConstraint(fields=("url", "crawl"), name="unique_url_per_crawl"),
|
||||
),
|
||||
migrations.AddConstraint(
|
||||
model_name='snapshot',
|
||||
constraint=models.UniqueConstraint(fields=('timestamp',), name='unique_timestamp'),
|
||||
model_name="snapshot",
|
||||
constraint=models.UniqueConstraint(fields=("timestamp",), name="unique_timestamp"),
|
||||
),
|
||||
]
|
||||
|
||||
@@ -5,24 +5,30 @@ from django.db import migrations, models
|
||||
|
||||
|
||||
class Migration(migrations.Migration):
|
||||
|
||||
dependencies = [
|
||||
('core', '0025_alter_archiveresult_options_alter_snapshot_options_and_more'),
|
||||
('machine', '0007_add_process_type_and_parent'),
|
||||
("core", "0025_alter_archiveresult_options_alter_snapshot_options_and_more"),
|
||||
("machine", "0007_add_process_type_and_parent"),
|
||||
]
|
||||
|
||||
operations = [
|
||||
migrations.RemoveField(
|
||||
model_name='archiveresult',
|
||||
name='num_uses_failed',
|
||||
model_name="archiveresult",
|
||||
name="num_uses_failed",
|
||||
),
|
||||
migrations.RemoveField(
|
||||
model_name='archiveresult',
|
||||
name='num_uses_succeeded',
|
||||
model_name="archiveresult",
|
||||
name="num_uses_succeeded",
|
||||
),
|
||||
migrations.AddField(
|
||||
model_name='archiveresult',
|
||||
name='process',
|
||||
field=models.OneToOneField(blank=True, help_text='Process execution details for this archive result', null=True, on_delete=django.db.models.deletion.PROTECT, related_name='archiveresult', to='machine.process'),
|
||||
model_name="archiveresult",
|
||||
name="process",
|
||||
field=models.OneToOneField(
|
||||
blank=True,
|
||||
help_text="Process execution details for this archive result",
|
||||
null=True,
|
||||
on_delete=django.db.models.deletion.PROTECT,
|
||||
related_name="archiveresult",
|
||||
to="machine.process",
|
||||
),
|
||||
),
|
||||
]
|
||||
|
||||
@@ -25,7 +25,7 @@ def parse_cmd_field(cmd_raw):
|
||||
return []
|
||||
|
||||
# Try to parse as JSON first
|
||||
if cmd_raw.startswith('['):
|
||||
if cmd_raw.startswith("["):
|
||||
try:
|
||||
parsed = json.loads(cmd_raw)
|
||||
if isinstance(parsed, list):
|
||||
@@ -45,7 +45,7 @@ def get_or_create_current_machine(cursor):
|
||||
|
||||
# Simple machine detection - get hostname as guid
|
||||
hostname = socket.gethostname()
|
||||
guid = f'host_{hostname}' # Simple but stable identifier
|
||||
guid = f"host_{hostname}" # Simple but stable identifier
|
||||
|
||||
# Check if machine exists
|
||||
cursor.execute("SELECT id FROM machine_machine WHERE guid = ?", [guid])
|
||||
@@ -64,9 +64,10 @@ def get_or_create_current_machine(cursor):
|
||||
machine_cols = {row[1] for row in cursor.fetchall()}
|
||||
|
||||
# Build INSERT statement based on available columns
|
||||
if 'config' in machine_cols:
|
||||
if "config" in machine_cols:
|
||||
# 0.9.x schema with config column
|
||||
cursor.execute("""
|
||||
cursor.execute(
|
||||
"""
|
||||
INSERT INTO machine_machine (
|
||||
id, created_at, modified_at, guid, hostname,
|
||||
hw_in_docker, hw_in_vm, hw_manufacturer, hw_product, hw_uuid,
|
||||
@@ -74,10 +75,13 @@ def get_or_create_current_machine(cursor):
|
||||
stats, config, num_uses_failed, num_uses_succeeded
|
||||
) VALUES (?, ?, ?, ?, ?, 0, 0, '', '', '',
|
||||
'', '', '', '', '', '{}', '{}', 0, 0)
|
||||
""", [machine_id, now, now, guid, hostname])
|
||||
""",
|
||||
[machine_id, now, now, guid, hostname],
|
||||
)
|
||||
else:
|
||||
# 0.8.x schema without config column
|
||||
cursor.execute("""
|
||||
cursor.execute(
|
||||
"""
|
||||
INSERT INTO machine_machine (
|
||||
id, created_at, modified_at, guid, hostname,
|
||||
hw_in_docker, hw_in_vm, hw_manufacturer, hw_product, hw_uuid,
|
||||
@@ -85,7 +89,9 @@ def get_or_create_current_machine(cursor):
|
||||
stats, num_uses_failed, num_uses_succeeded
|
||||
) VALUES (?, ?, ?, ?, ?, 0, 0, '', '', '',
|
||||
'', '', '', '', '', '{}', 0, 0)
|
||||
""", [machine_id, now, now, guid, hostname])
|
||||
""",
|
||||
[machine_id, now, now, guid, hostname],
|
||||
)
|
||||
|
||||
return machine_id
|
||||
|
||||
@@ -108,15 +114,18 @@ def get_or_create_binary(cursor, machine_id, name, abspath, version):
|
||||
|
||||
# If abspath is just a name without slashes, it's not a full path
|
||||
# Store it in both fields for simplicity
|
||||
if '/' not in abspath:
|
||||
if "/" not in abspath:
|
||||
# Not a full path - store as-is
|
||||
pass
|
||||
|
||||
# Check if binary exists with same machine, name, abspath, version
|
||||
cursor.execute("""
|
||||
cursor.execute(
|
||||
"""
|
||||
SELECT id FROM machine_binary
|
||||
WHERE machine_id = ? AND name = ? AND abspath = ? AND version = ?
|
||||
""", [machine_id, name, abspath, version])
|
||||
""",
|
||||
[machine_id, name, abspath, version],
|
||||
)
|
||||
|
||||
row = cursor.fetchone()
|
||||
if row:
|
||||
@@ -134,9 +143,10 @@ def get_or_create_binary(cursor, machine_id, name, abspath, version):
|
||||
# Use only columns that exist in current schema
|
||||
# 0.8.x schema: id, created_at, modified_at, machine_id, name, binprovider, abspath, version, sha256, num_uses_failed, num_uses_succeeded
|
||||
# 0.9.x schema adds: binproviders, overrides, status, retry_at, output_dir
|
||||
if 'binproviders' in binary_cols:
|
||||
if "binproviders" in binary_cols:
|
||||
# 0.9.x schema
|
||||
cursor.execute("""
|
||||
cursor.execute(
|
||||
"""
|
||||
INSERT INTO machine_binary (
|
||||
id, created_at, modified_at, machine_id,
|
||||
name, binproviders, overrides, binprovider, abspath, version, sha256,
|
||||
@@ -144,16 +154,21 @@ def get_or_create_binary(cursor, machine_id, name, abspath, version):
|
||||
num_uses_failed, num_uses_succeeded
|
||||
) VALUES (?, ?, ?, ?, ?, 'env', '{}', 'env', ?, ?, '',
|
||||
'succeeded', NULL, '', 0, 0)
|
||||
""", [binary_id, now, now, machine_id, name, abspath, version])
|
||||
""",
|
||||
[binary_id, now, now, machine_id, name, abspath, version],
|
||||
)
|
||||
else:
|
||||
# 0.8.x schema (simpler)
|
||||
cursor.execute("""
|
||||
cursor.execute(
|
||||
"""
|
||||
INSERT INTO machine_binary (
|
||||
id, created_at, modified_at, machine_id,
|
||||
name, binprovider, abspath, version, sha256,
|
||||
num_uses_failed, num_uses_succeeded
|
||||
) VALUES (?, ?, ?, ?, ?, 'env', ?, ?, '', 0, 0)
|
||||
""", [binary_id, now, now, machine_id, name, abspath, version])
|
||||
""",
|
||||
[binary_id, now, now, machine_id, name, abspath, version],
|
||||
)
|
||||
|
||||
return binary_id
|
||||
|
||||
@@ -169,15 +184,15 @@ def map_status(old_status):
|
||||
(process_status, exit_code) tuple
|
||||
"""
|
||||
status_map = {
|
||||
'queued': ('queued', None),
|
||||
'started': ('running', None),
|
||||
'backoff': ('queued', None),
|
||||
'succeeded': ('exited', 0),
|
||||
'failed': ('exited', 1),
|
||||
'skipped': ('exited', None), # Skipped = exited without error
|
||||
"queued": ("queued", None),
|
||||
"started": ("running", None),
|
||||
"backoff": ("queued", None),
|
||||
"succeeded": ("exited", 0),
|
||||
"failed": ("exited", 1),
|
||||
"skipped": ("exited", None), # Skipped = exited without error
|
||||
}
|
||||
|
||||
return status_map.get(old_status, ('queued', None))
|
||||
return status_map.get(old_status, ("queued", None))
|
||||
|
||||
|
||||
def create_process(cursor, machine_id, pwd, cmd, status, exit_code, started_at, ended_at, binary_id):
|
||||
@@ -197,9 +212,10 @@ def create_process(cursor, machine_id, pwd, cmd, status, exit_code, started_at,
|
||||
cmd_json = json.dumps(cmd)
|
||||
|
||||
# Set retry_at to now for queued processes, NULL otherwise
|
||||
retry_at = now if status == 'queued' else None
|
||||
retry_at = now if status == "queued" else None
|
||||
|
||||
cursor.execute("""
|
||||
cursor.execute(
|
||||
"""
|
||||
INSERT INTO machine_process (
|
||||
id, created_at, modified_at, machine_id, parent_id, process_type,
|
||||
pwd, cmd, env, timeout,
|
||||
@@ -213,14 +229,22 @@ def create_process(cursor, machine_id, pwd, cmd, status, exit_code, started_at,
|
||||
?, ?,
|
||||
?, NULL, NULL,
|
||||
?, ?)
|
||||
""", [
|
||||
process_id, now, now, machine_id,
|
||||
pwd, cmd_json,
|
||||
exit_code,
|
||||
started_at, ended_at,
|
||||
binary_id,
|
||||
status, retry_at
|
||||
])
|
||||
""",
|
||||
[
|
||||
process_id,
|
||||
now,
|
||||
now,
|
||||
machine_id,
|
||||
pwd,
|
||||
cmd_json,
|
||||
exit_code,
|
||||
started_at,
|
||||
ended_at,
|
||||
binary_id,
|
||||
status,
|
||||
retry_at,
|
||||
],
|
||||
)
|
||||
|
||||
return process_id
|
||||
|
||||
@@ -250,16 +274,18 @@ def copy_archiveresult_data_to_process(apps, schema_editor):
|
||||
cursor.execute("PRAGMA table_info(core_archiveresult)")
|
||||
cols = {row[1] for row in cursor.fetchall()}
|
||||
|
||||
print(f'DEBUG 0027: Columns found: {sorted(cols)}')
|
||||
print(f'DEBUG 0027: Has cmd={("cmd" in cols)}, pwd={("pwd" in cols)}, cmd_version={("cmd_version" in cols)}, process_id={("process_id" in cols)}')
|
||||
print(f"DEBUG 0027: Columns found: {sorted(cols)}")
|
||||
print(
|
||||
f"DEBUG 0027: Has cmd={('cmd' in cols)}, pwd={('pwd' in cols)}, cmd_version={('cmd_version' in cols)}, process_id={('process_id' in cols)}",
|
||||
)
|
||||
|
||||
if 'cmd' not in cols or 'pwd' not in cols or 'cmd_version' not in cols:
|
||||
print('✓ Fresh install or fields already removed - skipping data copy')
|
||||
if "cmd" not in cols or "pwd" not in cols or "cmd_version" not in cols:
|
||||
print("✓ Fresh install or fields already removed - skipping data copy")
|
||||
return
|
||||
|
||||
# Check if process_id field exists (should exist from 0026)
|
||||
if 'process_id' not in cols:
|
||||
print('✗ ERROR: process_id field not found. Migration 0026 must run first.')
|
||||
if "process_id" not in cols:
|
||||
print("✗ ERROR: process_id field not found. Migration 0026 must run first.")
|
||||
return
|
||||
|
||||
# Get or create Machine.current()
|
||||
@@ -278,10 +304,10 @@ def copy_archiveresult_data_to_process(apps, schema_editor):
|
||||
results = cursor.fetchall()
|
||||
|
||||
if not results:
|
||||
print('✓ No ArchiveResults need Process migration')
|
||||
print("✓ No ArchiveResults need Process migration")
|
||||
return
|
||||
|
||||
print(f'Migrating {len(results)} ArchiveResults to Process records...')
|
||||
print(f"Migrating {len(results)} ArchiveResults to Process records...")
|
||||
|
||||
migrated_count = 0
|
||||
skipped_count = 0
|
||||
@@ -291,42 +317,46 @@ def copy_archiveresult_data_to_process(apps, schema_editor):
|
||||
ar_id, snapshot_id, plugin, cmd_raw, pwd, cmd_version, status, start_ts, end_ts, created_at = row
|
||||
|
||||
if i == 0:
|
||||
print(f'DEBUG 0027: First row: ar_id={ar_id}, plugin={plugin}, cmd={cmd_raw[:50] if cmd_raw else None}, status={status}')
|
||||
print(f"DEBUG 0027: First row: ar_id={ar_id}, plugin={plugin}, cmd={cmd_raw[:50] if cmd_raw else None}, status={status}")
|
||||
|
||||
try:
|
||||
# Parse cmd field
|
||||
cmd_array = parse_cmd_field(cmd_raw)
|
||||
|
||||
if i == 0:
|
||||
print(f'DEBUG 0027: Parsed cmd: {cmd_array}')
|
||||
print(f"DEBUG 0027: Parsed cmd: {cmd_array}")
|
||||
|
||||
# Extract binary info from cmd[0] if available
|
||||
binary_id = None
|
||||
if cmd_array and cmd_array[0]:
|
||||
binary_name = Path(cmd_array[0]).name or plugin # Fallback to plugin name
|
||||
binary_abspath = cmd_array[0]
|
||||
binary_version = cmd_version or ''
|
||||
binary_version = cmd_version or ""
|
||||
|
||||
# Get or create Binary record
|
||||
binary_id = get_or_create_binary(
|
||||
cursor, machine_id, binary_name, binary_abspath, binary_version
|
||||
cursor,
|
||||
machine_id,
|
||||
binary_name,
|
||||
binary_abspath,
|
||||
binary_version,
|
||||
)
|
||||
|
||||
if i == 0:
|
||||
print(f'DEBUG 0027: Created Binary: id={binary_id}, name={binary_name}')
|
||||
print(f"DEBUG 0027: Created Binary: id={binary_id}, name={binary_name}")
|
||||
|
||||
# Map status
|
||||
process_status, exit_code = map_status(status)
|
||||
|
||||
# Set timestamps
|
||||
started_at = start_ts or created_at
|
||||
ended_at = end_ts if process_status == 'exited' else None
|
||||
ended_at = end_ts if process_status == "exited" else None
|
||||
|
||||
# Create Process record
|
||||
process_id = create_process(
|
||||
cursor=cursor,
|
||||
machine_id=machine_id,
|
||||
pwd=pwd or '',
|
||||
pwd=pwd or "",
|
||||
cmd=cmd_array,
|
||||
status=process_status,
|
||||
exit_code=exit_code,
|
||||
@@ -336,34 +366,34 @@ def copy_archiveresult_data_to_process(apps, schema_editor):
|
||||
)
|
||||
|
||||
if i == 0:
|
||||
print(f'DEBUG 0027: Created Process: id={process_id}')
|
||||
print(f"DEBUG 0027: Created Process: id={process_id}")
|
||||
|
||||
# Link ArchiveResult to Process
|
||||
cursor.execute(
|
||||
"UPDATE core_archiveresult SET process_id = ? WHERE id = ?",
|
||||
[process_id, ar_id]
|
||||
[process_id, ar_id],
|
||||
)
|
||||
|
||||
migrated_count += 1
|
||||
|
||||
if i == 0:
|
||||
print('DEBUG 0027: Linked ArchiveResult to Process')
|
||||
print("DEBUG 0027: Linked ArchiveResult to Process")
|
||||
|
||||
except Exception as e:
|
||||
print(f'✗ Error migrating ArchiveResult {ar_id}: {e}')
|
||||
print(f"✗ Error migrating ArchiveResult {ar_id}: {e}")
|
||||
import traceback
|
||||
|
||||
traceback.print_exc()
|
||||
error_count += 1
|
||||
continue
|
||||
|
||||
print(f'✓ Migration complete: {migrated_count} migrated, {skipped_count} skipped, {error_count} errors')
|
||||
print(f"✓ Migration complete: {migrated_count} migrated, {skipped_count} skipped, {error_count} errors")
|
||||
|
||||
|
||||
class Migration(migrations.Migration):
|
||||
|
||||
dependencies = [
|
||||
('core', '0026_add_process_to_archiveresult'),
|
||||
('machine', '0007_add_process_type_and_parent'),
|
||||
("core", "0026_add_process_to_archiveresult"),
|
||||
("machine", "0007_add_process_type_and_parent"),
|
||||
]
|
||||
|
||||
operations = [
|
||||
@@ -372,18 +402,17 @@ class Migration(migrations.Migration):
|
||||
copy_archiveresult_data_to_process,
|
||||
reverse_code=migrations.RunPython.noop,
|
||||
),
|
||||
|
||||
# Now safe to remove old fields (moved from 0025)
|
||||
migrations.RemoveField(
|
||||
model_name='archiveresult',
|
||||
name='cmd',
|
||||
model_name="archiveresult",
|
||||
name="cmd",
|
||||
),
|
||||
migrations.RemoveField(
|
||||
model_name='archiveresult',
|
||||
name='cmd_version',
|
||||
model_name="archiveresult",
|
||||
name="cmd_version",
|
||||
),
|
||||
migrations.RemoveField(
|
||||
model_name='archiveresult',
|
||||
name='pwd',
|
||||
model_name="archiveresult",
|
||||
name="pwd",
|
||||
),
|
||||
]
|
||||
|
||||
@@ -4,15 +4,18 @@ from django.db import migrations, models
|
||||
|
||||
|
||||
class Migration(migrations.Migration):
|
||||
|
||||
dependencies = [
|
||||
('core', '0027_copy_archiveresult_to_process'),
|
||||
("core", "0027_copy_archiveresult_to_process"),
|
||||
]
|
||||
|
||||
operations = [
|
||||
migrations.AlterField(
|
||||
model_name='snapshot',
|
||||
name='fs_version',
|
||||
field=models.CharField(default='0.9.0', help_text='Filesystem version of this snapshot (e.g., "0.7.0", "0.8.0", "0.9.0"). Used to trigger lazy migration on save().', max_length=10),
|
||||
model_name="snapshot",
|
||||
name="fs_version",
|
||||
field=models.CharField(
|
||||
default="0.9.0",
|
||||
help_text='Filesystem version of this snapshot (e.g., "0.7.0", "0.8.0", "0.9.0"). Used to trigger lazy migration on save().',
|
||||
max_length=10,
|
||||
),
|
||||
),
|
||||
]
|
||||
|
||||
@@ -28,7 +28,7 @@ def migrate_archiveresult_id_to_uuid(apps, schema_editor):
|
||||
# Check if table exists and has data
|
||||
cursor.execute("SELECT name FROM sqlite_master WHERE type='table' AND name='core_archiveresult'")
|
||||
if not cursor.fetchone():
|
||||
print('ArchiveResult table does not exist, skipping migration')
|
||||
print("ArchiveResult table does not exist, skipping migration")
|
||||
return
|
||||
|
||||
cursor.execute("SELECT COUNT(*) FROM core_archiveresult")
|
||||
@@ -38,16 +38,16 @@ def migrate_archiveresult_id_to_uuid(apps, schema_editor):
|
||||
# (fresh installs create table with uuid from 0025, but model expects no uuid after 0029)
|
||||
|
||||
if row_count == 0:
|
||||
print('[0029] Recreating ArchiveResult table schema (integer→UUID PK, removing uuid column)...')
|
||||
print("[0029] Recreating ArchiveResult table schema (integer→UUID PK, removing uuid column)...")
|
||||
else:
|
||||
print(f'[0029] Migrating {row_count} ArchiveResult records from integer PK to UUID PK...')
|
||||
print(f"[0029] Migrating {row_count} ArchiveResult records from integer PK to UUID PK...")
|
||||
|
||||
# Step 0: Check if machine_process table exists, if not NULL out process_id values
|
||||
cursor.execute("SELECT name FROM sqlite_master WHERE type='table' AND name='machine_process'")
|
||||
machine_process_exists = cursor.fetchone() is not None
|
||||
|
||||
if not machine_process_exists:
|
||||
print('machine_process table does not exist yet, setting process_id to NULL')
|
||||
print("machine_process table does not exist yet, setting process_id to NULL")
|
||||
cursor.execute("UPDATE core_archiveresult SET process_id = NULL WHERE process_id IS NOT NULL")
|
||||
|
||||
# Step 1: Create new table with UUID as primary key (clean - no old_id or uuid columns)
|
||||
@@ -90,7 +90,7 @@ def migrate_archiveresult_id_to_uuid(apps, schema_editor):
|
||||
cursor.execute("PRAGMA table_info(core_archiveresult)")
|
||||
columns = cursor.fetchall()
|
||||
col_names = [col[1] for col in columns]
|
||||
has_uuid_column = 'uuid' in col_names
|
||||
has_uuid_column = "uuid" in col_names
|
||||
|
||||
if has_uuid_column:
|
||||
cursor.execute("SELECT id, uuid FROM core_archiveresult")
|
||||
@@ -117,7 +117,7 @@ def migrate_archiveresult_id_to_uuid(apps, schema_editor):
|
||||
# col_names already fetched in Step 2
|
||||
inserted_count = 0
|
||||
for i, record in enumerate(old_records):
|
||||
old_id = record[col_names.index('id')]
|
||||
old_id = record[col_names.index("id")]
|
||||
new_uuid = id_to_uuid[old_id]
|
||||
|
||||
# Build insert with new structure
|
||||
@@ -125,37 +125,52 @@ def migrate_archiveresult_id_to_uuid(apps, schema_editor):
|
||||
|
||||
# List of fields to copy (all fields from new schema except id, old_id, uuid)
|
||||
fields_to_copy = [
|
||||
'created_at', 'modified_at', 'snapshot_id', 'plugin', 'hook_name',
|
||||
'status', 'retry_at', 'start_ts', 'end_ts',
|
||||
'output_str', 'output_json', 'output_files', 'output_size', 'output_mimetypes',
|
||||
'config', 'notes', 'num_uses_succeeded', 'num_uses_failed', 'process_id'
|
||||
"created_at",
|
||||
"modified_at",
|
||||
"snapshot_id",
|
||||
"plugin",
|
||||
"hook_name",
|
||||
"status",
|
||||
"retry_at",
|
||||
"start_ts",
|
||||
"end_ts",
|
||||
"output_str",
|
||||
"output_json",
|
||||
"output_files",
|
||||
"output_size",
|
||||
"output_mimetypes",
|
||||
"config",
|
||||
"notes",
|
||||
"num_uses_succeeded",
|
||||
"num_uses_failed",
|
||||
"process_id",
|
||||
]
|
||||
|
||||
# Build INSERT statement (only copy fields that exist in source)
|
||||
existing_fields = [f for f in fields_to_copy if f in values]
|
||||
|
||||
if i == 0:
|
||||
print(f'[0029] Source columns: {col_names}')
|
||||
print(f'[0029] Copying fields: {existing_fields}')
|
||||
print(f"[0029] Source columns: {col_names}")
|
||||
print(f"[0029] Copying fields: {existing_fields}")
|
||||
|
||||
placeholders = ', '.join(['?'] * (len(existing_fields) + 1)) # +1 for id
|
||||
field_list = 'id, ' + ', '.join(existing_fields)
|
||||
placeholders = ", ".join(["?"] * (len(existing_fields) + 1)) # +1 for id
|
||||
field_list = "id, " + ", ".join(existing_fields)
|
||||
|
||||
insert_values = [new_uuid] + [values.get(f) for f in existing_fields]
|
||||
|
||||
try:
|
||||
cursor.execute(
|
||||
f"INSERT INTO core_archiveresult_new ({field_list}) VALUES ({placeholders})",
|
||||
insert_values
|
||||
insert_values,
|
||||
)
|
||||
inserted_count += 1
|
||||
except Exception as e:
|
||||
print(f'[0029] ERROR inserting record {old_id}: {e}')
|
||||
print(f"[0029] ERROR inserting record {old_id}: {e}")
|
||||
if i == 0:
|
||||
print(f'[0029] First record values: {insert_values[:5]}...')
|
||||
print(f"[0029] First record values: {insert_values[:5]}...")
|
||||
raise
|
||||
|
||||
print(f'[0029] Inserted {inserted_count}/{len(old_records)} records')
|
||||
print(f"[0029] Inserted {inserted_count}/{len(old_records)} records")
|
||||
|
||||
# Step 4: Replace old table with new table
|
||||
cursor.execute("DROP TABLE core_archiveresult")
|
||||
@@ -170,13 +185,12 @@ def migrate_archiveresult_id_to_uuid(apps, schema_editor):
|
||||
cursor.execute("CREATE INDEX core_archiveresult_hook_name_idx ON core_archiveresult(hook_name)")
|
||||
cursor.execute("CREATE INDEX core_archiveresult_process_id_idx ON core_archiveresult(process_id)")
|
||||
|
||||
print(f'✓ Migrated {row_count} ArchiveResult records to UUID primary key')
|
||||
print(f"✓ Migrated {row_count} ArchiveResult records to UUID primary key")
|
||||
|
||||
|
||||
class Migration(migrations.Migration):
|
||||
|
||||
dependencies = [
|
||||
('core', '0028_alter_snapshot_fs_version'),
|
||||
("core", "0028_alter_snapshot_fs_version"),
|
||||
]
|
||||
|
||||
operations = [
|
||||
@@ -190,13 +204,13 @@ class Migration(migrations.Migration):
|
||||
state_operations=[
|
||||
# Remove uuid field (was added in 0025, we're merging it into id)
|
||||
migrations.RemoveField(
|
||||
model_name='archiveresult',
|
||||
name='uuid',
|
||||
model_name="archiveresult",
|
||||
name="uuid",
|
||||
),
|
||||
# Change id from AutoField to UUIDField (absorbing the uuid field)
|
||||
migrations.AlterField(
|
||||
model_name='archiveresult',
|
||||
name='id',
|
||||
model_name="archiveresult",
|
||||
name="id",
|
||||
field=models.UUIDField(primary_key=True, default=uuid7, editable=False, unique=True),
|
||||
),
|
||||
],
|
||||
|
||||
@@ -6,15 +6,14 @@ from archivebox.uuid_compat import uuid7
|
||||
|
||||
|
||||
class Migration(migrations.Migration):
|
||||
|
||||
dependencies = [
|
||||
('core', '0029_migrate_archiveresult_to_uuid_pk'),
|
||||
("core", "0029_migrate_archiveresult_to_uuid_pk"),
|
||||
]
|
||||
|
||||
operations = [
|
||||
migrations.AlterField(
|
||||
model_name='archiveresult',
|
||||
name='id',
|
||||
model_name="archiveresult",
|
||||
name="id",
|
||||
field=models.UUIDField(default=uuid7, editable=False, primary_key=True, serialize=False, unique=True),
|
||||
),
|
||||
]
|
||||
|
||||
@@ -4,14 +4,13 @@ from django.db import migrations, models
|
||||
|
||||
|
||||
class Migration(migrations.Migration):
|
||||
|
||||
dependencies = [
|
||||
('core', '0030_alter_archiveresult_id'),
|
||||
("core", "0030_alter_archiveresult_id"),
|
||||
]
|
||||
|
||||
operations = [
|
||||
migrations.AddIndex(
|
||||
model_name='archiveresult',
|
||||
index=models.Index(fields=['snapshot', 'status'], name='archiveresult_snap_status_idx'),
|
||||
model_name="archiveresult",
|
||||
index=models.Index(fields=["snapshot", "status"], name="archiveresult_snap_status_idx"),
|
||||
),
|
||||
]
|
||||
|
||||
@@ -2,7 +2,6 @@ from django.db import migrations
|
||||
|
||||
|
||||
class Migration(migrations.Migration):
|
||||
|
||||
dependencies = [
|
||||
("core", "0031_add_archiveresult_snapshot_status_index"),
|
||||
]
|
||||
|
||||
@@ -0,0 +1,28 @@
|
||||
from django.db import migrations, models
|
||||
|
||||
|
||||
class Migration(migrations.Migration):
|
||||
dependencies = [
|
||||
("core", "0032_remove_archiveresult_retry_at"),
|
||||
]
|
||||
|
||||
operations = [
|
||||
migrations.AlterField(
|
||||
model_name="archiveresult",
|
||||
name="status",
|
||||
field=models.CharField(
|
||||
choices=[
|
||||
("queued", "Queued"),
|
||||
("started", "Started"),
|
||||
("backoff", "Waiting to retry"),
|
||||
("succeeded", "Succeeded"),
|
||||
("failed", "Failed"),
|
||||
("skipped", "Skipped"),
|
||||
("noresults", "No Results"),
|
||||
],
|
||||
db_index=True,
|
||||
default="queued",
|
||||
max_length=16,
|
||||
),
|
||||
),
|
||||
]
|
||||
File diff suppressed because it is too large
Load Diff
@@ -117,6 +117,7 @@ try:
|
||||
is_valid, error_msg = LDAP_CONFIG.validate_ldap_config()
|
||||
if not is_valid:
|
||||
from rich import print
|
||||
|
||||
print(f"[red][X] Error: {error_msg}[/red]")
|
||||
raise ValueError(error_msg)
|
||||
|
||||
@@ -154,6 +155,7 @@ try:
|
||||
|
||||
except ImportError as e:
|
||||
from rich import print
|
||||
|
||||
print("[red][X] Error: LDAP_ENABLED=True but required LDAP libraries are not installed![/red]")
|
||||
print(f"[red] {e}[/red]")
|
||||
print("[yellow] To install LDAP support, run:[/yellow]")
|
||||
@@ -271,7 +273,6 @@ MIGRATION_MODULES = {"signal_webhooks": None}
|
||||
DEFAULT_AUTO_FIELD = "django.db.models.BigAutoField"
|
||||
|
||||
|
||||
|
||||
# class FilestoreDBRouter:
|
||||
# """
|
||||
# A router to store all the File models in the filestore.sqlite3 database.
|
||||
@@ -560,7 +561,7 @@ if DEBUG:
|
||||
AUTOTYPING = {
|
||||
"STUBS_GENERATION": {
|
||||
"LOCAL_STUBS_DIR": PACKAGE_DIR / "typings",
|
||||
}
|
||||
},
|
||||
}
|
||||
|
||||
# https://github.com/bensi94/Django-Requests-Tracker (improved version of django-debug-toolbar)
|
||||
|
||||
@@ -1,4 +1,4 @@
|
||||
__package__ = 'archivebox.core'
|
||||
__package__ = "archivebox.core"
|
||||
|
||||
import re
|
||||
import os
|
||||
@@ -19,6 +19,7 @@ IGNORABLE_URL_PATTERNS = [
|
||||
re.compile(r"/admin/jsi18n/"),
|
||||
]
|
||||
|
||||
|
||||
class NoisyRequestsFilter(logging.Filter):
|
||||
def filter(self, record) -> bool:
|
||||
logline = record.getMessage()
|
||||
@@ -34,7 +35,7 @@ class NoisyRequestsFilter(logging.Filter):
|
||||
if ignorable_GET_request.match(logline):
|
||||
return False
|
||||
|
||||
ignorable_404_pattern = re.compile(f'Not Found: {pattern.pattern}', re.I | re.M)
|
||||
ignorable_404_pattern = re.compile(f"Not Found: {pattern.pattern}", re.I | re.M)
|
||||
if ignorable_404_pattern.match(logline):
|
||||
return False
|
||||
|
||||
@@ -44,17 +45,18 @@ class NoisyRequestsFilter(logging.Filter):
|
||||
class CustomOutboundWebhookLogFormatter(logging.Formatter):
|
||||
def format(self, record):
|
||||
result = super().format(record)
|
||||
return result.replace('HTTP Request: ', 'OutboundWebhook: ')
|
||||
return result.replace("HTTP Request: ", "OutboundWebhook: ")
|
||||
|
||||
|
||||
class StripANSIColorCodesFilter(logging.Filter):
|
||||
_ansi_re = re.compile(r'\x1b\[[0-9;]*m')
|
||||
_bare_re = re.compile(r'\[[0-9;]*m')
|
||||
_ansi_re = re.compile(r"\x1b\[[0-9;]*m")
|
||||
_bare_re = re.compile(r"\[[0-9;]*m")
|
||||
|
||||
def filter(self, record) -> bool:
|
||||
msg = record.getMessage()
|
||||
if isinstance(msg, str) and ('\x1b[' in msg or '[m' in msg):
|
||||
msg = self._ansi_re.sub('', msg)
|
||||
msg = self._bare_re.sub('', msg)
|
||||
if isinstance(msg, str) and ("\x1b[" in msg or "[m" in msg):
|
||||
msg = self._ansi_re.sub("", msg)
|
||||
msg = self._bare_re.sub("", msg)
|
||||
record.msg = msg
|
||||
record.args = ()
|
||||
return True
|
||||
@@ -65,18 +67,18 @@ ERROR_LOG = tempfile.NamedTemporaryFile().name
|
||||
LOGS_DIR = CONSTANTS.LOGS_DIR
|
||||
|
||||
if os.access(LOGS_DIR, os.W_OK) and LOGS_DIR.is_dir():
|
||||
ERROR_LOG = (LOGS_DIR / 'errors.log')
|
||||
ERROR_LOG = LOGS_DIR / "errors.log"
|
||||
else:
|
||||
# historically too many edge cases here around creating log dir w/ correct permissions early on
|
||||
# if there's an issue on startup, we trash the log and let user figure it out via stdout/stderr
|
||||
# print(f'[!] WARNING: data/logs dir does not exist. Logging to temp file: {ERROR_LOG}')
|
||||
pass
|
||||
|
||||
LOG_LEVEL_DATABASE = 'WARNING' # change to DEBUG to log all SQL queries
|
||||
LOG_LEVEL_REQUEST = 'WARNING' # if DEBUG else 'WARNING'
|
||||
LOG_LEVEL_DATABASE = "WARNING" # change to DEBUG to log all SQL queries
|
||||
LOG_LEVEL_REQUEST = "WARNING" # if DEBUG else 'WARNING'
|
||||
|
||||
if LOG_LEVEL_DATABASE == 'DEBUG':
|
||||
db_logger = logging.getLogger('django.db.backends')
|
||||
if LOG_LEVEL_DATABASE == "DEBUG":
|
||||
db_logger = logging.getLogger("django.db.backends")
|
||||
db_logger.setLevel(logging.DEBUG)
|
||||
db_logger.addHandler(logging.StreamHandler())
|
||||
|
||||
|
||||
@@ -16,52 +16,52 @@ from archivebox.core.models import Snapshot, SnapshotTag, Tag
|
||||
|
||||
TAG_SNAPSHOT_PREVIEW_LIMIT = 10
|
||||
TAG_SORT_CHOICES = (
|
||||
('name_asc', 'Name A-Z'),
|
||||
('name_desc', 'Name Z-A'),
|
||||
('created_desc', 'Created newest'),
|
||||
('created_asc', 'Created oldest'),
|
||||
('snapshots_desc', 'Most snapshots'),
|
||||
('snapshots_asc', 'Fewest snapshots'),
|
||||
("name_asc", "Name A-Z"),
|
||||
("name_desc", "Name Z-A"),
|
||||
("created_desc", "Created newest"),
|
||||
("created_asc", "Created oldest"),
|
||||
("snapshots_desc", "Most snapshots"),
|
||||
("snapshots_asc", "Fewest snapshots"),
|
||||
)
|
||||
TAG_HAS_SNAPSHOTS_CHOICES = (
|
||||
('all', 'All'),
|
||||
('yes', 'Has snapshots'),
|
||||
('no', 'No snapshots'),
|
||||
("all", "All"),
|
||||
("yes", "Has snapshots"),
|
||||
("no", "No snapshots"),
|
||||
)
|
||||
|
||||
|
||||
def normalize_tag_name(name: str) -> str:
|
||||
return (name or '').strip()
|
||||
return (name or "").strip()
|
||||
|
||||
|
||||
def normalize_tag_sort(sort: str = 'created_desc') -> str:
|
||||
def normalize_tag_sort(sort: str = "created_desc") -> str:
|
||||
valid_sorts = {key for key, _label in TAG_SORT_CHOICES}
|
||||
return sort if sort in valid_sorts else 'created_desc'
|
||||
return sort if sort in valid_sorts else "created_desc"
|
||||
|
||||
|
||||
def normalize_has_snapshots_filter(value: str = 'all') -> str:
|
||||
def normalize_has_snapshots_filter(value: str = "all") -> str:
|
||||
valid_filters = {key for key, _label in TAG_HAS_SNAPSHOTS_CHOICES}
|
||||
return value if value in valid_filters else 'all'
|
||||
return value if value in valid_filters else "all"
|
||||
|
||||
|
||||
def normalize_created_by_filter(created_by: str = '') -> str:
|
||||
return created_by if str(created_by).isdigit() else ''
|
||||
def normalize_created_by_filter(created_by: str = "") -> str:
|
||||
return created_by if str(created_by).isdigit() else ""
|
||||
|
||||
|
||||
def normalize_created_year_filter(year: str = '') -> str:
|
||||
year = (year or '').strip()
|
||||
return year if len(year) == 4 and year.isdigit() else ''
|
||||
def normalize_created_year_filter(year: str = "") -> str:
|
||||
year = (year or "").strip()
|
||||
return year if len(year) == 4 and year.isdigit() else ""
|
||||
|
||||
|
||||
def get_matching_tags(
|
||||
query: str = '',
|
||||
sort: str = 'created_desc',
|
||||
created_by: str = '',
|
||||
year: str = '',
|
||||
has_snapshots: str = 'all',
|
||||
query: str = "",
|
||||
sort: str = "created_desc",
|
||||
created_by: str = "",
|
||||
year: str = "",
|
||||
has_snapshots: str = "all",
|
||||
) -> QuerySet[Tag]:
|
||||
queryset = Tag.objects.select_related('created_by').annotate(
|
||||
num_snapshots=Count('snapshot_set', distinct=True),
|
||||
queryset = Tag.objects.select_related("created_by").annotate(
|
||||
num_snapshots=Count("snapshot_set", distinct=True),
|
||||
)
|
||||
|
||||
query = normalize_tag_name(query)
|
||||
@@ -79,41 +79,40 @@ def get_matching_tags(
|
||||
queryset = queryset.filter(created_at__year=int(year))
|
||||
|
||||
has_snapshots = normalize_has_snapshots_filter(has_snapshots)
|
||||
if has_snapshots == 'yes':
|
||||
if has_snapshots == "yes":
|
||||
queryset = queryset.filter(num_snapshots__gt=0)
|
||||
elif has_snapshots == 'no':
|
||||
elif has_snapshots == "no":
|
||||
queryset = queryset.filter(num_snapshots=0)
|
||||
|
||||
sort = normalize_tag_sort(sort)
|
||||
if sort == 'name_asc':
|
||||
queryset = queryset.order_by(Lower('name'), 'id')
|
||||
elif sort == 'name_desc':
|
||||
queryset = queryset.order_by(Lower('name').desc(), '-id')
|
||||
elif sort == 'created_asc':
|
||||
queryset = queryset.order_by(F('created_at').asc(nulls_first=True), 'id', Lower('name'))
|
||||
elif sort == 'snapshots_desc':
|
||||
queryset = queryset.order_by(F('num_snapshots').desc(nulls_last=True), F('created_at').desc(nulls_last=True), '-id', Lower('name'))
|
||||
elif sort == 'snapshots_asc':
|
||||
queryset = queryset.order_by(F('num_snapshots').asc(nulls_first=True), Lower('name'), 'id')
|
||||
if sort == "name_asc":
|
||||
queryset = queryset.order_by(Lower("name"), "id")
|
||||
elif sort == "name_desc":
|
||||
queryset = queryset.order_by(Lower("name").desc(), "-id")
|
||||
elif sort == "created_asc":
|
||||
queryset = queryset.order_by(F("created_at").asc(nulls_first=True), "id", Lower("name"))
|
||||
elif sort == "snapshots_desc":
|
||||
queryset = queryset.order_by(F("num_snapshots").desc(nulls_last=True), F("created_at").desc(nulls_last=True), "-id", Lower("name"))
|
||||
elif sort == "snapshots_asc":
|
||||
queryset = queryset.order_by(F("num_snapshots").asc(nulls_first=True), Lower("name"), "id")
|
||||
else:
|
||||
queryset = queryset.order_by(F('created_at').desc(nulls_last=True), '-id', Lower('name'))
|
||||
queryset = queryset.order_by(F("created_at").desc(nulls_last=True), "-id", Lower("name"))
|
||||
|
||||
return queryset
|
||||
|
||||
|
||||
def get_tag_creator_choices() -> list[tuple[str, str]]:
|
||||
rows = (
|
||||
Tag.objects
|
||||
.filter(created_by__isnull=False)
|
||||
.values_list('created_by_id', 'created_by__username')
|
||||
.order_by(Lower('created_by__username'), 'created_by_id')
|
||||
Tag.objects.filter(created_by__isnull=False)
|
||||
.values_list("created_by_id", "created_by__username")
|
||||
.order_by(Lower("created_by__username"), "created_by_id")
|
||||
.distinct()
|
||||
)
|
||||
return [(str(user_id), username or f'User {user_id}') for user_id, username in rows]
|
||||
return [(str(user_id), username or f"User {user_id}") for user_id, username in rows]
|
||||
|
||||
|
||||
def get_tag_year_choices() -> list[str]:
|
||||
years = Tag.objects.exclude(created_at__isnull=True).dates('created_at', 'year', order='DESC')
|
||||
years = Tag.objects.exclude(created_at__isnull=True).dates("created_at", "year", order="DESC")
|
||||
return [str(year.year) for year in years]
|
||||
|
||||
|
||||
@@ -134,7 +133,7 @@ def get_tag_by_ref(tag_ref: str | int) -> Tag:
|
||||
def get_or_create_tag(name: str, created_by: User | None = None) -> tuple[Tag, bool]:
|
||||
normalized_name = normalize_tag_name(name)
|
||||
if not normalized_name:
|
||||
raise ValueError('Tag name is required')
|
||||
raise ValueError("Tag name is required")
|
||||
|
||||
existing = Tag.objects.filter(name__iexact=normalized_name).first()
|
||||
if existing:
|
||||
@@ -150,7 +149,7 @@ def get_or_create_tag(name: str, created_by: User | None = None) -> tuple[Tag, b
|
||||
def rename_tag(tag: Tag, name: str) -> Tag:
|
||||
normalized_name = normalize_tag_name(name)
|
||||
if not normalized_name:
|
||||
raise ValueError('Tag name is required')
|
||||
raise ValueError("Tag name is required")
|
||||
|
||||
existing = Tag.objects.filter(name__iexact=normalized_name).exclude(pk=tag.pk).first()
|
||||
if existing:
|
||||
@@ -167,53 +166,56 @@ def delete_tag(tag: Tag) -> tuple[int, dict[str, int]]:
|
||||
|
||||
|
||||
def export_tag_urls(tag: Tag) -> str:
|
||||
urls = tag.snapshot_set.order_by('-downloaded_at', '-created_at', '-pk').values_list('url', flat=True)
|
||||
return '\n'.join(urls)
|
||||
urls = tag.snapshot_set.order_by("-downloaded_at", "-created_at", "-pk").values_list("url", flat=True)
|
||||
return "\n".join(urls)
|
||||
|
||||
|
||||
def export_tag_snapshots_jsonl(tag: Tag) -> str:
|
||||
snapshots = tag.snapshot_set.order_by('-downloaded_at', '-created_at', '-pk').prefetch_related('tags')
|
||||
return '\n'.join(json.dumps(snapshot.to_json()) for snapshot in snapshots)
|
||||
snapshots = tag.snapshot_set.order_by("-downloaded_at", "-created_at", "-pk").prefetch_related("tags")
|
||||
return "\n".join(json.dumps(snapshot.to_json()) for snapshot in snapshots)
|
||||
|
||||
|
||||
def _display_snapshot_title(snapshot: Snapshot) -> str:
|
||||
title = (snapshot.title or '').strip()
|
||||
url = (snapshot.url or '').strip()
|
||||
title = (snapshot.title or "").strip()
|
||||
url = (snapshot.url or "").strip()
|
||||
if not title:
|
||||
return url
|
||||
|
||||
normalized_title = title.lower()
|
||||
if normalized_title == 'pending...' or normalized_title == url.lower():
|
||||
if normalized_title == "pending..." or normalized_title == url.lower():
|
||||
return url
|
||||
return title
|
||||
|
||||
|
||||
def _build_snapshot_preview(snapshot: Snapshot, request: HttpRequest | None = None) -> dict[str, Any]:
|
||||
return {
|
||||
'id': str(snapshot.pk),
|
||||
'title': _display_snapshot_title(snapshot),
|
||||
'url': snapshot.url,
|
||||
'favicon_url': build_snapshot_url(str(snapshot.pk), 'favicon.ico', request=request),
|
||||
'admin_url': reverse('admin:core_snapshot_change', args=[snapshot.pk]),
|
||||
'archive_url': build_web_url(f'/{snapshot.archive_path_from_db}/index.html', request=request),
|
||||
'downloaded_at': snapshot.downloaded_at.isoformat() if snapshot.downloaded_at else None,
|
||||
"id": str(snapshot.pk),
|
||||
"title": _display_snapshot_title(snapshot),
|
||||
"url": snapshot.url,
|
||||
"favicon_url": build_snapshot_url(str(snapshot.pk), "favicon.ico", request=request),
|
||||
"admin_url": reverse("admin:core_snapshot_change", args=[snapshot.pk]),
|
||||
"archive_url": build_web_url(f"/{snapshot.archive_path_from_db}/index.html", request=request),
|
||||
"downloaded_at": snapshot.downloaded_at.isoformat() if snapshot.downloaded_at else None,
|
||||
}
|
||||
|
||||
|
||||
def _build_snapshot_preview_map(tags: list[Tag], request: HttpRequest | None = None, preview_limit: int = TAG_SNAPSHOT_PREVIEW_LIMIT) -> dict[int, list[dict[str, Any]]]:
|
||||
def _build_snapshot_preview_map(
|
||||
tags: list[Tag],
|
||||
request: HttpRequest | None = None,
|
||||
preview_limit: int = TAG_SNAPSHOT_PREVIEW_LIMIT,
|
||||
) -> dict[int, list[dict[str, Any]]]:
|
||||
tag_ids = [tag.pk for tag in tags]
|
||||
if not tag_ids:
|
||||
return {}
|
||||
|
||||
snapshot_tags = (
|
||||
SnapshotTag.objects
|
||||
.filter(tag_id__in=tag_ids)
|
||||
.select_related('snapshot__crawl__created_by')
|
||||
SnapshotTag.objects.filter(tag_id__in=tag_ids)
|
||||
.select_related("snapshot__crawl__created_by")
|
||||
.order_by(
|
||||
'tag_id',
|
||||
F('snapshot__downloaded_at').desc(nulls_last=True),
|
||||
F('snapshot__created_at').desc(nulls_last=True),
|
||||
F('snapshot_id').desc(),
|
||||
"tag_id",
|
||||
F("snapshot__downloaded_at").desc(nulls_last=True),
|
||||
F("snapshot__created_at").desc(nulls_last=True),
|
||||
F("snapshot_id").desc(),
|
||||
)
|
||||
)
|
||||
|
||||
@@ -227,31 +229,31 @@ def _build_snapshot_preview_map(tags: list[Tag], request: HttpRequest | None = N
|
||||
|
||||
|
||||
def build_tag_card(tag: Tag, snapshot_previews: list[dict[str, Any]] | None = None) -> dict[str, Any]:
|
||||
count = getattr(tag, 'num_snapshots', tag.snapshot_set.count())
|
||||
count = getattr(tag, "num_snapshots", tag.snapshot_set.count())
|
||||
return {
|
||||
'id': tag.pk,
|
||||
'name': tag.name,
|
||||
'slug': tag.slug,
|
||||
'num_snapshots': count,
|
||||
'filter_url': f"{reverse('admin:core_snapshot_changelist')}?tags__id__exact={tag.pk}",
|
||||
'edit_url': reverse('admin:core_tag_change', args=[tag.pk]),
|
||||
'export_urls_url': reverse('api-1:tag_urls_export', args=[tag.pk]),
|
||||
'export_jsonl_url': reverse('api-1:tag_snapshots_export', args=[tag.pk]),
|
||||
'rename_url': reverse('api-1:rename_tag', args=[tag.pk]),
|
||||
'delete_url': reverse('api-1:delete_tag', args=[tag.pk]),
|
||||
'snapshots': snapshot_previews or [],
|
||||
"id": tag.pk,
|
||||
"name": tag.name,
|
||||
"slug": tag.slug,
|
||||
"num_snapshots": count,
|
||||
"filter_url": f"{reverse('admin:core_snapshot_changelist')}?tags__id__exact={tag.pk}",
|
||||
"edit_url": reverse("admin:core_tag_change", args=[tag.pk]),
|
||||
"export_urls_url": reverse("api-1:tag_urls_export", args=[tag.pk]),
|
||||
"export_jsonl_url": reverse("api-1:tag_snapshots_export", args=[tag.pk]),
|
||||
"rename_url": reverse("api-1:rename_tag", args=[tag.pk]),
|
||||
"delete_url": reverse("api-1:delete_tag", args=[tag.pk]),
|
||||
"snapshots": snapshot_previews or [],
|
||||
}
|
||||
|
||||
|
||||
def build_tag_cards(
|
||||
query: str = '',
|
||||
query: str = "",
|
||||
request: HttpRequest | None = None,
|
||||
limit: int | None = None,
|
||||
preview_limit: int = TAG_SNAPSHOT_PREVIEW_LIMIT,
|
||||
sort: str = 'created_desc',
|
||||
created_by: str = '',
|
||||
year: str = '',
|
||||
has_snapshots: str = 'all',
|
||||
sort: str = "created_desc",
|
||||
created_by: str = "",
|
||||
year: str = "",
|
||||
has_snapshots: str = "all",
|
||||
) -> list[dict[str, Any]]:
|
||||
queryset = get_matching_tags(
|
||||
query=query,
|
||||
@@ -265,7 +267,4 @@ def build_tag_cards(
|
||||
|
||||
tags = list(queryset)
|
||||
preview_map = _build_snapshot_preview_map(tags, request=request, preview_limit=preview_limit)
|
||||
return [
|
||||
build_tag_card(tag, snapshot_previews=preview_map.get(tag.pk, []))
|
||||
for tag in tags
|
||||
]
|
||||
return [build_tag_card(tag, snapshot_previews=preview_map.get(tag.pk, [])) for tag in tags]
|
||||
|
||||
@@ -1,13 +1,16 @@
|
||||
from typing import Any
|
||||
|
||||
from django import template
|
||||
from django.contrib.admin.templatetags.base import InclusionAdminNode
|
||||
from django.utils.safestring import mark_safe
|
||||
from django.utils.html import escape
|
||||
|
||||
from typing import Union
|
||||
from pathlib import Path
|
||||
|
||||
from archivebox.hooks import (
|
||||
get_plugin_icon, get_plugin_template, get_plugin_name,
|
||||
get_plugin_icon,
|
||||
get_plugin_template,
|
||||
get_plugin_name,
|
||||
)
|
||||
from archivebox.core.host_utils import (
|
||||
get_admin_base_url,
|
||||
@@ -20,28 +23,70 @@ from archivebox.core.host_utils import (
|
||||
|
||||
register = template.Library()
|
||||
|
||||
_TEXT_PREVIEW_EXTS = (".json", ".jsonl", ".txt", ".csv", ".tsv", ".xml", ".yml", ".yaml", ".md", ".log")
|
||||
_IMAGE_PREVIEW_EXTS = (".png", ".jpg", ".jpeg", ".gif", ".webp", ".bmp", ".ico", ".avif")
|
||||
|
||||
_MEDIA_FILE_EXTS = {
|
||||
'.mp4', '.webm', '.mkv', '.avi', '.mov', '.flv', '.wmv', '.m4v', '.mpg', '.mpeg', '.ts', '.m2ts', '.mts',
|
||||
'.3gp', '.3g2', '.ogv',
|
||||
'.mp3', '.m4a', '.aac', '.ogg', '.oga', '.opus', '.wav', '.flac', '.alac', '.aiff', '.wma', '.mka', '.ac3', '.eac3', '.dts',
|
||||
".mp4",
|
||||
".webm",
|
||||
".mkv",
|
||||
".avi",
|
||||
".mov",
|
||||
".flv",
|
||||
".wmv",
|
||||
".m4v",
|
||||
".mpg",
|
||||
".mpeg",
|
||||
".ts",
|
||||
".m2ts",
|
||||
".mts",
|
||||
".3gp",
|
||||
".3g2",
|
||||
".ogv",
|
||||
".mp3",
|
||||
".m4a",
|
||||
".aac",
|
||||
".ogg",
|
||||
".oga",
|
||||
".opus",
|
||||
".wav",
|
||||
".flac",
|
||||
".alac",
|
||||
".aiff",
|
||||
".wma",
|
||||
".mka",
|
||||
".ac3",
|
||||
".eac3",
|
||||
".dts",
|
||||
}
|
||||
|
||||
|
||||
def _normalize_output_files(output_files: Any) -> dict[str, dict[str, Any]]:
|
||||
if isinstance(output_files, dict):
|
||||
normalized: dict[str, dict[str, Any]] = {}
|
||||
for path, metadata in output_files.items():
|
||||
if not path:
|
||||
continue
|
||||
normalized[str(path)] = dict(metadata) if isinstance(metadata, dict) else {}
|
||||
return normalized
|
||||
return {}
|
||||
|
||||
|
||||
def _coerce_output_file_size(value: Any) -> int | None:
|
||||
try:
|
||||
return max(int(value or 0), 0)
|
||||
except (TypeError, ValueError):
|
||||
return None
|
||||
|
||||
|
||||
def _count_media_files(result) -> int:
|
||||
try:
|
||||
output_files = getattr(result, 'output_files', None) or {}
|
||||
output_files = _normalize_output_files(getattr(result, "output_files", None) or {})
|
||||
except Exception:
|
||||
output_files = {}
|
||||
|
||||
count_from_output = 0
|
||||
if output_files:
|
||||
count_from_output = sum(
|
||||
1
|
||||
for path in output_files.keys()
|
||||
if Path(path).suffix.lower() in _MEDIA_FILE_EXTS
|
||||
)
|
||||
if count_from_output >= 2:
|
||||
return count_from_output
|
||||
return sum(1 for path in output_files.keys() if Path(path).suffix.lower() in _MEDIA_FILE_EXTS)
|
||||
|
||||
try:
|
||||
plugin_dir = Path(result.snapshot_dir) / result.plugin
|
||||
@@ -54,7 +99,7 @@ def _count_media_files(result) -> int:
|
||||
count = 0
|
||||
scanned = 0
|
||||
max_scan = 500
|
||||
for file_path in plugin_dir.rglob('*'):
|
||||
for file_path in plugin_dir.rglob("*"):
|
||||
if scanned >= max_scan:
|
||||
break
|
||||
scanned += 1
|
||||
@@ -62,29 +107,28 @@ def _count_media_files(result) -> int:
|
||||
continue
|
||||
if file_path.suffix.lower() in _MEDIA_FILE_EXTS:
|
||||
count += 1
|
||||
return max(count_from_output, count)
|
||||
return count
|
||||
|
||||
|
||||
def _list_media_files(result) -> list[dict]:
|
||||
media_files: list[dict] = []
|
||||
try:
|
||||
plugin_dir = Path(result.snapshot_dir) / result.plugin
|
||||
snapshot_dir = Path(result.snapshot_dir)
|
||||
except Exception:
|
||||
return media_files
|
||||
|
||||
output_files = getattr(result, 'output_files', None) or {}
|
||||
candidates: list[Path] = []
|
||||
output_files = _normalize_output_files(getattr(result, "output_files", None) or {})
|
||||
candidates: list[tuple[Path, int | None]] = []
|
||||
if output_files:
|
||||
for path in output_files.keys():
|
||||
for path, metadata in output_files.items():
|
||||
rel_path = Path(path)
|
||||
if rel_path.suffix.lower() in _MEDIA_FILE_EXTS:
|
||||
candidates.append(rel_path)
|
||||
candidates.append((rel_path, _coerce_output_file_size(metadata.get("size"))))
|
||||
|
||||
if not candidates and plugin_dir.exists():
|
||||
scanned = 0
|
||||
max_scan = 2000
|
||||
for file_path in plugin_dir.rglob('*'):
|
||||
for file_path in plugin_dir.rglob("*"):
|
||||
if scanned >= max_scan:
|
||||
break
|
||||
scanned += 1
|
||||
@@ -95,40 +139,143 @@ def _list_media_files(result) -> list[dict]:
|
||||
rel_path = file_path.relative_to(plugin_dir)
|
||||
except ValueError:
|
||||
continue
|
||||
candidates.append(rel_path)
|
||||
try:
|
||||
size = file_path.stat().st_size
|
||||
except OSError:
|
||||
size = None
|
||||
candidates.append((rel_path, size))
|
||||
|
||||
for rel_path in candidates:
|
||||
file_path = plugin_dir / rel_path
|
||||
if not file_path.exists() or not file_path.is_file():
|
||||
continue
|
||||
try:
|
||||
size = file_path.stat().st_size
|
||||
except OSError:
|
||||
size = None
|
||||
try:
|
||||
href = str(file_path.relative_to(snapshot_dir))
|
||||
except ValueError:
|
||||
href = str(Path(result.plugin) / rel_path)
|
||||
media_files.append({
|
||||
'name': file_path.name,
|
||||
'path': href,
|
||||
'size': size,
|
||||
})
|
||||
for rel_path, size in candidates:
|
||||
href = str(Path(result.plugin) / rel_path)
|
||||
media_files.append(
|
||||
{
|
||||
"name": rel_path.name,
|
||||
"path": href,
|
||||
"size": size,
|
||||
},
|
||||
)
|
||||
|
||||
media_files.sort(key=lambda item: item['name'].lower())
|
||||
media_files.sort(key=lambda item: item["name"].lower())
|
||||
return media_files
|
||||
|
||||
@register.filter(name='split')
|
||||
def split(value, separator: str=','):
|
||||
return (value or '').split(separator)
|
||||
|
||||
def _resolve_snapshot_output_file(snapshot_dir: str | Path | None, raw_output_path: str | None) -> Path | None:
|
||||
if not snapshot_dir or not raw_output_path or str(raw_output_path).strip() in (".", "/", "./"):
|
||||
return None
|
||||
|
||||
output_file = Path(raw_output_path)
|
||||
if not output_file.is_absolute():
|
||||
output_file = Path(snapshot_dir) / raw_output_path
|
||||
|
||||
try:
|
||||
output_file = output_file.resolve()
|
||||
snap_dir = Path(snapshot_dir).resolve()
|
||||
if snap_dir not in output_file.parents and output_file != snap_dir:
|
||||
return None
|
||||
except Exception:
|
||||
return None
|
||||
|
||||
if output_file.exists() and output_file.is_file():
|
||||
return output_file
|
||||
return None
|
||||
|
||||
|
||||
def _is_text_preview_path(raw_output_path: str | None) -> bool:
|
||||
return (raw_output_path or "").lower().endswith(_TEXT_PREVIEW_EXTS)
|
||||
|
||||
|
||||
def _is_image_preview_path(raw_output_path: str | None) -> bool:
|
||||
return (raw_output_path or "").lower().endswith(_IMAGE_PREVIEW_EXTS)
|
||||
|
||||
|
||||
def _is_root_snapshot_output_path(raw_output_path: str | None) -> bool:
|
||||
normalized = str(raw_output_path or "").strip().lower()
|
||||
return normalized in ("", ".", "./", "/", "index.html", "index.json")
|
||||
|
||||
|
||||
def _build_snapshot_files_url(snapshot_id: str, request=None) -> str:
|
||||
return build_snapshot_url(str(snapshot_id), "/?files=1", request=request)
|
||||
|
||||
|
||||
def _build_snapshot_preview_url(snapshot_id: str, path: str = "", request=None) -> str:
|
||||
if path == "about:blank":
|
||||
return path
|
||||
if _is_root_snapshot_output_path(path):
|
||||
return _build_snapshot_files_url(snapshot_id, request=request)
|
||||
url = build_snapshot_url(str(snapshot_id), path, request=request)
|
||||
if not (_is_text_preview_path(path) or _is_image_preview_path(path)):
|
||||
return url
|
||||
separator = "&" if "?" in url else "?"
|
||||
return f"{url}{separator}preview=1"
|
||||
|
||||
|
||||
def _render_text_preview(plugin: str, icon_html: str, snippet: str) -> str:
|
||||
plugin_attr = escape(plugin or "")
|
||||
plugin_label = escape(plugin or "")
|
||||
escaped = escape(snippet)
|
||||
return (
|
||||
f'<div class="thumbnail-text" data-plugin="{plugin_attr}" data-compact="1">'
|
||||
f'<div class="thumbnail-text-header">'
|
||||
f'<span class="thumbnail-compact-icon">{icon_html}</span>'
|
||||
f'<span class="thumbnail-text-title">{plugin_label}</span>'
|
||||
f"</div>"
|
||||
f'<pre class="thumbnail-text-pre">{escaped}</pre>'
|
||||
f"</div>"
|
||||
)
|
||||
|
||||
|
||||
def _render_fallback_card(plugin: str, icon_html: str, fallback_label: str) -> str:
|
||||
plugin_attr = escape(plugin or "")
|
||||
plugin_label = escape(plugin or "")
|
||||
fallback_attr = escape(fallback_label)
|
||||
return (
|
||||
f'<div class="thumbnail-compact" data-plugin="{plugin_attr}" data-compact="1">'
|
||||
f'<span class="thumbnail-compact-icon">{icon_html}</span>'
|
||||
f'<span class="thumbnail-compact-label">{plugin_label}</span>'
|
||||
f'<span class="thumbnail-compact-meta">{fallback_attr}</span>'
|
||||
f"</div>"
|
||||
)
|
||||
|
||||
|
||||
def _render_text_file_preview(snapshot_dir: str | Path | None, raw_output_path: str | None, plugin: str, icon_html: str) -> str | None:
|
||||
output_file = _resolve_snapshot_output_file(snapshot_dir, raw_output_path)
|
||||
if not output_file:
|
||||
return None
|
||||
|
||||
try:
|
||||
with output_file.open("rb") as f:
|
||||
raw = f.read(4096)
|
||||
text = raw.decode("utf-8", errors="replace").strip()
|
||||
if not text:
|
||||
return None
|
||||
lines = text.splitlines()[:6]
|
||||
snippet = "\n".join(lines)
|
||||
return _render_text_preview(plugin, icon_html, snippet)
|
||||
except Exception:
|
||||
return None
|
||||
|
||||
|
||||
@register.filter(name="split")
|
||||
def split(value, separator: str = ","):
|
||||
return (value or "").split(separator)
|
||||
|
||||
|
||||
@register.filter(name="index")
|
||||
def index(value, position):
|
||||
try:
|
||||
return value[int(position)]
|
||||
except Exception:
|
||||
return None
|
||||
|
||||
|
||||
@register.filter
|
||||
def file_size(num_bytes: Union[int, float]) -> str:
|
||||
for count in ['Bytes','KB','MB','GB']:
|
||||
def file_size(num_bytes: int | float) -> str:
|
||||
for count in ["Bytes", "KB", "MB", "GB"]:
|
||||
if num_bytes > -1024.0 and num_bytes < 1024.0:
|
||||
return '%3.1f %s' % (num_bytes, count)
|
||||
return f"{num_bytes:3.1f} {count}"
|
||||
num_bytes /= 1024.0
|
||||
return '%3.1f %s' % (num_bytes, 'TB')
|
||||
return "{:3.1f} {}".format(num_bytes, "TB")
|
||||
|
||||
|
||||
def result_list(cl):
|
||||
"""
|
||||
@@ -136,52 +283,61 @@ def result_list(cl):
|
||||
"""
|
||||
num_sorted_fields = 0
|
||||
return {
|
||||
'cl': cl,
|
||||
'num_sorted_fields': num_sorted_fields,
|
||||
'results': cl.result_list,
|
||||
"cl": cl,
|
||||
"num_sorted_fields": num_sorted_fields,
|
||||
"results": cl.result_list,
|
||||
}
|
||||
|
||||
@register.tag(name='snapshots_grid')
|
||||
|
||||
@register.tag(name="snapshots_grid")
|
||||
def result_list_tag(parser, token):
|
||||
return InclusionAdminNode(
|
||||
parser, token,
|
||||
parser,
|
||||
token,
|
||||
func=result_list,
|
||||
template_name='snapshots_grid.html',
|
||||
template_name="snapshots_grid.html",
|
||||
takes_context=False,
|
||||
)
|
||||
|
||||
|
||||
@register.simple_tag(takes_context=True)
|
||||
def url_replace(context, **kwargs):
|
||||
dict_ = context['request'].GET.copy()
|
||||
dict_ = context["request"].GET.copy()
|
||||
dict_.update(**kwargs)
|
||||
return dict_.urlencode()
|
||||
|
||||
|
||||
@register.simple_tag(takes_context=True)
|
||||
def admin_base_url(context) -> str:
|
||||
return get_admin_base_url(request=context.get('request'))
|
||||
return get_admin_base_url(request=context.get("request"))
|
||||
|
||||
|
||||
@register.simple_tag(takes_context=True)
|
||||
def web_base_url(context) -> str:
|
||||
return get_web_base_url(request=context.get('request'))
|
||||
return get_web_base_url(request=context.get("request"))
|
||||
|
||||
|
||||
@register.simple_tag(takes_context=True)
|
||||
def public_base_url(context) -> str:
|
||||
return get_public_base_url(request=context.get('request'))
|
||||
return get_public_base_url(request=context.get("request"))
|
||||
|
||||
|
||||
@register.simple_tag(takes_context=True)
|
||||
def snapshot_base_url(context, snapshot) -> str:
|
||||
snapshot_id = getattr(snapshot, 'id', snapshot)
|
||||
return get_snapshot_base_url(str(snapshot_id), request=context.get('request'))
|
||||
snapshot_id = getattr(snapshot, "id", snapshot)
|
||||
return get_snapshot_base_url(str(snapshot_id), request=context.get("request"))
|
||||
|
||||
|
||||
@register.simple_tag(takes_context=True)
|
||||
def snapshot_url(context, snapshot, path: str = "") -> str:
|
||||
snapshot_id = getattr(snapshot, 'id', snapshot)
|
||||
return build_snapshot_url(str(snapshot_id), path, request=context.get('request'))
|
||||
snapshot_id = getattr(snapshot, "id", snapshot)
|
||||
return build_snapshot_url(str(snapshot_id), path, request=context.get("request"))
|
||||
|
||||
|
||||
@register.simple_tag(takes_context=True)
|
||||
def snapshot_preview_url(context, snapshot, path: str = "") -> str:
|
||||
snapshot_id = getattr(snapshot, "id", snapshot)
|
||||
return _build_snapshot_preview_url(str(snapshot_id), path, request=context.get("request"))
|
||||
|
||||
|
||||
@register.simple_tag
|
||||
@@ -193,7 +349,7 @@ def plugin_icon(plugin: str) -> str:
|
||||
"""
|
||||
icon_html = get_plugin_icon(plugin)
|
||||
return mark_safe(
|
||||
f'<span class="abx-plugin-icon" style="display:inline-flex; width:20px; height:20px; align-items:center; justify-content:center;">{icon_html}</span>'
|
||||
f'<span class="abx-plugin-icon" style="display:inline-flex; width:20px; height:20px; align-items:center; justify-content:center;">{icon_html}</span>',
|
||||
)
|
||||
|
||||
|
||||
@@ -210,46 +366,50 @@ def plugin_card(context, result) -> str:
|
||||
- output_path: Path to output relative to snapshot dir (from embed_path())
|
||||
- plugin: Plugin base name
|
||||
"""
|
||||
if result is None or not hasattr(result, "plugin"):
|
||||
return ""
|
||||
|
||||
plugin = get_plugin_name(result.plugin)
|
||||
template_str = get_plugin_template(plugin, 'card')
|
||||
template_str = get_plugin_template(plugin, "card")
|
||||
|
||||
# Use embed_path() for the display path
|
||||
raw_output_path = result.embed_path() if hasattr(result, 'embed_path') else ''
|
||||
raw_output_path = result.embed_path() if hasattr(result, "embed_path") else ""
|
||||
output_url = build_snapshot_url(
|
||||
str(getattr(result, 'snapshot_id', '')),
|
||||
raw_output_path or '',
|
||||
request=context.get('request'),
|
||||
str(getattr(result, "snapshot_id", "")),
|
||||
raw_output_path or "",
|
||||
request=context.get("request"),
|
||||
)
|
||||
|
||||
icon_html = get_plugin_icon(plugin)
|
||||
plugin_lower = (plugin or '').lower()
|
||||
media_file_count = _count_media_files(result) if plugin_lower in ('ytdlp', 'yt-dlp', 'youtube-dl') else 0
|
||||
media_files = _list_media_files(result) if plugin_lower in ('ytdlp', 'yt-dlp', 'youtube-dl') else []
|
||||
plugin_lower = (plugin or "").lower()
|
||||
media_file_count = _count_media_files(result) if plugin_lower in ("ytdlp", "yt-dlp", "youtube-dl") else 0
|
||||
media_files = _list_media_files(result) if plugin_lower in ("ytdlp", "yt-dlp", "youtube-dl") else []
|
||||
if media_files:
|
||||
snapshot_id = str(getattr(result, 'snapshot_id', ''))
|
||||
request = context.get('request')
|
||||
snapshot_id = str(getattr(result, "snapshot_id", ""))
|
||||
request = context.get("request")
|
||||
for item in media_files:
|
||||
path = item.get('path') or ''
|
||||
item['url'] = build_snapshot_url(snapshot_id, path, request=request) if path else ''
|
||||
path = item.get("path") or ""
|
||||
item["url"] = build_snapshot_url(snapshot_id, path, request=request) if path else ""
|
||||
|
||||
output_lower = (raw_output_path or '').lower()
|
||||
text_preview_exts = ('.json', '.jsonl', '.txt', '.csv', '.tsv', '.xml', '.yml', '.yaml', '.md', '.log')
|
||||
force_text_preview = output_lower.endswith(text_preview_exts)
|
||||
output_lower = (raw_output_path or "").lower()
|
||||
force_text_preview = output_lower.endswith(_TEXT_PREVIEW_EXTS)
|
||||
|
||||
# Create a mini template and render it with context
|
||||
try:
|
||||
if template_str and raw_output_path and str(raw_output_path).strip() not in ('.', '/', './') and not force_text_preview:
|
||||
if template_str and raw_output_path and str(raw_output_path).strip() not in (".", "/", "./") and not force_text_preview:
|
||||
tpl = template.Template(template_str)
|
||||
ctx = template.Context({
|
||||
'result': result,
|
||||
'snapshot': result.snapshot,
|
||||
'output_path': output_url,
|
||||
'output_path_raw': raw_output_path,
|
||||
'plugin': plugin,
|
||||
'plugin_icon': icon_html,
|
||||
'media_file_count': media_file_count,
|
||||
'media_files': media_files,
|
||||
})
|
||||
ctx = template.Context(
|
||||
{
|
||||
"result": result,
|
||||
"snapshot": result.snapshot,
|
||||
"output_path": output_url,
|
||||
"output_path_raw": raw_output_path,
|
||||
"plugin": plugin,
|
||||
"plugin_icon": icon_html,
|
||||
"media_file_count": media_file_count,
|
||||
"media_files": media_files,
|
||||
},
|
||||
)
|
||||
rendered = tpl.render(ctx)
|
||||
# Only return non-empty content (strip whitespace to check)
|
||||
if rendered.strip():
|
||||
@@ -257,52 +417,30 @@ def plugin_card(context, result) -> str:
|
||||
except Exception:
|
||||
pass
|
||||
|
||||
if force_text_preview and raw_output_path and str(raw_output_path).strip() not in ('.', '/', './'):
|
||||
output_file = Path(raw_output_path)
|
||||
if not output_file.is_absolute():
|
||||
output_file = Path(result.snapshot_dir) / raw_output_path
|
||||
try:
|
||||
output_file = output_file.resolve()
|
||||
snap_dir = Path(result.snapshot_dir).resolve()
|
||||
if snap_dir not in output_file.parents and output_file != snap_dir:
|
||||
output_file = None
|
||||
except Exception:
|
||||
output_file = None
|
||||
if output_file and output_file.exists() and output_file.is_file():
|
||||
try:
|
||||
with output_file.open('rb') as f:
|
||||
raw = f.read(4096)
|
||||
text = raw.decode('utf-8', errors='replace').strip()
|
||||
if text:
|
||||
lines = text.splitlines()[:6]
|
||||
snippet = '\n'.join(lines)
|
||||
escaped = escape(snippet)
|
||||
preview = (
|
||||
f'<div class="thumbnail-text" data-plugin="{plugin}" data-compact="1">'
|
||||
f'<div class="thumbnail-text-header">'
|
||||
f'<span class="thumbnail-compact-icon">{icon_html}</span>'
|
||||
f'<span class="thumbnail-text-title">{plugin}</span>'
|
||||
f'</div>'
|
||||
f'<pre class="thumbnail-text-pre">{escaped}</pre>'
|
||||
f'</div>'
|
||||
)
|
||||
return mark_safe(preview)
|
||||
except Exception:
|
||||
pass
|
||||
if force_text_preview:
|
||||
preview = _render_text_file_preview(getattr(result, "snapshot_dir", None), raw_output_path, plugin, icon_html)
|
||||
if preview:
|
||||
return mark_safe(preview)
|
||||
|
||||
if output_lower.endswith(text_preview_exts):
|
||||
fallback_label = 'text'
|
||||
if output_lower.endswith(_TEXT_PREVIEW_EXTS):
|
||||
fallback_label = "text"
|
||||
else:
|
||||
fallback_label = 'output'
|
||||
fallback_label = "output"
|
||||
|
||||
fallback = (
|
||||
f'<div class="thumbnail-compact" data-plugin="{plugin}" data-compact="1">'
|
||||
f'<span class="thumbnail-compact-icon">{icon_html}</span>'
|
||||
f'<span class="thumbnail-compact-label">{plugin}</span>'
|
||||
f'<span class="thumbnail-compact-meta">{fallback_label}</span>'
|
||||
f'</div>'
|
||||
)
|
||||
return mark_safe(fallback)
|
||||
return mark_safe(_render_fallback_card(plugin, icon_html, fallback_label))
|
||||
|
||||
|
||||
@register.simple_tag
|
||||
def output_card(snapshot, output_path: str, plugin: str) -> str:
|
||||
plugin_name = get_plugin_name(plugin)
|
||||
icon_html = get_plugin_icon(plugin_name)
|
||||
preview = _render_text_file_preview(getattr(snapshot, "output_dir", None), output_path, plugin_name, icon_html)
|
||||
if preview:
|
||||
return mark_safe(preview)
|
||||
|
||||
output_lower = (output_path or "").lower()
|
||||
fallback_label = "text" if output_lower.endswith(_TEXT_PREVIEW_EXTS) else "output"
|
||||
return mark_safe(_render_fallback_card(plugin_name, icon_html, fallback_label))
|
||||
|
||||
|
||||
@register.simple_tag(takes_context=True)
|
||||
@@ -312,37 +450,46 @@ def plugin_full(context, result) -> str:
|
||||
|
||||
Usage: {% plugin_full result %}
|
||||
"""
|
||||
if result is None or not hasattr(result, "plugin"):
|
||||
return ""
|
||||
|
||||
plugin = get_plugin_name(result.plugin)
|
||||
template_str = get_plugin_template(plugin, 'full')
|
||||
template_str = get_plugin_template(plugin, "full")
|
||||
|
||||
if not template_str:
|
||||
return ''
|
||||
return ""
|
||||
|
||||
raw_output_path = result.embed_path() if hasattr(result, 'embed_path') else ''
|
||||
raw_output_path = ""
|
||||
if hasattr(result, "embed_path_db"):
|
||||
raw_output_path = result.embed_path_db() or ""
|
||||
if not raw_output_path and hasattr(result, "embed_path"):
|
||||
raw_output_path = result.embed_path() or ""
|
||||
if _is_root_snapshot_output_path(raw_output_path):
|
||||
return ""
|
||||
output_url = build_snapshot_url(
|
||||
str(getattr(result, 'snapshot_id', '')),
|
||||
raw_output_path or '',
|
||||
request=context.get('request'),
|
||||
str(getattr(result, "snapshot_id", "")),
|
||||
raw_output_path,
|
||||
request=context.get("request"),
|
||||
)
|
||||
|
||||
try:
|
||||
tpl = template.Template(template_str)
|
||||
ctx = template.Context({
|
||||
'result': result,
|
||||
'snapshot': result.snapshot,
|
||||
'output_path': output_url,
|
||||
'output_path_raw': raw_output_path,
|
||||
'plugin': plugin,
|
||||
})
|
||||
ctx = template.Context(
|
||||
{
|
||||
"result": result,
|
||||
"snapshot": result.snapshot,
|
||||
"output_path": output_url,
|
||||
"output_path_raw": raw_output_path,
|
||||
"plugin": plugin,
|
||||
},
|
||||
)
|
||||
rendered = tpl.render(ctx)
|
||||
# Only return non-empty content (strip whitespace to check)
|
||||
if rendered.strip():
|
||||
return mark_safe(rendered)
|
||||
return ''
|
||||
return ""
|
||||
except Exception:
|
||||
return ''
|
||||
|
||||
|
||||
return ""
|
||||
|
||||
|
||||
@register.filter
|
||||
@@ -355,8 +502,6 @@ def plugin_name(value: str) -> str:
|
||||
return get_plugin_name(value)
|
||||
|
||||
|
||||
|
||||
|
||||
@register.simple_tag(takes_context=True)
|
||||
def api_token(context) -> str:
|
||||
"""
|
||||
@@ -364,10 +509,10 @@ def api_token(context) -> str:
|
||||
"""
|
||||
from archivebox.api.auth import get_or_create_api_token
|
||||
|
||||
request = context.get('request')
|
||||
user = getattr(request, 'user', None)
|
||||
request = context.get("request")
|
||||
user = getattr(request, "user", None)
|
||||
if not user or not user.is_authenticated:
|
||||
return ''
|
||||
return ""
|
||||
|
||||
token = get_or_create_api_token(user)
|
||||
return token.token if token else ''
|
||||
return token.token if token else ""
|
||||
|
||||
@@ -1,4 +1,4 @@
|
||||
__package__ = 'archivebox.core'
|
||||
__package__ = "archivebox.core"
|
||||
|
||||
from django.urls import path, re_path, include
|
||||
from django.views import static
|
||||
@@ -9,7 +9,18 @@ from django.http import HttpRequest
|
||||
from archivebox.misc.serve_static import serve_static
|
||||
|
||||
from archivebox.core.admin_site import archivebox_admin
|
||||
from archivebox.core.views import HomepageView, SnapshotView, SnapshotPathView, SnapshotReplayView, OriginalDomainReplayView, PublicIndexView, AddView, WebAddView, HealthCheckView, live_progress_view
|
||||
from archivebox.core.views import (
|
||||
HomepageView,
|
||||
SnapshotView,
|
||||
SnapshotPathView,
|
||||
SnapshotReplayView,
|
||||
OriginalDomainReplayView,
|
||||
PublicIndexView,
|
||||
AddView,
|
||||
WebAddView,
|
||||
HealthCheckView,
|
||||
live_progress_view,
|
||||
)
|
||||
|
||||
|
||||
# GLOBAL_CONTEXT doesn't work as-is, disabled for now: https://github.com/ArchiveBox/ArchiveBox/discussions/1306
|
||||
@@ -22,54 +33,54 @@ from archivebox.core.views import HomepageView, SnapshotView, SnapshotPathView,
|
||||
urlpatterns = [
|
||||
re_path(r"^static/(?P<path>.*)$", serve_static),
|
||||
# re_path(r"^media/(?P<path>.*)$", static.serve, {"document_root": settings.MEDIA_ROOT}),
|
||||
|
||||
path('robots.txt', static.serve, {'document_root': settings.STATICFILES_DIRS[0], 'path': 'robots.txt'}),
|
||||
path('favicon.ico', static.serve, {'document_root': settings.STATICFILES_DIRS[0], 'path': 'favicon.ico'}),
|
||||
|
||||
path('docs/', RedirectView.as_view(url='https://github.com/ArchiveBox/ArchiveBox/wiki'), name='Docs'),
|
||||
|
||||
path('public/', PublicIndexView.as_view(), name='public-index'),
|
||||
path('public.html', RedirectView.as_view(url='/public/'), name='public-index-html'),
|
||||
|
||||
path('archive/', RedirectView.as_view(url='/')),
|
||||
path('archive/<path:path>', SnapshotView.as_view(), name='Snapshot'),
|
||||
re_path(r'^snapshot\/(?P<snapshot_id>[0-9a-fA-F-]{8,36})(?:\/(?P<path>.*))?$', SnapshotReplayView.as_view(), name='snapshot-replay'),
|
||||
re_path(r'^original\/(?P<domain>[^/]+)(?:\/(?P<path>.*))?$', OriginalDomainReplayView.as_view(), name='original-replay'),
|
||||
re_path(r'^web/(?P<url>(?!\d{4}(?:\d{2})?(?:\d{2})?(?:/|$)).+)$', WebAddView.as_view(), name='web-add'),
|
||||
re_path(r'^(?P<username>[^/]+)/(?P<date>\d{4}(?:\d{2})?(?:\d{2})?)/(?P<url>https?://.*)$', SnapshotPathView.as_view(), name='snapshot-path-url'),
|
||||
re_path(r'^(?P<username>[^/]+)/(?P<date>\d{4}(?:\d{2})?(?:\d{2})?)/(?P<domain>[^/]+)(?:/(?P<snapshot_id>[0-9a-fA-F-]{8,36})(?:/(?P<path>.*))?)?$', SnapshotPathView.as_view(), name='snapshot-path'),
|
||||
re_path(r'^(?P<username>[^/]+)/(?P<url>https?://.*)$', SnapshotPathView.as_view(), name='snapshot-path-url-nodate'),
|
||||
re_path(r'^(?P<username>[^/]+)/(?P<domain>[^/]+)(?:/(?P<snapshot_id>[0-9a-fA-F-]{8,36})(?:/(?P<path>.*))?)?$', SnapshotPathView.as_view(), name='snapshot-path-nodate'),
|
||||
|
||||
path('admin/core/snapshot/add/', RedirectView.as_view(url='/add/')),
|
||||
path('add/', AddView.as_view(), name='add'),
|
||||
|
||||
path('accounts/login/', RedirectView.as_view(url='/admin/login/')),
|
||||
path('accounts/logout/', RedirectView.as_view(url='/admin/logout/')),
|
||||
|
||||
|
||||
path('accounts/', include('django.contrib.auth.urls')),
|
||||
|
||||
path('admin/live-progress/', live_progress_view, name='live_progress'),
|
||||
path('admin/', archivebox_admin.urls),
|
||||
|
||||
path("api/", include('archivebox.api.urls'), name='api'),
|
||||
|
||||
path('health/', HealthCheckView.as_view(), name='healthcheck'),
|
||||
path('error/', lambda request: _raise_test_error(request)),
|
||||
|
||||
path("robots.txt", static.serve, {"document_root": settings.STATICFILES_DIRS[0], "path": "robots.txt"}),
|
||||
path("favicon.ico", static.serve, {"document_root": settings.STATICFILES_DIRS[0], "path": "favicon.ico"}),
|
||||
path("docs/", RedirectView.as_view(url="https://github.com/ArchiveBox/ArchiveBox/wiki"), name="Docs"),
|
||||
path("public/", PublicIndexView.as_view(), name="public-index"),
|
||||
path("public.html", RedirectView.as_view(url="/public/"), name="public-index-html"),
|
||||
path("archive/", RedirectView.as_view(url="/")),
|
||||
path("archive/<path:path>", SnapshotView.as_view(), name="Snapshot"),
|
||||
re_path(r"^snapshot\/(?P<snapshot_id>[0-9a-fA-F-]{8,36})(?:\/(?P<path>.*))?$", SnapshotReplayView.as_view(), name="snapshot-replay"),
|
||||
re_path(r"^original\/(?P<domain>[^/]+)(?:\/(?P<path>.*))?$", OriginalDomainReplayView.as_view(), name="original-replay"),
|
||||
re_path(r"^web/(?P<url>(?!\d{4}(?:\d{2})?(?:\d{2})?(?:/|$)).+)$", WebAddView.as_view(), name="web-add"),
|
||||
re_path(
|
||||
r"^(?P<username>[^/]+)/(?P<date>\d{4}(?:\d{2})?(?:\d{2})?)/(?P<url>https?://.*)$",
|
||||
SnapshotPathView.as_view(),
|
||||
name="snapshot-path-url",
|
||||
),
|
||||
re_path(
|
||||
r"^(?P<username>[^/]+)/(?P<date>\d{4}(?:\d{2})?(?:\d{2})?)/(?P<domain>[^/]+)(?:/(?P<snapshot_id>[0-9a-fA-F-]{8,36})(?:/(?P<path>.*))?)?$",
|
||||
SnapshotPathView.as_view(),
|
||||
name="snapshot-path",
|
||||
),
|
||||
re_path(r"^(?P<username>[^/]+)/(?P<url>https?://.*)$", SnapshotPathView.as_view(), name="snapshot-path-url-nodate"),
|
||||
re_path(
|
||||
r"^(?P<username>[^/]+)/(?P<domain>[^/]+)(?:/(?P<snapshot_id>[0-9a-fA-F-]{8,36})(?:/(?P<path>.*))?)?$",
|
||||
SnapshotPathView.as_view(),
|
||||
name="snapshot-path-nodate",
|
||||
),
|
||||
path("admin/core/snapshot/add/", RedirectView.as_view(url="/add/")),
|
||||
path("add/", AddView.as_view(), name="add"),
|
||||
path("accounts/login/", RedirectView.as_view(url="/admin/login/")),
|
||||
path("accounts/logout/", RedirectView.as_view(url="/admin/logout/")),
|
||||
path("accounts/", include("django.contrib.auth.urls")),
|
||||
path("admin/live-progress/", live_progress_view, name="live_progress"),
|
||||
path("admin/", archivebox_admin.urls),
|
||||
path("api/", include("archivebox.api.urls"), name="api"),
|
||||
path("health/", HealthCheckView.as_view(), name="healthcheck"),
|
||||
path("error/", lambda request: _raise_test_error(request)),
|
||||
# path('jet_api/', include('jet_django.urls')), Enable to use https://www.jetadmin.io/integrations/django
|
||||
|
||||
path('index.html', RedirectView.as_view(url='/')),
|
||||
path('', HomepageView.as_view(), name='Home'),
|
||||
path("index.html", RedirectView.as_view(url="/")),
|
||||
path("", HomepageView.as_view(), name="Home"),
|
||||
]
|
||||
|
||||
|
||||
def _raise_test_error(_request: HttpRequest):
|
||||
raise ZeroDivisionError('Intentional test error route')
|
||||
raise ZeroDivisionError("Intentional test error route")
|
||||
|
||||
|
||||
if settings.DEBUG_TOOLBAR:
|
||||
urlpatterns += [path('__debug__/', include("debug_toolbar.urls"))]
|
||||
urlpatterns += [path("__debug__/", include("debug_toolbar.urls"))]
|
||||
|
||||
if settings.DEBUG_REQUESTS_TRACKER:
|
||||
urlpatterns += [path("__requests_tracker__/", include("requests_tracker.urls"))]
|
||||
@@ -84,7 +95,7 @@ if settings.DEBUG_REQUESTS_TRACKER:
|
||||
# path('/admin', admin.site.urls)
|
||||
# path('/accounts', django.contrib.auth.urls)
|
||||
|
||||
# # Prposed REST API spec
|
||||
# # Proposed REST API spec
|
||||
# # :slugs can be uuid, short_uuid, or any of the unique index_fields
|
||||
# path('api/v1/'),
|
||||
# path('api/v1/core/' [GET])
|
||||
|
||||
File diff suppressed because it is too large
Load Diff
@@ -1,4 +1,4 @@
|
||||
__package__ = 'archivebox.core'
|
||||
__package__ = "archivebox.core"
|
||||
|
||||
import json
|
||||
import re
|
||||
@@ -16,10 +16,11 @@ class TagEditorWidget(forms.Widget):
|
||||
- Press Enter or Space to create new tags (auto-creates if doesn't exist)
|
||||
- Uses AJAX for autocomplete and tag creation
|
||||
"""
|
||||
|
||||
template_name = "" # We render manually
|
||||
|
||||
class Media:
|
||||
css = {'all': []}
|
||||
css = {"all": []}
|
||||
js = []
|
||||
|
||||
def __init__(self, attrs=None, snapshot_id=None):
|
||||
@@ -28,24 +29,24 @@ class TagEditorWidget(forms.Widget):
|
||||
|
||||
def _escape(self, value):
|
||||
"""Escape HTML entities in value."""
|
||||
return escape(str(value)) if value else ''
|
||||
return escape(str(value)) if value else ""
|
||||
|
||||
def _normalize_id(self, value):
|
||||
"""Normalize IDs for HTML + JS usage (letters, digits, underscore; JS-safe start)."""
|
||||
normalized = re.sub(r'[^A-Za-z0-9_]', '_', str(value))
|
||||
if not normalized or not re.match(r'[A-Za-z_]', normalized):
|
||||
normalized = f't_{normalized}'
|
||||
normalized = re.sub(r"[^A-Za-z0-9_]", "_", str(value))
|
||||
if not normalized or not re.match(r"[A-Za-z_]", normalized):
|
||||
normalized = f"t_{normalized}"
|
||||
return normalized
|
||||
|
||||
def _tag_style(self, value):
|
||||
"""Compute a stable pastel color style for a tag value."""
|
||||
tag = (value or '').strip().lower()
|
||||
digest = hashlib.md5(tag.encode('utf-8')).hexdigest()
|
||||
tag = (value or "").strip().lower()
|
||||
digest = hashlib.md5(tag.encode("utf-8")).hexdigest()
|
||||
hue = int(digest[:4], 16) % 360
|
||||
bg = f'hsl({hue}, 70%, 92%)'
|
||||
border = f'hsl({hue}, 60%, 82%)'
|
||||
fg = f'hsl({hue}, 35%, 28%)'
|
||||
return f'--tag-bg: {bg}; --tag-border: {border}; --tag-fg: {fg};'
|
||||
bg = f"hsl({hue}, 70%, 92%)"
|
||||
border = f"hsl({hue}, 60%, 82%)"
|
||||
fg = f"hsl({hue}, 35%, 28%)"
|
||||
return f"--tag-bg: {bg}; --tag-border: {border}; --tag-fg: {fg};"
|
||||
|
||||
def render(self, name, value, attrs=None, renderer=None):
|
||||
"""
|
||||
@@ -64,14 +65,15 @@ class TagEditorWidget(forms.Widget):
|
||||
# Parse value to get list of tag names
|
||||
tags = []
|
||||
if value:
|
||||
if hasattr(value, 'all'): # QuerySet
|
||||
if hasattr(value, "all"): # QuerySet
|
||||
tags = sorted([tag.name for tag in value.all()])
|
||||
elif isinstance(value, (list, tuple)):
|
||||
if value and hasattr(value[0], 'name'): # List of Tag objects
|
||||
if value and hasattr(value[0], "name"): # List of Tag objects
|
||||
tags = sorted([tag.name for tag in value])
|
||||
else: # List of strings or IDs
|
||||
# Could be tag IDs from form submission
|
||||
from archivebox.core.models import Tag
|
||||
|
||||
tag_names = []
|
||||
for v in value:
|
||||
if isinstance(v, str) and not v.isdigit():
|
||||
@@ -85,13 +87,13 @@ class TagEditorWidget(forms.Widget):
|
||||
tag_names.append(v)
|
||||
tags = sorted(tag_names)
|
||||
elif isinstance(value, str):
|
||||
tags = sorted([t.strip() for t in value.split(',') if t.strip()])
|
||||
tags = sorted([t.strip() for t in value.split(",") if t.strip()])
|
||||
|
||||
widget_id_raw = attrs.get('id', name) if attrs else name
|
||||
widget_id_raw = attrs.get("id", name) if attrs else name
|
||||
widget_id = self._normalize_id(widget_id_raw)
|
||||
|
||||
# Build pills HTML
|
||||
pills_html = ''
|
||||
pills_html = ""
|
||||
for tag in tags:
|
||||
pills_html += f'''
|
||||
<span class="tag-pill" data-tag="{self._escape(tag)}" style="{self._tag_style(tag)}">
|
||||
@@ -113,11 +115,11 @@ class TagEditorWidget(forms.Widget):
|
||||
placeholder="Add tag..."
|
||||
autocomplete="off"
|
||||
onkeydown="handleTagKeydown_{widget_id}(event)"
|
||||
onkeypress="if(event.key==='Enter' || event.keyCode===13){{event.preventDefault(); event.stopPropagation();}}"
|
||||
onkeypress="if(event.key==='Enter' || event.keyCode===13 || event.key===' ' || event.code==='Space' || event.key==='Spacebar'){{event.preventDefault(); event.stopPropagation();}}"
|
||||
oninput="fetchTagAutocomplete_{widget_id}(this.value)"
|
||||
>
|
||||
<datalist id="{widget_id}_datalist"></datalist>
|
||||
<input type="hidden" name="{name}" id="{widget_id}" value="{self._escape(','.join(tags))}">
|
||||
<input type="hidden" name="{name}" id="{widget_id}" value="{self._escape(",".join(tags))}">
|
||||
</div>
|
||||
|
||||
<script>
|
||||
@@ -300,13 +302,16 @@ class TagEditorWidget(forms.Widget):
|
||||
window.handleTagKeydown_{widget_id} = function(event) {{
|
||||
var input = event.target;
|
||||
var value = input.value.trim();
|
||||
var isSpace = event.key === ' ' || event.code === 'Space' || event.key === 'Spacebar';
|
||||
var isEnter = event.key === 'Enter' || event.keyCode === 13;
|
||||
var isComma = event.key === ',';
|
||||
|
||||
if (event.key === 'Enter' || event.keyCode === 13 || event.key === ' ' || event.key === ',') {{
|
||||
if (isEnter || isSpace || isComma) {{
|
||||
event.preventDefault();
|
||||
event.stopPropagation();
|
||||
if (value) {{
|
||||
// Handle comma-separated values
|
||||
value.split(',').forEach(function(tag) {{
|
||||
// Treat commas and whitespace as tag boundaries.
|
||||
value.split(/[\s,]+/).forEach(function(tag) {{
|
||||
addTag_{widget_id}(tag.trim());
|
||||
}});
|
||||
}}
|
||||
@@ -385,10 +390,10 @@ class URLFiltersWidget(forms.Widget):
|
||||
|
||||
def render(self, name, value, attrs=None, renderer=None):
|
||||
value = value if isinstance(value, dict) else {}
|
||||
widget_id_raw = attrs.get('id', name) if attrs else name
|
||||
widget_id = re.sub(r'[^A-Za-z0-9_]', '_', str(widget_id_raw)) or name
|
||||
allowlist = escape(value.get('allowlist', '') or '')
|
||||
denylist = escape(value.get('denylist', '') or '')
|
||||
widget_id_raw = attrs.get("id", name) if attrs else name
|
||||
widget_id = re.sub(r"[^A-Za-z0-9_]", "_", str(widget_id_raw)) or name
|
||||
allowlist = escape(value.get("allowlist", "") or "")
|
||||
denylist = escape(value.get("denylist", "") or "")
|
||||
|
||||
return mark_safe(f'''
|
||||
<div id="{widget_id}_container" class="url-filters-widget">
|
||||
@@ -584,9 +589,9 @@ class URLFiltersWidget(forms.Widget):
|
||||
|
||||
def value_from_datadict(self, data, files, name):
|
||||
return {
|
||||
'allowlist': data.get(f'{name}_allowlist', ''),
|
||||
'denylist': data.get(f'{name}_denylist', ''),
|
||||
'same_domain_only': data.get(f'{name}_same_domain_only') in ('1', 'on', 'true'),
|
||||
"allowlist": data.get(f"{name}_allowlist", ""),
|
||||
"denylist": data.get(f"{name}_denylist", ""),
|
||||
"same_domain_only": data.get(f"{name}_same_domain_only") in ("1", "on", "true"),
|
||||
}
|
||||
|
||||
|
||||
@@ -609,38 +614,38 @@ class InlineTagEditorWidget(TagEditorWidget):
|
||||
# Parse value to get list of tag dicts with id and name
|
||||
tag_data = []
|
||||
if value:
|
||||
if hasattr(value, 'all'): # QuerySet
|
||||
if hasattr(value, "all"): # QuerySet
|
||||
for tag in value.all():
|
||||
tag_data.append({'id': tag.pk, 'name': tag.name})
|
||||
tag_data.sort(key=lambda x: x['name'].lower())
|
||||
tag_data.append({"id": tag.pk, "name": tag.name})
|
||||
tag_data.sort(key=lambda x: x["name"].lower())
|
||||
elif isinstance(value, (list, tuple)):
|
||||
if value and hasattr(value[0], 'name'):
|
||||
if value and hasattr(value[0], "name"):
|
||||
for tag in value:
|
||||
tag_data.append({'id': tag.pk, 'name': tag.name})
|
||||
tag_data.sort(key=lambda x: x['name'].lower())
|
||||
tag_data.append({"id": tag.pk, "name": tag.name})
|
||||
tag_data.sort(key=lambda x: x["name"].lower())
|
||||
|
||||
widget_id_raw = f"inline_tags_{snapshot_id}" if snapshot_id else (attrs.get('id', name) if attrs else name)
|
||||
widget_id_raw = f"inline_tags_{snapshot_id}" if snapshot_id else (attrs.get("id", name) if attrs else name)
|
||||
widget_id = self._normalize_id(widget_id_raw)
|
||||
|
||||
# Build pills HTML with filter links
|
||||
pills_html = ''
|
||||
pills_html = ""
|
||||
for td in tag_data:
|
||||
remove_button = ''
|
||||
remove_button = ""
|
||||
if self.editable:
|
||||
remove_button = (
|
||||
f'<button type="button" class="tag-remove-btn" '
|
||||
f'data-tag-id="{td["id"]}" data-tag-name="{self._escape(td["name"])}">×</button>'
|
||||
)
|
||||
pills_html += f'''
|
||||
<span class="tag-pill" data-tag="{self._escape(td['name'])}" data-tag-id="{td['id']}" style="{self._tag_style(td['name'])}">
|
||||
<a href="/admin/core/snapshot/?tags__id__exact={td['id']}" class="tag-link">{self._escape(td['name'])}</a>
|
||||
<span class="tag-pill" data-tag="{self._escape(td["name"])}" data-tag-id="{td["id"]}" style="{self._tag_style(td["name"])}">
|
||||
<a href="/admin/core/snapshot/?tags__id__exact={td["id"]}" class="tag-link">{self._escape(td["name"])}</a>
|
||||
{remove_button}
|
||||
</span>
|
||||
'''
|
||||
|
||||
tags_json = escape(json.dumps(tag_data))
|
||||
input_html = ''
|
||||
readonly_class = ' readonly' if not self.editable else ''
|
||||
input_html = ""
|
||||
readonly_class = " readonly" if not self.editable else ""
|
||||
if self.editable:
|
||||
input_html = f'''
|
||||
<input type="text"
|
||||
|
||||
@@ -7,7 +7,7 @@ For more information on this file, see
|
||||
https://docs.djangoproject.com/en/2.1/howto/deployment/wsgi/
|
||||
"""
|
||||
|
||||
import archivebox # noqa
|
||||
import archivebox # noqa
|
||||
from archivebox.config.django import setup_django
|
||||
from django.core.wsgi import get_wsgi_application
|
||||
|
||||
|
||||
Reference in New Issue
Block a user