mirror of
https://github.com/ArchiveBox/ArchiveBox.git
synced 2026-04-05 15:27:53 +10:00
remove huey
This commit is contained in:
@@ -9,25 +9,17 @@ from django.core.exceptions import ValidationError
|
||||
from django.urls import reverse, resolve
|
||||
from django.utils import timezone
|
||||
|
||||
from huey_monitor.admin import TaskModel
|
||||
|
||||
from archivebox.config import DATA_DIR
|
||||
from archivebox.config.common import SERVER_CONFIG
|
||||
from archivebox.misc.paginators import AccelleratedPaginator
|
||||
from archivebox.base_models.admin import BaseModelAdmin
|
||||
from archivebox.hooks import get_extractor_icon
|
||||
|
||||
|
||||
from core.models import ArchiveResult, Snapshot
|
||||
|
||||
|
||||
|
||||
|
||||
def result_url(result: TaskModel) -> str:
|
||||
url = reverse("admin:huey_monitor_taskmodel_change", args=[str(result.id)])
|
||||
return format_html('<a href="{url}" class="fade-in-progress-url">See progress...</a>'.format(url=url))
|
||||
|
||||
|
||||
|
||||
class ArchiveResultInline(admin.TabularInline):
|
||||
name = 'Archive Results Log'
|
||||
model = ArchiveResult
|
||||
@@ -101,9 +93,9 @@ class ArchiveResultInline(admin.TabularInline):
|
||||
|
||||
|
||||
class ArchiveResultAdmin(BaseModelAdmin):
|
||||
list_display = ('id', 'created_by', 'created_at', 'snapshot_info', 'tags_str', 'status', 'extractor', 'cmd_str', 'output_str')
|
||||
list_display = ('id', 'created_by', 'created_at', 'snapshot_info', 'tags_str', 'status', 'extractor_with_icon', 'cmd_str', 'output_str')
|
||||
sort_fields = ('id', 'created_by', 'created_at', 'extractor', 'status')
|
||||
readonly_fields = ('cmd_str', 'snapshot_info', 'tags_str', 'created_at', 'modified_at', 'output_summary')
|
||||
readonly_fields = ('cmd_str', 'snapshot_info', 'tags_str', 'created_at', 'modified_at', 'output_summary', 'extractor_with_icon')
|
||||
search_fields = ('id', 'snapshot__url', 'extractor', 'output', 'cmd_version', 'cmd', 'snapshot__timestamp')
|
||||
fields = ('snapshot', 'extractor', 'status', 'retry_at', 'start_ts', 'end_ts', 'created_by', 'pwd', 'cmd_version', 'cmd', 'output', *readonly_fields)
|
||||
autocomplete_fields = ['snapshot']
|
||||
@@ -144,17 +136,29 @@ class ArchiveResultAdmin(BaseModelAdmin):
|
||||
def tags_str(self, result):
|
||||
return result.snapshot.tags_str()
|
||||
|
||||
@admin.display(description='Extractor', ordering='extractor')
|
||||
def extractor_with_icon(self, result):
|
||||
icon = get_extractor_icon(result.extractor)
|
||||
return format_html(
|
||||
'<span title="{}">{}</span> {}',
|
||||
result.extractor,
|
||||
icon,
|
||||
result.extractor,
|
||||
)
|
||||
|
||||
def cmd_str(self, result):
|
||||
return format_html(
|
||||
'<pre>{}</pre>',
|
||||
' '.join(result.cmd) if isinstance(result.cmd, list) else str(result.cmd),
|
||||
)
|
||||
|
||||
|
||||
def output_str(self, result):
|
||||
# Determine output link path - use output if file exists, otherwise link to index
|
||||
output_path = result.output if (result.status == 'succeeded' and result.output) else 'index.html'
|
||||
return format_html(
|
||||
'<a href="/archive/{}/{}" class="output-link">↗️</a><pre>{}</pre>',
|
||||
result.snapshot.timestamp,
|
||||
result.output if (result.status == 'succeeded') and result.extractor not in ('title', 'archive_org') else 'index.html',
|
||||
output_path,
|
||||
result.output,
|
||||
)
|
||||
|
||||
@@ -185,7 +189,7 @@ class ArchiveResultAdmin(BaseModelAdmin):
|
||||
is_hidden = filename.startswith('.')
|
||||
output_str += format_html('<span style="opacity: {}.2">{}{}</span><br/>', int(not is_hidden), indentation_str, filename.strip())
|
||||
|
||||
return output_str + format_html('</code></pre>')
|
||||
return output_str + mark_safe('</code></pre>')
|
||||
|
||||
|
||||
|
||||
|
||||
@@ -35,8 +35,19 @@ def register_admin_site():
|
||||
|
||||
admin.site = archivebox_admin
|
||||
sites.site = archivebox_admin
|
||||
|
||||
# Plugin admin registration is now handled by individual app admins
|
||||
# No longer using archivebox.pm.hook.register_admin()
|
||||
|
||||
|
||||
# Register admin views for each app
|
||||
# (Previously handled by ABX plugin system, now called directly)
|
||||
from core.admin import register_admin as register_core_admin
|
||||
from crawls.admin import register_admin as register_crawls_admin
|
||||
from api.admin import register_admin as register_api_admin
|
||||
from machine.admin import register_admin as register_machine_admin
|
||||
from workers.admin import register_admin as register_workers_admin
|
||||
|
||||
register_core_admin(archivebox_admin)
|
||||
register_crawls_admin(archivebox_admin)
|
||||
register_api_admin(archivebox_admin)
|
||||
register_machine_admin(archivebox_admin)
|
||||
register_workers_admin(archivebox_admin)
|
||||
|
||||
return archivebox_admin
|
||||
|
||||
@@ -25,7 +25,7 @@ from archivebox.workers.tasks import bg_archive_snapshots, bg_add
|
||||
|
||||
from core.models import Tag
|
||||
from core.admin_tags import TagInline
|
||||
from core.admin_archiveresults import ArchiveResultInline, result_url
|
||||
from core.admin_archiveresults import ArchiveResultInline
|
||||
|
||||
|
||||
# GLOBAL_CONTEXT = {'VERSION': VERSION, 'VERSIONS_AVAILABLE': [], 'CAN_UPGRADE': False}
|
||||
@@ -54,10 +54,10 @@ class SnapshotActionForm(ActionForm):
|
||||
class SnapshotAdmin(SearchResultsAdminMixin, ConfigEditorMixin, BaseModelAdmin):
|
||||
list_display = ('created_at', 'title_str', 'status', 'files', 'size', 'url_str')
|
||||
sort_fields = ('title_str', 'url_str', 'created_at', 'status', 'crawl')
|
||||
readonly_fields = ('admin_actions', 'status_info', 'tags_str', 'imported_timestamp', 'created_at', 'modified_at', 'downloaded_at', 'link_dir', 'available_config_options')
|
||||
readonly_fields = ('admin_actions', 'status_info', 'tags_str', 'imported_timestamp', 'created_at', 'modified_at', 'downloaded_at', 'output_dir')
|
||||
search_fields = ('id', 'url', 'timestamp', 'title', 'tags__name')
|
||||
list_filter = ('created_at', 'downloaded_at', 'archiveresult__status', 'created_by', 'tags__name')
|
||||
fields = ('url', 'title', 'created_by', 'bookmarked_at', 'status', 'retry_at', 'crawl', 'config', 'available_config_options', *readonly_fields[:-1])
|
||||
fields = ('url', 'title', 'created_by', 'bookmarked_at', 'status', 'retry_at', 'crawl', 'config', *readonly_fields)
|
||||
ordering = ['-created_at']
|
||||
actions = ['add_tags', 'remove_tags', 'update_titles', 'update_snapshots', 'resnapshot_snapshot', 'overwrite_snapshots', 'delete_snapshots']
|
||||
inlines = [TagInline, ArchiveResultInline]
|
||||
@@ -93,12 +93,10 @@ class SnapshotAdmin(SearchResultsAdminMixin, ConfigEditorMixin, BaseModelAdmin):
|
||||
# self.request = request
|
||||
# return super().get_queryset(request).prefetch_related('archiveresult_set').distinct() # .annotate(archiveresult_count=Count('archiveresult'))
|
||||
|
||||
@admin.action(
|
||||
description="Imported Timestamp"
|
||||
)
|
||||
@admin.display(description="Imported Timestamp")
|
||||
def imported_timestamp(self, obj):
|
||||
context = RequestContext(self.request, {
|
||||
'bookmarked_date': obj.bookmarked,
|
||||
'bookmarked_date': obj.bookmarked_at,
|
||||
'timestamp': obj.timestamp,
|
||||
})
|
||||
|
||||
@@ -145,22 +143,15 @@ class SnapshotAdmin(SearchResultsAdminMixin, ConfigEditorMixin, BaseModelAdmin):
|
||||
|
||||
def status_info(self, obj):
|
||||
return format_html(
|
||||
# URL Hash: <code style="font-size: 10px; user-select: all">{}</code><br/>
|
||||
'''
|
||||
Archived: {} ({} files {})
|
||||
Favicon: <img src="{}" style="height: 20px"/>
|
||||
Status code: {} <br/>
|
||||
Server: {}
|
||||
Content type: {}
|
||||
Extension: {}
|
||||
''',
|
||||
'✅' if obj.is_archived else '❌',
|
||||
obj.num_outputs,
|
||||
self.size(obj) or '0kb',
|
||||
f'/archive/{obj.timestamp}/favicon.ico',
|
||||
obj.status_code or '-',
|
||||
obj.headers and obj.headers.get('Server') or '-',
|
||||
obj.headers and obj.headers.get('Content-Type') or '-',
|
||||
obj.extension or '-',
|
||||
)
|
||||
|
||||
@@ -184,8 +175,8 @@ class SnapshotAdmin(SearchResultsAdminMixin, ConfigEditorMixin, BaseModelAdmin):
|
||||
obj.archive_path,
|
||||
obj.archive_path,
|
||||
obj.archive_path,
|
||||
'fetched' if obj.latest_title or obj.title else 'pending',
|
||||
urldecode(htmldecode(obj.latest_title or obj.title or ''))[:128] or 'Pending...'
|
||||
'fetched' if obj.title else 'pending',
|
||||
urldecode(htmldecode(obj.title or ''))[:128] or 'Pending...'
|
||||
) + mark_safe(f' <span class="tags">{tags}</span>')
|
||||
|
||||
@admin.display(
|
||||
@@ -259,14 +250,13 @@ class SnapshotAdmin(SearchResultsAdminMixin, ConfigEditorMixin, BaseModelAdmin):
|
||||
description="ℹ️ Get Title"
|
||||
)
|
||||
def update_titles(self, request, queryset):
|
||||
from core.models import Snapshot
|
||||
count = queryset.count()
|
||||
|
||||
# Queue snapshots for archiving via the state machine system
|
||||
result = bg_archive_snapshots(queryset, kwargs={"overwrite": True, "methods": ["title", "favicon"], "out_dir": DATA_DIR})
|
||||
queued = bg_archive_snapshots(queryset, kwargs={"overwrite": True, "methods": ["title", "favicon"], "out_dir": DATA_DIR})
|
||||
messages.success(
|
||||
request,
|
||||
mark_safe(f"Title and favicon are updating in the background for {count} URLs. {result_url(result)}"),
|
||||
f"Queued {queued} snapshots for title/favicon update. The orchestrator will process them in the background.",
|
||||
)
|
||||
|
||||
@admin.action(
|
||||
@@ -275,11 +265,11 @@ class SnapshotAdmin(SearchResultsAdminMixin, ConfigEditorMixin, BaseModelAdmin):
|
||||
def update_snapshots(self, request, queryset):
|
||||
count = queryset.count()
|
||||
|
||||
result = bg_archive_snapshots(queryset, kwargs={"overwrite": False, "out_dir": DATA_DIR})
|
||||
queued = bg_archive_snapshots(queryset, kwargs={"overwrite": False, "out_dir": DATA_DIR})
|
||||
|
||||
messages.success(
|
||||
request,
|
||||
mark_safe(f"Re-trying any previously failed methods for {count} URLs in the background. {result_url(result)}"),
|
||||
f"Queued {queued} snapshots for re-archiving. The orchestrator will process them in the background.",
|
||||
)
|
||||
|
||||
|
||||
@@ -291,11 +281,11 @@ class SnapshotAdmin(SearchResultsAdminMixin, ConfigEditorMixin, BaseModelAdmin):
|
||||
timestamp = timezone.now().isoformat('T', 'seconds')
|
||||
new_url = snapshot.url.split('#')[0] + f'#{timestamp}'
|
||||
|
||||
result = bg_add({'urls': new_url, 'tag': snapshot.tags_str()})
|
||||
bg_add({'urls': new_url, 'tag': snapshot.tags_str()})
|
||||
|
||||
messages.success(
|
||||
request,
|
||||
mark_safe(f"Creating new fresh snapshots for {queryset.count()} URLs in the background. {result_url(result)}"),
|
||||
f"Creating {queryset.count()} new fresh snapshots. The orchestrator will process them in the background.",
|
||||
)
|
||||
|
||||
@admin.action(
|
||||
@@ -304,11 +294,11 @@ class SnapshotAdmin(SearchResultsAdminMixin, ConfigEditorMixin, BaseModelAdmin):
|
||||
def overwrite_snapshots(self, request, queryset):
|
||||
count = queryset.count()
|
||||
|
||||
result = bg_archive_snapshots(queryset, kwargs={"overwrite": True, "out_dir": DATA_DIR})
|
||||
queued = bg_archive_snapshots(queryset, kwargs={"overwrite": True, "out_dir": DATA_DIR})
|
||||
|
||||
messages.success(
|
||||
request,
|
||||
mark_safe(f"Clearing all previous results and re-downloading {count} URLs in the background. {result_url(result)}"),
|
||||
f"Queued {queued} snapshots for full re-archive (overwriting existing). The orchestrator will process them in the background.",
|
||||
)
|
||||
|
||||
@admin.action(
|
||||
|
||||
@@ -1,5 +1,7 @@
|
||||
__package__ = 'archivebox.core'
|
||||
|
||||
import sys
|
||||
|
||||
from django.apps import AppConfig
|
||||
|
||||
|
||||
@@ -10,6 +12,41 @@ class CoreConfig(AppConfig):
|
||||
"""Register the archivebox.core.admin_site as the main django admin site"""
|
||||
from core.admin_site import register_admin_site
|
||||
register_admin_site()
|
||||
|
||||
|
||||
# Auto-start the orchestrator when running the web server
|
||||
self._maybe_start_orchestrator()
|
||||
|
||||
def _maybe_start_orchestrator(self):
|
||||
"""Start the orchestrator if we're running a web server."""
|
||||
import os
|
||||
|
||||
# Don't start orchestrator during migrations, shell, tests, etc.
|
||||
# Only start when running: runserver, daphne, gunicorn, uwsgi
|
||||
if not self._is_web_server():
|
||||
return
|
||||
|
||||
# Don't start if RUN_ORCHESTRATOR env var is explicitly set to false
|
||||
if os.environ.get('RUN_ORCHESTRATOR', '').lower() in ('false', '0', 'no'):
|
||||
return
|
||||
|
||||
# Don't start in autoreload child process (avoid double-start)
|
||||
if os.environ.get('RUN_MAIN') != 'true' and 'runserver' in sys.argv:
|
||||
return
|
||||
|
||||
try:
|
||||
from workers.orchestrator import Orchestrator
|
||||
|
||||
if not Orchestrator.is_running():
|
||||
# Start orchestrator as daemon (won't exit on idle when started by server)
|
||||
orchestrator = Orchestrator(exit_on_idle=False)
|
||||
orchestrator.start()
|
||||
except Exception as e:
|
||||
# Don't crash the server if orchestrator fails to start
|
||||
import logging
|
||||
logging.getLogger('archivebox').warning(f'Failed to auto-start orchestrator: {e}')
|
||||
|
||||
def _is_web_server(self) -> bool:
|
||||
"""Check if we're running a web server command."""
|
||||
# Check for common web server indicators
|
||||
server_commands = ('runserver', 'daphne', 'gunicorn', 'uwsgi', 'server')
|
||||
return any(cmd in ' '.join(sys.argv).lower() for cmd in server_commands)
|
||||
|
||||
@@ -23,7 +23,11 @@ from archivebox.config import CONSTANTS
|
||||
from archivebox.misc.system import get_dir_size, atomic_write
|
||||
from archivebox.misc.util import parse_date, base_url, domain as url_domain, to_json, ts_to_date_str, urlencode, htmlencode, urldecode
|
||||
from archivebox.misc.hashing import get_dir_info
|
||||
from archivebox.hooks import ARCHIVE_METHODS_INDEXING_PRECEDENCE
|
||||
from archivebox.hooks import (
|
||||
ARCHIVE_METHODS_INDEXING_PRECEDENCE,
|
||||
get_extractors, get_extractor_name, get_extractor_icon,
|
||||
DEFAULT_EXTRACTOR_ICONS,
|
||||
)
|
||||
from archivebox.base_models.models import (
|
||||
ModelWithUUID, ModelWithSerializers, ModelWithOutputDir,
|
||||
ModelWithConfig, ModelWithNotes, ModelWithHealthStats,
|
||||
@@ -343,45 +347,37 @@ class Snapshot(ModelWithOutputDir, ModelWithConfig, ModelWithNotes, ModelWithHea
|
||||
def icons(self) -> str:
|
||||
"""Generate HTML icons showing which extractors have succeeded for this snapshot"""
|
||||
from django.utils.html import format_html, mark_safe
|
||||
from collections import defaultdict
|
||||
|
||||
cache_key = f'result_icons:{self.pk}:{(self.downloaded_at or self.modified_at or self.created_at or self.bookmarked_at).timestamp()}'
|
||||
|
||||
def calc_icons():
|
||||
if hasattr(self, '_prefetched_objects_cache') and 'archiveresult_set' in self._prefetched_objects_cache:
|
||||
archive_results = [r for r in self.archiveresult_set.all() if r.status == "succeeded" and r.output]
|
||||
archive_results = {r.extractor: r for r in self.archiveresult_set.all() if r.status == "succeeded" and r.output}
|
||||
else:
|
||||
archive_results = self.archiveresult_set.filter(status="succeeded", output__isnull=False)
|
||||
archive_results = {r.extractor: r for r in self.archiveresult_set.filter(status="succeeded", output__isnull=False)}
|
||||
|
||||
path = self.archive_path
|
||||
canon = self.canonical_outputs()
|
||||
output = ""
|
||||
output_template = '<a href="/{}/{}" class="exists-{}" title="{}">{}</a> '
|
||||
icons = {
|
||||
"singlefile": "❶", "wget": "🆆", "dom": "🅷", "pdf": "📄",
|
||||
"screenshot": "💻", "media": "📼", "git": "🅶", "archive_org": "🏛",
|
||||
"readability": "🆁", "mercury": "🅼", "warc": "📦"
|
||||
}
|
||||
exclude = ["favicon", "title", "headers", "htmltotext", "archive_org"]
|
||||
|
||||
extractor_outputs = defaultdict(lambda: None)
|
||||
for extractor, _ in ArchiveResult.EXTRACTOR_CHOICES:
|
||||
for result in archive_results:
|
||||
if result.extractor == extractor:
|
||||
extractor_outputs[extractor] = result
|
||||
# Get all extractors from hooks system (sorted by numeric prefix)
|
||||
all_extractors = [get_extractor_name(e) for e in get_extractors()]
|
||||
|
||||
for extractor, _ in ArchiveResult.EXTRACTOR_CHOICES:
|
||||
if extractor not in exclude:
|
||||
existing = extractor_outputs[extractor] and extractor_outputs[extractor].status == 'succeeded' and extractor_outputs[extractor].output
|
||||
output += format_html(output_template, path, canon.get(extractor, ''), str(bool(existing)), extractor, icons.get(extractor, "?"))
|
||||
if extractor == "wget":
|
||||
exists = extractor_outputs[extractor] and extractor_outputs[extractor].status == 'succeeded' and extractor_outputs[extractor].output
|
||||
output += format_html(output_template, path, canon.get("warc", "warc/"), str(bool(exists)), "warc", icons.get("warc", "?"))
|
||||
if extractor == "archive_org":
|
||||
exists = extractor in extractor_outputs and extractor_outputs[extractor] and extractor_outputs[extractor].status == 'succeeded' and extractor_outputs[extractor].output
|
||||
output += '<a href="{}" class="exists-{}" title="{}">{}</a> '.format(canon.get("archive_org", ""), str(exists), "archive_org", icons.get("archive_org", "?"))
|
||||
for extractor in all_extractors:
|
||||
result = archive_results.get(extractor)
|
||||
existing = result and result.status == 'succeeded' and result.output
|
||||
icon = get_extractor_icon(extractor)
|
||||
output += format_html(
|
||||
output_template,
|
||||
path,
|
||||
canon.get(extractor, extractor + '/'),
|
||||
str(bool(existing)),
|
||||
extractor,
|
||||
icon
|
||||
)
|
||||
|
||||
return format_html('<span class="files-icons" style="font-size: 1.1em; opacity: 0.8; min-width: 240px; display: inline-block">{}<span>', mark_safe(output))
|
||||
return format_html('<span class="files-icons" style="font-size: 1.1em; opacity: 0.8; min-width: 240px; display: inline-block">{}</span>', mark_safe(output))
|
||||
|
||||
cache_result = cache.get(cache_key)
|
||||
if cache_result:
|
||||
@@ -767,12 +763,11 @@ class ArchiveResult(ModelWithOutputDir, ModelWithConfig, ModelWithNotes, ModelWi
|
||||
FAILED = 'failed', 'Failed'
|
||||
SKIPPED = 'skipped', 'Skipped'
|
||||
|
||||
EXTRACTOR_CHOICES = (
|
||||
('htmltotext', 'htmltotext'), ('git', 'git'), ('singlefile', 'singlefile'), ('media', 'media'),
|
||||
('archive_org', 'archive_org'), ('readability', 'readability'), ('mercury', 'mercury'),
|
||||
('favicon', 'favicon'), ('pdf', 'pdf'), ('headers', 'headers'), ('screenshot', 'screenshot'),
|
||||
('dom', 'dom'), ('title', 'title'), ('wget', 'wget'),
|
||||
)
|
||||
@classmethod
|
||||
def get_extractor_choices(cls):
|
||||
"""Get extractor choices from discovered hooks (for forms/admin)."""
|
||||
extractors = [get_extractor_name(e) for e in get_extractors()]
|
||||
return tuple((e, e) for e in extractors)
|
||||
|
||||
# Keep AutoField for backward compatibility with 0.7.x databases
|
||||
# UUID field is added separately by migration for new records
|
||||
@@ -783,7 +778,8 @@ class ArchiveResult(ModelWithOutputDir, ModelWithConfig, ModelWithNotes, ModelWi
|
||||
modified_at = models.DateTimeField(auto_now=True)
|
||||
|
||||
snapshot: Snapshot = models.ForeignKey(Snapshot, on_delete=models.CASCADE) # type: ignore
|
||||
extractor = models.CharField(choices=EXTRACTOR_CHOICES, max_length=32, blank=False, null=False, db_index=True)
|
||||
# No choices= constraint - extractor names come from plugin system and can be any string
|
||||
extractor = models.CharField(max_length=32, blank=False, null=False, db_index=True)
|
||||
pwd = models.CharField(max_length=256, default=None, null=True, blank=True)
|
||||
cmd = models.JSONField(default=None, null=True, blank=True)
|
||||
cmd_version = models.CharField(max_length=128, default=None, null=True, blank=True)
|
||||
@@ -835,6 +831,25 @@ class ArchiveResult(ModelWithOutputDir, ModelWithConfig, ModelWithNotes, ModelWi
|
||||
def output_exists(self) -> bool:
|
||||
return os.path.exists(Path(self.snapshot_dir) / self.extractor)
|
||||
|
||||
def embed_path(self) -> Optional[str]:
|
||||
"""
|
||||
Get the relative path to the embeddable output file for this result.
|
||||
|
||||
Returns the output field if set and file exists, otherwise tries to
|
||||
find a reasonable default based on the extractor type.
|
||||
"""
|
||||
if self.output:
|
||||
return self.output
|
||||
|
||||
# Try to find output file based on extractor's canonical output path
|
||||
canonical = self.snapshot.canonical_outputs()
|
||||
extractor_key = f'{self.extractor}_path'
|
||||
if extractor_key in canonical:
|
||||
return canonical[extractor_key]
|
||||
|
||||
# Fallback to extractor directory
|
||||
return f'{self.extractor}/'
|
||||
|
||||
def create_output_dir(self):
|
||||
output_dir = Path(self.snapshot_dir) / self.extractor
|
||||
output_dir.mkdir(parents=True, exist_ok=True)
|
||||
@@ -891,6 +906,7 @@ class ArchiveResult(ModelWithOutputDir, ModelWithConfig, ModelWithNotes, ModelWi
|
||||
output_dir=extractor_dir,
|
||||
config_objects=config_objects,
|
||||
url=self.snapshot.url,
|
||||
snapshot_id=str(self.snapshot.id),
|
||||
)
|
||||
end_ts = timezone.now()
|
||||
|
||||
@@ -1000,6 +1016,7 @@ class ArchiveResult(ModelWithOutputDir, ModelWithConfig, ModelWithNotes, ModelWi
|
||||
hook,
|
||||
output_dir=self.output_dir,
|
||||
config_objects=config_objects,
|
||||
url=self.snapshot.url,
|
||||
snapshot_id=str(self.snapshot.id),
|
||||
extractor=self.extractor,
|
||||
)
|
||||
|
||||
@@ -68,9 +68,6 @@ INSTALLED_APPS = [
|
||||
# 3rd-party apps from PyPI that need to be loaded last
|
||||
"admin_data_views", # handles rendering some convenient automatic read-only views of data in Django admin
|
||||
"django_extensions", # provides Django Debug Toolbar (and other non-debug helpers)
|
||||
"django_huey", # provides multi-queue support for django huey https://github.com/gaiacoop/django-huey
|
||||
"bx_django_utils", # needed for huey_monitor https://github.com/boxine/bx_django_utils
|
||||
"huey_monitor", # adds an admin UI for monitoring background huey tasks https://github.com/boxine/django-huey-monitor
|
||||
]
|
||||
|
||||
|
||||
@@ -215,70 +212,6 @@ MIGRATION_MODULES = {"signal_webhooks": None}
|
||||
# as much as I'd love this to be a UUID or ULID field, it's not supported yet as of Django 5.0
|
||||
DEFAULT_AUTO_FIELD = "django.db.models.BigAutoField"
|
||||
|
||||
HUEY = {
|
||||
"huey_class": "huey.SqliteHuey",
|
||||
"filename": CONSTANTS.QUEUE_DATABASE_FILENAME,
|
||||
"name": "commands",
|
||||
"results": True,
|
||||
"store_none": True,
|
||||
"immediate": False,
|
||||
"utc": True,
|
||||
"consumer": {
|
||||
"workers": 1,
|
||||
"worker_type": "thread",
|
||||
"initial_delay": 0.1, # Smallest polling interval, same as -d.
|
||||
"backoff": 1.15, # Exponential backoff using this rate, -b.
|
||||
"max_delay": 10.0, # Max possible polling interval, -m.
|
||||
"scheduler_interval": 1, # Check schedule every second, -s.
|
||||
"periodic": True, # Enable crontab feature.
|
||||
"check_worker_health": True, # Enable worker health checks.
|
||||
"health_check_interval": 1, # Check worker health every second.
|
||||
},
|
||||
}
|
||||
|
||||
# https://huey.readthedocs.io/en/latest/contrib.html#setting-things-up
|
||||
# https://github.com/gaiacoop/django-huey
|
||||
DJANGO_HUEY = {
|
||||
"default": "commands",
|
||||
"queues": {
|
||||
HUEY["name"]: HUEY.copy(),
|
||||
# more registered here at plugin import-time by BaseQueue.register()
|
||||
# Additional huey queues configured via settings
|
||||
},
|
||||
}
|
||||
|
||||
|
||||
class HueyDBRouter:
|
||||
"""
|
||||
A router to store all the Huey result k:v / Huey Monitor models in the queue.sqlite3 database.
|
||||
We keep the databases separate because the queue database receives many more reads/writes per second
|
||||
and we want to avoid single-write lock contention with the main database. Also all the in-progress task
|
||||
data is ephemeral/not-important-long-term. This makes it easier to for the user to clear non-critical
|
||||
temp data by just deleting queue.sqlite3 and leaving index.sqlite3.
|
||||
"""
|
||||
|
||||
route_app_labels = {"huey_monitor", "django_huey", "djhuey"}
|
||||
db_name = "queue"
|
||||
|
||||
def db_for_read(self, model, **hints):
|
||||
if model._meta.app_label in self.route_app_labels:
|
||||
return self.db_name
|
||||
return "default"
|
||||
|
||||
def db_for_write(self, model, **hints):
|
||||
if model._meta.app_label in self.route_app_labels:
|
||||
return self.db_name
|
||||
return "default"
|
||||
|
||||
def allow_relation(self, obj1, obj2, **hints):
|
||||
if obj1._meta.app_label in self.route_app_labels or obj2._meta.app_label in self.route_app_labels:
|
||||
return obj1._meta.app_label == obj2._meta.app_label
|
||||
return None
|
||||
|
||||
def allow_migrate(self, db, app_label, model_name=None, **hints):
|
||||
if app_label in self.route_app_labels:
|
||||
return db == self.db_name
|
||||
return db == "default"
|
||||
|
||||
|
||||
# class FilestoreDBRouter:
|
||||
@@ -311,7 +244,7 @@ class HueyDBRouter:
|
||||
# return db == self.db_name
|
||||
# return db == "default"
|
||||
|
||||
DATABASE_ROUTERS = ["core.settings.HueyDBRouter"]
|
||||
DATABASE_ROUTERS = []
|
||||
|
||||
CACHES = {
|
||||
"default": {"BACKEND": "django.core.cache.backends.locmem.LocMemCache"},
|
||||
|
||||
@@ -1,9 +1,13 @@
|
||||
from django import template
|
||||
from django.contrib.admin.templatetags.base import InclusionAdminNode
|
||||
|
||||
from django.utils.safestring import mark_safe
|
||||
|
||||
from typing import Union
|
||||
|
||||
from archivebox.hooks import (
|
||||
get_extractor_icon, get_extractor_template, get_extractor_name,
|
||||
)
|
||||
|
||||
|
||||
register = template.Library()
|
||||
|
||||
@@ -44,3 +48,115 @@ def url_replace(context, **kwargs):
|
||||
dict_ = context['request'].GET.copy()
|
||||
dict_.update(**kwargs)
|
||||
return dict_.urlencode()
|
||||
|
||||
|
||||
@register.simple_tag
|
||||
def extractor_icon(extractor: str) -> str:
|
||||
"""
|
||||
Render the icon for an extractor.
|
||||
|
||||
Usage: {% extractor_icon "screenshot" %}
|
||||
"""
|
||||
return mark_safe(get_extractor_icon(extractor))
|
||||
|
||||
|
||||
@register.simple_tag(takes_context=True)
|
||||
def extractor_thumbnail(context, result) -> str:
|
||||
"""
|
||||
Render the thumbnail template for an archive result.
|
||||
|
||||
Usage: {% extractor_thumbnail result %}
|
||||
|
||||
Context variables passed to template:
|
||||
- result: ArchiveResult object
|
||||
- snapshot: Parent Snapshot object
|
||||
- output_path: Path to output relative to snapshot dir (from embed_path())
|
||||
- extractor: Extractor base name
|
||||
"""
|
||||
extractor = get_extractor_name(result.extractor)
|
||||
template_str = get_extractor_template(extractor, 'thumbnail')
|
||||
|
||||
if not template_str:
|
||||
return ''
|
||||
|
||||
# Use embed_path() for the display path (includes canonical paths)
|
||||
output_path = result.embed_path() if hasattr(result, 'embed_path') else (result.output or '')
|
||||
|
||||
# Create a mini template and render it with context
|
||||
try:
|
||||
tpl = template.Template(template_str)
|
||||
ctx = template.Context({
|
||||
'result': result,
|
||||
'snapshot': result.snapshot,
|
||||
'output_path': output_path,
|
||||
'extractor': extractor,
|
||||
})
|
||||
return mark_safe(tpl.render(ctx))
|
||||
except Exception:
|
||||
return ''
|
||||
|
||||
|
||||
@register.simple_tag(takes_context=True)
|
||||
def extractor_embed(context, result) -> str:
|
||||
"""
|
||||
Render the embed iframe template for an archive result.
|
||||
|
||||
Usage: {% extractor_embed result %}
|
||||
"""
|
||||
extractor = get_extractor_name(result.extractor)
|
||||
template_str = get_extractor_template(extractor, 'embed')
|
||||
|
||||
if not template_str:
|
||||
return ''
|
||||
|
||||
output_path = result.embed_path() if hasattr(result, 'embed_path') else (result.output or '')
|
||||
|
||||
try:
|
||||
tpl = template.Template(template_str)
|
||||
ctx = template.Context({
|
||||
'result': result,
|
||||
'snapshot': result.snapshot,
|
||||
'output_path': output_path,
|
||||
'extractor': extractor,
|
||||
})
|
||||
return mark_safe(tpl.render(ctx))
|
||||
except Exception:
|
||||
return ''
|
||||
|
||||
|
||||
@register.simple_tag(takes_context=True)
|
||||
def extractor_fullscreen(context, result) -> str:
|
||||
"""
|
||||
Render the fullscreen template for an archive result.
|
||||
|
||||
Usage: {% extractor_fullscreen result %}
|
||||
"""
|
||||
extractor = get_extractor_name(result.extractor)
|
||||
template_str = get_extractor_template(extractor, 'fullscreen')
|
||||
|
||||
if not template_str:
|
||||
return ''
|
||||
|
||||
output_path = result.embed_path() if hasattr(result, 'embed_path') else (result.output or '')
|
||||
|
||||
try:
|
||||
tpl = template.Template(template_str)
|
||||
ctx = template.Context({
|
||||
'result': result,
|
||||
'snapshot': result.snapshot,
|
||||
'output_path': output_path,
|
||||
'extractor': extractor,
|
||||
})
|
||||
return mark_safe(tpl.render(ctx))
|
||||
except Exception:
|
||||
return ''
|
||||
|
||||
|
||||
@register.filter
|
||||
def extractor_name(value: str) -> str:
|
||||
"""
|
||||
Get the base name of an extractor (strips numeric prefix).
|
||||
|
||||
Usage: {{ result.extractor|extractor_name }}
|
||||
"""
|
||||
return get_extractor_name(value)
|
||||
|
||||
@@ -8,7 +8,7 @@ from django.views.generic.base import RedirectView
|
||||
from archivebox.misc.serve_static import serve_static
|
||||
|
||||
from core.admin_site import archivebox_admin
|
||||
from core.views import HomepageView, SnapshotView, PublicIndexView, AddView, HealthCheckView
|
||||
from core.views import HomepageView, SnapshotView, PublicIndexView, AddView, HealthCheckView, live_progress_view
|
||||
|
||||
from workers.views import JobsDashboardView
|
||||
|
||||
@@ -43,8 +43,10 @@ urlpatterns = [
|
||||
|
||||
|
||||
path('accounts/', include('django.contrib.auth.urls')),
|
||||
|
||||
path('admin/live-progress/', live_progress_view, name='live_progress'),
|
||||
path('admin/', archivebox_admin.urls),
|
||||
|
||||
|
||||
path("api/", include('api.urls'), name='api'),
|
||||
|
||||
path('health/', HealthCheckView.as_view(), name='healthcheck'),
|
||||
|
||||
@@ -34,6 +34,7 @@ from archivebox.search import query_search_index
|
||||
from core.models import Snapshot
|
||||
from core.forms import AddLinkForm
|
||||
from crawls.models import Seed, Crawl
|
||||
from archivebox.hooks import get_extractors, get_extractor_name
|
||||
|
||||
|
||||
|
||||
@@ -54,8 +55,10 @@ class SnapshotView(View):
|
||||
@staticmethod
|
||||
def render_live_index(request, snapshot):
|
||||
TITLE_LOADING_MSG = 'Not yet archived...'
|
||||
HIDDEN_RESULTS = ('favicon', 'headers', 'title', 'htmltotext', 'warc', 'archive_org')
|
||||
|
||||
# Dict of extractor -> ArchiveResult object
|
||||
archiveresult_objects = {}
|
||||
# Dict of extractor -> result info dict (for template compatibility)
|
||||
archiveresults = {}
|
||||
|
||||
results = snapshot.archiveresult_set.all()
|
||||
@@ -65,18 +68,21 @@ class SnapshotView(View):
|
||||
abs_path = result.snapshot_dir / (embed_path or 'None')
|
||||
|
||||
if (result.status == 'succeeded'
|
||||
and (result.extractor not in HIDDEN_RESULTS)
|
||||
and embed_path
|
||||
and os.access(abs_path, os.R_OK)
|
||||
and abs_path.exists()):
|
||||
if os.path.isdir(abs_path) and not any(abs_path.glob('*.*')):
|
||||
continue
|
||||
|
||||
# Store the full ArchiveResult object for template tags
|
||||
archiveresult_objects[result.extractor] = result
|
||||
|
||||
result_info = {
|
||||
'name': result.extractor,
|
||||
'path': embed_path,
|
||||
'ts': ts_to_date_str(result.end_ts),
|
||||
'size': abs_path.stat().st_size or '?',
|
||||
'result': result, # Include the full object for template tags
|
||||
}
|
||||
archiveresults[result.extractor] = result_info
|
||||
|
||||
@@ -101,11 +107,11 @@ class SnapshotView(View):
|
||||
}
|
||||
|
||||
|
||||
# iterate through all the files in the snapshot dir and add the biggest ones to1 the result list
|
||||
# iterate through all the files in the snapshot dir and add the biggest ones to the result list
|
||||
snap_dir = Path(snapshot.output_dir)
|
||||
if not os.path.isdir(snap_dir) and os.access(snap_dir, os.R_OK):
|
||||
return {}
|
||||
|
||||
|
||||
for result_file in (*snap_dir.glob('*'), *snap_dir.glob('*/*')):
|
||||
extension = result_file.suffix.lstrip('.').lower()
|
||||
if result_file.is_dir() or result_file.name.startswith('.') or extension not in allowed_extensions:
|
||||
@@ -121,12 +127,16 @@ class SnapshotView(View):
|
||||
'path': result_file.relative_to(snap_dir),
|
||||
'ts': ts_to_date_str(result_file.stat().st_mtime or 0),
|
||||
'size': file_size,
|
||||
'result': None, # No ArchiveResult object for filesystem-discovered files
|
||||
}
|
||||
|
||||
preferred_types = ('singlefile', 'screenshot', 'wget', 'dom', 'media', 'pdf', 'readability', 'mercury')
|
||||
# Get available extractors from hooks (sorted by numeric prefix for ordering)
|
||||
# Convert to base names for display ordering
|
||||
all_extractors = [get_extractor_name(e) for e in get_extractors()]
|
||||
preferred_types = tuple(all_extractors)
|
||||
all_types = preferred_types + tuple(result_type for result_type in archiveresults.keys() if result_type not in preferred_types)
|
||||
|
||||
best_result = {'path': 'None'}
|
||||
best_result = {'path': 'None', 'result': None}
|
||||
for result_type in preferred_types:
|
||||
if result_type in archiveresults:
|
||||
best_result = archiveresults[result_type]
|
||||
@@ -157,6 +167,7 @@ class SnapshotView(View):
|
||||
'PREVIEW_ORIGINALS': SERVER_CONFIG.PREVIEW_ORIGINALS,
|
||||
'archiveresults': sorted(archiveresults.values(), key=lambda r: all_types.index(r['name']) if r['name'] in all_types else -r['size']),
|
||||
'best_result': best_result,
|
||||
'snapshot': snapshot, # Pass the snapshot object for template tags
|
||||
}
|
||||
return render(template_name='core/snapshot_live.html', request=request, context=context)
|
||||
|
||||
@@ -436,7 +447,7 @@ class AddView(UserPassesTestMixin, FormView):
|
||||
def form_valid(self, form):
|
||||
urls = form.cleaned_data["url"]
|
||||
print(f'[+] Adding URL: {urls}')
|
||||
parser = form.cleaned_data["parser"]
|
||||
parser = form.cleaned_data.get("parser", "auto") # default to auto-detect parser
|
||||
tag = form.cleaned_data["tag"]
|
||||
depth = 0 if form.cleaned_data["depth"] == "0" else 1
|
||||
extractors = ','.join(form.cleaned_data["archive_methods"])
|
||||
@@ -452,18 +463,19 @@ class AddView(UserPassesTestMixin, FormView):
|
||||
if extractors:
|
||||
input_kwargs.update({"extractors": extractors})
|
||||
|
||||
|
||||
|
||||
from archivebox.config.permissions import HOSTNAME
|
||||
|
||||
|
||||
|
||||
|
||||
# 1. save the provided urls to sources/2024-11-05__23-59-59__web_ui_add_by_user_<user_pk>.txt
|
||||
sources_file = CONSTANTS.SOURCES_DIR / f'{timezone.now().strftime("%Y-%m-%d__%H-%M-%S")}__web_ui_add_by_user_{self.request.user.pk}.txt'
|
||||
sources_file.write_text(urls if isinstance(urls, str) else '\n'.join(urls))
|
||||
|
||||
|
||||
# 2. create a new Seed pointing to the sources/2024-11-05__23-59-59__web_ui_add_by_user_<user_pk>.txt
|
||||
timestamp = timezone.now().strftime("%Y-%m-%d__%H-%M-%S")
|
||||
seed = Seed.from_file(
|
||||
sources_file,
|
||||
label=f'{self.request.user.username}@{HOSTNAME}{self.request.path}',
|
||||
label=f'{self.request.user.username}@{HOSTNAME}{self.request.path} {timestamp}',
|
||||
parser=parser,
|
||||
tag=tag,
|
||||
created_by=self.request.user.pk,
|
||||
@@ -472,7 +484,7 @@ class AddView(UserPassesTestMixin, FormView):
|
||||
# 'INDEX_ONLY': index_only,
|
||||
# 'OVERWRITE': False,
|
||||
'DEPTH': depth,
|
||||
'EXTRACTORS': parser,
|
||||
'EXTRACTORS': extractors or '',
|
||||
# 'DEFAULT_PERSONA': persona or 'Default',
|
||||
})
|
||||
# 3. create a new Crawl pointing to the Seed
|
||||
@@ -490,10 +502,15 @@ class AddView(UserPassesTestMixin, FormView):
|
||||
self.request,
|
||||
mark_safe(f"Adding {rough_url_count} URLs in the background. (refresh in a minute start seeing results) {crawl.admin_change_url}"),
|
||||
)
|
||||
# if not bg:
|
||||
# from workers.orchestrator import Orchestrator
|
||||
# orchestrator = Orchestrator(exit_on_idle=True, max_concurrent_actors=4)
|
||||
# orchestrator.start()
|
||||
|
||||
# Start orchestrator in background to process the queued crawl
|
||||
try:
|
||||
from archivebox.workers.tasks import ensure_orchestrator_running
|
||||
ensure_orchestrator_running()
|
||||
except Exception as e:
|
||||
# Orchestrator may already be running via supervisord, or fail to start
|
||||
# This is not fatal - the crawl will be processed when orchestrator runs
|
||||
print(f'[!] Failed to start orchestrator: {e}')
|
||||
|
||||
return redirect(crawl.admin_change_url)
|
||||
|
||||
@@ -513,6 +530,141 @@ class HealthCheckView(View):
|
||||
)
|
||||
|
||||
|
||||
import json
|
||||
from django.http import JsonResponse
|
||||
|
||||
def live_progress_view(request):
|
||||
"""Simple JSON endpoint for live progress status - used by admin progress monitor."""
|
||||
try:
|
||||
from workers.orchestrator import Orchestrator
|
||||
from crawls.models import Crawl
|
||||
from core.models import Snapshot, ArchiveResult
|
||||
|
||||
# Get orchestrator status
|
||||
orchestrator_running = Orchestrator.is_running()
|
||||
total_workers = Orchestrator().get_total_worker_count() if orchestrator_running else 0
|
||||
|
||||
# Get model counts by status
|
||||
crawls_pending = Crawl.objects.filter(status=Crawl.StatusChoices.QUEUED).count()
|
||||
crawls_started = Crawl.objects.filter(status=Crawl.StatusChoices.STARTED).count()
|
||||
|
||||
# Get recent crawls (last 24 hours)
|
||||
from datetime import timedelta
|
||||
one_day_ago = timezone.now() - timedelta(days=1)
|
||||
crawls_recent = Crawl.objects.filter(created_at__gte=one_day_ago).count()
|
||||
|
||||
snapshots_pending = Snapshot.objects.filter(status=Snapshot.StatusChoices.QUEUED).count()
|
||||
snapshots_started = Snapshot.objects.filter(status=Snapshot.StatusChoices.STARTED).count()
|
||||
|
||||
archiveresults_pending = ArchiveResult.objects.filter(status=ArchiveResult.StatusChoices.QUEUED).count()
|
||||
archiveresults_started = ArchiveResult.objects.filter(status=ArchiveResult.StatusChoices.STARTED).count()
|
||||
archiveresults_succeeded = ArchiveResult.objects.filter(status=ArchiveResult.StatusChoices.SUCCEEDED).count()
|
||||
archiveresults_failed = ArchiveResult.objects.filter(status=ArchiveResult.StatusChoices.FAILED).count()
|
||||
|
||||
# Build hierarchical active crawls with nested snapshots and archive results
|
||||
active_crawls = []
|
||||
for crawl in Crawl.objects.filter(
|
||||
status__in=[Crawl.StatusChoices.QUEUED, Crawl.StatusChoices.STARTED]
|
||||
).order_by('-modified_at')[:10]:
|
||||
# Get snapshots for this crawl
|
||||
crawl_snapshots = Snapshot.objects.filter(crawl=crawl)
|
||||
total_snapshots = crawl_snapshots.count()
|
||||
completed_snapshots = crawl_snapshots.filter(status=Snapshot.StatusChoices.SEALED).count()
|
||||
pending_snapshots = crawl_snapshots.filter(status=Snapshot.StatusChoices.QUEUED).count()
|
||||
|
||||
# Calculate crawl progress
|
||||
crawl_progress = int((completed_snapshots / total_snapshots) * 100) if total_snapshots > 0 else 0
|
||||
|
||||
# Get active snapshots for this crawl
|
||||
active_snapshots_for_crawl = []
|
||||
for snapshot in crawl_snapshots.filter(
|
||||
status__in=[Snapshot.StatusChoices.QUEUED, Snapshot.StatusChoices.STARTED]
|
||||
).order_by('-modified_at')[:5]:
|
||||
# Get archive results for this snapshot
|
||||
snapshot_results = ArchiveResult.objects.filter(snapshot=snapshot)
|
||||
total_extractors = snapshot_results.count()
|
||||
completed_extractors = snapshot_results.filter(status=ArchiveResult.StatusChoices.SUCCEEDED).count()
|
||||
failed_extractors = snapshot_results.filter(status=ArchiveResult.StatusChoices.FAILED).count()
|
||||
pending_extractors = snapshot_results.filter(status=ArchiveResult.StatusChoices.QUEUED).count()
|
||||
|
||||
# Calculate snapshot progress
|
||||
snapshot_progress = int(((completed_extractors + failed_extractors) / total_extractors) * 100) if total_extractors > 0 else 0
|
||||
|
||||
# Get active extractors for this snapshot
|
||||
active_extractors = [
|
||||
{
|
||||
'id': str(ar.id),
|
||||
'extractor': ar.extractor,
|
||||
'status': ar.status,
|
||||
'started': ar.start_ts.isoformat() if ar.start_ts else None,
|
||||
'progress': 50,
|
||||
}
|
||||
for ar in snapshot_results.filter(status=ArchiveResult.StatusChoices.STARTED).order_by('-start_ts')[:5]
|
||||
]
|
||||
|
||||
active_snapshots_for_crawl.append({
|
||||
'id': str(snapshot.id),
|
||||
'url': snapshot.url[:80],
|
||||
'status': snapshot.status,
|
||||
'started': snapshot.modified_at.isoformat() if snapshot.modified_at else None,
|
||||
'progress': snapshot_progress,
|
||||
'total_extractors': total_extractors,
|
||||
'completed_extractors': completed_extractors,
|
||||
'failed_extractors': failed_extractors,
|
||||
'pending_extractors': pending_extractors,
|
||||
'active_extractors': active_extractors,
|
||||
})
|
||||
|
||||
active_crawls.append({
|
||||
'id': str(crawl.id),
|
||||
'label': str(crawl)[:60],
|
||||
'status': crawl.status,
|
||||
'started': crawl.modified_at.isoformat() if crawl.modified_at else None,
|
||||
'progress': crawl_progress,
|
||||
'max_depth': crawl.max_depth,
|
||||
'total_snapshots': total_snapshots,
|
||||
'completed_snapshots': completed_snapshots,
|
||||
'failed_snapshots': 0,
|
||||
'pending_snapshots': pending_snapshots,
|
||||
'active_snapshots': active_snapshots_for_crawl,
|
||||
})
|
||||
|
||||
return JsonResponse({
|
||||
'orchestrator_running': orchestrator_running,
|
||||
'total_workers': total_workers,
|
||||
'crawls_pending': crawls_pending,
|
||||
'crawls_started': crawls_started,
|
||||
'crawls_recent': crawls_recent,
|
||||
'snapshots_pending': snapshots_pending,
|
||||
'snapshots_started': snapshots_started,
|
||||
'archiveresults_pending': archiveresults_pending,
|
||||
'archiveresults_started': archiveresults_started,
|
||||
'archiveresults_succeeded': archiveresults_succeeded,
|
||||
'archiveresults_failed': archiveresults_failed,
|
||||
'active_crawls': active_crawls,
|
||||
'server_time': timezone.now().isoformat(),
|
||||
})
|
||||
except Exception as e:
|
||||
import traceback
|
||||
return JsonResponse({
|
||||
'error': str(e),
|
||||
'traceback': traceback.format_exc(),
|
||||
'orchestrator_running': False,
|
||||
'total_workers': 0,
|
||||
'crawls_pending': 0,
|
||||
'crawls_started': 0,
|
||||
'crawls_recent': 0,
|
||||
'snapshots_pending': 0,
|
||||
'snapshots_started': 0,
|
||||
'archiveresults_pending': 0,
|
||||
'archiveresults_started': 0,
|
||||
'archiveresults_succeeded': 0,
|
||||
'archiveresults_failed': 0,
|
||||
'active_crawls': [],
|
||||
'server_time': timezone.now().isoformat(),
|
||||
}, status=500)
|
||||
|
||||
|
||||
def find_config_section(key: str) -> str:
|
||||
CONFIGS = get_all_configs()
|
||||
|
||||
|
||||
Reference in New Issue
Block a user