mirror of
https://github.com/ArchiveBox/ArchiveBox.git
synced 2026-04-06 07:47:53 +10:00
fix lint
This commit is contained in:
@@ -1,8 +1,8 @@
|
||||
__package__ = 'archivebox.core'
|
||||
|
||||
from django.contrib import admin
|
||||
from admin_data_views.admin import get_app_list, admin_data_index_view, get_admin_data_urls, get_urls
|
||||
|
||||
import archivebox
|
||||
|
||||
class ArchiveBoxAdmin(admin.AdminSite):
|
||||
site_header = 'ArchiveBox'
|
||||
@@ -20,7 +20,6 @@ archivebox_admin = ArchiveBoxAdmin()
|
||||
# patch admin with methods to add data views (implemented by admin_data_views package)
|
||||
# https://github.com/MrThearMan/django-admin-data-views
|
||||
# https://mrthearman.github.io/django-admin-data-views/setup/
|
||||
from admin_data_views.admin import get_app_list, admin_data_index_view, get_admin_data_urls, get_urls
|
||||
archivebox_admin.get_app_list = get_app_list.__get__(archivebox_admin, ArchiveBoxAdmin)
|
||||
archivebox_admin.admin_data_index_view = admin_data_index_view.__get__(archivebox_admin, ArchiveBoxAdmin) # type: ignore
|
||||
archivebox_admin.get_admin_data_urls = get_admin_data_urls.__get__(archivebox_admin, ArchiveBoxAdmin) # type: ignore
|
||||
|
||||
@@ -26,7 +26,7 @@ from archivebox.base_models.admin import BaseModelAdmin, ConfigEditorMixin
|
||||
from archivebox.workers.tasks import bg_archive_snapshots, bg_add
|
||||
|
||||
from archivebox.core.models import Tag, Snapshot, ArchiveResult
|
||||
from archivebox.core.admin_archiveresults import ArchiveResultInline, render_archiveresults_list
|
||||
from archivebox.core.admin_archiveresults import render_archiveresults_list
|
||||
from archivebox.core.widgets import TagEditorWidget, InlineTagEditorWidget
|
||||
|
||||
|
||||
@@ -712,8 +712,6 @@ class SnapshotAdmin(SearchResultsAdminMixin, ConfigEditorMixin, BaseModelAdmin):
|
||||
description="🔁 Redo Failed"
|
||||
)
|
||||
def update_snapshots(self, request, queryset):
|
||||
count = queryset.count()
|
||||
|
||||
queued = bg_archive_snapshots(queryset, kwargs={"overwrite": False, "out_dir": DATA_DIR})
|
||||
|
||||
messages.success(
|
||||
@@ -741,8 +739,6 @@ class SnapshotAdmin(SearchResultsAdminMixin, ConfigEditorMixin, BaseModelAdmin):
|
||||
description="🔄 Redo"
|
||||
)
|
||||
def overwrite_snapshots(self, request, queryset):
|
||||
count = queryset.count()
|
||||
|
||||
queued = bg_archive_snapshots(queryset, kwargs={"overwrite": True, "out_dir": DATA_DIR})
|
||||
|
||||
messages.success(
|
||||
|
||||
@@ -60,7 +60,7 @@ class CoreConfig(AppConfig):
|
||||
from archivebox.workers.orchestrator import Orchestrator
|
||||
|
||||
Process.cleanup_stale_running()
|
||||
machine = Machine.current()
|
||||
Machine.current()
|
||||
|
||||
if not Orchestrator.is_running():
|
||||
Orchestrator(exit_on_idle=False).start()
|
||||
|
||||
@@ -8,11 +8,10 @@ https://docs.djangoproject.com/en/stable/howto/deployment/asgi/
|
||||
"""
|
||||
|
||||
from archivebox.config.django import setup_django
|
||||
from django.core.asgi import get_asgi_application
|
||||
|
||||
setup_django(in_memory_db=False, check_db=True)
|
||||
|
||||
from django.core.asgi import get_asgi_application
|
||||
|
||||
# Standard Django ASGI application (no websockets/channels needed)
|
||||
application = get_asgi_application()
|
||||
|
||||
|
||||
@@ -6,6 +6,7 @@ from archivebox.misc.util import URL_REGEX
|
||||
from taggit.utils import edit_string_for_tags, parse_tags
|
||||
from archivebox.base_models.admin import KeyValueWidget
|
||||
from archivebox.crawls.schedule_utils import validate_schedule
|
||||
from archivebox.hooks import get_plugins
|
||||
|
||||
DEPTH_CHOICES = (
|
||||
('0', 'depth = 0 (archive just these URLs)'),
|
||||
@@ -15,7 +16,6 @@ DEPTH_CHOICES = (
|
||||
('4', 'depth = 4 (+ URLs four hops away)'),
|
||||
)
|
||||
|
||||
from archivebox.hooks import get_plugins
|
||||
|
||||
def get_plugin_choices():
|
||||
"""Get available extractor plugins from discovered hooks."""
|
||||
@@ -210,15 +210,18 @@ class AddLinkForm(forms.Form):
|
||||
|
||||
return schedule
|
||||
|
||||
|
||||
class TagWidgetMixin:
|
||||
def format_value(self, value):
|
||||
if value is not None and not isinstance(value, str):
|
||||
value = edit_string_for_tags(value)
|
||||
return super().format_value(value)
|
||||
|
||||
|
||||
class TagWidget(TagWidgetMixin, forms.TextInput):
|
||||
pass
|
||||
|
||||
|
||||
class TagField(forms.CharField):
|
||||
widget = TagWidget
|
||||
|
||||
|
||||
@@ -17,7 +17,6 @@ from archivebox.config import VERSION
|
||||
from archivebox.config.version import get_COMMIT_HASH
|
||||
from archivebox.core.host_utils import (
|
||||
build_admin_url,
|
||||
build_api_url,
|
||||
build_web_url,
|
||||
get_api_host,
|
||||
get_admin_host,
|
||||
|
||||
@@ -7,10 +7,8 @@ def forwards_func(apps, schema_editor):
|
||||
SnapshotModel = apps.get_model("core", "Snapshot")
|
||||
TagModel = apps.get_model("core", "Tag")
|
||||
|
||||
db_alias = schema_editor.connection.alias
|
||||
snapshots = SnapshotModel.objects.all()
|
||||
for snapshot in snapshots:
|
||||
tags = snapshot.tags
|
||||
tag_set = (
|
||||
set(tag.strip() for tag in (snapshot.tags_old or '').split(','))
|
||||
)
|
||||
@@ -23,9 +21,7 @@ def forwards_func(apps, schema_editor):
|
||||
|
||||
def reverse_func(apps, schema_editor):
|
||||
SnapshotModel = apps.get_model("core", "Snapshot")
|
||||
TagModel = apps.get_model("core", "Tag")
|
||||
|
||||
db_alias = schema_editor.connection.alias
|
||||
snapshots = SnapshotModel.objects.all()
|
||||
for snapshot in snapshots:
|
||||
tags = snapshot.tags.values_list("name", flat=True)
|
||||
|
||||
@@ -43,7 +43,7 @@ def forwards_func(apps, schema_editor):
|
||||
try:
|
||||
with open(out_dir / "index.json", "r") as f:
|
||||
fs_index = json.load(f)
|
||||
except Exception as e:
|
||||
except Exception:
|
||||
continue
|
||||
|
||||
history = fs_index["history"]
|
||||
|
||||
@@ -234,7 +234,6 @@ def upgrade_core_tables(apps, schema_editor):
|
||||
tag_has_data = cursor.fetchone()[0] > 0
|
||||
|
||||
if tag_has_data:
|
||||
tag_cols = get_table_columns('core_tag')
|
||||
cursor.execute("PRAGMA table_info(core_tag)")
|
||||
tag_id_type = None
|
||||
for row in cursor.fetchall():
|
||||
|
||||
@@ -2,7 +2,6 @@
|
||||
# Creates a default crawl for v0.7.2 migrated snapshots and makes crawl_id NOT NULL
|
||||
|
||||
from django.db import migrations, models
|
||||
import uuid
|
||||
|
||||
|
||||
def create_default_crawl_and_assign_snapshots(apps, schema_editor):
|
||||
|
||||
@@ -347,7 +347,7 @@ def copy_archiveresult_data_to_process(apps, schema_editor):
|
||||
migrated_count += 1
|
||||
|
||||
if i == 0:
|
||||
print(f'DEBUG 0027: Linked ArchiveResult to Process')
|
||||
print('DEBUG 0027: Linked ArchiveResult to Process')
|
||||
|
||||
except Exception as e:
|
||||
print(f'✗ Error migrating ArchiveResult {ar_id}: {e}')
|
||||
|
||||
@@ -1,6 +1,6 @@
|
||||
__package__ = 'archivebox.core'
|
||||
|
||||
from typing import Optional, Dict, Iterable, Any, List, TYPE_CHECKING
|
||||
from typing import Optional, Dict, Iterable, Any, List
|
||||
from archivebox.uuid_compat import uuid7
|
||||
from datetime import datetime, timedelta
|
||||
from django_stubs_ext.db.models import TypedModelMeta
|
||||
@@ -12,19 +12,18 @@ from pathlib import Path
|
||||
from statemachine import State, registry
|
||||
|
||||
from django.db import models
|
||||
from django.db.models import QuerySet, Value, Case, When, IntegerField
|
||||
from django.db.models import QuerySet
|
||||
from django.utils.functional import cached_property
|
||||
from django.utils.text import slugify
|
||||
from django.utils import timezone
|
||||
from django.core.cache import cache
|
||||
from django.urls import reverse, reverse_lazy
|
||||
from django.urls import reverse_lazy
|
||||
from django.contrib import admin
|
||||
from django.conf import settings
|
||||
|
||||
from archivebox.config import CONSTANTS
|
||||
from archivebox.misc.system import get_dir_size, atomic_write
|
||||
from archivebox.misc.util import parse_date, base_url, domain as url_domain, to_json, ts_to_date_str, urlencode, htmlencode, urldecode
|
||||
from archivebox.misc.hashing import get_dir_info
|
||||
from archivebox.misc.util import parse_date, domain as url_domain, to_json, ts_to_date_str, urlencode, htmlencode, urldecode
|
||||
from archivebox.hooks import (
|
||||
get_plugins, get_plugin_name, get_plugin_icon,
|
||||
)
|
||||
@@ -186,7 +185,7 @@ class SnapshotQuerySet(models.QuerySet):
|
||||
for pattern in patterns:
|
||||
try:
|
||||
qsearch |= query_search_index(pattern)
|
||||
except:
|
||||
except BaseException:
|
||||
raise SystemExit(2)
|
||||
return self.all() & qsearch
|
||||
|
||||
@@ -344,8 +343,6 @@ class Snapshot(ModelWithOutputDir, ModelWithConfig, ModelWithNotes, ModelWithHea
|
||||
@property
|
||||
def process_set(self):
|
||||
"""Get all Process objects related to this snapshot's ArchiveResults."""
|
||||
import json
|
||||
import json
|
||||
from archivebox.machine.models import Process
|
||||
return Process.objects.filter(archiveresult__snapshot_id=self.id)
|
||||
|
||||
@@ -458,13 +455,13 @@ class Snapshot(ModelWithOutputDir, ModelWithConfig, ModelWithNotes, ModelWithHea
|
||||
|
||||
if not old_dir.exists() or old_dir == new_dir:
|
||||
# No migration needed
|
||||
print(f"[DEBUG _fs_migrate] Returning None (early return)")
|
||||
print("[DEBUG _fs_migrate] Returning None (early return)")
|
||||
return None
|
||||
|
||||
if new_dir.exists():
|
||||
# New directory already exists (files already copied), but we still need cleanup
|
||||
# Return cleanup info so old directory can be cleaned up
|
||||
print(f"[DEBUG _fs_migrate] Returning cleanup info (new_dir exists)")
|
||||
print("[DEBUG _fs_migrate] Returning cleanup info (new_dir exists)")
|
||||
return (old_dir, new_dir)
|
||||
|
||||
new_dir.mkdir(parents=True, exist_ok=True)
|
||||
@@ -499,7 +496,6 @@ class Snapshot(ModelWithOutputDir, ModelWithConfig, ModelWithNotes, ModelWithHea
|
||||
|
||||
# Schedule cleanup AFTER transaction commits successfully
|
||||
# This ensures DB changes are committed before we delete old files
|
||||
from django.db import transaction
|
||||
transaction.on_commit(lambda: self._cleanup_old_migration_dir(old_dir, new_dir))
|
||||
|
||||
# Return cleanup info for manual cleanup if needed (when called directly)
|
||||
@@ -594,8 +590,8 @@ class Snapshot(ModelWithOutputDir, ModelWithConfig, ModelWithNotes, ModelWithHea
|
||||
domain = self.extract_domain_from_url(self.url)
|
||||
|
||||
return (
|
||||
CONSTANTS.DATA_DIR / 'users' / username / 'snapshots' /
|
||||
date_str / domain / str(self.id)
|
||||
CONSTANTS.DATA_DIR / 'users' / username / 'snapshots'
|
||||
/ date_str / domain / str(self.id)
|
||||
)
|
||||
else:
|
||||
# Unknown version - use current
|
||||
@@ -670,7 +666,7 @@ class Snapshot(ModelWithOutputDir, ModelWithConfig, ModelWithNotes, ModelWithHea
|
||||
print(f"[DEBUG load_from_directory] Found via fuzzy match: {snapshot.timestamp}")
|
||||
return snapshot
|
||||
elif candidates.count() > 1:
|
||||
print(f"[DEBUG load_from_directory] Multiple fuzzy matches, using first")
|
||||
print("[DEBUG load_from_directory] Multiple fuzzy matches, using first")
|
||||
return candidates.first()
|
||||
print(f"[DEBUG load_from_directory] NOT FOUND (fuzzy): {url} @ {timestamp}")
|
||||
return None
|
||||
@@ -767,7 +763,7 @@ class Snapshot(ModelWithOutputDir, ModelWithConfig, ModelWithNotes, ModelWithHea
|
||||
ts_int = int(float(ts))
|
||||
# 1995-01-01 to 2035-12-31
|
||||
return 788918400 <= ts_int <= 2082758400
|
||||
except:
|
||||
except (TypeError, ValueError, OverflowError):
|
||||
return False
|
||||
|
||||
index_valid = is_valid_timestamp(index_timestamp) if index_timestamp else False
|
||||
@@ -850,7 +846,7 @@ class Snapshot(ModelWithOutputDir, ModelWithConfig, ModelWithNotes, ModelWithHea
|
||||
try:
|
||||
with open(json_path) as f:
|
||||
index_data = json.load(f)
|
||||
except:
|
||||
except (OSError, TypeError, ValueError, json.JSONDecodeError):
|
||||
pass
|
||||
|
||||
# Merge title
|
||||
@@ -929,7 +925,7 @@ class Snapshot(ModelWithOutputDir, ModelWithConfig, ModelWithNotes, ModelWithHea
|
||||
if result_data.get('start_ts'):
|
||||
try:
|
||||
start_ts = parser.parse(result_data['start_ts'])
|
||||
except:
|
||||
except (TypeError, ValueError, OverflowError):
|
||||
pass
|
||||
|
||||
if (plugin, start_ts) in existing:
|
||||
@@ -940,7 +936,7 @@ class Snapshot(ModelWithOutputDir, ModelWithConfig, ModelWithNotes, ModelWithHea
|
||||
if result_data.get('end_ts'):
|
||||
try:
|
||||
end_ts = parser.parse(result_data['end_ts'])
|
||||
except:
|
||||
except (TypeError, ValueError, OverflowError):
|
||||
pass
|
||||
|
||||
# Support both 'output' (legacy) and 'output_str' (new JSONL) field names
|
||||
@@ -957,7 +953,7 @@ class Snapshot(ModelWithOutputDir, ModelWithConfig, ModelWithNotes, ModelWithHea
|
||||
start_ts=start_ts,
|
||||
end_ts=end_ts,
|
||||
)
|
||||
except:
|
||||
except Exception:
|
||||
pass
|
||||
|
||||
def write_index_json(self):
|
||||
@@ -1176,7 +1172,7 @@ class Snapshot(ModelWithOutputDir, ModelWithConfig, ModelWithNotes, ModelWithHea
|
||||
|
||||
try:
|
||||
shutil.move(str(snapshot_dir), str(dest))
|
||||
except:
|
||||
except Exception:
|
||||
pass
|
||||
|
||||
@classmethod
|
||||
@@ -1208,7 +1204,7 @@ class Snapshot(ModelWithOutputDir, ModelWithConfig, ModelWithNotes, ModelWithHea
|
||||
try:
|
||||
cls._merge_snapshots(snapshots)
|
||||
merged += 1
|
||||
except:
|
||||
except Exception:
|
||||
pass
|
||||
|
||||
return merged
|
||||
@@ -1244,7 +1240,7 @@ class Snapshot(ModelWithOutputDir, ModelWithConfig, ModelWithNotes, ModelWithHea
|
||||
|
||||
try:
|
||||
shutil.rmtree(dup_dir)
|
||||
except:
|
||||
except Exception:
|
||||
pass
|
||||
|
||||
# Merge tags
|
||||
@@ -1615,7 +1611,6 @@ class Snapshot(ModelWithOutputDir, ModelWithConfig, ModelWithNotes, ModelWithHea
|
||||
"""
|
||||
import re
|
||||
from django.utils import timezone
|
||||
from archivebox.misc.util import parse_date
|
||||
from archivebox.base_models.models import get_or_create_system_user_pk
|
||||
from archivebox.config.common import GENERAL_CONFIG
|
||||
|
||||
@@ -2125,7 +2120,6 @@ class Snapshot(ModelWithOutputDir, ModelWithConfig, ModelWithNotes, ModelWithHea
|
||||
|
||||
def to_dict(self, extended: bool = False) -> Dict[str, Any]:
|
||||
"""Convert Snapshot to a dictionary (replacement for Link._asdict())"""
|
||||
from archivebox.misc.util import ts_to_date_str
|
||||
from archivebox.core.host_utils import build_snapshot_url
|
||||
|
||||
result = {
|
||||
@@ -2283,9 +2277,9 @@ class SnapshotMachine(BaseStateMachine):
|
||||
|
||||
# Tick Event (polled by workers)
|
||||
tick = (
|
||||
queued.to.itself(unless='can_start') |
|
||||
queued.to(started, cond='can_start') |
|
||||
started.to(sealed, cond='is_finished')
|
||||
queued.to.itself(unless='can_start')
|
||||
| queued.to(started, cond='can_start')
|
||||
| started.to(sealed, cond='is_finished')
|
||||
)
|
||||
|
||||
# Manual event (can also be triggered by last ArchiveResult finishing)
|
||||
@@ -2783,7 +2777,7 @@ class ArchiveResult(ModelWithOutputDir, ModelWithConfig, ModelWithNotes, ModelWi
|
||||
Updates status/output fields, queues discovered URLs, and triggers indexing.
|
||||
"""
|
||||
from django.utils import timezone
|
||||
from archivebox.hooks import BUILTIN_PLUGINS_DIR, USER_PLUGINS_DIR, run_hook, is_background_hook
|
||||
from archivebox.hooks import BUILTIN_PLUGINS_DIR, USER_PLUGINS_DIR, run_hook
|
||||
from archivebox.config.configset import get_config
|
||||
|
||||
# Get merged config with proper context
|
||||
@@ -3190,16 +3184,16 @@ class ArchiveResultMachine(BaseStateMachine):
|
||||
# queued → skipped (if exceeded max attempts)
|
||||
# started → backoff → started (retry)
|
||||
tick = (
|
||||
queued.to(skipped, cond='is_exceeded_max_attempts') | # Check skip first
|
||||
queued.to.itself(unless='can_start') |
|
||||
queued.to(started, cond='can_start') |
|
||||
started.to(succeeded, cond='is_succeeded') |
|
||||
started.to(failed, cond='is_failed') |
|
||||
started.to(skipped, cond='is_skipped') |
|
||||
started.to(backoff, cond='is_backoff') |
|
||||
backoff.to(skipped, cond='is_exceeded_max_attempts') | # Check skip from backoff too
|
||||
backoff.to.itself(unless='can_start') |
|
||||
backoff.to(started, cond='can_start')
|
||||
queued.to(skipped, cond='is_exceeded_max_attempts') # Check skip first
|
||||
| queued.to.itself(unless='can_start')
|
||||
| queued.to(started, cond='can_start')
|
||||
| started.to(succeeded, cond='is_succeeded')
|
||||
| started.to(failed, cond='is_failed')
|
||||
| started.to(skipped, cond='is_skipped')
|
||||
| started.to(backoff, cond='is_backoff')
|
||||
| backoff.to(skipped, cond='is_exceeded_max_attempts') # Check skip from backoff too
|
||||
| backoff.to.itself(unless='can_start')
|
||||
| backoff.to(started, cond='can_start')
|
||||
# Removed redundant transitions: backoff.to(succeeded/failed/skipped)
|
||||
# Reason: backoff should always retry→started, then started→final states
|
||||
)
|
||||
@@ -3241,8 +3235,8 @@ class ArchiveResultMachine(BaseStateMachine):
|
||||
"""Check if we should backoff and retry later."""
|
||||
# Backoff if status is still started (plugin didn't complete) and output_str is empty
|
||||
return (
|
||||
self.archiveresult.status == ArchiveResult.StatusChoices.STARTED and
|
||||
not self.archiveresult.output_str
|
||||
self.archiveresult.status == ArchiveResult.StatusChoices.STARTED
|
||||
and not self.archiveresult.output_str
|
||||
)
|
||||
|
||||
def is_finished(self) -> bool:
|
||||
@@ -3286,7 +3280,6 @@ class ArchiveResultMachine(BaseStateMachine):
|
||||
|
||||
@started.enter
|
||||
def enter_started(self):
|
||||
from archivebox.machine.models import NetworkInterface
|
||||
|
||||
# Update Process with network interface
|
||||
if self.archiveresult.process_id:
|
||||
|
||||
@@ -6,6 +6,7 @@ import inspect
|
||||
|
||||
from pathlib import Path
|
||||
|
||||
from django.conf.locale.en import formats as en_formats # type: ignore
|
||||
from django.utils.crypto import get_random_string
|
||||
|
||||
import archivebox
|
||||
@@ -13,6 +14,7 @@ import archivebox
|
||||
from archivebox.config import DATA_DIR, PACKAGE_DIR, ARCHIVE_DIR, CONSTANTS # noqa
|
||||
from archivebox.config.common import SHELL_CONFIG, SERVER_CONFIG, STORAGE_CONFIG # noqa
|
||||
from archivebox.core.host_utils import normalize_base_url, get_admin_base_url, get_api_base_url
|
||||
from .settings_logging import SETTINGS_LOGGING
|
||||
|
||||
|
||||
IS_MIGRATING = "makemigrations" in sys.argv[:3] or "migrate" in sys.argv[:3]
|
||||
@@ -54,8 +56,8 @@ INSTALLED_APPS = [
|
||||
"django.contrib.staticfiles",
|
||||
"django.contrib.admin",
|
||||
# 3rd-party apps from PyPI
|
||||
"signal_webhooks", # handles REST API outbound webhooks https://github.com/MrThearMan/django-signal-webhooks
|
||||
"django_object_actions", # provides easy Django Admin action buttons on change views https://github.com/crccheck/django-object-actions
|
||||
"signal_webhooks", # handles REST API outbound webhooks
|
||||
"django_object_actions", # provides easy Django Admin action buttons on change views
|
||||
# Our ArchiveBox-provided apps (use fully qualified names)
|
||||
# NOTE: Order matters! Apps with migrations that depend on other apps must come AFTER their dependencies
|
||||
# "archivebox.config", # ArchiveBox config settings (no models, not a real Django app)
|
||||
@@ -117,7 +119,6 @@ try:
|
||||
|
||||
try:
|
||||
# Try to import django-auth-ldap (will fail if not installed)
|
||||
import django_auth_ldap
|
||||
from django_auth_ldap.config import LDAPSearch
|
||||
import ldap
|
||||
|
||||
@@ -414,9 +415,6 @@ DATETIME_FORMAT = "Y-m-d h:i:s A"
|
||||
SHORT_DATETIME_FORMAT = "Y-m-d h:i:s A"
|
||||
TIME_ZONE = CONSTANTS.TIMEZONE # django convention is TIME_ZONE, archivebox config uses TIMEZONE, they are equivalent
|
||||
|
||||
|
||||
from django.conf.locale.en import formats as en_formats # type: ignore
|
||||
|
||||
en_formats.DATETIME_FORMAT = DATETIME_FORMAT # monkey patch en_format default with our preferred format
|
||||
en_formats.SHORT_DATETIME_FORMAT = SHORT_DATETIME_FORMAT
|
||||
|
||||
@@ -425,9 +423,6 @@ en_formats.SHORT_DATETIME_FORMAT = SHORT_DATETIME_FORMAT
|
||||
### Logging Settings
|
||||
################################################################################
|
||||
|
||||
|
||||
from .settings_logging import SETTINGS_LOGGING, LOGS_DIR, ERROR_LOG
|
||||
|
||||
LOGGING = SETTINGS_LOGGING
|
||||
|
||||
|
||||
|
||||
@@ -5,8 +5,6 @@ import os
|
||||
import tempfile
|
||||
import logging
|
||||
|
||||
import pydantic
|
||||
import django.template
|
||||
|
||||
from archivebox.config import CONSTANTS
|
||||
|
||||
|
||||
@@ -1,5 +1,6 @@
|
||||
"""Tests for the core views, especially AddView."""
|
||||
|
||||
import importlib
|
||||
import os
|
||||
import django
|
||||
from unittest.mock import patch
|
||||
@@ -8,13 +9,14 @@ from unittest.mock import patch
|
||||
os.environ.setdefault('DJANGO_SETTINGS_MODULE', 'archivebox.settings')
|
||||
django.setup()
|
||||
|
||||
from django.test import TestCase, Client
|
||||
from django.contrib.auth.models import User
|
||||
from django.urls import reverse
|
||||
|
||||
from archivebox.crawls.models import Crawl, CrawlSchedule
|
||||
from archivebox.core.models import Tag
|
||||
from archivebox.config.common import SERVER_CONFIG
|
||||
TestCase = importlib.import_module('django.test').TestCase
|
||||
Client = importlib.import_module('django.test').Client
|
||||
User = importlib.import_module('django.contrib.auth.models').User
|
||||
reverse = importlib.import_module('django.urls').reverse
|
||||
Crawl = importlib.import_module('archivebox.crawls.models').Crawl
|
||||
CrawlSchedule = importlib.import_module('archivebox.crawls.models').CrawlSchedule
|
||||
Tag = importlib.import_module('archivebox.core.models').Tag
|
||||
SERVER_CONFIG = importlib.import_module('archivebox.config.common').SERVER_CONFIG
|
||||
|
||||
|
||||
class AddViewTests(TestCase):
|
||||
@@ -252,7 +254,7 @@ class AddViewTests(TestCase):
|
||||
def test_add_staff_admin_custom_config_is_allowed(self):
|
||||
"""Admin users can override crawl config."""
|
||||
self.client.logout()
|
||||
admin_user = User.objects.create_user(
|
||||
User.objects.create_user(
|
||||
username='adminuser',
|
||||
password='adminpass123',
|
||||
email='admin@example.com',
|
||||
|
||||
@@ -10,7 +10,7 @@ from pathlib import Path
|
||||
from urllib.parse import urlparse
|
||||
|
||||
from django.shortcuts import render, redirect
|
||||
from django.http import HttpRequest, HttpResponse, Http404, HttpResponseForbidden
|
||||
from django.http import JsonResponse, HttpRequest, HttpResponse, Http404, HttpResponseForbidden
|
||||
from django.utils.html import format_html, mark_safe
|
||||
from django.views import View
|
||||
from django.views.generic.list import ListView
|
||||
@@ -24,9 +24,8 @@ from django.utils.decorators import method_decorator
|
||||
from admin_data_views.typing import TableContext, ItemContext
|
||||
from admin_data_views.utils import render_with_table_view, render_with_item_view, ItemLink
|
||||
|
||||
import archivebox
|
||||
from archivebox.config import CONSTANTS, CONSTANTS_CONFIG, DATA_DIR, VERSION
|
||||
from archivebox.config.common import SHELL_CONFIG, SERVER_CONFIG, ARCHIVING_CONFIG
|
||||
from archivebox.config.common import SHELL_CONFIG, SERVER_CONFIG
|
||||
from archivebox.config.configset import get_flat_config, get_config, get_all_configs
|
||||
from archivebox.misc.util import base_url, htmlencode, ts_to_date_str, urldecode
|
||||
from archivebox.misc.serve_static import serve_static_with_byterange_support
|
||||
@@ -35,6 +34,9 @@ from archivebox.search import query_search_index
|
||||
|
||||
from archivebox.core.models import Snapshot
|
||||
from archivebox.core.host_utils import build_snapshot_url
|
||||
from archivebox.core.forms import AddLinkForm
|
||||
from archivebox.crawls.models import Crawl
|
||||
from archivebox.hooks import get_enabled_plugins, get_plugin_name
|
||||
|
||||
|
||||
def _files_index_target(snapshot: Snapshot, archivefile: str | None) -> str:
|
||||
@@ -49,12 +51,6 @@ def _files_index_target(snapshot: Snapshot, archivefile: str | None) -> str:
|
||||
return target
|
||||
|
||||
|
||||
from archivebox.core.forms import AddLinkForm
|
||||
from archivebox.crawls.models import Crawl
|
||||
from archivebox.hooks import get_enabled_plugins, get_plugin_name
|
||||
|
||||
|
||||
|
||||
class HomepageView(View):
|
||||
def get(self, request):
|
||||
if request.user.is_authenticated:
|
||||
@@ -1066,10 +1062,6 @@ class HealthCheckView(View):
|
||||
status=200
|
||||
)
|
||||
|
||||
|
||||
import json
|
||||
from django.http import JsonResponse
|
||||
|
||||
def live_progress_view(request):
|
||||
"""Simple JSON endpoint for live progress status - used by admin progress monitor."""
|
||||
try:
|
||||
@@ -1077,7 +1069,6 @@ def live_progress_view(request):
|
||||
from archivebox.crawls.models import Crawl
|
||||
from archivebox.core.models import Snapshot, ArchiveResult
|
||||
from archivebox.machine.models import Process, Machine
|
||||
from django.db.models import Case, When, Value, IntegerField
|
||||
|
||||
# Get orchestrator status
|
||||
orchestrator_running = Orchestrator.is_running()
|
||||
@@ -1133,7 +1124,6 @@ def live_progress_view(request):
|
||||
})
|
||||
|
||||
# Build hierarchical active crawls with nested snapshots and archive results
|
||||
from django.db.models import Prefetch
|
||||
|
||||
running_workers = Process.objects.filter(
|
||||
machine=machine,
|
||||
@@ -1387,7 +1377,7 @@ def find_config_default(key: str) -> str:
|
||||
return default_val
|
||||
|
||||
def find_config_type(key: str) -> str:
|
||||
from typing import get_type_hints, ClassVar
|
||||
from typing import ClassVar
|
||||
CONFIGS = get_all_configs()
|
||||
|
||||
for config in CONFIGS.values():
|
||||
@@ -1430,7 +1420,6 @@ def key_is_safe(key: str) -> bool:
|
||||
|
||||
def find_config_source(key: str, merged_config: dict) -> str:
|
||||
"""Determine where a config value comes from."""
|
||||
import os
|
||||
from archivebox.machine.models import Machine
|
||||
|
||||
# Check if it's from archivebox.machine.config
|
||||
@@ -1464,12 +1453,11 @@ def live_config_list_view(request: HttpRequest, **kwargs) -> TableContext:
|
||||
# Get merged config that includes Machine.config overrides
|
||||
try:
|
||||
from archivebox.machine.models import Machine
|
||||
machine = Machine.current()
|
||||
Machine.current()
|
||||
merged_config = get_config()
|
||||
except Exception as e:
|
||||
except Exception:
|
||||
# Fallback if Machine model not available
|
||||
merged_config = get_config()
|
||||
machine = None
|
||||
|
||||
rows = {
|
||||
"Section": [],
|
||||
@@ -1525,7 +1513,6 @@ def live_config_list_view(request: HttpRequest, **kwargs) -> TableContext:
|
||||
|
||||
@render_with_item_view
|
||||
def live_config_value_view(request: HttpRequest, key: str, **kwargs) -> ItemContext:
|
||||
import os
|
||||
from archivebox.machine.models import Machine
|
||||
from archivebox.config.configset import BaseConfigSet
|
||||
|
||||
|
||||
@@ -343,20 +343,17 @@ class InlineTagEditorWidget(TagEditorWidget):
|
||||
snapshot_id = snapshot_id or self.snapshot_id
|
||||
|
||||
# Parse value to get list of tag dicts with id and name
|
||||
tags = []
|
||||
tag_data = []
|
||||
if value:
|
||||
if hasattr(value, 'all'): # QuerySet
|
||||
for tag in value.all():
|
||||
tag_data.append({'id': tag.pk, 'name': tag.name})
|
||||
tag_data.sort(key=lambda x: x['name'].lower())
|
||||
tags = [t['name'] for t in tag_data]
|
||||
elif isinstance(value, (list, tuple)):
|
||||
if value and hasattr(value[0], 'name'):
|
||||
for tag in value:
|
||||
tag_data.append({'id': tag.pk, 'name': tag.name})
|
||||
tag_data.sort(key=lambda x: x['name'].lower())
|
||||
tags = [t['name'] for t in tag_data]
|
||||
|
||||
widget_id_raw = f"inline_tags_{snapshot_id}" if snapshot_id else (attrs.get('id', name) if attrs else name)
|
||||
widget_id = self._normalize_id(widget_id_raw)
|
||||
|
||||
@@ -9,9 +9,8 @@ https://docs.djangoproject.com/en/2.1/howto/deployment/wsgi/
|
||||
|
||||
import archivebox # noqa
|
||||
from archivebox.config.django import setup_django
|
||||
from django.core.wsgi import get_wsgi_application
|
||||
|
||||
setup_django(in_memory_db=False, check_db=True)
|
||||
|
||||
from django.core.wsgi import get_wsgi_application
|
||||
|
||||
application = get_wsgi_application()
|
||||
|
||||
Reference in New Issue
Block a user