wip

2026-04-06 07:47:53 +10:00 · 2026-03-23 03:58:32 -07:00
parent 268856bcfb
commit b749b26c5d
286 changed files with 21704 additions and 13480 deletions
--- a/archivebox/init.py
+++ b/archivebox/init.py
@@ -9,7 +9,7 @@
 # in a universe that seems indifferent to us."
 # --Norber Weiner

-__package__ = 'archivebox'
+__package__ = "archivebox"

 import os
 import sys
@@ -22,11 +22,12 @@ from abx_plugins import get_plugins_dir
 class _ReconfigurableStream(Protocol):
    def reconfigure(self, *, line_buffering: bool) -> object: ...

+
 # Force unbuffered output for real-time logs
-if hasattr(sys.stdout, 'reconfigure'):
+if hasattr(sys.stdout, "reconfigure"):
    cast(_ReconfigurableStream, sys.stdout).reconfigure(line_buffering=True)
    cast(_ReconfigurableStream, sys.stderr).reconfigure(line_buffering=True)
-os.environ['PYTHONUNBUFFERED'] = '1'
+os.environ["PYTHONUNBUFFERED"] = "1"

 ASCII_LOGO = """
 █████╗ ██████╗  ██████╗██╗  ██╗██╗██╗   ██╗███████╗ ██████╗  ██████╗ ██╗  ██╗
@@ -44,48 +45,51 @@ PACKAGE_DIR = Path(__file__).resolve().parent
 # if str(PACKAGE_DIR) not in sys.path:
 #     sys.path.append(str(PACKAGE_DIR))

-os.environ['DJANGO_SETTINGS_MODULE'] = 'archivebox.core.settings'
-os.environ['TZ'] = 'UTC'
+os.environ["DJANGO_SETTINGS_MODULE"] = "archivebox.core.settings"
+os.environ["TZ"] = "UTC"

 # detect ArchiveBox user's UID/GID based on data dir ownership
-from .config.permissions import drop_privileges                 # noqa
+from .config.permissions import drop_privileges  # noqa
+
 drop_privileges()

-from .misc.checks import check_not_root, check_not_inside_source_dir, check_io_encoding      # noqa
+from .misc.checks import check_not_root, check_not_inside_source_dir, check_io_encoding  # noqa
+
 check_not_root()
 check_not_inside_source_dir()
 check_io_encoding()

 # Install monkey patches for third-party libraries
-from .misc.monkey_patches import *                    # noqa
+from .misc.monkey_patches import *  # noqa

 # Plugin directories
 BUILTIN_PLUGINS_DIR = Path(get_plugins_dir()).resolve()
-USER_PLUGINS_DIR = Path(
-    os.environ.get('ARCHIVEBOX_USER_PLUGINS_DIR')
-    or os.environ.get('USER_PLUGINS_DIR')
-    or os.environ.get('DATA_DIR', os.getcwd())
-) / 'custom_plugins'
+USER_PLUGINS_DIR = (
+    Path(
+        os.environ.get("ARCHIVEBOX_USER_PLUGINS_DIR") or os.environ.get("USER_PLUGINS_DIR") or os.environ.get("DATA_DIR", os.getcwd()),
+    )
+    / "custom_plugins"
+)

 # These are kept for backwards compatibility with existing code
 # that checks for plugins. The new hook system uses discover_hooks()
 ALL_PLUGINS = {
-    'builtin': BUILTIN_PLUGINS_DIR,
-    'user': USER_PLUGINS_DIR,
+    "builtin": BUILTIN_PLUGINS_DIR,
+    "user": USER_PLUGINS_DIR,
 }
 LOADED_PLUGINS = ALL_PLUGINS

 # Setup basic config, constants, paths, and version
-from .config.constants import CONSTANTS                         # noqa
-from .config.paths import PACKAGE_DIR, DATA_DIR, ARCHIVE_DIR    # noqa
-from .config.version import VERSION                             # noqa
+from .config.constants import CONSTANTS  # noqa
+from .config.paths import PACKAGE_DIR, DATA_DIR, ARCHIVE_DIR  # noqa
+from .config.version import VERSION  # noqa

 # Set MACHINE_ID env var so hook scripts can use it
-os.environ.setdefault('MACHINE_ID', CONSTANTS.MACHINE_ID)
+os.environ.setdefault("MACHINE_ID", CONSTANTS.MACHINE_ID)

 __version__ = VERSION
-__author__ = 'ArchiveBox'
-__license__ = 'MIT'
+__author__ = "ArchiveBox"
+__license__ = "MIT"

 ASCII_ICON = """
 ██████████████████████████████████████████████████████████████████████████████████████████████████ 
--- a/archivebox/main.py
+++ b/archivebox/main.py
@@ -1,8 +1,9 @@
 #!/usr/bin/env python3
 """This is the entrypoint for python -m archivebox ..."""
-__package__ = 'archivebox'

-import archivebox      # noqa # make sure monkey patches are applied before anything else
+__package__ = "archivebox"
+
+import archivebox  # noqa # make sure monkey patches are applied before anything else
 import sys

 from .cli import main
@@ -15,5 +16,5 @@ ASCII_LOGO_MINI = r"""
 /_/   \_\_|  \___|_| |_|_| \_/ \___|____/ \___/_/\_\
 """

-if __name__ == '__main__':
+if __name__ == "__main__":
    main(args=sys.argv[1:], stdin=sys.stdin)
--- a/archivebox/api/init.py
+++ b/archivebox/api/init.py
@@ -1 +1 @@
-__package__ = 'archivebox.api'
+__package__ = "archivebox.api"
--- a/archivebox/api/admin.py
+++ b/archivebox/api/admin.py
@@ -1,4 +1,4 @@
-__package__ = 'archivebox.api'
+__package__ = "archivebox.api"

 from django.contrib import admin
 from django.http import HttpRequest
@@ -11,57 +11,81 @@ from archivebox.api.models import APIToken


 class APITokenAdmin(BaseModelAdmin):
-    list_display = ('created_at', 'id', 'created_by', 'token_redacted', 'expires')
-    sort_fields = ('id', 'created_at', 'created_by', 'expires')
-    readonly_fields = ('created_at', 'modified_at')
-    search_fields = ('id', 'created_by__username', 'token')
+    list_display = ("created_at", "id", "created_by", "token_redacted", "expires")
+    sort_fields = ("id", "created_at", "created_by", "expires")
+    readonly_fields = ("created_at", "modified_at")
+    search_fields = ("id", "created_by__username", "token")

    fieldsets = (
-        ('Token', {
-            'fields': ('token', 'expires'),
-            'classes': ('card',),
-        }),
-        ('Owner', {
-            'fields': ('created_by',),
-            'classes': ('card',),
-        }),
-        ('Timestamps', {
-            'fields': ('created_at', 'modified_at'),
-            'classes': ('card',),
-        }),
+        (
+            "Token",
+            {
+                "fields": ("token", "expires"),
+                "classes": ("card",),
+            },
+        ),
+        (
+            "Owner",
+            {
+                "fields": ("created_by",),
+                "classes": ("card",),
+            },
+        ),
+        (
+            "Timestamps",
+            {
+                "fields": ("created_at", "modified_at"),
+                "classes": ("card",),
+            },
+        ),
    )

-    list_filter = ('created_by',)
-    ordering = ['-created_at']
+    list_filter = ("created_by",)
+    ordering = ["-created_at"]
    list_per_page = 100


 class CustomWebhookAdmin(WebhookAdmin, BaseModelAdmin):
-    list_display = ('created_at', 'created_by', 'id', *WebhookAdmin.list_display)
-    sort_fields = ('created_at', 'created_by', 'id', 'referenced_model', 'endpoint', 'last_success', 'last_error')
-    readonly_fields = ('created_at', 'modified_at', *WebhookAdmin.readonly_fields)
+    list_display = ("created_at", "created_by", "id", *WebhookAdmin.list_display)
+    sort_fields = ("created_at", "created_by", "id", "referenced_model", "endpoint", "last_success", "last_error")
+    readonly_fields = ("created_at", "modified_at", *WebhookAdmin.readonly_fields)

    fieldsets = (
-        ('Webhook', {
-            'fields': ('name', 'signal', 'referenced_model', 'endpoint'),
-            'classes': ('card', 'wide'),
-        }),
-        ('Authentication', {
-            'fields': ('auth_token',),
-            'classes': ('card',),
-        }),
-        ('Status', {
-            'fields': ('enabled', 'last_success', 'last_error'),
-            'classes': ('card',),
-        }),
-        ('Owner', {
-            'fields': ('created_by',),
-            'classes': ('card',),
-        }),
-        ('Timestamps', {
-            'fields': ('created_at', 'modified_at'),
-            'classes': ('card',),
-        }),
+        (
+            "Webhook",
+            {
+                "fields": ("name", "signal", "referenced_model", "endpoint"),
+                "classes": ("card", "wide"),
+            },
+        ),
+        (
+            "Authentication",
+            {
+                "fields": ("auth_token",),
+                "classes": ("card",),
+            },
+        ),
+        (
+            "Status",
+            {
+                "fields": ("enabled", "last_success", "last_error"),
+                "classes": ("card",),
+            },
+        ),
+        (
+            "Owner",
+            {
+                "fields": ("created_by",),
+                "classes": ("card",),
+            },
+        ),
+        (
+            "Timestamps",
+            {
+                "fields": ("created_at", "modified_at"),
+                "classes": ("card",),
+            },
+        ),
    )

    def lookup_allowed(self, lookup: str, value: str, request: HttpRequest | None = None) -> bool:
--- a/archivebox/api/apps.py
+++ b/archivebox/api/apps.py
@@ -1,13 +1,14 @@
-__package__ = 'archivebox.api'
+__package__ = "archivebox.api"

 from django.apps import AppConfig


 class APIConfig(AppConfig):
-    name = 'archivebox.api'
-    label = 'api'
+    name = "archivebox.api"
+    label = "api"


 def register_admin(admin_site):
    from archivebox.api.admin import register_admin
+
    register_admin(admin_site)
--- a/archivebox/api/auth.py
+++ b/archivebox/api/auth.py
@@ -1,6 +1,5 @@
-__package__ = 'archivebox.api'
+__package__ = "archivebox.api"

-from typing import Optional
 from datetime import timedelta

 from django.utils import timezone
@@ -14,7 +13,7 @@ from ninja.errors import HttpError

 def get_or_create_api_token(user: User | None):
    from archivebox.api.models import APIToken
-    
+
    if user and user.is_superuser:
        api_tokens = APIToken.objects.filter(created_by_id=user.pk, expires__gt=timezone.now())
        if api_tokens.exists():
@@ -34,18 +33,18 @@ def get_or_create_api_token(user: User | None):

 def auth_using_token(token: str | None, request: HttpRequest | None = None) -> User | None:
    """Given an API token string, check if a corresponding non-expired APIToken exists, and return its user"""
-    from archivebox.api.models import APIToken        # lazy import model to avoid loading it at urls.py import time
-    
+    from archivebox.api.models import APIToken  # lazy import model to avoid loading it at urls.py import time
+
    user: User | None = None

-    submitted_empty_form = str(token).strip() in ('string', '', 'None', 'null')
+    submitted_empty_form = str(token).strip() in ("string", "", "None", "null")
    if not submitted_empty_form:
        try:
            api_token = APIToken.objects.get(token=token)
            if api_token.is_valid() and isinstance(api_token.created_by, User):
                user = api_token.created_by
                if request is not None:
-                    setattr(request, '_api_token', api_token)
+                    setattr(request, "_api_token", api_token)
        except APIToken.DoesNotExist:
            pass

@@ -55,8 +54,8 @@ def auth_using_token(token: str | None, request: HttpRequest | None = None) -> U
 def auth_using_password(username: str | None, password: str | None, request: HttpRequest | None = None) -> User | None:
    """Given a username and password, check if they are valid and return the corresponding user"""
    user: User | None = None
-    
-    submitted_empty_form = (username, password) in (('string', 'string'), ('', ''), (None, None))
+
+    submitted_empty_form = (username, password) in (("string", "string"), ("", ""), (None, None))
    if not submitted_empty_form:
        authenticated_user = authenticate(
            username=username,
@@ -73,34 +72,40 @@ def auth_using_password(username: str | None, password: str | None, request: Htt
 def _require_superuser(user: User | None, request: HttpRequest, auth_method: str) -> User | None:
    if user and user.pk:
        request.user = user
-        setattr(request, '_api_auth_method', auth_method)
+        setattr(request, "_api_auth_method", auth_method)
        if not user.is_superuser:
-            raise HttpError(403, 'Valid credentials but User does not have permission (make sure user.is_superuser=True)')
+            raise HttpError(403, "Valid credentials but User does not have permission (make sure user.is_superuser=True)")
    return user


 ### Django-Ninja-Provided Auth Methods

+
 class HeaderTokenAuth(APIKeyHeader):
    """Allow authenticating by passing X-API-Key=xyz as a request header"""
+
    param_name = "X-ArchiveBox-API-Key"

-    def authenticate(self, request: HttpRequest, key: Optional[str]) -> User | None:
+    def authenticate(self, request: HttpRequest, key: str | None) -> User | None:
        return _require_superuser(auth_using_token(token=key, request=request), request, self.__class__.__name__)

+
 class BearerTokenAuth(HttpBearer):
    """Allow authenticating by passing Bearer=xyz as a request header"""

    def authenticate(self, request: HttpRequest, token: str) -> User | None:
        return _require_superuser(auth_using_token(token=token, request=request), request, self.__class__.__name__)

+
 class QueryParamTokenAuth(APIKeyQuery):
    """Allow authenticating by passing api_key=xyz as a GET/POST query parameter"""
+
    param_name = "api_key"

-    def authenticate(self, request: HttpRequest, key: Optional[str]) -> User | None:
+    def authenticate(self, request: HttpRequest, key: str | None) -> User | None:
        return _require_superuser(auth_using_token(token=key, request=request), request, self.__class__.__name__)

+
 class UsernameAndPasswordAuth(HttpBasicAuth):
    """Allow authenticating by passing username & password via HTTP Basic Authentication (not recommended)"""

@@ -111,25 +116,28 @@ class UsernameAndPasswordAuth(HttpBasicAuth):
            self.__class__.__name__,
        )

+
 class DjangoSessionAuth:
    """Allow authenticating with existing Django session cookies (same-origin only)."""
+
    def __call__(self, request: HttpRequest) -> User | None:
        return self.authenticate(request)

    def authenticate(self, request: HttpRequest, **kwargs) -> User | None:
-        user = getattr(request, 'user', None)
+        user = getattr(request, "user", None)
        if isinstance(user, User) and user.is_authenticated:
-            setattr(request, '_api_auth_method', self.__class__.__name__)
+            setattr(request, "_api_auth_method", self.__class__.__name__)
            if not user.is_superuser:
-                raise HttpError(403, 'Valid session but User does not have permission (make sure user.is_superuser=True)')
+                raise HttpError(403, "Valid session but User does not have permission (make sure user.is_superuser=True)")
            return user
        return None

+
 ### Enabled Auth Methods

 API_AUTH_METHODS = [
    HeaderTokenAuth(),
    BearerTokenAuth(),
-    QueryParamTokenAuth(), 
+    QueryParamTokenAuth(),
    # django_auth_superuser,       # django admin cookie auth, not secure to use with csrf=False
 ]
--- a/archivebox/api/middleware.py
+++ b/archivebox/api/middleware.py
@@ -1,4 +1,4 @@
-__package__ = 'archivebox.api'
+__package__ = "archivebox.api"

 from django.http import HttpResponse

@@ -10,8 +10,8 @@ class ApiCorsMiddleware:
        self.get_response = get_response

    def __call__(self, request):
-        if request.path.startswith('/api/'):
-            if request.method == 'OPTIONS' and request.META.get('HTTP_ACCESS_CONTROL_REQUEST_METHOD'):
+        if request.path.startswith("/api/"):
+            if request.method == "OPTIONS" and request.META.get("HTTP_ACCESS_CONTROL_REQUEST_METHOD"):
                response = HttpResponse(status=204)
                return self._add_cors_headers(request, response)

@@ -21,14 +21,12 @@ class ApiCorsMiddleware:
        return self.get_response(request)

    def _add_cors_headers(self, request, response):
-        origin = request.META.get('HTTP_ORIGIN')
+        origin = request.META.get("HTTP_ORIGIN")
        if not origin:
            return response

-        response['Access-Control-Allow-Origin'] = '*'
-        response['Access-Control-Allow-Methods'] = 'GET, POST, PUT, PATCH, DELETE, OPTIONS'
-        response['Access-Control-Allow-Headers'] = (
-            'Authorization, X-ArchiveBox-API-Key, Content-Type, X-CSRFToken'
-        )
-        response['Access-Control-Max-Age'] = '600'
+        response["Access-Control-Allow-Origin"] = "*"
+        response["Access-Control-Allow-Methods"] = "GET, POST, PUT, PATCH, DELETE, OPTIONS"
+        response["Access-Control-Allow-Headers"] = "Authorization, X-ArchiveBox-API-Key, Content-Type, X-CSRFToken"
+        response["Access-Control-Max-Age"] = "600"
        return response
--- a/archivebox/api/migrations/0001_initial.py
+++ b/archivebox/api/migrations/0001_initial.py
@@ -13,11 +13,10 @@ import signal_webhooks.utils


 class Migration(migrations.Migration):
-
    initial = True

    dependencies = [
-        ('auth', '0012_alter_user_first_name_max_length'),
+        ("auth", "0012_alter_user_first_name_max_length"),
        migrations.swappable_dependency(settings.AUTH_USER_MODEL),
    ]

@@ -75,55 +74,165 @@ class Migration(migrations.Migration):
                    reverse_sql="""
                DROP TABLE IF EXISTS api_outboundwebhook;
                DROP TABLE IF EXISTS api_apitoken;
-                    """
+                    """,
                ),
            ],
            state_operations=[
                migrations.CreateModel(
-                    name='APIToken',
+                    name="APIToken",
                    fields=[
-                        ('id', models.UUIDField(default=uuid7, editable=False, primary_key=True, serialize=False, unique=True)),
-                        ('created_at', models.DateTimeField(db_index=True, default=django.utils.timezone.now)),
-                        ('modified_at', models.DateTimeField(auto_now=True)),
-                        ('token', models.CharField(default=archivebox.api.models.generate_secret_token, max_length=32, unique=True)),
-                        ('expires', models.DateTimeField(blank=True, null=True)),
-                        ('created_by', models.ForeignKey(default=get_or_create_system_user_pk, on_delete=django.db.models.deletion.CASCADE, to=settings.AUTH_USER_MODEL)),
+                        ("id", models.UUIDField(default=uuid7, editable=False, primary_key=True, serialize=False, unique=True)),
+                        ("created_at", models.DateTimeField(db_index=True, default=django.utils.timezone.now)),
+                        ("modified_at", models.DateTimeField(auto_now=True)),
+                        ("token", models.CharField(default=archivebox.api.models.generate_secret_token, max_length=32, unique=True)),
+                        ("expires", models.DateTimeField(blank=True, null=True)),
+                        (
+                            "created_by",
+                            models.ForeignKey(
+                                default=get_or_create_system_user_pk,
+                                on_delete=django.db.models.deletion.CASCADE,
+                                to=settings.AUTH_USER_MODEL,
+                            ),
+                        ),
                    ],
                    options={
-                        'verbose_name': 'API Key',
-                        'verbose_name_plural': 'API Keys',
-                        'app_label': 'api',
+                        "verbose_name": "API Key",
+                        "verbose_name_plural": "API Keys",
+                        "app_label": "api",
                    },
                ),
                migrations.CreateModel(
-                    name='OutboundWebhook',
+                    name="OutboundWebhook",
                    fields=[
-                        ('id', models.UUIDField(default=uuid7, editable=False, primary_key=True, serialize=False, unique=True)),
-                        ('created_at', models.DateTimeField(db_index=True, default=django.utils.timezone.now)),
-                        ('modified_at', models.DateTimeField(auto_now=True)),
-                        ('name', models.CharField(db_index=True, help_text='Webhook name.', max_length=255, unique=True, verbose_name='name')),
-                        ('signal', models.CharField(choices=[('CREATE', 'Create'), ('UPDATE', 'Update'), ('DELETE', 'Delete'), ('M2M', 'M2M changed'), ('CREATE_OR_UPDATE', 'Create or Update'), ('CREATE_OR_DELETE', 'Create or Delete'), ('CREATE_OR_M2M', 'Create or M2M changed'), ('UPDATE_OR_DELETE', 'Update or Delete'), ('UPDATE_OR_M2M', 'Update or M2M changed'), ('DELETE_OR_M2M', 'Delete or M2M changed'), ('CREATE_UPDATE_OR_DELETE', 'Create, Update or Delete'), ('CREATE_UPDATE_OR_M2M', 'Create, Update or M2M changed'), ('CREATE_DELETE_OR_M2M', 'Create, Delete or M2M changed'), ('UPDATE_DELETE_OR_M2M', 'Update, Delete or M2M changed'), ('CREATE_UPDATE_DELETE_OR_M2M', 'Create, Update or Delete, or M2M changed')], help_text='Signal the webhook fires to.', max_length=255, verbose_name='signal')),
-                        ('ref', models.CharField(db_index=True, help_text='Dot import notation to the model the webhook is for.', max_length=1023, validators=[signal_webhooks.utils.model_from_reference], verbose_name='referenced model')),
-                        ('endpoint', models.URLField(help_text='Target endpoint for this webhook.', max_length=2047, verbose_name='endpoint')),
-                        ('headers', models.JSONField(blank=True, default=dict, help_text='Headers to send with the webhook request.', validators=[signal_webhooks.utils.is_dict], verbose_name='headers')),
-                        ('auth_token', signal_webhooks.fields.TokenField(blank=True, default='', help_text='Authentication token to use in an Authorization header.', max_length=8000, validators=[signal_webhooks.utils.decode_cipher_key], verbose_name='authentication token')),
-                        ('enabled', models.BooleanField(default=True, help_text='Is this webhook enabled?', verbose_name='enabled')),
-                        ('keep_last_response', models.BooleanField(default=False, help_text='Should the webhook keep a log of the latest response it got?', verbose_name='keep last response')),
-                        ('created', models.DateTimeField(auto_now_add=True, help_text='When the webhook was created.', verbose_name='created')),
-                        ('updated', models.DateTimeField(auto_now=True, help_text='When the webhook was last updated.', verbose_name='updated')),
-                        ('last_response', models.CharField(blank=True, default='', help_text='Latest response to this webhook.', max_length=8000, verbose_name='last response')),
-                        ('last_success', models.DateTimeField(default=None, help_text='When the webhook last succeeded.', null=True, verbose_name='last success')),
-                        ('last_failure', models.DateTimeField(default=None, help_text='When the webhook last failed.', null=True, verbose_name='last failure')),
-                        ('created_by', models.ForeignKey(default=get_or_create_system_user_pk, on_delete=django.db.models.deletion.CASCADE, to=settings.AUTH_USER_MODEL)),
+                        ("id", models.UUIDField(default=uuid7, editable=False, primary_key=True, serialize=False, unique=True)),
+                        ("created_at", models.DateTimeField(db_index=True, default=django.utils.timezone.now)),
+                        ("modified_at", models.DateTimeField(auto_now=True)),
+                        (
+                            "name",
+                            models.CharField(db_index=True, help_text="Webhook name.", max_length=255, unique=True, verbose_name="name"),
+                        ),
+                        (
+                            "signal",
+                            models.CharField(
+                                choices=[
+                                    ("CREATE", "Create"),
+                                    ("UPDATE", "Update"),
+                                    ("DELETE", "Delete"),
+                                    ("M2M", "M2M changed"),
+                                    ("CREATE_OR_UPDATE", "Create or Update"),
+                                    ("CREATE_OR_DELETE", "Create or Delete"),
+                                    ("CREATE_OR_M2M", "Create or M2M changed"),
+                                    ("UPDATE_OR_DELETE", "Update or Delete"),
+                                    ("UPDATE_OR_M2M", "Update or M2M changed"),
+                                    ("DELETE_OR_M2M", "Delete or M2M changed"),
+                                    ("CREATE_UPDATE_OR_DELETE", "Create, Update or Delete"),
+                                    ("CREATE_UPDATE_OR_M2M", "Create, Update or M2M changed"),
+                                    ("CREATE_DELETE_OR_M2M", "Create, Delete or M2M changed"),
+                                    ("UPDATE_DELETE_OR_M2M", "Update, Delete or M2M changed"),
+                                    ("CREATE_UPDATE_DELETE_OR_M2M", "Create, Update or Delete, or M2M changed"),
+                                ],
+                                help_text="Signal the webhook fires to.",
+                                max_length=255,
+                                verbose_name="signal",
+                            ),
+                        ),
+                        (
+                            "ref",
+                            models.CharField(
+                                db_index=True,
+                                help_text="Dot import notation to the model the webhook is for.",
+                                max_length=1023,
+                                validators=[signal_webhooks.utils.model_from_reference],
+                                verbose_name="referenced model",
+                            ),
+                        ),
+                        (
+                            "endpoint",
+                            models.URLField(help_text="Target endpoint for this webhook.", max_length=2047, verbose_name="endpoint"),
+                        ),
+                        (
+                            "headers",
+                            models.JSONField(
+                                blank=True,
+                                default=dict,
+                                help_text="Headers to send with the webhook request.",
+                                validators=[signal_webhooks.utils.is_dict],
+                                verbose_name="headers",
+                            ),
+                        ),
+                        (
+                            "auth_token",
+                            signal_webhooks.fields.TokenField(
+                                blank=True,
+                                default="",
+                                help_text="Authentication token to use in an Authorization header.",
+                                max_length=8000,
+                                validators=[signal_webhooks.utils.decode_cipher_key],
+                                verbose_name="authentication token",
+                            ),
+                        ),
+                        ("enabled", models.BooleanField(default=True, help_text="Is this webhook enabled?", verbose_name="enabled")),
+                        (
+                            "keep_last_response",
+                            models.BooleanField(
+                                default=False,
+                                help_text="Should the webhook keep a log of the latest response it got?",
+                                verbose_name="keep last response",
+                            ),
+                        ),
+                        (
+                            "created",
+                            models.DateTimeField(auto_now_add=True, help_text="When the webhook was created.", verbose_name="created"),
+                        ),
+                        (
+                            "updated",
+                            models.DateTimeField(auto_now=True, help_text="When the webhook was last updated.", verbose_name="updated"),
+                        ),
+                        (
+                            "last_response",
+                            models.CharField(
+                                blank=True,
+                                default="",
+                                help_text="Latest response to this webhook.",
+                                max_length=8000,
+                                verbose_name="last response",
+                            ),
+                        ),
+                        (
+                            "last_success",
+                            models.DateTimeField(
+                                default=None,
+                                help_text="When the webhook last succeeded.",
+                                null=True,
+                                verbose_name="last success",
+                            ),
+                        ),
+                        (
+                            "last_failure",
+                            models.DateTimeField(
+                                default=None,
+                                help_text="When the webhook last failed.",
+                                null=True,
+                                verbose_name="last failure",
+                            ),
+                        ),
+                        (
+                            "created_by",
+                            models.ForeignKey(
+                                default=get_or_create_system_user_pk,
+                                on_delete=django.db.models.deletion.CASCADE,
+                                to=settings.AUTH_USER_MODEL,
+                            ),
+                        ),
                    ],
                    options={
-                        'verbose_name': 'API Outbound Webhook',
-                        'app_label': 'api',
+                        "verbose_name": "API Outbound Webhook",
+                        "app_label": "api",
                    },
                ),
                migrations.AddConstraint(
-                    model_name='outboundwebhook',
-                    constraint=models.UniqueConstraint(fields=['ref', 'endpoint'], name='prevent_duplicate_hooks_api_outboundwebhook'),
+                    model_name="outboundwebhook",
+                    constraint=models.UniqueConstraint(fields=["ref", "endpoint"], name="prevent_duplicate_hooks_api_outboundwebhook"),
                ),
            ],
        ),
--- a/archivebox/api/models.py
+++ b/archivebox/api/models.py
@@ -1,4 +1,4 @@
-__package__ = 'archivebox.api'
+__package__ = "archivebox.api"

 import secrets
 from archivebox.uuid_compat import uuid7
@@ -25,7 +25,7 @@ class APIToken(models.Model):
    expires = models.DateTimeField(null=True, blank=True)

    class Meta(TypedModelMeta):
-        app_label = 'api'
+        app_label = "api"
        verbose_name = "API Key"
        verbose_name_plural = "API Keys"

@@ -34,7 +34,7 @@ class APIToken(models.Model):

    @property
    def token_redacted(self):
-        return f'************{self.token[-4:]}'
+        return f"************{self.token[-4:]}"

    def is_valid(self, for_date=None):
        return not self.expires or self.expires >= (for_date or timezone.now())
@@ -47,8 +47,8 @@ class OutboundWebhook(WebhookBase):
    modified_at = models.DateTimeField(auto_now=True)

    class Meta(WebhookBase.Meta):
-        app_label = 'api'
-        verbose_name = 'API Outbound Webhook'
+        app_label = "api"
+        verbose_name = "API Outbound Webhook"

    def __str__(self) -> str:
-        return f'[{self.id}] {self.ref} -> {self.endpoint}'
+        return f"[{self.id}] {self.ref} -> {self.endpoint}"
--- a/archivebox/api/urls.py
+++ b/archivebox/api/urls.py
@@ -1,4 +1,4 @@
-__package__ = 'archivebox.api'
+__package__ = "archivebox.api"

 from django.urls import path
 from django.views.generic.base import RedirectView
@@ -6,12 +6,10 @@ from django.views.generic.base import RedirectView
 from .v1_api import urls as v1_api_urls

 urlpatterns = [
-    path("",                 RedirectView.as_view(url='/api/v1/docs')),
-
-    path("v1/",              RedirectView.as_view(url='/api/v1/docs')),
-    path("v1/",              v1_api_urls),
-    path("v1",               RedirectView.as_view(url='/api/v1/docs')),
-
+    path("", RedirectView.as_view(url="/api/v1/docs")),
+    path("v1/", RedirectView.as_view(url="/api/v1/docs")),
+    path("v1/", v1_api_urls),
+    path("v1", RedirectView.as_view(url="/api/v1/docs")),
    # ... v2 can be added here ...
    # path("v2/",              v2_api_urls),
    # path("v2",               RedirectView.as_view(url='/api/v2/docs')),
--- a/archivebox/api/v1_api.py
+++ b/archivebox/api/v1_api.py
@@ -1,4 +1,4 @@
-__package__ = 'archivebox.api'
+__package__ = "archivebox.api"


 from io import StringIO
@@ -20,9 +20,9 @@ from archivebox.api.auth import API_AUTH_METHODS
 from archivebox.api.models import APIToken


-COMMIT_HASH = get_COMMIT_HASH() or 'unknown'
+COMMIT_HASH = get_COMMIT_HASH() or "unknown"

-html_description=f'''
+html_description = f"""
 <h3>Welcome to your ArchiveBox server's REST API <code>[v1 ALPHA]</code> homepage!</h3>
 <br/>
 <i><b>WARNING: This API is still in an early development stage and may change!</b></i>
@@ -35,47 +35,47 @@ html_description=f'''
 <li>📜 See the API source code: <a href="https://github.com/ArchiveBox/ArchiveBox/blob/dev/archivebox/api"><code>archivebox/api/</code></a></li>
 </ul>
 <small>Served by ArchiveBox v{VERSION} (<a href="https://github.com/ArchiveBox/ArchiveBox/commit/{COMMIT_HASH}"><code>{COMMIT_HASH[:8]}</code></a>), API powered by <a href="https://django-ninja.dev/"><code>django-ninja</code></a>.</small>
-'''
+"""


 def register_urls(api: NinjaAPI) -> NinjaAPI:
-    api.add_router('/auth/',     'archivebox.api.v1_auth.router')
-    api.add_router('/core/',     'archivebox.api.v1_core.router')
-    api.add_router('/crawls/',   'archivebox.api.v1_crawls.router')
-    api.add_router('/cli/',      'archivebox.api.v1_cli.router')
-    api.add_router('/machine/',  'archivebox.api.v1_machine.router')
+    api.add_router("/auth/", "archivebox.api.v1_auth.router")
+    api.add_router("/core/", "archivebox.api.v1_core.router")
+    api.add_router("/crawls/", "archivebox.api.v1_crawls.router")
+    api.add_router("/cli/", "archivebox.api.v1_cli.router")
+    api.add_router("/machine/", "archivebox.api.v1_machine.router")
    return api


-class NinjaAPIWithIOCapture(NinjaAPI):    
+class NinjaAPIWithIOCapture(NinjaAPI):
    def create_temporal_response(self, request: HttpRequest) -> HttpResponse:
        stdout, stderr = StringIO(), StringIO()

        with redirect_stderr(stderr):
            with redirect_stdout(stdout):
-                setattr(request, 'stdout', stdout)
-                setattr(request, 'stderr', stderr)
+                setattr(request, "stdout", stdout)
+                setattr(request, "stderr", stderr)

                response = super().create_temporal_response(request)

-        # Diable caching of API responses entirely
-        response['Cache-Control'] = 'no-store'
+        # Disable caching of API responses entirely
+        response["Cache-Control"] = "no-store"

        # Add debug stdout and stderr headers to response
-        response['X-ArchiveBox-Stdout'] = stdout.getvalue().replace('\n', '\\n')[:200]
-        response['X-ArchiveBox-Stderr'] = stderr.getvalue().replace('\n', '\\n')[:200]
+        response["X-ArchiveBox-Stdout"] = stdout.getvalue().replace("\n", "\\n")[:200]
+        response["X-ArchiveBox-Stderr"] = stderr.getvalue().replace("\n", "\\n")[:200]
        # response['X-ArchiveBox-View'] = self.get_openapi_operation_id(request) or 'Unknown'

        # Add Auth Headers to response
-        api_token_attr = getattr(request, '_api_token', None)
+        api_token_attr = getattr(request, "_api_token", None)
        api_token = api_token_attr if isinstance(api_token_attr, APIToken) else None
-        token_expiry = api_token.expires.isoformat() if api_token and api_token.expires else 'Never'
+        token_expiry = api_token.expires.isoformat() if api_token and api_token.expires else "Never"

-        response['X-ArchiveBox-Auth-Method'] = str(getattr(request, '_api_auth_method', 'None'))
-        response['X-ArchiveBox-Auth-Expires'] = token_expiry
-        response['X-ArchiveBox-Auth-Token-Id'] = str(api_token.id) if api_token else 'None'
-        response['X-ArchiveBox-Auth-User-Id'] = str(request.user.pk) if getattr(request.user, 'pk', None) else 'None'
-        response['X-ArchiveBox-Auth-User-Username'] = request.user.username if isinstance(request.user, User) else 'None'
+        response["X-ArchiveBox-Auth-Method"] = str(getattr(request, "_api_auth_method", "None"))
+        response["X-ArchiveBox-Auth-Expires"] = token_expiry
+        response["X-ArchiveBox-Auth-Token-Id"] = str(api_token.id) if api_token else "None"
+        response["X-ArchiveBox-Auth-User-Id"] = str(request.user.pk) if getattr(request.user, "pk", None) else "None"
+        response["X-ArchiveBox-Auth-User-Username"] = request.user.username if isinstance(request.user, User) else "None"

        # import ipdb; ipdb.set_trace()
        # print('RESPONDING NOW', response)
@@ -84,7 +84,7 @@ class NinjaAPIWithIOCapture(NinjaAPI):


 api = NinjaAPIWithIOCapture(
-    title='ArchiveBox API',
+    title="ArchiveBox API",
    description=html_description,
    version=VERSION,
    auth=API_AUTH_METHODS,
@@ -103,15 +103,15 @@ def generic_exception_handler(request, err):
    if isinstance(err, (ObjectDoesNotExist, EmptyResultSet, PermissionDenied)):
        status = 404

-    print(''.join(format_exception(err)))
+    print("".join(format_exception(err)))

    return api.create_response(
        request,
        {
            "succeeded": False,
-            "message": f'{err.__class__.__name__}: {err}',
+            "message": f"{err.__class__.__name__}: {err}",
            "errors": [
-                ''.join(format_exception(err)),
+                "".join(format_exception(err)),
                # or send simpler parent-only traceback:
                # *([str(err.__context__)] if getattr(err, '__context__', None) else []),
            ],
@@ -120,7 +120,6 @@ def generic_exception_handler(request, err):
    )


-
 # import orjson
 # from ninja.renderers import BaseRenderer
 # class ORJSONRenderer(BaseRenderer):
--- a/archivebox/api/v1_auth.py
+++ b/archivebox/api/v1_auth.py
@@ -1,6 +1,5 @@
-__package__ = 'archivebox.api'
+__package__ = "archivebox.api"

-from typing import Optional
 from django.http import HttpRequest

 from ninja import Router, Schema
@@ -8,16 +7,21 @@ from ninja import Router, Schema
 from archivebox.api.auth import auth_using_token, auth_using_password, get_or_create_api_token


-router = Router(tags=['Authentication'], auth=None)
+router = Router(tags=["Authentication"], auth=None)


 class PasswordAuthSchema(Schema):
    """Schema for a /get_api_token request"""
-    username: Optional[str] = None
-    password: Optional[str] = None
+
+    username: str | None = None
+    password: str | None = None


-@router.post("/get_api_token", auth=None, summary='Generate an API token for a given username & password (or currently logged-in user)')             # auth=None because they are not authed yet
+@router.post(
+    "/get_api_token",
+    auth=None,
+    summary="Generate an API token for a given username & password (or currently logged-in user)",
+)  # auth=None because they are not authed yet
 def get_api_token(request: HttpRequest, auth_data: PasswordAuthSchema):
    user = auth_using_password(
        username=auth_data.username,
@@ -35,17 +39,21 @@ def get_api_token(request: HttpRequest, auth_data: PasswordAuthSchema):
            "token": api_token.token,
            "expires": api_token.expires.isoformat() if api_token.expires else None,
        }
-    
-    return {"success": False, "errors": ["Invalid credentials"]}

+    return {"success": False, "errors": ["Invalid credentials"]}


 class TokenAuthSchema(Schema):
    """Schema for a /check_api_token request"""
+
    token: str


-@router.post("/check_api_token", auth=None, summary='Validate an API token to make sure its valid and non-expired')        # auth=None because they are not authed yet
+@router.post(
+    "/check_api_token",
+    auth=None,
+    summary="Validate an API token to make sure its valid and non-expired",
+)  # auth=None because they are not authed yet
 def check_api_token(request: HttpRequest, token_data: TokenAuthSchema):
    user = auth_using_token(
        token=token_data.token,
@@ -53,5 +61,5 @@ def check_api_token(request: HttpRequest, token_data: TokenAuthSchema):
    )
    if user:
        return {"success": True, "user_id": str(user.pk)}
-    
+
    return {"success": False, "user_id": None}
--- a/archivebox/api/v1_cli.py
+++ b/archivebox/api/v1_cli.py
@@ -1,8 +1,8 @@
-__package__ = 'archivebox.api'
+__package__ = "archivebox.api"

 import json
 from io import StringIO
-from typing import List, Dict, Any, Optional
+from typing import Any
 from enum import Enum

 from django.http import HttpRequest
@@ -16,44 +16,47 @@ from archivebox.config.common import ARCHIVING_CONFIG
 # from .auth import API_AUTH_METHODS

 # router for API that exposes archivebox cli subcommands as REST endpoints
-router = Router(tags=['ArchiveBox CLI Sub-Commands'])
+router = Router(tags=["ArchiveBox CLI Sub-Commands"])


 # Schemas

-JSONType = List[Any] | Dict[str, Any] | bool | int | str | None
+JSONType = list[Any] | dict[str, Any] | bool | int | str | None
+

 class CLICommandResponseSchema(Schema):
    success: bool
-    errors: List[str]
+    errors: list[str]
    result: JSONType
-    result_format: str = 'str'
+    result_format: str = "str"
    stdout: str
    stderr: str

+
 class FilterTypeChoices(str, Enum):
-    exact = 'exact'
-    substring = 'substring'
-    regex = 'regex'
-    domain = 'domain'
-    tag = 'tag'
-    timestamp = 'timestamp'
+    exact = "exact"
+    substring = "substring"
+    regex = "regex"
+    domain = "domain"
+    tag = "tag"
+    timestamp = "timestamp"
+

 class StatusChoices(str, Enum):
-    indexed = 'indexed'
-    archived = 'archived'
-    unarchived = 'unarchived'
-    present = 'present'
-    valid = 'valid'
-    invalid = 'invalid'
-    duplicate = 'duplicate'
-    orphaned = 'orphaned'
-    corrupted = 'corrupted'
-    unrecognized = 'unrecognized'
+    indexed = "indexed"
+    archived = "archived"
+    unarchived = "unarchived"
+    present = "present"
+    valid = "valid"
+    invalid = "invalid"
+    duplicate = "duplicate"
+    orphaned = "orphaned"
+    corrupted = "corrupted"
+    unrecognized = "unrecognized"


 class AddCommandSchema(Schema):
-    urls: List[str]
+    urls: list[str]
    tag: str = ""
    depth: int = 0
    parser: str = "auto"
@@ -62,53 +65,54 @@ class AddCommandSchema(Schema):
    overwrite: bool = False
    index_only: bool = False

+
 class UpdateCommandSchema(Schema):
-    resume: Optional[str] = None
-    after: Optional[float] = 0
-    before: Optional[float] = 999999999999999
-    filter_type: Optional[str] = FilterTypeChoices.substring
-    filter_patterns: Optional[List[str]] = ['https://example.com']
+    resume: str | None = None
+    after: float | None = 0
+    before: float | None = 999999999999999
+    filter_type: str | None = FilterTypeChoices.substring
+    filter_patterns: list[str] | None = ["https://example.com"]
    batch_size: int = 100
    continuous: bool = False

+
 class ScheduleCommandSchema(Schema):
-    import_path: Optional[str] = None
+    import_path: str | None = None
    add: bool = False
    show: bool = False
    foreground: bool = False
    run_all: bool = False
    quiet: bool = False
-    every: Optional[str] = None
-    tag: str = ''
+    every: str | None = None
+    tag: str = ""
    depth: int = 0
    overwrite: bool = False
    update: bool = not ARCHIVING_CONFIG.ONLY_NEW
    clear: bool = False

+
 class ListCommandSchema(Schema):
-    filter_patterns: Optional[List[str]] = ['https://example.com']
+    filter_patterns: list[str] | None = ["https://example.com"]
    filter_type: str = FilterTypeChoices.substring
    status: StatusChoices = StatusChoices.indexed
-    after: Optional[float] = 0
-    before: Optional[float] = 999999999999999
-    sort: str = 'bookmarked_at'
+    after: float | None = 0
+    before: float | None = 999999999999999
+    sort: str = "bookmarked_at"
    as_json: bool = True
    as_html: bool = False
-    as_csv: str | None = 'timestamp,url'
+    as_csv: str | None = "timestamp,url"
    with_headers: bool = False

+
 class RemoveCommandSchema(Schema):
    delete: bool = True
-    after: Optional[float] = 0
-    before: Optional[float] = 999999999999999
+    after: float | None = 0
+    before: float | None = 999999999999999
    filter_type: str = FilterTypeChoices.exact
-    filter_patterns: Optional[List[str]] = ['https://example.com']
+    filter_patterns: list[str] | None = ["https://example.com"]


-
-
-
-@router.post("/add", response=CLICommandResponseSchema, summary='archivebox add [args] [urls]')
+@router.post("/add", response=CLICommandResponseSchema, summary="archivebox add [args] [urls]")
 def cli_add(request: HttpRequest, args: AddCommandSchema):
    from archivebox.cli.archivebox_add import add

@@ -125,30 +129,30 @@ def cli_add(request: HttpRequest, args: AddCommandSchema):
        created_by_id=request.user.pk,
    )

-    snapshot_ids = [str(snapshot_id) for snapshot_id in snapshots.values_list('id', flat=True)]
+    snapshot_ids = [str(snapshot_id) for snapshot_id in snapshots.values_list("id", flat=True)]
    result_payload = {
        "crawl_id": str(crawl.id),
        "num_snapshots": len(snapshot_ids),
        "snapshot_ids": snapshot_ids,
        "queued_urls": args.urls,
    }
-    stdout = getattr(request, 'stdout', None)
-    stderr = getattr(request, 'stderr', None)
+    stdout = getattr(request, "stdout", None)
+    stderr = getattr(request, "stderr", None)

    return {
        "success": True,
        "errors": [],
        "result": result_payload,
        "result_format": "json",
-        "stdout": ansi_to_html(stdout.getvalue().strip()) if isinstance(stdout, StringIO) else '',
-        "stderr": ansi_to_html(stderr.getvalue().strip()) if isinstance(stderr, StringIO) else '',
+        "stdout": ansi_to_html(stdout.getvalue().strip()) if isinstance(stdout, StringIO) else "",
+        "stderr": ansi_to_html(stderr.getvalue().strip()) if isinstance(stderr, StringIO) else "",
    }


-@router.post("/update", response=CLICommandResponseSchema, summary='archivebox update [args] [filter_patterns]')
+@router.post("/update", response=CLICommandResponseSchema, summary="archivebox update [args] [filter_patterns]")
 def cli_update(request: HttpRequest, args: UpdateCommandSchema):
    from archivebox.cli.archivebox_update import update
-    
+
    result = update(
        filter_patterns=args.filter_patterns or [],
        filter_type=args.filter_type or FilterTypeChoices.substring,
@@ -158,21 +162,21 @@ def cli_update(request: HttpRequest, args: UpdateCommandSchema):
        batch_size=args.batch_size,
        continuous=args.continuous,
    )
-    stdout = getattr(request, 'stdout', None)
-    stderr = getattr(request, 'stderr', None)
+    stdout = getattr(request, "stdout", None)
+    stderr = getattr(request, "stderr", None)
    return {
        "success": True,
        "errors": [],
        "result": result,
-        "stdout": ansi_to_html(stdout.getvalue().strip()) if isinstance(stdout, StringIO) else '',
-        "stderr": ansi_to_html(stderr.getvalue().strip()) if isinstance(stderr, StringIO) else '',
+        "stdout": ansi_to_html(stdout.getvalue().strip()) if isinstance(stdout, StringIO) else "",
+        "stderr": ansi_to_html(stderr.getvalue().strip()) if isinstance(stderr, StringIO) else "",
    }


-@router.post("/schedule", response=CLICommandResponseSchema, summary='archivebox schedule [args] [import_path]')
+@router.post("/schedule", response=CLICommandResponseSchema, summary="archivebox schedule [args] [import_path]")
 def cli_schedule(request: HttpRequest, args: ScheduleCommandSchema):
    from archivebox.cli.archivebox_schedule import schedule
-    
+
    result = schedule(
        import_path=args.import_path,
        add=args.add,
@@ -188,23 +192,22 @@ def cli_schedule(request: HttpRequest, args: ScheduleCommandSchema):
        update=args.update,
    )

-    stdout = getattr(request, 'stdout', None)
-    stderr = getattr(request, 'stderr', None)
+    stdout = getattr(request, "stdout", None)
+    stderr = getattr(request, "stderr", None)
    return {
        "success": True,
        "errors": [],
        "result": result,
        "result_format": "json",
-        "stdout": ansi_to_html(stdout.getvalue().strip()) if isinstance(stdout, StringIO) else '',
-        "stderr": ansi_to_html(stderr.getvalue().strip()) if isinstance(stderr, StringIO) else '',
+        "stdout": ansi_to_html(stdout.getvalue().strip()) if isinstance(stdout, StringIO) else "",
+        "stderr": ansi_to_html(stderr.getvalue().strip()) if isinstance(stderr, StringIO) else "",
    }


-
-@router.post("/search", response=CLICommandResponseSchema, summary='archivebox search [args] [filter_patterns]')
+@router.post("/search", response=CLICommandResponseSchema, summary="archivebox search [args] [filter_patterns]")
 def cli_search(request: HttpRequest, args: ListCommandSchema):
    from archivebox.cli.archivebox_search import search
-    
+
    result = search(
        filter_patterns=args.filter_patterns,
        filter_type=args.filter_type,
@@ -218,7 +221,7 @@ def cli_search(request: HttpRequest, args: ListCommandSchema):
        with_headers=args.with_headers,
    )

-    result_format = 'txt'
+    result_format = "txt"
    if args.as_json:
        result_format = "json"
        result = json.loads(result)
@@ -227,20 +230,19 @@ def cli_search(request: HttpRequest, args: ListCommandSchema):
    elif args.as_csv:
        result_format = "csv"

-    stdout = getattr(request, 'stdout', None)
-    stderr = getattr(request, 'stderr', None)
+    stdout = getattr(request, "stdout", None)
+    stderr = getattr(request, "stderr", None)
    return {
        "success": True,
        "errors": [],
        "result": result,
        "result_format": result_format,
-        "stdout": ansi_to_html(stdout.getvalue().strip()) if isinstance(stdout, StringIO) else '',
-        "stderr": ansi_to_html(stderr.getvalue().strip()) if isinstance(stderr, StringIO) else '',
+        "stdout": ansi_to_html(stdout.getvalue().strip()) if isinstance(stdout, StringIO) else "",
+        "stderr": ansi_to_html(stderr.getvalue().strip()) if isinstance(stderr, StringIO) else "",
    }
-    


-@router.post("/remove", response=CLICommandResponseSchema, summary='archivebox remove [args] [filter_patterns]')
+@router.post("/remove", response=CLICommandResponseSchema, summary="archivebox remove [args] [filter_patterns]")
 def cli_remove(request: HttpRequest, args: RemoveCommandSchema):
    from archivebox.cli.archivebox_remove import remove
    from archivebox.cli.archivebox_search import get_snapshots
@@ -253,10 +255,10 @@ def cli_remove(request: HttpRequest, args: RemoveCommandSchema):
        after=args.after,
        before=args.before,
    )
-    removed_snapshot_ids = [str(snapshot_id) for snapshot_id in snapshots_to_remove.values_list('id', flat=True)]
-    
+    removed_snapshot_ids = [str(snapshot_id) for snapshot_id in snapshots_to_remove.values_list("id", flat=True)]
+
    remove(
-        yes=True,            # no way to interactively ask for confirmation via API, so we force yes
+        yes=True,  # no way to interactively ask for confirmation via API, so we force yes
        delete=args.delete,
        snapshots=snapshots_to_remove,
        before=args.before,
@@ -270,14 +272,13 @@ def cli_remove(request: HttpRequest, args: RemoveCommandSchema):
        "removed_snapshot_ids": removed_snapshot_ids,
        "remaining_snapshots": Snapshot.objects.count(),
    }
-    stdout = getattr(request, 'stdout', None)
-    stderr = getattr(request, 'stderr', None)
+    stdout = getattr(request, "stdout", None)
+    stderr = getattr(request, "stderr", None)
    return {
        "success": True,
        "errors": [],
        "result": result,
        "result_format": "json",
-        "stdout": ansi_to_html(stdout.getvalue().strip()) if isinstance(stdout, StringIO) else '',
-        "stderr": ansi_to_html(stderr.getvalue().strip()) if isinstance(stderr, StringIO) else '',
+        "stdout": ansi_to_html(stdout.getvalue().strip()) if isinstance(stdout, StringIO) else "",
+        "stderr": ansi_to_html(stderr.getvalue().strip()) if isinstance(stderr, StringIO) else "",
    }
-    
--- a/archivebox/api/v1_core.py
+++ b/archivebox/api/v1_core.py
@@ -1,11 +1,13 @@
-__package__ = 'archivebox.api'
+__package__ = "archivebox.api"

 import math
+from collections import defaultdict
 from uuid import UUID
-from typing import List, Optional, Union, Any, Annotated
+from typing import Union, Any, Annotated
 from datetime import datetime

-from django.db.models import Model, Q
+from django.db.models import Model, Q, Sum
+from django.db.models.functions import Coalesce
 from django.conf import settings
 from django.http import HttpRequest, HttpResponse
 from django.core.exceptions import ValidationError
@@ -39,7 +41,7 @@ from archivebox.crawls.models import Crawl
 from archivebox.api.v1_crawls import CrawlSchema


-router = Router(tags=['Core Models'])
+router = Router(tags=["Core Models"])


 class CustomPagination(PaginationBase):
@@ -49,13 +51,14 @@ class CustomPagination(PaginationBase):
        page: int = 0

    class Output(PaginationBase.Output):
+        count: int
        total_items: int
        total_pages: int
        page: int
        limit: int
        offset: int
        num_items: int
-        items: List[Any]
+        items: list[Any]

    def paginate_queryset(self, queryset, pagination: Input, request: HttpRequest, **params):
        limit = min(pagination.limit, 500)
@@ -65,27 +68,29 @@ class CustomPagination(PaginationBase):
        current_page = math.ceil(offset / (limit + 1))
        items = queryset[offset : offset + limit]
        return {
-            'total_items': total,
-            'total_pages': total_pages,
-            'page': current_page,
-            'limit': limit,
-            'offset': offset,
-            'num_items': len(items),
-            'items': items,
+            "count": total,
+            "total_items": total,
+            "total_pages": total_pages,
+            "page": current_page,
+            "limit": limit,
+            "offset": offset,
+            "num_items": len(items),
+            "items": items,
        }


 ### ArchiveResult #########################################################################

+
 class MinimalArchiveResultSchema(Schema):
-    TYPE: str = 'core.models.ArchiveResult'
+    TYPE: str = "core.models.ArchiveResult"
    id: UUID
    created_at: datetime | None
    modified_at: datetime | None
    created_by_id: str
    created_by_username: str
    status: str
-    retry_at: datetime | None
+    retry_at: datetime | None = None
    plugin: str
    hook_name: str
    process_id: UUID | None
@@ -93,8 +98,8 @@ class MinimalArchiveResultSchema(Schema):
    cmd: list[str] | None
    pwd: str | None
    output_str: str
-    output_json: dict | None
-    output_files: dict | None
+    output_json: dict[str, Any] | None
+    output_files: dict[str, dict[str, Any]] | None
    output_size: int
    output_mimetypes: str
    start_ts: datetime | None
@@ -108,13 +113,34 @@ class MinimalArchiveResultSchema(Schema):
    def resolve_created_by_username(obj) -> str:
        return obj.created_by.username

+    @staticmethod
+    def resolve_output_files(obj):
+        return obj.output_file_map()
+
+    @staticmethod
+    def resolve_output_mimetypes(obj) -> str:
+        mime_sizes: dict[str, int] = defaultdict(int)
+        for metadata in obj.output_file_map().values():
+            if not isinstance(metadata, dict):
+                continue
+            mimetype = str(metadata.get("mimetype") or "").strip()
+            try:
+                size = max(int(metadata.get("size") or 0), 0)
+            except (TypeError, ValueError):
+                size = 0
+            if mimetype and size:
+                mime_sizes[mimetype] += size
+        if mime_sizes:
+            return ",".join(mime for mime, _size in sorted(mime_sizes.items(), key=lambda item: item[1], reverse=True))
+        return obj.output_mimetypes or ""
+

 class ArchiveResultSchema(MinimalArchiveResultSchema):
-    TYPE: str = 'core.models.ArchiveResult'
+    TYPE: str = "core.models.ArchiveResult"
    snapshot_id: UUID
    snapshot_timestamp: str
    snapshot_url: str
-    snapshot_tags: List[str]
+    snapshot_tags: list[str]

    @staticmethod
    def resolve_snapshot_timestamp(obj):
@@ -134,25 +160,39 @@ class ArchiveResultSchema(MinimalArchiveResultSchema):


 class ArchiveResultFilterSchema(FilterSchema):
-    id: Annotated[Optional[str], FilterLookup(['id__startswith', 'snapshot__id__startswith', 'snapshot__timestamp__startswith'])] = None
-    search: Annotated[Optional[str], FilterLookup(['snapshot__url__icontains', 'snapshot__title__icontains', 'snapshot__tags__name__icontains', 'plugin', 'output_str__icontains', 'id__startswith', 'snapshot__id__startswith', 'snapshot__timestamp__startswith'])] = None
-    snapshot_id: Annotated[Optional[str], FilterLookup(['snapshot__id__startswith', 'snapshot__timestamp__startswith'])] = None
-    snapshot_url: Annotated[Optional[str], FilterLookup('snapshot__url__icontains')] = None
-    snapshot_tag: Annotated[Optional[str], FilterLookup('snapshot__tags__name__icontains')] = None
-    status: Annotated[Optional[str], FilterLookup('status')] = None
-    output_str: Annotated[Optional[str], FilterLookup('output_str__icontains')] = None
-    plugin: Annotated[Optional[str], FilterLookup('plugin__icontains')] = None
-    hook_name: Annotated[Optional[str], FilterLookup('hook_name__icontains')] = None
-    process_id: Annotated[Optional[str], FilterLookup('process__id__startswith')] = None
-    cmd: Annotated[Optional[str], FilterLookup('cmd__0__icontains')] = None
-    pwd: Annotated[Optional[str], FilterLookup('pwd__icontains')] = None
-    cmd_version: Annotated[Optional[str], FilterLookup('cmd_version')] = None
-    created_at: Annotated[Optional[datetime], FilterLookup('created_at')] = None
-    created_at__gte: Annotated[Optional[datetime], FilterLookup('created_at__gte')] = None
-    created_at__lt: Annotated[Optional[datetime], FilterLookup('created_at__lt')] = None
+    id: Annotated[str | None, FilterLookup(["id__startswith", "snapshot__id__startswith", "snapshot__timestamp__startswith"])] = None
+    search: Annotated[
+        str | None,
+        FilterLookup(
+            [
+                "snapshot__url__icontains",
+                "snapshot__title__icontains",
+                "snapshot__tags__name__icontains",
+                "plugin",
+                "output_str__icontains",
+                "id__startswith",
+                "snapshot__id__startswith",
+                "snapshot__timestamp__startswith",
+            ],
+        ),
+    ] = None
+    snapshot_id: Annotated[str | None, FilterLookup(["snapshot__id__startswith", "snapshot__timestamp__startswith"])] = None
+    snapshot_url: Annotated[str | None, FilterLookup("snapshot__url__icontains")] = None
+    snapshot_tag: Annotated[str | None, FilterLookup("snapshot__tags__name__icontains")] = None
+    status: Annotated[str | None, FilterLookup("status")] = None
+    output_str: Annotated[str | None, FilterLookup("output_str__icontains")] = None
+    plugin: Annotated[str | None, FilterLookup("plugin__icontains")] = None
+    hook_name: Annotated[str | None, FilterLookup("hook_name__icontains")] = None
+    process_id: Annotated[str | None, FilterLookup("process__id__startswith")] = None
+    cmd: Annotated[str | None, FilterLookup("cmd__0__icontains")] = None
+    pwd: Annotated[str | None, FilterLookup("pwd__icontains")] = None
+    cmd_version: Annotated[str | None, FilterLookup("cmd_version")] = None
+    created_at: Annotated[datetime | None, FilterLookup("created_at")] = None
+    created_at__gte: Annotated[datetime | None, FilterLookup("created_at__gte")] = None
+    created_at__lt: Annotated[datetime | None, FilterLookup("created_at__lt")] = None


-@router.get("/archiveresults", response=List[ArchiveResultSchema], url_name="get_archiveresult")
+@router.get("/archiveresults", response=list[ArchiveResultSchema], url_name="get_archiveresult")
@paginate(CustomPagination)
 def get_archiveresults(request: HttpRequest, filters: Query[ArchiveResultFilterSchema]):
    """List all ArchiveResult entries matching these filters."""
@@ -167,8 +207,9 @@ def get_archiveresult(request: HttpRequest, archiveresult_id: str):

 ### Snapshot #########################################################################

+
 class SnapshotSchema(Schema):
-    TYPE: str = 'core.models.Snapshot'
+    TYPE: str = "core.models.Snapshot"
    id: UUID
    created_by_id: str
    created_by_username: str
@@ -177,14 +218,16 @@ class SnapshotSchema(Schema):
    status: str
    retry_at: datetime | None
    bookmarked_at: datetime
-    downloaded_at: Optional[datetime]
+    downloaded_at: datetime | None
    url: str
-    tags: List[str]
-    title: Optional[str]
+    tags: list[str]
+    title: str | None
    timestamp: str
    archive_path: str
+    archive_size: int
+    output_size: int
    num_archiveresults: int
-    archiveresults: List[MinimalArchiveResultSchema]
+    archiveresults: list[MinimalArchiveResultSchema]

    @staticmethod
    def resolve_created_by_id(obj):
@@ -198,13 +241,21 @@ class SnapshotSchema(Schema):
    def resolve_tags(obj):
        return sorted(tag.name for tag in obj.tags.all())

+    @staticmethod
+    def resolve_archive_size(obj):
+        return int(getattr(obj, "output_size_sum", obj.archive_size) or 0)
+
+    @staticmethod
+    def resolve_output_size(obj):
+        return SnapshotSchema.resolve_archive_size(obj)
+
    @staticmethod
    def resolve_num_archiveresults(obj, context):
        return obj.archiveresult_set.all().distinct().count()

    @staticmethod
    def resolve_archiveresults(obj, context):
-        if bool(getattr(context['request'], 'with_archiveresults', False)):
+        if bool(getattr(context["request"], "with_archiveresults", False)):
            return obj.archiveresult_set.all().distinct()
        return ArchiveResult.objects.none()

@@ -212,16 +263,16 @@ class SnapshotSchema(Schema):
 class SnapshotUpdateSchema(Schema):
    status: str | None = None
    retry_at: datetime | None = None
-    tags: Optional[List[str]] = None
+    tags: list[str] | None = None


 class SnapshotCreateSchema(Schema):
    url: str
-    crawl_id: Optional[str] = None
+    crawl_id: str | None = None
    depth: int = 0
-    title: Optional[str] = None
-    tags: Optional[List[str]] = None
-    status: Optional[str] = None
+    title: str | None = None
+    tags: list[str] | None = None
+    status: str | None = None


 class SnapshotDeleteResponseSchema(Schema):
@@ -231,77 +282,82 @@ class SnapshotDeleteResponseSchema(Schema):
    deleted_count: int


-def normalize_tag_list(tags: Optional[List[str]] = None) -> List[str]:
+def normalize_tag_list(tags: list[str] | None = None) -> list[str]:
    return [tag.strip() for tag in (tags or []) if tag and tag.strip()]


 class SnapshotFilterSchema(FilterSchema):
-    id: Annotated[Optional[str], FilterLookup(['id__icontains', 'timestamp__startswith'])] = None
-    created_by_id: Annotated[Optional[str], FilterLookup('crawl__created_by_id')] = None
-    created_by_username: Annotated[Optional[str], FilterLookup('crawl__created_by__username__icontains')] = None
-    created_at__gte: Annotated[Optional[datetime], FilterLookup('created_at__gte')] = None
-    created_at__lt: Annotated[Optional[datetime], FilterLookup('created_at__lt')] = None
-    created_at: Annotated[Optional[datetime], FilterLookup('created_at')] = None
-    modified_at: Annotated[Optional[datetime], FilterLookup('modified_at')] = None
-    modified_at__gte: Annotated[Optional[datetime], FilterLookup('modified_at__gte')] = None
-    modified_at__lt: Annotated[Optional[datetime], FilterLookup('modified_at__lt')] = None
-    search: Annotated[Optional[str], FilterLookup(['url__icontains', 'title__icontains', 'tags__name__icontains', 'id__icontains', 'timestamp__startswith'])] = None
-    url: Annotated[Optional[str], FilterLookup('url')] = None
-    tag: Annotated[Optional[str], FilterLookup('tags__name')] = None
-    title: Annotated[Optional[str], FilterLookup('title__icontains')] = None
-    timestamp: Annotated[Optional[str], FilterLookup('timestamp__startswith')] = None
-    bookmarked_at__gte: Annotated[Optional[datetime], FilterLookup('bookmarked_at__gte')] = None
-    bookmarked_at__lt: Annotated[Optional[datetime], FilterLookup('bookmarked_at__lt')] = None
+    id: Annotated[str | None, FilterLookup(["id__icontains", "timestamp__startswith"])] = None
+    created_by_id: Annotated[str | None, FilterLookup("crawl__created_by_id")] = None
+    created_by_username: Annotated[str | None, FilterLookup("crawl__created_by__username__icontains")] = None
+    created_at__gte: Annotated[datetime | None, FilterLookup("created_at__gte")] = None
+    created_at__lt: Annotated[datetime | None, FilterLookup("created_at__lt")] = None
+    created_at: Annotated[datetime | None, FilterLookup("created_at")] = None
+    modified_at: Annotated[datetime | None, FilterLookup("modified_at")] = None
+    modified_at__gte: Annotated[datetime | None, FilterLookup("modified_at__gte")] = None
+    modified_at__lt: Annotated[datetime | None, FilterLookup("modified_at__lt")] = None
+    search: Annotated[
+        str | None,
+        FilterLookup(["url__icontains", "title__icontains", "tags__name__icontains", "id__icontains", "timestamp__startswith"]),
+    ] = None
+    url: Annotated[str | None, FilterLookup("url")] = None
+    tag: Annotated[str | None, FilterLookup("tags__name")] = None
+    title: Annotated[str | None, FilterLookup("title__icontains")] = None
+    timestamp: Annotated[str | None, FilterLookup("timestamp__startswith")] = None
+    bookmarked_at__gte: Annotated[datetime | None, FilterLookup("bookmarked_at__gte")] = None
+    bookmarked_at__lt: Annotated[datetime | None, FilterLookup("bookmarked_at__lt")] = None


-@router.get("/snapshots", response=List[SnapshotSchema], url_name="get_snapshots")
+@router.get("/snapshots", response=list[SnapshotSchema], url_name="get_snapshots")
@paginate(CustomPagination)
 def get_snapshots(request: HttpRequest, filters: Query[SnapshotFilterSchema], with_archiveresults: bool = False):
    """List all Snapshot entries matching these filters."""
-    setattr(request, 'with_archiveresults', with_archiveresults)
-    return filters.filter(Snapshot.objects.all()).distinct()
+    setattr(request, "with_archiveresults", with_archiveresults)
+    queryset = Snapshot.objects.annotate(output_size_sum=Coalesce(Sum("archiveresult__output_size"), 0))
+    return filters.filter(queryset).distinct()


@router.get("/snapshot/{snapshot_id}", response=SnapshotSchema, url_name="get_snapshot")
 def get_snapshot(request: HttpRequest, snapshot_id: str, with_archiveresults: bool = True):
    """Get a specific Snapshot by id."""
-    setattr(request, 'with_archiveresults', with_archiveresults)
+    setattr(request, "with_archiveresults", with_archiveresults)
+    queryset = Snapshot.objects.annotate(output_size_sum=Coalesce(Sum("archiveresult__output_size"), 0))
    try:
-        return Snapshot.objects.get(Q(id__startswith=snapshot_id) | Q(timestamp__startswith=snapshot_id))
+        return queryset.get(Q(id__startswith=snapshot_id) | Q(timestamp__startswith=snapshot_id))
    except Snapshot.DoesNotExist:
-        return Snapshot.objects.get(Q(id__icontains=snapshot_id))
+        return queryset.get(Q(id__icontains=snapshot_id))


@router.post("/snapshots", response=SnapshotSchema, url_name="create_snapshot")
 def create_snapshot(request: HttpRequest, data: SnapshotCreateSchema):
    tags = normalize_tag_list(data.tags)
    if data.status is not None and data.status not in Snapshot.StatusChoices.values:
-        raise HttpError(400, f'Invalid status: {data.status}')
+        raise HttpError(400, f"Invalid status: {data.status}")
    if not data.url.strip():
-        raise HttpError(400, 'URL is required')
+        raise HttpError(400, "URL is required")
    if data.depth not in (0, 1, 2, 3, 4):
-        raise HttpError(400, 'depth must be between 0 and 4')
+        raise HttpError(400, "depth must be between 0 and 4")

    if data.crawl_id:
        crawl = Crawl.objects.get(id__icontains=data.crawl_id)
-        crawl_tags = normalize_tag_list(crawl.tags_str.split(','))
+        crawl_tags = normalize_tag_list(crawl.tags_str.split(","))
        tags = tags or crawl_tags
    else:
        crawl = Crawl.objects.create(
            urls=data.url,
            max_depth=max(data.depth, 0),
-            tags_str=','.join(tags),
+            tags_str=",".join(tags),
            status=Crawl.StatusChoices.QUEUED,
            retry_at=timezone.now(),
            created_by=request.user if isinstance(request.user, User) else None,
        )

    snapshot_defaults = {
-        'depth': data.depth,
-        'title': data.title,
-        'timestamp': str(timezone.now().timestamp()),
-        'status': data.status or Snapshot.StatusChoices.QUEUED,
-        'retry_at': timezone.now(),
+        "depth": data.depth,
+        "title": data.title,
+        "timestamp": str(timezone.now().timestamp()),
+        "status": data.status or Snapshot.StatusChoices.QUEUED,
+        "retry_at": timezone.now(),
    }
    snapshot, _ = Snapshot.objects.get_or_create(
        url=data.url,
@@ -309,17 +365,17 @@ def create_snapshot(request: HttpRequest, data: SnapshotCreateSchema):
        defaults=snapshot_defaults,
    )

-    update_fields: List[str] = []
+    update_fields: list[str] = []
    if data.title is not None and snapshot.title != data.title:
        snapshot.title = data.title
-        update_fields.append('title')
+        update_fields.append("title")
    if data.status is not None and snapshot.status != data.status:
        if data.status not in Snapshot.StatusChoices.values:
-            raise HttpError(400, f'Invalid status: {data.status}')
+            raise HttpError(400, f"Invalid status: {data.status}")
        snapshot.status = data.status
-        update_fields.append('status')
+        update_fields.append("status")
    if update_fields:
-        update_fields.append('modified_at')
+        update_fields.append("modified_at")
        snapshot.save(update_fields=update_fields)

    if tags:
@@ -330,7 +386,7 @@ def create_snapshot(request: HttpRequest, data: SnapshotCreateSchema):
    except Exception:
        pass

-    setattr(request, 'with_archiveresults', False)
+    setattr(request, "with_archiveresults", False)
    return snapshot


@@ -343,26 +399,26 @@ def patch_snapshot(request: HttpRequest, snapshot_id: str, data: SnapshotUpdateS
        snapshot = Snapshot.objects.get(Q(id__icontains=snapshot_id))

    payload = data.dict(exclude_unset=True)
-    update_fields = ['modified_at']
-    tags = payload.pop('tags', None)
+    update_fields = ["modified_at"]
+    tags = payload.pop("tags", None)

-    if 'status' in payload:
-        if payload['status'] not in Snapshot.StatusChoices.values:
-            raise HttpError(400, f'Invalid status: {payload["status"]}')
-        snapshot.status = payload['status']
-        if snapshot.status == Snapshot.StatusChoices.SEALED and 'retry_at' not in payload:
+    if "status" in payload:
+        if payload["status"] not in Snapshot.StatusChoices.values:
+            raise HttpError(400, f"Invalid status: {payload['status']}")
+        snapshot.status = payload["status"]
+        if snapshot.status == Snapshot.StatusChoices.SEALED and "retry_at" not in payload:
            snapshot.retry_at = None
-        update_fields.append('status')
+        update_fields.append("status")

-    if 'retry_at' in payload:
-        snapshot.retry_at = payload['retry_at']
-        update_fields.append('retry_at')
+    if "retry_at" in payload:
+        snapshot.retry_at = payload["retry_at"]
+        update_fields.append("retry_at")

    if tags is not None:
        snapshot.save_tags(normalize_tag_list(tags))

    snapshot.save(update_fields=update_fields)
-    setattr(request, 'with_archiveresults', False)
+    setattr(request, "with_archiveresults", False)
    return snapshot


@@ -373,17 +429,18 @@ def delete_snapshot(request: HttpRequest, snapshot_id: str):
    crawl_id_str = str(snapshot.crawl.pk)
    deleted_count, _ = snapshot.delete()
    return {
-        'success': True,
-        'snapshot_id': snapshot_id_str,
-        'crawl_id': crawl_id_str,
-        'deleted_count': deleted_count,
+        "success": True,
+        "snapshot_id": snapshot_id_str,
+        "crawl_id": crawl_id_str,
+        "deleted_count": deleted_count,
    }


 ### Tag #########################################################################

+
 class TagSchema(Schema):
-    TYPE: str = 'core.models.Tag'
+    TYPE: str = "core.models.Tag"
    id: int
    modified_at: datetime
    created_at: datetime
@@ -392,7 +449,7 @@ class TagSchema(Schema):
    name: str
    slug: str
    num_snapshots: int
-    snapshots: List[SnapshotSchema]
+    snapshots: list[SnapshotSchema]

    @staticmethod
    def resolve_created_by_id(obj):
@@ -402,7 +459,7 @@ class TagSchema(Schema):
    def resolve_created_by_username(obj):
        user_model = get_user_model()
        user = user_model.objects.get(id=obj.created_by_id)
-        username = getattr(user, 'username', None)
+        username = getattr(user, "username", None)
        return username if isinstance(username, str) else str(user)

    @staticmethod
@@ -411,58 +468,67 @@ class TagSchema(Schema):

    @staticmethod
    def resolve_snapshots(obj, context):
-        if bool(getattr(context['request'], 'with_snapshots', False)):
+        if bool(getattr(context["request"], "with_snapshots", False)):
            return obj.snapshot_set.all().distinct()
        return Snapshot.objects.none()


-@router.get("/tags", response=List[TagSchema], url_name="get_tags")
+@router.get("/tags", response=list[TagSchema], url_name="get_tags")
@paginate(CustomPagination)
 def get_tags(request: HttpRequest):
-    setattr(request, 'with_snapshots', False)
-    setattr(request, 'with_archiveresults', False)
+    setattr(request, "with_snapshots", False)
+    setattr(request, "with_archiveresults", False)
    return get_matching_tags()


@router.get("/tag/{tag_id}", response=TagSchema, url_name="get_tag")
 def get_tag(request: HttpRequest, tag_id: str, with_snapshots: bool = True):
-    setattr(request, 'with_snapshots', with_snapshots)
-    setattr(request, 'with_archiveresults', False)
+    setattr(request, "with_snapshots", with_snapshots)
+    setattr(request, "with_archiveresults", False)
    try:
        return get_tag_by_ref(tag_id)
    except (Tag.DoesNotExist, ValidationError):
-        raise HttpError(404, 'Tag not found')
+        raise HttpError(404, "Tag not found")


-@router.get("/any/{id}", response=Union[SnapshotSchema, ArchiveResultSchema, TagSchema, CrawlSchema], url_name="get_any", summary="Get any object by its ID")
+@router.get(
+    "/any/{id}",
+    response=Union[SnapshotSchema, ArchiveResultSchema, TagSchema, CrawlSchema],
+    url_name="get_any",
+    summary="Get any object by its ID",
+)
 def get_any(request: HttpRequest, id: str):
    """Get any object by its ID (e.g. snapshot, archiveresult, tag, crawl, etc.)."""
-    setattr(request, 'with_snapshots', False)
-    setattr(request, 'with_archiveresults', False)
+    setattr(request, "with_snapshots", False)
+    setattr(request, "with_archiveresults", False)

    for getter in [get_snapshot, get_archiveresult, get_tag]:
        try:
            response = getter(request, id)
            if isinstance(response, Model):
-                return redirect(f"/api/v1/{response._meta.app_label}/{response._meta.model_name}/{response.pk}?{request.META['QUERY_STRING']}")
+                return redirect(
+                    f"/api/v1/{response._meta.app_label}/{response._meta.model_name}/{response.pk}?{request.META['QUERY_STRING']}",
+                )
        except Exception:
            pass

    try:
        from archivebox.api.v1_crawls import get_crawl
+
        response = get_crawl(request, id)
        if isinstance(response, Model):
            return redirect(f"/api/v1/{response._meta.app_label}/{response._meta.model_name}/{response.pk}?{request.META['QUERY_STRING']}")
    except Exception:
        pass

-    raise HttpError(404, 'Object with given ID not found')
+    raise HttpError(404, "Object with given ID not found")


 ### Tag Editor API Endpoints #########################################################################

+
 class TagAutocompleteSchema(Schema):
-    tags: List[dict]
+    tags: list[dict]


 class TagCreateSchema(Schema):
@@ -483,7 +549,7 @@ class TagSearchSnapshotSchema(Schema):
    favicon_url: str
    admin_url: str
    archive_url: str
-    downloaded_at: Optional[str] = None
+    downloaded_at: str | None = None


 class TagSearchCardSchema(Schema):
@@ -497,11 +563,11 @@ class TagSearchCardSchema(Schema):
    export_jsonl_url: str
    rename_url: str
    delete_url: str
-    snapshots: List[TagSearchSnapshotSchema]
+    snapshots: list[TagSearchSnapshotSchema]


 class TagSearchResponseSchema(Schema):
-    tags: List[TagSearchCardSchema]
+    tags: list[TagSearchCardSchema]
    sort: str
    created_by: str
    year: str
@@ -527,8 +593,8 @@ class TagDeleteResponseSchema(Schema):

 class TagSnapshotRequestSchema(Schema):
    snapshot_id: str
-    tag_name: Optional[str] = None
-    tag_id: Optional[int] = None
+    tag_name: str | None = None
+    tag_id: int | None = None


 class TagSnapshotResponseSchema(Schema):
@@ -541,10 +607,10 @@ class TagSnapshotResponseSchema(Schema):
 def search_tags(
    request: HttpRequest,
    q: str = "",
-    sort: str = 'created_desc',
-    created_by: str = '',
-    year: str = '',
-    has_snapshots: str = 'all',
+    sort: str = "created_desc",
+    created_by: str = "",
+    year: str = "",
+    has_snapshots: str = "all",
 ):
    """Return detailed tag cards for admin/live-search UIs."""
    normalized_sort = normalize_tag_sort(sort)
@@ -552,7 +618,7 @@ def search_tags(
    normalized_year = normalize_created_year_filter(year)
    normalized_has_snapshots = normalize_has_snapshots_filter(has_snapshots)
    return {
-        'tags': build_tag_cards(
+        "tags": build_tag_cards(
            query=q,
            request=request,
            sort=normalized_sort,
@@ -560,28 +626,28 @@ def search_tags(
            year=normalized_year,
            has_snapshots=normalized_has_snapshots,
        ),
-        'sort': normalized_sort,
-        'created_by': normalized_created_by,
-        'year': normalized_year,
-        'has_snapshots': normalized_has_snapshots,
+        "sort": normalized_sort,
+        "created_by": normalized_created_by,
+        "year": normalized_year,
+        "has_snapshots": normalized_has_snapshots,
    }


 def _public_tag_listing_enabled() -> bool:
-    explicit = getattr(settings, 'PUBLIC_SNAPSHOTS_LIST', None)
+    explicit = getattr(settings, "PUBLIC_SNAPSHOTS_LIST", None)
    if explicit is not None:
        return bool(explicit)
-    return bool(getattr(settings, 'PUBLIC_INDEX', SERVER_CONFIG.PUBLIC_INDEX))
+    return bool(getattr(settings, "PUBLIC_INDEX", SERVER_CONFIG.PUBLIC_INDEX))


 def _request_has_tag_autocomplete_access(request: HttpRequest) -> bool:
-    user = getattr(request, 'user', None)
-    if getattr(user, 'is_authenticated', False):
+    user = getattr(request, "user", None)
+    if getattr(user, "is_authenticated", False):
        return True

-    token = request.GET.get('api_key') or request.headers.get('X-ArchiveBox-API-Key')
-    auth_header = request.headers.get('Authorization', '')
-    if not token and auth_header.lower().startswith('bearer '):
+    token = request.GET.get("api_key") or request.headers.get("X-ArchiveBox-API-Key")
+    auth_header = request.headers.get("Authorization", "")
+    if not token and auth_header.lower().startswith("bearer "):
        token = auth_header.split(None, 1)[1].strip()

    if token and auth_using_token(token=token, request=request):
@@ -594,12 +660,12 @@ def _request_has_tag_autocomplete_access(request: HttpRequest) -> bool:
 def tags_autocomplete(request: HttpRequest, q: str = ""):
    """Return tags matching the query for autocomplete."""
    if not _request_has_tag_autocomplete_access(request):
-        raise HttpError(401, 'Authentication required')
+        raise HttpError(401, "Authentication required")

-    tags = get_matching_tags(q)[:50 if not q else 20]
+    tags = get_matching_tags(q)[: 50 if not q else 20]

    return {
-        'tags': [{'id': tag.pk, 'name': tag.name, 'slug': tag.slug, 'num_snapshots': getattr(tag, 'num_snapshots', 0)} for tag in tags]
+        "tags": [{"id": tag.pk, "name": tag.name, "slug": tag.slug, "num_snapshots": getattr(tag, "num_snapshots", 0)} for tag in tags],
    }


@@ -615,10 +681,10 @@ def tags_create(request: HttpRequest, data: TagCreateSchema):
        raise HttpError(400, str(err)) from err

    return {
-        'success': True,
-        'tag_id': tag.pk,
-        'tag_name': tag.name,
-        'created': created,
+        "success": True,
+        "tag_id": tag.pk,
+        "tag_name": tag.name,
+        "created": created,
    }


@@ -627,15 +693,15 @@ def rename_tag(request: HttpRequest, tag_id: int, data: TagUpdateSchema):
    try:
        tag = rename_tag_record(get_tag_by_ref(tag_id), data.name)
    except Tag.DoesNotExist as err:
-        raise HttpError(404, 'Tag not found') from err
+        raise HttpError(404, "Tag not found") from err
    except ValueError as err:
        raise HttpError(400, str(err)) from err

    return {
-        'success': True,
-        'tag_id': tag.pk,
-        'tag_name': tag.name,
-        'slug': tag.slug,
+        "success": True,
+        "tag_id": tag.pk,
+        "tag_name": tag.name,
+        "slug": tag.slug,
    }


@@ -644,13 +710,13 @@ def delete_tag(request: HttpRequest, tag_id: int):
    try:
        tag = get_tag_by_ref(tag_id)
    except Tag.DoesNotExist as err:
-        raise HttpError(404, 'Tag not found') from err
+        raise HttpError(404, "Tag not found") from err

    deleted_count, _ = delete_tag_record(tag)
    return {
-        'success': True,
-        'tag_id': int(tag_id),
-        'deleted_count': deleted_count,
+        "success": True,
+        "tag_id": int(tag_id),
+        "deleted_count": deleted_count,
    }


@@ -659,10 +725,10 @@ def tag_urls_export(request: HttpRequest, tag_id: int):
    try:
        tag = get_tag_by_ref(tag_id)
    except Tag.DoesNotExist as err:
-        raise HttpError(404, 'Tag not found') from err
+        raise HttpError(404, "Tag not found") from err

-    response = HttpResponse(export_tag_urls(tag), content_type='text/plain; charset=utf-8')
-    response['Content-Disposition'] = f'attachment; filename="tag-{tag.slug}-urls.txt"'
+    response = HttpResponse(export_tag_urls(tag), content_type="text/plain; charset=utf-8")
+    response["Content-Disposition"] = f'attachment; filename="tag-{tag.slug}-urls.txt"'
    return response


@@ -671,10 +737,10 @@ def tag_snapshots_export(request: HttpRequest, tag_id: int):
    try:
        tag = get_tag_by_ref(tag_id)
    except Tag.DoesNotExist as err:
-        raise HttpError(404, 'Tag not found') from err
+        raise HttpError(404, "Tag not found") from err

-    response = HttpResponse(export_tag_snapshots_jsonl(tag), content_type='application/x-ndjson; charset=utf-8')
-    response['Content-Disposition'] = f'attachment; filename="tag-{tag.slug}-snapshots.jsonl"'
+    response = HttpResponse(export_tag_snapshots_jsonl(tag), content_type="application/x-ndjson; charset=utf-8")
+    response["Content-Disposition"] = f'attachment; filename="tag-{tag.slug}-snapshots.jsonl"'
    return response


@@ -684,16 +750,16 @@ def tags_add_to_snapshot(request: HttpRequest, data: TagSnapshotRequestSchema):
    # Get the snapshot
    try:
        snapshot = Snapshot.objects.get(
-            Q(id__startswith=data.snapshot_id) | Q(timestamp__startswith=data.snapshot_id)
+            Q(id__startswith=data.snapshot_id) | Q(timestamp__startswith=data.snapshot_id),
        )
    except Snapshot.DoesNotExist:
-        raise HttpError(404, 'Snapshot not found')
+        raise HttpError(404, "Snapshot not found")
    except Snapshot.MultipleObjectsReturned:
        snapshot = Snapshot.objects.filter(
-            Q(id__startswith=data.snapshot_id) | Q(timestamp__startswith=data.snapshot_id)
+            Q(id__startswith=data.snapshot_id) | Q(timestamp__startswith=data.snapshot_id),
        ).first()
        if snapshot is None:
-            raise HttpError(404, 'Snapshot not found')
+            raise HttpError(404, "Snapshot not found")

    # Get or create the tag
    if data.tag_name:
@@ -708,17 +774,17 @@ def tags_add_to_snapshot(request: HttpRequest, data: TagSnapshotRequestSchema):
        try:
            tag = get_tag_by_ref(data.tag_id)
        except Tag.DoesNotExist:
-            raise HttpError(404, 'Tag not found')
+            raise HttpError(404, "Tag not found")
    else:
-        raise HttpError(400, 'Either tag_name or tag_id is required')
+        raise HttpError(400, "Either tag_name or tag_id is required")

    # Add the tag to the snapshot
    snapshot.tags.add(tag.pk)

    return {
-        'success': True,
-        'tag_id': tag.pk,
-        'tag_name': tag.name,
+        "success": True,
+        "tag_id": tag.pk,
+        "tag_name": tag.name,
    }


@@ -728,36 +794,36 @@ def tags_remove_from_snapshot(request: HttpRequest, data: TagSnapshotRequestSche
    # Get the snapshot
    try:
        snapshot = Snapshot.objects.get(
-            Q(id__startswith=data.snapshot_id) | Q(timestamp__startswith=data.snapshot_id)
+            Q(id__startswith=data.snapshot_id) | Q(timestamp__startswith=data.snapshot_id),
        )
    except Snapshot.DoesNotExist:
-        raise HttpError(404, 'Snapshot not found')
+        raise HttpError(404, "Snapshot not found")
    except Snapshot.MultipleObjectsReturned:
        snapshot = Snapshot.objects.filter(
-            Q(id__startswith=data.snapshot_id) | Q(timestamp__startswith=data.snapshot_id)
+            Q(id__startswith=data.snapshot_id) | Q(timestamp__startswith=data.snapshot_id),
        ).first()
        if snapshot is None:
-            raise HttpError(404, 'Snapshot not found')
+            raise HttpError(404, "Snapshot not found")

    # Get the tag
    if data.tag_id:
        try:
            tag = Tag.objects.get(pk=data.tag_id)
        except Tag.DoesNotExist:
-            raise HttpError(404, 'Tag not found')
+            raise HttpError(404, "Tag not found")
    elif data.tag_name:
        try:
            tag = Tag.objects.get(name__iexact=data.tag_name.strip())
        except Tag.DoesNotExist:
-            raise HttpError(404, 'Tag not found')
+            raise HttpError(404, "Tag not found")
    else:
-        raise HttpError(400, 'Either tag_name or tag_id is required')
+        raise HttpError(400, "Either tag_name or tag_id is required")

    # Remove the tag from the snapshot
    snapshot.tags.remove(tag.pk)

    return {
-        'success': True,
-        'tag_id': tag.pk,
-        'tag_name': tag.name,
+        "success": True,
+        "tag_id": tag.pk,
+        "tag_name": tag.name,
    }
--- a/archivebox/api/v1_crawls.py
+++ b/archivebox/api/v1_crawls.py
@@ -1,7 +1,6 @@
-__package__ = 'archivebox.api'
+__package__ = "archivebox.api"

 from uuid import UUID
-from typing import List, Optional
 from datetime import datetime
 from django.http import HttpRequest
 from django.utils import timezone
@@ -17,11 +16,11 @@ from archivebox.crawls.models import Crawl

 from .auth import API_AUTH_METHODS

-router = Router(tags=['Crawl Models'], auth=API_AUTH_METHODS)
+router = Router(tags=["Crawl Models"], auth=API_AUTH_METHODS)


 class CrawlSchema(Schema):
-    TYPE: str = 'crawls.models.Crawl'
+    TYPE: str = "crawls.models.Crawl"

    id: UUID

@@ -35,6 +34,8 @@ class CrawlSchema(Schema):

    urls: str
    max_depth: int
+    max_urls: int
+    max_size: int
    tags_str: str
    config: dict

@@ -48,12 +49,12 @@ class CrawlSchema(Schema):
    def resolve_created_by_username(obj):
        user_model = get_user_model()
        user = user_model.objects.get(id=obj.created_by_id)
-        username = getattr(user, 'username', None)
+        username = getattr(user, "username", None)
        return username if isinstance(username, str) else str(user)

    @staticmethod
    def resolve_snapshots(obj, context):
-        if bool(getattr(context['request'], 'with_snapshots', False)):
+        if bool(getattr(context["request"], "with_snapshots", False)):
            return obj.snapshot_set.all().distinct()
        return Snapshot.objects.none()

@@ -61,17 +62,19 @@ class CrawlSchema(Schema):
 class CrawlUpdateSchema(Schema):
    status: str | None = None
    retry_at: datetime | None = None
-    tags: Optional[List[str]] = None
+    tags: list[str] | None = None
    tags_str: str | None = None


 class CrawlCreateSchema(Schema):
-    urls: List[str]
+    urls: list[str]
    max_depth: int = 0
-    tags: Optional[List[str]] = None
-    tags_str: str = ''
-    label: str = ''
-    notes: str = ''
+    max_urls: int = 0
+    max_size: int = 0
+    tags: list[str] | None = None
+    tags_str: str = ""
+    label: str = ""
+    notes: str = ""
    config: dict = {}


@@ -82,13 +85,13 @@ class CrawlDeleteResponseSchema(Schema):
    deleted_snapshots: int


-def normalize_tag_list(tags: Optional[List[str]] = None, tags_str: str = '') -> List[str]:
+def normalize_tag_list(tags: list[str] | None = None, tags_str: str = "") -> list[str]:
    if tags is not None:
        return [tag.strip() for tag in tags if tag and tag.strip()]
-    return [tag.strip() for tag in tags_str.split(',') if tag.strip()]
+    return [tag.strip() for tag in tags_str.split(",") if tag.strip()]


-@router.get("/crawls", response=List[CrawlSchema], url_name="get_crawls")
+@router.get("/crawls", response=list[CrawlSchema], url_name="get_crawls")
 def get_crawls(request: HttpRequest):
    return Crawl.objects.all().distinct()

@@ -97,15 +100,21 @@ def get_crawls(request: HttpRequest):
 def create_crawl(request: HttpRequest, data: CrawlCreateSchema):
    urls = [url.strip() for url in data.urls if url and url.strip()]
    if not urls:
-        raise HttpError(400, 'At least one URL is required')
+        raise HttpError(400, "At least one URL is required")
    if data.max_depth not in (0, 1, 2, 3, 4):
-        raise HttpError(400, 'max_depth must be between 0 and 4')
+        raise HttpError(400, "max_depth must be between 0 and 4")
+    if data.max_urls < 0:
+        raise HttpError(400, "max_urls must be >= 0")
+    if data.max_size < 0:
+        raise HttpError(400, "max_size must be >= 0")

    tags = normalize_tag_list(data.tags, data.tags_str)
    crawl = Crawl.objects.create(
-        urls='\n'.join(urls),
+        urls="\n".join(urls),
        max_depth=data.max_depth,
-        tags_str=','.join(tags),
+        max_urls=data.max_urls,
+        max_size=data.max_size,
+        tags_str=",".join(tags),
        label=data.label,
        notes=data.notes,
        config=data.config,
@@ -116,25 +125,26 @@ def create_crawl(request: HttpRequest, data: CrawlCreateSchema):
    crawl.create_snapshots_from_urls()
    return crawl

+
@router.get("/crawl/{crawl_id}", response=CrawlSchema | str, url_name="get_crawl")
-def get_crawl(request: HttpRequest, crawl_id: str, as_rss: bool=False, with_snapshots: bool=False, with_archiveresults: bool=False):
+def get_crawl(request: HttpRequest, crawl_id: str, as_rss: bool = False, with_snapshots: bool = False, with_archiveresults: bool = False):
    """Get a specific Crawl by id."""
-    setattr(request, 'with_snapshots', with_snapshots)
-    setattr(request, 'with_archiveresults', with_archiveresults)
+    setattr(request, "with_snapshots", with_snapshots)
+    setattr(request, "with_archiveresults", with_archiveresults)
    crawl = Crawl.objects.get(id__icontains=crawl_id)
-    
+
    if crawl and as_rss:
        # return snapshots as XML rss feed
        urls = [
-            {'url': snapshot.url, 'title': snapshot.title, 'bookmarked_at': snapshot.bookmarked_at, 'tags': snapshot.tags_str}
+            {"url": snapshot.url, "title": snapshot.title, "bookmarked_at": snapshot.bookmarked_at, "tags": snapshot.tags_str}
            for snapshot in crawl.snapshot_set.all()
        ]
        xml = '<rss version="2.0"><channel>'
        for url in urls:
-            xml += f'<item><url>{url["url"]}</url><title>{url["title"]}</title><bookmarked_at>{url["bookmarked_at"]}</bookmarked_at><tags>{url["tags"]}</tags></item>'
-        xml += '</channel></rss>'
+            xml += f"<item><url>{url['url']}</url><title>{url['title']}</title><bookmarked_at>{url['bookmarked_at']}</bookmarked_at><tags>{url['tags']}</tags></item>"
+        xml += "</channel></rss>"
        return xml
-    
+
    return crawl


@@ -143,29 +153,29 @@ def patch_crawl(request: HttpRequest, crawl_id: str, data: CrawlUpdateSchema):
    """Update a crawl (e.g., set status=sealed to cancel queued work)."""
    crawl = Crawl.objects.get(id__icontains=crawl_id)
    payload = data.dict(exclude_unset=True)
-    update_fields = ['modified_at']
+    update_fields = ["modified_at"]

-    tags = payload.pop('tags', None)
-    tags_str = payload.pop('tags_str', None)
+    tags = payload.pop("tags", None)
+    tags_str = payload.pop("tags_str", None)
    if tags is not None or tags_str is not None:
-        crawl.tags_str = ','.join(normalize_tag_list(tags, tags_str or ''))
-        update_fields.append('tags_str')
+        crawl.tags_str = ",".join(normalize_tag_list(tags, tags_str or ""))
+        update_fields.append("tags_str")

-    if 'status' in payload:
-        if payload['status'] not in Crawl.StatusChoices.values:
-            raise HttpError(400, f'Invalid status: {payload["status"]}')
-        crawl.status = payload['status']
-        if crawl.status == Crawl.StatusChoices.SEALED and 'retry_at' not in payload:
+    if "status" in payload:
+        if payload["status"] not in Crawl.StatusChoices.values:
+            raise HttpError(400, f"Invalid status: {payload['status']}")
+        crawl.status = payload["status"]
+        if crawl.status == Crawl.StatusChoices.SEALED and "retry_at" not in payload:
            crawl.retry_at = None
-        update_fields.append('status')
+        update_fields.append("status")

-    if 'retry_at' in payload:
-        crawl.retry_at = payload['retry_at']
-        update_fields.append('retry_at')
+    if "retry_at" in payload:
+        crawl.retry_at = payload["retry_at"]
+        update_fields.append("retry_at")

    crawl.save(update_fields=update_fields)

-    if payload.get('status') == Crawl.StatusChoices.SEALED:
+    if payload.get("status") == Crawl.StatusChoices.SEALED:
        Snapshot.objects.filter(
            crawl=crawl,
            status__in=[Snapshot.StatusChoices.QUEUED, Snapshot.StatusChoices.STARTED],
@@ -184,8 +194,8 @@ def delete_crawl(request: HttpRequest, crawl_id: str):
    snapshot_count = crawl.snapshot_set.count()
    deleted_count, _ = crawl.delete()
    return {
-        'success': True,
-        'crawl_id': crawl_id_str,
-        'deleted_count': deleted_count,
-        'deleted_snapshots': snapshot_count,
+        "success": True,
+        "crawl_id": crawl_id_str,
+        "deleted_count": deleted_count,
+        "deleted_snapshots": snapshot_count,
    }
--- a/archivebox/api/v1_machine.py
+++ b/archivebox/api/v1_machine.py
@@ -1,7 +1,7 @@
-__package__ = 'archivebox.api'
+__package__ = "archivebox.api"

 from uuid import UUID
-from typing import Annotated, List, Optional
+from typing import Annotated
 from datetime import datetime

 from django.http import HttpRequest
@@ -12,16 +12,18 @@ from ninja.pagination import paginate
 from archivebox.api.v1_core import CustomPagination


-router = Router(tags=['Machine and Dependencies'])
+router = Router(tags=["Machine and Dependencies"])


 # ============================================================================
 # Machine Schemas
 # ============================================================================

+
 class MachineSchema(Schema):
    """Schema for Machine model."""
-    TYPE: str = 'machine.Machine'
+
+    TYPE: str = "machine.Machine"
    id: UUID
    created_at: datetime
    modified_at: datetime
@@ -43,22 +45,24 @@ class MachineSchema(Schema):


 class MachineFilterSchema(FilterSchema):
-    id: Annotated[Optional[str], FilterLookup('id__startswith')] = None
-    hostname: Annotated[Optional[str], FilterLookup('hostname__icontains')] = None
-    os_platform: Annotated[Optional[str], FilterLookup('os_platform__icontains')] = None
-    os_arch: Annotated[Optional[str], FilterLookup('os_arch')] = None
-    hw_in_docker: Annotated[Optional[bool], FilterLookup('hw_in_docker')] = None
-    hw_in_vm: Annotated[Optional[bool], FilterLookup('hw_in_vm')] = None
-    bin_providers: Annotated[Optional[str], FilterLookup('bin_providers__icontains')] = None
+    id: Annotated[str | None, FilterLookup("id__startswith")] = None
+    hostname: Annotated[str | None, FilterLookup("hostname__icontains")] = None
+    os_platform: Annotated[str | None, FilterLookup("os_platform__icontains")] = None
+    os_arch: Annotated[str | None, FilterLookup("os_arch")] = None
+    hw_in_docker: Annotated[bool | None, FilterLookup("hw_in_docker")] = None
+    hw_in_vm: Annotated[bool | None, FilterLookup("hw_in_vm")] = None
+    bin_providers: Annotated[str | None, FilterLookup("bin_providers__icontains")] = None


 # ============================================================================
 # Binary Schemas
 # ============================================================================

+
 class BinarySchema(Schema):
    """Schema for Binary model."""
-    TYPE: str = 'machine.Binary'
+
+    TYPE: str = "machine.Binary"
    id: UUID
    created_at: datetime
    modified_at: datetime
@@ -85,23 +89,25 @@ class BinarySchema(Schema):


 class BinaryFilterSchema(FilterSchema):
-    id: Annotated[Optional[str], FilterLookup('id__startswith')] = None
-    name: Annotated[Optional[str], FilterLookup('name__icontains')] = None
-    binprovider: Annotated[Optional[str], FilterLookup('binprovider')] = None
-    status: Annotated[Optional[str], FilterLookup('status')] = None
-    machine_id: Annotated[Optional[str], FilterLookup('machine_id__startswith')] = None
-    version: Annotated[Optional[str], FilterLookup('version__icontains')] = None
+    id: Annotated[str | None, FilterLookup("id__startswith")] = None
+    name: Annotated[str | None, FilterLookup("name__icontains")] = None
+    binprovider: Annotated[str | None, FilterLookup("binprovider")] = None
+    status: Annotated[str | None, FilterLookup("status")] = None
+    machine_id: Annotated[str | None, FilterLookup("machine_id__startswith")] = None
+    version: Annotated[str | None, FilterLookup("version__icontains")] = None


 # ============================================================================
 # Machine Endpoints
 # ============================================================================

-@router.get("/machines", response=List[MachineSchema], url_name="get_machines")
+
+@router.get("/machines", response=list[MachineSchema], url_name="get_machines")
@paginate(CustomPagination)
 def get_machines(request: HttpRequest, filters: Query[MachineFilterSchema]):
    """List all machines."""
    from archivebox.machine.models import Machine
+
    return filters.filter(Machine.objects.all()).distinct()


@@ -109,6 +115,7 @@ def get_machines(request: HttpRequest, filters: Query[MachineFilterSchema]):
 def get_current_machine(request: HttpRequest):
    """Get the current machine."""
    from archivebox.machine.models import Machine
+
    return Machine.current()


@@ -117,6 +124,7 @@ def get_machine(request: HttpRequest, machine_id: str):
    """Get a specific machine by ID."""
    from archivebox.machine.models import Machine
    from django.db.models import Q
+
    return Machine.objects.get(Q(id__startswith=machine_id) | Q(hostname__iexact=machine_id))


@@ -127,23 +135,27 @@ def get_machine(request: HttpRequest, machine_id: str):
 # Binary Endpoints
 # ============================================================================

-@router.get("/binaries", response=List[BinarySchema], url_name="get_binaries")
+
+@router.get("/binaries", response=list[BinarySchema], url_name="get_binaries")
@paginate(CustomPagination)
 def get_binaries(request: HttpRequest, filters: Query[BinaryFilterSchema]):
    """List all binaries."""
    from archivebox.machine.models import Binary
-    return filters.filter(Binary.objects.all().select_related('machine')).distinct()
+
+    return filters.filter(Binary.objects.all().select_related("machine")).distinct()


@router.get("/binary/{binary_id}", response=BinarySchema, url_name="get_binary")
 def get_binary(request: HttpRequest, binary_id: str):
    """Get a specific binary by ID."""
    from archivebox.machine.models import Binary
-    return Binary.objects.select_related('machine').get(id__startswith=binary_id)
+
+    return Binary.objects.select_related("machine").get(id__startswith=binary_id)


-@router.get("/binary/by-name/{name}", response=List[BinarySchema], url_name="get_binaries_by_name")
+@router.get("/binary/by-name/{name}", response=list[BinarySchema], url_name="get_binaries_by_name")
 def get_binaries_by_name(request: HttpRequest, name: str):
    """Get all binaries with the given name."""
    from archivebox.machine.models import Binary
-    return list(Binary.objects.filter(name__iexact=name).select_related('machine'))
+
+    return list(Binary.objects.filter(name__iexact=name).select_related("machine"))
--- a/archivebox/base_models/init.py
+++ b/archivebox/base_models/init.py
@@ -1 +1 @@
-__package__ = 'archivebox.base_models'
+__package__ = "archivebox.base_models"
--- a/archivebox/base_models/admin.py
+++ b/archivebox/base_models/admin.py
@@ -1,6 +1,6 @@
 """Base admin classes for models using UUIDv7."""

-__package__ = 'archivebox.base_models'
+__package__ = "archivebox.base_models"

 import json
 from collections.abc import Mapping
@@ -32,11 +32,12 @@ class KeyValueWidget(forms.Widget):
    with + and - buttons to add/remove rows.
    Includes autocomplete for available config keys from the plugin system.
    """
+
    template_name = ""  # We render manually

    class Media:
        css = {
-            'all': []
+            "all": [],
        }
        js = []

@@ -44,17 +45,18 @@ class KeyValueWidget(forms.Widget):
        """Get available config options from plugins."""
        try:
            from archivebox.hooks import discover_plugin_configs
+
            plugin_configs = discover_plugin_configs()
            options: dict[str, ConfigOption] = {}
            for plugin_name, schema in plugin_configs.items():
-                for key, prop in schema.get('properties', {}).items():
+                for key, prop in schema.get("properties", {}).items():
                    option: ConfigOption = {
-                        'plugin': plugin_name,
-                        'type': prop.get('type', 'string'),
-                        'default': prop.get('default', ''),
-                        'description': prop.get('description', ''),
+                        "plugin": plugin_name,
+                        "type": prop.get("type", "string"),
+                        "default": prop.get("default", ""),
+                        "description": prop.get("description", ""),
                    }
-                    for schema_key in ('enum', 'pattern', 'minimum', 'maximum'):
+                    for schema_key in ("enum", "pattern", "minimum", "maximum"):
                        if schema_key in prop:
                            option[schema_key] = prop[schema_key]
                    options[key] = option
@@ -85,11 +87,11 @@ class KeyValueWidget(forms.Widget):
    ) -> SafeString:
        data = self._parse_value(value)

-        widget_id = attrs.get('id', name) if attrs else name
+        widget_id = attrs.get("id", name) if attrs else name
        config_options = self._get_config_options()

        # Build datalist options
-        datalist_options = '\n'.join(
+        datalist_options = "\n".join(
            f'<option value="{self._escape(key)}">{self._escape(opt["description"][:60] or opt["type"])}</option>'
            for key, opt in sorted(config_options.items())
        )
@@ -111,7 +113,7 @@ class KeyValueWidget(forms.Widget):
            html += self._render_row(widget_id, key, val_str)

        # Always add one empty row for new entries
-        html += self._render_row(widget_id, '', '')
+        html += self._render_row(widget_id, "", "")

        html += f'''
            </div>
@@ -669,8 +671,8 @@ class KeyValueWidget(forms.Widget):
    def _escape(self, s: object) -> str:
        """Escape HTML special chars in attribute values."""
        if not s:
-            return ''
-        return str(s).replace('&', '&amp;').replace('<', '&lt;').replace('>', '&gt;').replace('"', '&quot;')
+            return ""
+        return str(s).replace("&", "&amp;").replace("<", "&lt;").replace(">", "&gt;").replace('"', "&quot;")

    def value_from_datadict(
        self,
@@ -678,8 +680,8 @@ class KeyValueWidget(forms.Widget):
        files: object,
        name: str,
    ) -> str:
-        value = data.get(name, '{}')
-        return value if isinstance(value, str) else '{}'
+        value = data.get(name, "{}")
+        return value if isinstance(value, str) else "{}"


 class ConfigEditorMixin(admin.ModelAdmin):
@@ -696,14 +698,20 @@ class ConfigEditorMixin(admin.ModelAdmin):
        **kwargs: object,
    ) -> forms.Field | None:
        """Use KeyValueWidget for the config JSON field."""
-        if db_field.name == 'config':
-            kwargs['widget'] = KeyValueWidget()
+        if db_field.name == "config":
+            kwargs["widget"] = KeyValueWidget()
        return super().formfield_for_dbfield(db_field, request, **kwargs)


 class BaseModelAdmin(DjangoObjectActions, admin.ModelAdmin):
-    list_display = ('id', 'created_at', 'created_by')
-    readonly_fields = ('id', 'created_at', 'modified_at')
+    list_display = ("id", "created_at", "created_by")
+    readonly_fields = ("id", "created_at", "modified_at")
+    show_search_mode_selector = False
+
+    def get_default_search_mode(self) -> str:
+        # The shared changelist template always asks every admin for a default
+        # search mode, even when the search-mode toggle is hidden.
+        return "meta"

    def get_form(
        self,
@@ -713,6 +721,6 @@ class BaseModelAdmin(DjangoObjectActions, admin.ModelAdmin):
        **kwargs: object,
    ):
        form = super().get_form(request, obj, change=change, **kwargs)
-        if 'created_by' in form.base_fields:
-            form.base_fields['created_by'].initial = request.user
+        if "created_by" in form.base_fields:
+            form.base_fields["created_by"].initial = request.user
        return form
--- a/archivebox/base_models/models.py
+++ b/archivebox/base_models/models.py
@@ -1,6 +1,6 @@
 """Base models using UUIDv7 for all id fields."""

-__package__ = 'archivebox.base_models'
+__package__ = "archivebox.base_models"

 from archivebox.uuid_compat import uuid7
 from pathlib import Path
@@ -15,22 +15,22 @@ from django.conf import settings
 from django_stubs_ext.db.models import TypedModelMeta


-
-def get_or_create_system_user_pk(username='system'):
+def get_or_create_system_user_pk(username="system"):
    User = get_user_model()
    # If there's exactly one superuser, use that for all system operations
    if User.objects.filter(is_superuser=True).count() == 1:
-        return User.objects.filter(is_superuser=True).values_list('pk', flat=True)[0]
+        return User.objects.filter(is_superuser=True).values_list("pk", flat=True)[0]
    # Otherwise get or create the system user
    user, _ = User.objects.get_or_create(
        username=username,
-        defaults={'is_staff': True, 'is_superuser': True, 'email': '', 'password': '!'}
+        defaults={"is_staff": True, "is_superuser": True, "email": "", "password": "!"},
    )
    return user.pk


 class AutoDateTimeField(models.DateTimeField):
    """DateTimeField that automatically updates on save (legacy compatibility)."""
+
    def pre_save(self, model_instance, add):
        if add or not getattr(model_instance, self.attname):
            value = timezone.now()
@@ -43,13 +43,19 @@ class ModelWithUUID(models.Model):
    id = models.UUIDField(primary_key=True, default=uuid7, editable=False, unique=True)
    created_at = models.DateTimeField(default=timezone.now, db_index=True)
    modified_at = models.DateTimeField(auto_now=True)
-    created_by = models.ForeignKey(settings.AUTH_USER_MODEL, on_delete=models.CASCADE, default=get_or_create_system_user_pk, null=False, db_index=True)
+    created_by = models.ForeignKey(
+        settings.AUTH_USER_MODEL,
+        on_delete=models.CASCADE,
+        default=get_or_create_system_user_pk,
+        null=False,
+        db_index=True,
+    )

    class Meta(TypedModelMeta):
        abstract = True

    def __str__(self) -> str:
-        return f'[{self.id}] {self.__class__.__name__}'
+        return f"[{self.id}] {self.__class__.__name__}"

    @property
    def admin_change_url(self) -> str:
@@ -57,17 +63,17 @@ class ModelWithUUID(models.Model):

    @property
    def api_url(self) -> str:
-        return str(reverse_lazy('api-1:get_any', args=[self.id]))
+        return str(reverse_lazy("api-1:get_any", args=[self.id]))

    @property
    def api_docs_url(self) -> str:
-        return f'/api/v1/docs#/{self._meta.app_label.title()}%20Models/api_v1_{self._meta.app_label}_get_{self._meta.db_table}'
-
+        return f"/api/v1/docs#/{self._meta.app_label.title()}%20Models/api_v1_{self._meta.app_label}_get_{self._meta.db_table}"


 class ModelWithNotes(models.Model):
    """Mixin for models with a notes field."""
-    notes = models.TextField(blank=True, null=False, default='')
+
+    notes = models.TextField(blank=True, null=False, default="")

    class Meta(TypedModelMeta):
        abstract = True
@@ -75,6 +81,7 @@ class ModelWithNotes(models.Model):

 class ModelWithHealthStats(models.Model):
    """Mixin for models with health tracking fields."""
+
    num_uses_failed = models.PositiveIntegerField(default=0)
    num_uses_succeeded = models.PositiveIntegerField(default=0)

@@ -88,12 +95,13 @@ class ModelWithHealthStats(models.Model):

    def increment_health_stats(self, success: bool):
        """Atomically increment success or failure counter using F() expression."""
-        field = 'num_uses_succeeded' if success else 'num_uses_failed'
+        field = "num_uses_succeeded" if success else "num_uses_failed"
        type(self).objects.filter(pk=self.pk).update(**{field: F(field) + 1})


 class ModelWithConfig(models.Model):
    """Mixin for models with a JSON config field."""
+
    config = models.JSONField(default=dict, null=True, blank=True, editable=True)

    class Meta(TypedModelMeta):
@@ -111,7 +119,7 @@ class ModelWithOutputDir(ModelWithUUID):

    @property
    def output_dir_parent(self) -> str:
-        return f'{self._meta.model_name}s'
+        return f"{self._meta.model_name}s"

    @property
    def output_dir_name(self) -> str:
@@ -119,7 +127,7 @@ class ModelWithOutputDir(ModelWithUUID):

    @property
    def output_dir_str(self) -> str:
-        return f'{self.output_dir_parent}/{self.output_dir_name}'
+        return f"{self.output_dir_parent}/{self.output_dir_name}"

    @property
    def output_dir(self) -> Path:
--- a/archivebox/cli/init.py
+++ b/archivebox/cli/init.py
@@ -1,5 +1,5 @@
-__package__ = 'archivebox.cli'
-__command__ = 'archivebox'
+__package__ = "archivebox.cli"
+__command__ = "archivebox"
 import os
 import sys
 from importlib import import_module
@@ -10,55 +10,55 @@ from rich import print
 from archivebox.config.version import VERSION


-
-if '--debug' in sys.argv:
-    os.environ['DEBUG'] = 'True'
-    sys.argv.remove('--debug')
+if "--debug" in sys.argv:
+    os.environ["DEBUG"] = "True"
+    sys.argv.remove("--debug")


 class ArchiveBoxGroup(click.Group):
    """lazy loading click group for archivebox commands"""
+
    meta_commands = {
-        'help': 'archivebox.cli.archivebox_help.main',
-        'version': 'archivebox.cli.archivebox_version.main',
-        'mcp': 'archivebox.cli.archivebox_mcp.main',
+        "help": "archivebox.cli.archivebox_help.main",
+        "version": "archivebox.cli.archivebox_version.main",
+        "mcp": "archivebox.cli.archivebox_mcp.main",
    }
    setup_commands = {
-        'init': 'archivebox.cli.archivebox_init.main',
-        'install': 'archivebox.cli.archivebox_install.main',
+        "init": "archivebox.cli.archivebox_init.main",
+        "install": "archivebox.cli.archivebox_install.main",
    }
    # Model commands (CRUD operations via subcommands)
    model_commands = {
-        'crawl': 'archivebox.cli.archivebox_crawl.main',
-        'snapshot': 'archivebox.cli.archivebox_snapshot.main',
-        'archiveresult': 'archivebox.cli.archivebox_archiveresult.main',
-        'tag': 'archivebox.cli.archivebox_tag.main',
-        'binary': 'archivebox.cli.archivebox_binary.main',
-        'process': 'archivebox.cli.archivebox_process.main',
-        'machine': 'archivebox.cli.archivebox_machine.main',
-        'persona': 'archivebox.cli.archivebox_persona.main',
+        "crawl": "archivebox.cli.archivebox_crawl.main",
+        "snapshot": "archivebox.cli.archivebox_snapshot.main",
+        "archiveresult": "archivebox.cli.archivebox_archiveresult.main",
+        "tag": "archivebox.cli.archivebox_tag.main",
+        "binary": "archivebox.cli.archivebox_binary.main",
+        "process": "archivebox.cli.archivebox_process.main",
+        "machine": "archivebox.cli.archivebox_machine.main",
+        "persona": "archivebox.cli.archivebox_persona.main",
    }
    archive_commands = {
        # High-level commands
-        'add': 'archivebox.cli.archivebox_add.main',
-        'extract': 'archivebox.cli.archivebox_extract.main',
-        'list': 'archivebox.cli.archivebox_list.main',
-        'remove': 'archivebox.cli.archivebox_remove.main',
-        'run': 'archivebox.cli.archivebox_run.main',
-        'update': 'archivebox.cli.archivebox_update.main',
-        'status': 'archivebox.cli.archivebox_status.main',
-        'search': 'archivebox.cli.archivebox_search.main',
-        'config': 'archivebox.cli.archivebox_config.main',
-        'schedule': 'archivebox.cli.archivebox_schedule.main',
-        'server': 'archivebox.cli.archivebox_server.main',
-        'shell': 'archivebox.cli.archivebox_shell.main',
-        'manage': 'archivebox.cli.archivebox_manage.main',
+        "add": "archivebox.cli.archivebox_add.main",
+        "extract": "archivebox.cli.archivebox_extract.main",
+        "list": "archivebox.cli.archivebox_list.main",
+        "remove": "archivebox.cli.archivebox_remove.main",
+        "run": "archivebox.cli.archivebox_run.main",
+        "update": "archivebox.cli.archivebox_update.main",
+        "status": "archivebox.cli.archivebox_status.main",
+        "search": "archivebox.cli.archivebox_search.main",
+        "config": "archivebox.cli.archivebox_config.main",
+        "schedule": "archivebox.cli.archivebox_schedule.main",
+        "server": "archivebox.cli.archivebox_server.main",
+        "shell": "archivebox.cli.archivebox_shell.main",
+        "manage": "archivebox.cli.archivebox_manage.main",
        # Introspection commands
-        'pluginmap': 'archivebox.cli.archivebox_pluginmap.main',
+        "pluginmap": "archivebox.cli.archivebox_pluginmap.main",
    }
    legacy_model_commands = {
-        'crawl': 'archivebox.cli.archivebox_crawl_compat.main',
-        'snapshot': 'archivebox.cli.archivebox_snapshot_compat.main',
+        "crawl": "archivebox.cli.archivebox_crawl_compat.main",
+        "snapshot": "archivebox.cli.archivebox_snapshot_compat.main",
    }
    all_subcommands = {
        **meta_commands,
@@ -67,15 +67,15 @@ class ArchiveBoxGroup(click.Group):
        **archive_commands,
    }
    renamed_commands = {
-        'setup': 'install',
-        'import': 'add',
-        'archive': 'add',
+        "setup": "install",
+        "import": "add",
+        "archive": "add",
    }
    legacy_model_subcommands = {
-        'crawl': {'create', 'list', 'update', 'delete'},
-        'snapshot': {'create', 'list', 'update', 'delete'},
+        "crawl": {"create", "list", "update", "delete"},
+        "snapshot": {"create", "list", "update", "delete"},
    }
-    
+
    @classmethod
    def get_canonical_name(cls, cmd_name):
        return cls.renamed_commands.get(cmd_name, cmd_name)
@@ -90,23 +90,22 @@ class ArchiveBoxGroup(click.Group):
        except ValueError:
            return False

-        remaining_args = sys.argv[arg_idx + 1:]
+        remaining_args = sys.argv[arg_idx + 1 :]
        if not remaining_args:
            return False

        first_arg = remaining_args[0]
-        if first_arg in ('-h', '--help'):
+        if first_arg in ("-h", "--help"):
            return False

        return first_arg not in cls.legacy_model_subcommands[cmd_name]
-    

    def get_command(self, ctx, cmd_name):
        # handle renamed commands
        if cmd_name in self.renamed_commands:
            new_name = self.renamed_commands[cmd_name]
            print(
-                f' [violet]Hint:[/violet] `archivebox {cmd_name}` has been renamed to `archivebox {new_name}`',
+                f" [violet]Hint:[/violet] `archivebox {cmd_name}` has been renamed to `archivebox {new_name}`",
                file=sys.stderr,
            )
            cmd_name = new_name
@@ -114,11 +113,11 @@ class ArchiveBoxGroup(click.Group):

        if self._should_use_legacy_model_command(cmd_name):
            return self._lazy_load(self.legacy_model_commands[cmd_name])
-        
+
        # handle lazy loading of commands
        if cmd_name in self.all_subcommands:
            return self._lazy_load(cmd_name)
-        
+
        # fall-back to using click's default command lookup
        return super().get_command(ctx, cmd_name)

@@ -127,72 +126,74 @@ class ArchiveBoxGroup(click.Group):
        import_path = cls.all_subcommands.get(cmd_name_or_path)
        if import_path is None:
            import_path = cmd_name_or_path
-        modname, funcname = import_path.rsplit('.', 1)
-        
+        modname, funcname = import_path.rsplit(".", 1)
+
        # print(f'LAZY LOADING {import_path}')
        mod = import_module(modname)
        func = getattr(mod, funcname)
-        
-        if not hasattr(func, '__doc__'):
-            raise ValueError(f'lazy loading of {import_path} failed - no docstring found on method')
-        
+
+        if not hasattr(func, "__doc__"):
+            raise ValueError(f"lazy loading of {import_path} failed - no docstring found on method")
+
        # if not isinstance(cmd, click.BaseCommand):
-            # raise ValueError(f'lazy loading of {import_path} failed - not a click command')
-            
+        # raise ValueError(f'lazy loading of {import_path} failed - not a click command')
+
        return func


@click.group(cls=ArchiveBoxGroup, invoke_without_command=True)
-@click.option('--help', '-h', is_flag=True, help='Show help')
-@click.version_option(VERSION, '-v', '--version', package_name='archivebox', message='%(version)s')
+@click.option("--help", "-h", is_flag=True, help="Show help")
+@click.version_option(VERSION, "-v", "--version", package_name="archivebox", message="%(version)s")
@click.pass_context
 def cli(ctx, help=False):
    """ArchiveBox: The self-hosted internet archive"""
-    
+
    subcommand = ArchiveBoxGroup.get_canonical_name(ctx.invoked_subcommand)
-    
+
    # if --help is passed or no subcommand is given, show custom help message
    if help or ctx.invoked_subcommand is None:
-        ctx.invoke(ctx.command.get_command(ctx, 'help'))
-    
+        ctx.invoke(ctx.command.get_command(ctx, "help"))
+
    # if the subcommand is in archive_commands or model_commands,
    # then we need to set up the django environment and check that we're in a valid data folder
    if subcommand in ArchiveBoxGroup.archive_commands or subcommand in ArchiveBoxGroup.model_commands:
        # print('SETUP DJANGO AND CHECK DATA FOLDER')
        try:
-            if subcommand == 'server':
-                run_in_debug = '--reload' in sys.argv or os.environ.get('DEBUG') in ('1', 'true', 'True', 'TRUE', 'yes')
+            if subcommand == "server":
+                run_in_debug = "--reload" in sys.argv or os.environ.get("DEBUG") in ("1", "true", "True", "TRUE", "yes")
                if run_in_debug:
-                    os.environ['ARCHIVEBOX_RUNSERVER'] = '1'
-                    if '--reload' in sys.argv:
-                        os.environ['ARCHIVEBOX_AUTORELOAD'] = '1'
+                    os.environ["ARCHIVEBOX_RUNSERVER"] = "1"
+                    if "--reload" in sys.argv:
+                        os.environ["ARCHIVEBOX_AUTORELOAD"] = "1"
                        from archivebox.config.common import STORAGE_CONFIG
-                        os.environ['ARCHIVEBOX_RUNSERVER_PIDFILE'] = str(STORAGE_CONFIG.TMP_DIR / 'runserver.pid')
+
+                        os.environ["ARCHIVEBOX_RUNSERVER_PIDFILE"] = str(STORAGE_CONFIG.TMP_DIR / "runserver.pid")

            from archivebox.config.django import setup_django
            from archivebox.misc.checks import check_data_folder
+
            setup_django()
            check_data_folder()
        except Exception as e:
-            print(f'[red][X] Error setting up Django or checking data folder: {e}[/red]', file=sys.stderr)
-            if subcommand not in ('manage', 'shell'):   # not all management commands need django to be setup beforehand
+            print(f"[red][X] Error setting up Django or checking data folder: {e}[/red]", file=sys.stderr)
+            if subcommand not in ("manage", "shell"):  # not all management commands need django to be setup beforehand
                raise
-            
+

 def main(args=None, prog_name=None, stdin=None):
    # show `docker run archivebox xyz` in help messages if running in docker
-    IN_DOCKER = os.environ.get('IN_DOCKER', False) in ('1', 'true', 'True', 'TRUE', 'yes')
+    IN_DOCKER = os.environ.get("IN_DOCKER", False) in ("1", "true", "True", "TRUE", "yes")
    IS_TTY = sys.stdin.isatty()
-    prog_name = prog_name or (f'docker compose run{"" if IS_TTY else " -T"} archivebox' if IN_DOCKER else 'archivebox')
-    
+    prog_name = prog_name or (f"docker compose run{'' if IS_TTY else ' -T'} archivebox" if IN_DOCKER else "archivebox")
+
    # stdin param allows passing input data from caller (used by __main__.py)
    # currently not used by click-based CLI, but kept for backwards compatibility

    try:
        cli(args=args, prog_name=prog_name)
    except KeyboardInterrupt:
-        print('\n\n[red][X] Got CTRL+C. Exiting...[/red]')
+        print("\n\n[red][X] Got CTRL+C. Exiting...[/red]")


-if __name__ == '__main__':
+if __name__ == "__main__":
    main()
--- a/archivebox/cli/archivebox_add.py
+++ b/archivebox/cli/archivebox_add.py
@@ -1,7 +1,7 @@
 #!/usr/bin/env python3

-__package__ = 'archivebox.cli'
-__command__ = 'archivebox add'
+__package__ = "archivebox.cli"
+__command__ = "archivebox add"

 import sys
 from pathlib import Path
@@ -14,6 +14,7 @@ from django.utils import timezone
 from django.db.models import QuerySet

 from archivebox.misc.util import enforce_types, docstring
+from archivebox.misc.util import parse_filesize_to_bytes
 from archivebox import CONSTANTS
 from archivebox.config.common import ARCHIVING_CONFIG, SERVER_CONFIG
 from archivebox.config.permissions import USER, HOSTNAME
@@ -29,34 +30,38 @@ def _collect_input_urls(args: tuple[str, ...]) -> list[str]:

    urls: list[str] = []
    for record in read_args_or_stdin(args):
-        url = record.get('url')
+        url = record.get("url")
        if isinstance(url, str) and url:
            urls.append(url)

-        urls_field = record.get('urls')
+        urls_field = record.get("urls")
        if isinstance(urls_field, str):
            for line in urls_field.splitlines():
                line = line.strip()
-                if line and not line.startswith('#'):
+                if line and not line.startswith("#"):
                    urls.append(line)

    return urls


@enforce_types
-def add(urls: str | list[str],
-        depth: int | str=0,
-        tag: str='',
-        url_allowlist: str='',
-        url_denylist: str='',
-        parser: str="auto",
-        plugins: str="",
-        persona: str='Default',
-        overwrite: bool=False,
-        update: bool | None=None,
-        index_only: bool=False,
-        bg: bool=False,
-        created_by_id: int | None=None) -> tuple['Crawl', QuerySet['Snapshot']]:
+def add(
+    urls: str | list[str],
+    depth: int | str = 0,
+    max_urls: int = 0,
+    max_size: int | str = 0,
+    tag: str = "",
+    url_allowlist: str = "",
+    url_denylist: str = "",
+    parser: str = "auto",
+    plugins: str = "",
+    persona: str = "Default",
+    overwrite: bool = False,
+    update: bool | None = None,
+    index_only: bool = False,
+    bg: bool = False,
+    created_by_id: int | None = None,
+) -> tuple["Crawl", QuerySet["Snapshot"]]:
    """Add a new URL or list of URLs to your archive.

    The flow is:
@@ -72,8 +77,15 @@ def add(urls: str | list[str],
    from rich import print

    depth = int(depth)
+    max_urls = int(max_urls or 0)
+    max_size = parse_filesize_to_bytes(max_size)

-    assert depth in (0, 1, 2, 3, 4), 'Depth must be 0-4'
+    if depth not in (0, 1, 2, 3, 4):
+        raise ValueError("Depth must be 0-4")
+    if max_urls < 0:
+        raise ValueError("max_urls must be >= 0")
+    if max_size < 0:
+        raise ValueError("max_size must be >= 0")

    # import models once django is set up
    from archivebox.core.models import Snapshot
@@ -91,47 +103,49 @@ def add(urls: str | list[str],
        update = not ARCHIVING_CONFIG.ONLY_NEW

    # 1. Save the provided URLs to sources/2024-11-05__23-59-59__cli_add.txt
-    sources_file = CONSTANTS.SOURCES_DIR / f'{timezone.now().strftime("%Y-%m-%d__%H-%M-%S")}__cli_add.txt'
+    sources_file = CONSTANTS.SOURCES_DIR / f"{timezone.now().strftime('%Y-%m-%d__%H-%M-%S')}__cli_add.txt"
    sources_file.parent.mkdir(parents=True, exist_ok=True)
-    sources_file.write_text(urls if isinstance(urls, str) else '\n'.join(urls))
+    sources_file.write_text(urls if isinstance(urls, str) else "\n".join(urls))

    # 2. Create a new Crawl with inline URLs
    cli_args = [*sys.argv]
-    if cli_args[0].lower().endswith('archivebox'):
-        cli_args[0] = 'archivebox'
-    cmd_str = ' '.join(cli_args)
+    if cli_args[0].lower().endswith("archivebox"):
+        cli_args[0] = "archivebox"
+    cmd_str = " ".join(cli_args)

    timestamp = timezone.now().strftime("%Y-%m-%d__%H-%M-%S")

    # Read URLs directly into crawl
    urls_content = sources_file.read_text()
-    persona_name = (persona or 'Default').strip() or 'Default'
-    plugins = plugins or str(get_config().get('PLUGINS') or '')
+    persona_name = (persona or "Default").strip() or "Default"
+    plugins = plugins or str(get_config().get("PLUGINS") or "")
    persona_obj, _ = Persona.objects.get_or_create(name=persona_name)
    persona_obj.ensure_dirs()

    crawl = Crawl.objects.create(
        urls=urls_content,
        max_depth=depth,
+        max_urls=max_urls,
+        max_size=max_size,
        tags_str=tag,
        persona_id=persona_obj.id,
-        label=f'{USER}@{HOSTNAME} $ {cmd_str} [{timestamp}]',
+        label=f"{USER}@{HOSTNAME} $ {cmd_str} [{timestamp}]",
        created_by_id=created_by_id,
        config={
-            'ONLY_NEW': not update,
-            'INDEX_ONLY': index_only,
-            'OVERWRITE': overwrite,
-            'PLUGINS': plugins,
-            'DEFAULT_PERSONA': persona_name,
-            'PARSER': parser,
-            **({'URL_ALLOWLIST': url_allowlist} if url_allowlist else {}),
-            **({'URL_DENYLIST': url_denylist} if url_denylist else {}),
-        }
+            "ONLY_NEW": not update,
+            "INDEX_ONLY": index_only,
+            "OVERWRITE": overwrite,
+            "PLUGINS": plugins,
+            "DEFAULT_PERSONA": persona_name,
+            "PARSER": parser,
+            **({"URL_ALLOWLIST": url_allowlist} if url_allowlist else {}),
+            **({"URL_DENYLIST": url_denylist} if url_denylist else {}),
+        },
    )

-    print(f'[green]\\[+] Created Crawl {crawl.id} with max_depth={depth}[/green]')
-    first_url = crawl.get_urls_list()[0] if crawl.get_urls_list() else ''
-    print(f'    [dim]First URL: {first_url}[/dim]')
+    print(f"[green]\\[+] Created Crawl {crawl.id} with max_depth={depth}[/green]")
+    first_url = crawl.get_urls_list()[0] if crawl.get_urls_list() else ""
+    print(f"    [dim]First URL: {first_url}[/dim]")

    # 3. The CrawlMachine will create Snapshots from all URLs when started
    #    Parser extractors run on snapshots and discover more URLs
@@ -139,20 +153,21 @@ def add(urls: str | list[str],

    if index_only:
        # Just create the crawl but don't start processing
-        print('[yellow]\\[*] Index-only mode - crawl created but not started[/yellow]')
+        print("[yellow]\\[*] Index-only mode - crawl created but not started[/yellow]")
        # Create snapshots for all URLs in the crawl
        for url in crawl.get_urls_list():
            snapshot, _ = Snapshot.objects.update_or_create(
-                crawl=crawl, url=url,
+                crawl=crawl,
+                url=url,
                defaults={
-                    'status': Snapshot.INITIAL_STATE,
-                    'retry_at': timezone.now(),
-                    'timestamp': str(timezone.now().timestamp()),
-                    'depth': 0,
+                    "status": Snapshot.INITIAL_STATE,
+                    "retry_at": timezone.now(),
+                    "timestamp": str(timezone.now().timestamp()),
+                    "depth": 0,
                },
            )
            if tag:
-                snapshot.save_tags(tag.split(','))
+                snapshot.save_tags(tag.split(","))
            snapshot.ensure_crawl_symlink()
        return crawl, crawl.snapshot_set.all()

@@ -168,10 +183,12 @@ def add(urls: str | list[str],

    if bg:
        # Background mode: just queue work and return (background runner via server will pick it up)
-        print('[yellow]\\[*] URLs queued. The background runner will process them (run `archivebox server` or `archivebox run --daemon` if not already running).[/yellow]')
+        print(
+            "[yellow]\\[*] URLs queued. The background runner will process them (run `archivebox server` or `archivebox run --daemon` if not already running).[/yellow]",
+        )
    else:
        # Foreground mode: run full crawl runner until all work is done
-        print('[green]\\[*] Starting crawl runner to process crawl...[/green]')
+        print("[green]\\[*] Starting crawl runner to process crawl...[/green]")
        run_crawl(str(crawl.id))

        # Print summary for foreground runs
@@ -179,7 +196,10 @@ def add(urls: str | list[str],
            crawl.refresh_from_db()
            snapshots_count = crawl.snapshot_set.count()
            try:
-                total_bytes = sum(s.archive_size for s in crawl.snapshot_set.all())
+                from django.db.models import Count, Sum
+
+                totals = crawl.snapshot_set.aggregate(snapshot_count=Count("id"), total_bytes=Sum("archiveresult__output_size"))
+                total_bytes = int(totals["total_bytes"] or 0) if totals["snapshot_count"] else 0
            except Exception:
                total_bytes, _, _ = get_dir_size(crawl.output_dir)
            total_size = printable_filesize(total_bytes)
@@ -197,23 +217,23 @@ def add(urls: str | list[str],
            # Output dir relative to DATA_DIR
            try:
                rel_output = Path(crawl.output_dir).relative_to(CONSTANTS.DATA_DIR)
-                rel_output_str = f'./{rel_output}'
+                rel_output_str = f"./{rel_output}"
            except Exception:
                rel_output_str = str(crawl.output_dir)

-            bind_addr = SERVER_CONFIG.BIND_ADDR or '127.0.0.1:8000'
-            if bind_addr.startswith('http://') or bind_addr.startswith('https://'):
+            bind_addr = SERVER_CONFIG.BIND_ADDR or "127.0.0.1:8000"
+            if bind_addr.startswith("http://") or bind_addr.startswith("https://"):
                base_url = bind_addr
            else:
-                base_url = f'http://{bind_addr}'
-            admin_url = f'{base_url}/admin/crawls/crawl/{crawl.id}/change/'
+                base_url = f"http://{bind_addr}"
+            admin_url = f"{base_url}/admin/crawls/crawl/{crawl.id}/change/"

-            print('\n[bold]crawl output saved to:[/bold]')
-            print(f'  {rel_output_str}')
-            print(f'  {admin_url}')
-            print(f'\n[bold]total urls snapshotted:[/bold] {snapshots_count}')
-            print(f'[bold]total size:[/bold] {total_size}')
-            print(f'[bold]total time:[/bold] {duration_str}')
+            print("\n[bold]crawl output saved to:[/bold]")
+            print(f"  {rel_output_str}")
+            print(f"  {admin_url}")
+            print(f"\n[bold]total urls snapshotted:[/bold] {snapshots_count}")
+            print(f"[bold]total size:[/bold] {total_size}")
+            print(f"[bold]total time:[/bold] {duration_str}")
        except Exception:
            # Summary is best-effort; avoid failing the command if something goes wrong
            pass
@@ -224,29 +244,43 @@ def add(urls: str | list[str],


@click.command()
-@click.option('--depth', '-d', type=click.Choice([str(i) for i in range(5)]), default='0', help='Recursively archive linked pages up to N hops away')
-@click.option('--tag', '-t', default='', help='Comma-separated list of tags to add to each snapshot e.g. tag1,tag2,tag3')
-@click.option('--url-allowlist', '--domain-allowlist', default='', help='Comma-separated URL/domain allowlist for this crawl')
-@click.option('--url-denylist', '--domain-denylist', default='', help='Comma-separated URL/domain denylist for this crawl')
-@click.option('--parser', default='auto', help='Parser for reading input URLs (auto, txt, html, rss, json, jsonl, netscape, ...)')
-@click.option('--plugins', '-p', default='', help='Comma-separated list of plugins to run e.g. title,favicon,screenshot,singlefile,...')
-@click.option('--persona', default='Default', help='Authentication profile to use when archiving')
-@click.option('--overwrite', '-F', is_flag=True, help='Overwrite existing data if URLs have been archived previously')
-@click.option('--update', is_flag=True, default=None, help='Retry any previously skipped/failed URLs when re-adding them')
-@click.option('--index-only', is_flag=True, help='Just add the URLs to the index without archiving them now')
-@click.option('--bg', is_flag=True, help='Run archiving in background (queue work and return immediately)')
-@click.argument('urls', nargs=-1, type=click.Path())
+@click.option(
+    "--depth",
+    "-d",
+    type=click.Choice([str(i) for i in range(5)]),
+    default="0",
+    help="Recursively archive linked pages up to N hops away",
+)
+@click.option("--max-urls", type=int, default=0, help="Maximum number of URLs to snapshot for this crawl (0 = unlimited)")
+@click.option("--max-size", default="0", help="Maximum total crawl size in bytes or units like 45mb / 1gb (0 = unlimited)")
+@click.option("--tag", "-t", default="", help="Comma-separated list of tags to add to each snapshot e.g. tag1,tag2,tag3")
+@click.option("--url-allowlist", "--domain-allowlist", default="", help="Comma-separated URL/domain allowlist for this crawl")
+@click.option("--url-denylist", "--domain-denylist", default="", help="Comma-separated URL/domain denylist for this crawl")
+@click.option("--parser", default="auto", help="Parser for reading input URLs (auto, txt, html, rss, json, jsonl, netscape, ...)")
+@click.option("--plugins", "-p", default="", help="Comma-separated list of plugins to run e.g. title,favicon,screenshot,singlefile,...")
+@click.option("--persona", default="Default", help="Authentication profile to use when archiving")
+@click.option("--overwrite", "-F", is_flag=True, help="Overwrite existing data if URLs have been archived previously")
+@click.option("--update", is_flag=True, default=None, help="Retry any previously skipped/failed URLs when re-adding them")
+@click.option("--index-only", is_flag=True, help="Just add the URLs to the index without archiving them now")
+@click.option("--bg", is_flag=True, help="Run archiving in background (queue work and return immediately)")
+@click.argument("urls", nargs=-1, type=click.Path())
@docstring(add.__doc__)
 def main(**kwargs):
    """Add a new URL or list of URLs to your archive"""

-    raw_urls = kwargs.pop('urls')
+    raw_urls = kwargs.pop("urls")
    urls = _collect_input_urls(raw_urls)
    if not urls:
-        raise click.UsageError('No URLs provided. Pass URLs as arguments or via stdin.')
+        raise click.UsageError("No URLs provided. Pass URLs as arguments or via stdin.")
+    if int(kwargs.get("max_urls") or 0) < 0:
+        raise click.BadParameter("max_urls must be 0 or a positive integer.", param_hint="--max-urls")
+    try:
+        kwargs["max_size"] = parse_filesize_to_bytes(kwargs.get("max_size"))
+    except ValueError as err:
+        raise click.BadParameter(str(err), param_hint="--max-size") from err

    add(urls=urls, **kwargs)


-if __name__ == '__main__':
+if __name__ == "__main__":
    main()
--- a/archivebox/cli/archivebox_archiveresult.py
+++ b/archivebox/cli/archivebox_archiveresult.py
@@ -30,11 +30,10 @@ Examples:
    archivebox archiveresult list --status=failed | archivebox run
 """

-__package__ = 'archivebox.cli'
-__command__ = 'archivebox archiveresult'
+__package__ = "archivebox.cli"
+__command__ = "archivebox archiveresult"

 import sys
-from typing import Optional

 import rich_click as click
 from rich import print as rprint
@@ -42,13 +41,13 @@ from rich import print as rprint
 from archivebox.cli.cli_utils import apply_filters


-def build_archiveresult_request(snapshot_id: str, plugin: str, hook_name: str = '', status: str = 'queued') -> dict:
+def build_archiveresult_request(snapshot_id: str, plugin: str, hook_name: str = "", status: str = "queued") -> dict:
    return {
-        'type': 'ArchiveResult',
-        'snapshot_id': str(snapshot_id),
-        'plugin': plugin,
-        'hook_name': hook_name,
-        'status': status,
+        "type": "ArchiveResult",
+        "snapshot_id": str(snapshot_id),
+        "plugin": plugin,
+        "hook_name": hook_name,
+        "status": status,
    }


@@ -56,10 +55,11 @@ def build_archiveresult_request(snapshot_id: str, plugin: str, hook_name: str =
 # CREATE
 # =============================================================================

+
 def create_archiveresults(
-    snapshot_id: Optional[str] = None,
-    plugin: Optional[str] = None,
-    status: str = 'queued',
+    snapshot_id: str | None = None,
+    plugin: str | None = None,
+    status: str = "queued",
 ) -> int:
    """
    Create ArchiveResult request records for Snapshots.
@@ -86,13 +86,13 @@ def create_archiveresults(
            snapshots = [Snapshot.objects.get(id=snapshot_id)]
            pass_through_records = []
        except Snapshot.DoesNotExist:
-            rprint(f'[red]Snapshot not found: {snapshot_id}[/red]', file=sys.stderr)
+            rprint(f"[red]Snapshot not found: {snapshot_id}[/red]", file=sys.stderr)
            return 1
    else:
        # Read from stdin
        records = list(read_stdin())
        if not records:
-            rprint('[yellow]No Snapshot records provided via stdin[/yellow]', file=sys.stderr)
+            rprint("[yellow]No Snapshot records provided via stdin[/yellow]", file=sys.stderr)
            return 1

        # Separate snapshot records from pass-through records
@@ -100,17 +100,17 @@ def create_archiveresults(
        pass_through_records = []

        for record in records:
-            record_type = record.get('type', '')
+            record_type = record.get("type", "")

            if record_type == TYPE_SNAPSHOT:
                # Pass through the Snapshot record itself
                pass_through_records.append(record)
-                if record.get('id'):
-                    snapshot_ids.append(record['id'])
+                if record.get("id"):
+                    snapshot_ids.append(record["id"])

            elif record_type == TYPE_ARCHIVERESULT:
                # ArchiveResult records: pass through if they have an id
-                if record.get('id'):
+                if record.get("id"):
                    pass_through_records.append(record)
                # If no id, we could create it, but for now just pass through
                else:
@@ -120,9 +120,9 @@ def create_archiveresults(
                # Other typed records (Crawl, Tag, etc): pass through
                pass_through_records.append(record)

-            elif record.get('id'):
+            elif record.get("id"):
                # Untyped record with id - assume it's a snapshot ID
-                snapshot_ids.append(record['id'])
+                snapshot_ids.append(record["id"])

        # Output pass-through records first
        if not is_tty:
@@ -131,15 +131,15 @@ def create_archiveresults(

        if not snapshot_ids:
            if pass_through_records:
-                rprint(f'[dim]Passed through {len(pass_through_records)} records, no new snapshots to process[/dim]', file=sys.stderr)
+                rprint(f"[dim]Passed through {len(pass_through_records)} records, no new snapshots to process[/dim]", file=sys.stderr)
                return 0
-            rprint('[yellow]No valid Snapshot IDs in input[/yellow]', file=sys.stderr)
+            rprint("[yellow]No valid Snapshot IDs in input[/yellow]", file=sys.stderr)
            return 1

        snapshots = list(Snapshot.objects.filter(id__in=snapshot_ids))

    if not snapshots:
-        rprint('[yellow]No matching snapshots found[/yellow]', file=sys.stderr)
+        rprint("[yellow]No matching snapshots found[/yellow]", file=sys.stderr)
        return 0 if pass_through_records else 1

    created_count = 0
@@ -150,7 +150,7 @@ def create_archiveresults(
            created_count += 1
        else:
            config = get_config(crawl=snapshot.crawl, snapshot=snapshot)
-            hooks = discover_hooks('Snapshot', config=config)
+            hooks = discover_hooks("Snapshot", config=config)
            for hook_path in hooks:
                hook_name = hook_path.name
                plugin_name = hook_path.parent.name
@@ -158,7 +158,7 @@ def create_archiveresults(
                    write_record(build_archiveresult_request(snapshot.id, plugin_name, hook_name=hook_name, status=status))
                created_count += 1

-    rprint(f'[green]Created {created_count} archive result request records[/green]', file=sys.stderr)
+    rprint(f"[green]Created {created_count} archive result request records[/green]", file=sys.stderr)
    return 0


@@ -166,11 +166,12 @@ def create_archiveresults(
 # LIST
 # =============================================================================

+
 def list_archiveresults(
-    status: Optional[str] = None,
-    plugin: Optional[str] = None,
-    snapshot_id: Optional[str] = None,
-    limit: Optional[int] = None,
+    status: str | None = None,
+    plugin: str | None = None,
+    snapshot_id: str | None = None,
+    limit: int | None = None,
 ) -> int:
    """
    List ArchiveResults as JSONL with optional filters.
@@ -183,13 +184,13 @@ def list_archiveresults(

    is_tty = sys.stdout.isatty()

-    queryset = ArchiveResult.objects.all().order_by('-start_ts')
+    queryset = ArchiveResult.objects.all().order_by("-start_ts")

    # Apply filters
    filter_kwargs = {
-        'status': status,
-        'plugin': plugin,
-        'snapshot_id': snapshot_id,
+        "status": status,
+        "plugin": plugin,
+        "snapshot_id": snapshot_id,
    }
    queryset = apply_filters(queryset, filter_kwargs, limit=limit)

@@ -197,20 +198,22 @@ def list_archiveresults(
    for result in queryset:
        if is_tty:
            status_color = {
-                'queued': 'yellow',
-                'started': 'blue',
-                'succeeded': 'green',
-                'failed': 'red',
-                'skipped': 'dim',
-                'noresults': 'dim',
-                'backoff': 'magenta',
-            }.get(result.status, 'dim')
-            rprint(f'[{status_color}]{result.status:10}[/{status_color}] {result.plugin:15} [dim]{result.id}[/dim] {result.snapshot.url[:40]}')
+                "queued": "yellow",
+                "started": "blue",
+                "succeeded": "green",
+                "failed": "red",
+                "skipped": "dim",
+                "noresults": "dim",
+                "backoff": "magenta",
+            }.get(result.status, "dim")
+            rprint(
+                f"[{status_color}]{result.status:10}[/{status_color}] {result.plugin:15} [dim]{result.id}[/dim] {result.snapshot.url[:40]}",
+            )
        else:
            write_record(result.to_json())
        count += 1

-    rprint(f'[dim]Listed {count} archive results[/dim]', file=sys.stderr)
+    rprint(f"[dim]Listed {count} archive results[/dim]", file=sys.stderr)
    return 0


@@ -218,8 +221,9 @@ def list_archiveresults(
 # UPDATE
 # =============================================================================

+
 def update_archiveresults(
-    status: Optional[str] = None,
+    status: str | None = None,
 ) -> int:
    """
    Update ArchiveResults from stdin JSONL.
@@ -238,12 +242,12 @@ def update_archiveresults(

    records = list(read_stdin())
    if not records:
-        rprint('[yellow]No records provided via stdin[/yellow]', file=sys.stderr)
+        rprint("[yellow]No records provided via stdin[/yellow]", file=sys.stderr)
        return 1

    updated_count = 0
    for record in records:
-        result_id = record.get('id')
+        result_id = record.get("id")
        if not result_id:
            continue

@@ -261,10 +265,10 @@ def update_archiveresults(
                write_record(result.to_json())

        except ArchiveResult.DoesNotExist:
-            rprint(f'[yellow]ArchiveResult not found: {result_id}[/yellow]', file=sys.stderr)
+            rprint(f"[yellow]ArchiveResult not found: {result_id}[/yellow]", file=sys.stderr)
            continue

-    rprint(f'[green]Updated {updated_count} archive results[/green]', file=sys.stderr)
+    rprint(f"[green]Updated {updated_count} archive results[/green]", file=sys.stderr)
    return 0


@@ -272,6 +276,7 @@ def update_archiveresults(
 # DELETE
 # =============================================================================

+
 def delete_archiveresults(yes: bool = False, dry_run: bool = False) -> int:
    """
    Delete ArchiveResults from stdin JSONL.
@@ -287,37 +292,37 @@ def delete_archiveresults(yes: bool = False, dry_run: bool = False) -> int:

    records = list(read_stdin())
    if not records:
-        rprint('[yellow]No records provided via stdin[/yellow]', file=sys.stderr)
+        rprint("[yellow]No records provided via stdin[/yellow]", file=sys.stderr)
        return 1

-    result_ids = [r.get('id') for r in records if r.get('id')]
+    result_ids = [r.get("id") for r in records if r.get("id")]

    if not result_ids:
-        rprint('[yellow]No valid archive result IDs in input[/yellow]', file=sys.stderr)
+        rprint("[yellow]No valid archive result IDs in input[/yellow]", file=sys.stderr)
        return 1

    results = ArchiveResult.objects.filter(id__in=result_ids)
    count = results.count()

    if count == 0:
-        rprint('[yellow]No matching archive results found[/yellow]', file=sys.stderr)
+        rprint("[yellow]No matching archive results found[/yellow]", file=sys.stderr)
        return 0

    if dry_run:
-        rprint(f'[yellow]Would delete {count} archive results (dry run)[/yellow]', file=sys.stderr)
+        rprint(f"[yellow]Would delete {count} archive results (dry run)[/yellow]", file=sys.stderr)
        for result in results[:10]:
-            rprint(f'  [dim]{result.id}[/dim] {result.plugin} {result.snapshot.url[:40]}', file=sys.stderr)
+            rprint(f"  [dim]{result.id}[/dim] {result.plugin} {result.snapshot.url[:40]}", file=sys.stderr)
        if count > 10:
-            rprint(f'  ... and {count - 10} more', file=sys.stderr)
+            rprint(f"  ... and {count - 10} more", file=sys.stderr)
        return 0

    if not yes:
-        rprint('[red]Use --yes to confirm deletion[/red]', file=sys.stderr)
+        rprint("[red]Use --yes to confirm deletion[/red]", file=sys.stderr)
        return 1

    # Perform deletion
    deleted_count, _ = results.delete()
-    rprint(f'[green]Deleted {deleted_count} archive results[/green]', file=sys.stderr)
+    rprint(f"[green]Deleted {deleted_count} archive results[/green]", file=sys.stderr)
    return 0


@@ -325,51 +330,58 @@ def delete_archiveresults(yes: bool = False, dry_run: bool = False) -> int:
 # CLI Commands
 # =============================================================================

+
@click.group()
 def main():
    """Manage ArchiveResult records (plugin extraction results)."""
    pass


-@main.command('create')
-@click.option('--snapshot-id', help='Snapshot ID to create results for')
-@click.option('--plugin', '-p', help='Plugin name (e.g., screenshot, singlefile)')
-@click.option('--status', '-s', default='queued', help='Initial status (default: queued)')
-def create_cmd(snapshot_id: Optional[str], plugin: Optional[str], status: str):
+@main.command("create")
+@click.option("--snapshot-id", help="Snapshot ID to create results for")
+@click.option("--plugin", "-p", help="Plugin name (e.g., screenshot, singlefile)")
+@click.option("--status", "-s", default="queued", help="Initial status (default: queued)")
+def create_cmd(snapshot_id: str | None, plugin: str | None, status: str):
    """Create ArchiveResults for Snapshots from stdin JSONL."""
    sys.exit(create_archiveresults(snapshot_id=snapshot_id, plugin=plugin, status=status))


-@main.command('list')
-@click.option('--status', '-s', help='Filter by status (queued, started, succeeded, failed, skipped)')
-@click.option('--plugin', '-p', help='Filter by plugin name')
-@click.option('--snapshot-id', help='Filter by snapshot ID')
-@click.option('--limit', '-n', type=int, help='Limit number of results')
-def list_cmd(status: Optional[str], plugin: Optional[str],
-             snapshot_id: Optional[str], limit: Optional[int]):
+@main.command("list")
+@click.option("--status", "-s", help="Filter by status (queued, started, succeeded, failed, skipped)")
+@click.option("--plugin", "-p", help="Filter by plugin name")
+@click.option("--snapshot-id", help="Filter by snapshot ID")
+@click.option("--limit", "-n", type=int, help="Limit number of results")
+def list_cmd(
+    status: str | None,
+    plugin: str | None,
+    snapshot_id: str | None,
+    limit: int | None,
+):
    """List ArchiveResults as JSONL."""
-    sys.exit(list_archiveresults(
-        status=status,
-        plugin=plugin,
-        snapshot_id=snapshot_id,
-        limit=limit,
-    ))
+    sys.exit(
+        list_archiveresults(
+            status=status,
+            plugin=plugin,
+            snapshot_id=snapshot_id,
+            limit=limit,
+        ),
+    )


-@main.command('update')
-@click.option('--status', '-s', help='Set status')
-def update_cmd(status: Optional[str]):
+@main.command("update")
+@click.option("--status", "-s", help="Set status")
+def update_cmd(status: str | None):
    """Update ArchiveResults from stdin JSONL."""
    sys.exit(update_archiveresults(status=status))


-@main.command('delete')
-@click.option('--yes', '-y', is_flag=True, help='Confirm deletion')
-@click.option('--dry-run', is_flag=True, help='Show what would be deleted')
+@main.command("delete")
+@click.option("--yes", "-y", is_flag=True, help="Confirm deletion")
+@click.option("--dry-run", is_flag=True, help="Show what would be deleted")
 def delete_cmd(yes: bool, dry_run: bool):
    """Delete ArchiveResults from stdin JSONL."""
    sys.exit(delete_archiveresults(yes=yes, dry_run=dry_run))


-if __name__ == '__main__':
+if __name__ == "__main__":
    main()
--- a/archivebox/cli/archivebox_binary.py
+++ b/archivebox/cli/archivebox_binary.py
@@ -25,11 +25,10 @@ Examples:
    archivebox binary list --name=chrome | archivebox binary delete --yes
 """

-__package__ = 'archivebox.cli'
-__command__ = 'archivebox binary'
+__package__ = "archivebox.cli"
+__command__ = "archivebox binary"

 import sys
-from typing import Optional

 import rich_click as click
 from rich import print as rprint
@@ -41,10 +40,11 @@ from archivebox.cli.cli_utils import apply_filters
 # CREATE
 # =============================================================================

+
 def create_binary(
    name: str,
    abspath: str,
-    version: str = '',
+    version: str = "",
 ) -> int:
    """
    Create/register a Binary.
@@ -59,7 +59,7 @@ def create_binary(
    is_tty = sys.stdout.isatty()

    if not name or not abspath:
-        rprint('[red]Both --name and --abspath are required[/red]', file=sys.stderr)
+        rprint("[red]Both --name and --abspath are required[/red]", file=sys.stderr)
        return 1

    try:
@@ -76,28 +76,30 @@ def create_binary(
        # Mirror the Binary model lifecycle used elsewhere in the system so CLI
        # records are owned by the current machine and can be safely piped into
        # `archivebox run` without creating invalid rows missing machine_id.
-        binary = Binary.from_json({
-            'name': name,
-            'abspath': abspath,
-            'version': version,
-            'binproviders': 'env',
-            'binprovider': 'env',
-        })
+        binary = Binary.from_json(
+            {
+                "name": name,
+                "abspath": abspath,
+                "version": version,
+                "binproviders": "env",
+                "binprovider": "env",
+            },
+        )
        if binary is None:
-            raise ValueError('failed to create binary record')
+            raise ValueError("failed to create binary record")

        if not is_tty:
            write_record(binary.to_json())

        if created:
-            rprint(f'[green]Created binary: {name} at {abspath}[/green]', file=sys.stderr)
+            rprint(f"[green]Created binary: {name} at {abspath}[/green]", file=sys.stderr)
        else:
-            rprint(f'[dim]Binary already exists: {name} at {abspath}[/dim]', file=sys.stderr)
+            rprint(f"[dim]Binary already exists: {name} at {abspath}[/dim]", file=sys.stderr)

        return 0

    except Exception as e:
-        rprint(f'[red]Error creating binary: {e}[/red]', file=sys.stderr)
+        rprint(f"[red]Error creating binary: {e}[/red]", file=sys.stderr)
        return 1


@@ -105,11 +107,12 @@ def create_binary(
 # LIST
 # =============================================================================

+
 def list_binaries(
-    name: Optional[str] = None,
-    abspath__icontains: Optional[str] = None,
-    version__icontains: Optional[str] = None,
-    limit: Optional[int] = None,
+    name: str | None = None,
+    abspath__icontains: str | None = None,
+    version__icontains: str | None = None,
+    limit: int | None = None,
 ) -> int:
    """
    List Binaries as JSONL with optional filters.
@@ -122,25 +125,25 @@ def list_binaries(

    is_tty = sys.stdout.isatty()

-    queryset = Binary.objects.all().order_by('name', '-modified_at', '-created_at')
+    queryset = Binary.objects.all().order_by("name", "-modified_at", "-created_at")

    # Apply filters
    filter_kwargs = {
-        'name': name,
-        'abspath__icontains': abspath__icontains,
-        'version__icontains': version__icontains,
+        "name": name,
+        "abspath__icontains": abspath__icontains,
+        "version__icontains": version__icontains,
    }
    queryset = apply_filters(queryset, filter_kwargs, limit=limit)

    count = 0
    for binary in queryset:
        if is_tty:
-            rprint(f'[cyan]{binary.name:20}[/cyan] [dim]{binary.version:15}[/dim] {binary.abspath}')
+            rprint(f"[cyan]{binary.name:20}[/cyan] [dim]{binary.version:15}[/dim] {binary.abspath}")
        else:
            write_record(binary.to_json())
        count += 1

-    rprint(f'[dim]Listed {count} binaries[/dim]', file=sys.stderr)
+    rprint(f"[dim]Listed {count} binaries[/dim]", file=sys.stderr)
    return 0


@@ -148,9 +151,10 @@ def list_binaries(
 # UPDATE
 # =============================================================================

+
 def update_binaries(
-    version: Optional[str] = None,
-    abspath: Optional[str] = None,
+    version: str | None = None,
+    abspath: str | None = None,
 ) -> int:
    """
    Update Binaries from stdin JSONL.
@@ -169,12 +173,12 @@ def update_binaries(

    records = list(read_stdin())
    if not records:
-        rprint('[yellow]No records provided via stdin[/yellow]', file=sys.stderr)
+        rprint("[yellow]No records provided via stdin[/yellow]", file=sys.stderr)
        return 1

    updated_count = 0
    for record in records:
-        binary_id = record.get('id')
+        binary_id = record.get("id")
        if not binary_id:
            continue

@@ -194,10 +198,10 @@ def update_binaries(
                write_record(binary.to_json())

        except Binary.DoesNotExist:
-            rprint(f'[yellow]Binary not found: {binary_id}[/yellow]', file=sys.stderr)
+            rprint(f"[yellow]Binary not found: {binary_id}[/yellow]", file=sys.stderr)
            continue

-    rprint(f'[green]Updated {updated_count} binaries[/green]', file=sys.stderr)
+    rprint(f"[green]Updated {updated_count} binaries[/green]", file=sys.stderr)
    return 0


@@ -205,6 +209,7 @@ def update_binaries(
 # DELETE
 # =============================================================================

+
 def delete_binaries(yes: bool = False, dry_run: bool = False) -> int:
    """
    Delete Binaries from stdin JSONL.
@@ -220,35 +225,35 @@ def delete_binaries(yes: bool = False, dry_run: bool = False) -> int:

    records = list(read_stdin())
    if not records:
-        rprint('[yellow]No records provided via stdin[/yellow]', file=sys.stderr)
+        rprint("[yellow]No records provided via stdin[/yellow]", file=sys.stderr)
        return 1

-    binary_ids = [r.get('id') for r in records if r.get('id')]
+    binary_ids = [r.get("id") for r in records if r.get("id")]

    if not binary_ids:
-        rprint('[yellow]No valid binary IDs in input[/yellow]', file=sys.stderr)
+        rprint("[yellow]No valid binary IDs in input[/yellow]", file=sys.stderr)
        return 1

    binaries = Binary.objects.filter(id__in=binary_ids)
    count = binaries.count()

    if count == 0:
-        rprint('[yellow]No matching binaries found[/yellow]', file=sys.stderr)
+        rprint("[yellow]No matching binaries found[/yellow]", file=sys.stderr)
        return 0

    if dry_run:
-        rprint(f'[yellow]Would delete {count} binaries (dry run)[/yellow]', file=sys.stderr)
+        rprint(f"[yellow]Would delete {count} binaries (dry run)[/yellow]", file=sys.stderr)
        for binary in binaries:
-            rprint(f'  {binary.name} {binary.abspath}', file=sys.stderr)
+            rprint(f"  {binary.name} {binary.abspath}", file=sys.stderr)
        return 0

    if not yes:
-        rprint('[red]Use --yes to confirm deletion[/red]', file=sys.stderr)
+        rprint("[red]Use --yes to confirm deletion[/red]", file=sys.stderr)
        return 1

    # Perform deletion
    deleted_count, _ = binaries.delete()
-    rprint(f'[green]Deleted {deleted_count} binaries[/green]', file=sys.stderr)
+    rprint(f"[green]Deleted {deleted_count} binaries[/green]", file=sys.stderr)
    return 0


@@ -256,52 +261,59 @@ def delete_binaries(yes: bool = False, dry_run: bool = False) -> int:
 # CLI Commands
 # =============================================================================

+
@click.group()
 def main():
    """Manage Binary records (detected executables)."""
    pass


-@main.command('create')
-@click.option('--name', '-n', required=True, help='Binary name (e.g., chrome, wget)')
-@click.option('--abspath', '-p', required=True, help='Absolute path to binary')
-@click.option('--version', '-v', default='', help='Binary version')
+@main.command("create")
+@click.option("--name", "-n", required=True, help="Binary name (e.g., chrome, wget)")
+@click.option("--abspath", "-p", required=True, help="Absolute path to binary")
+@click.option("--version", "-v", default="", help="Binary version")
 def create_cmd(name: str, abspath: str, version: str):
    """Create/register a Binary."""
    sys.exit(create_binary(name=name, abspath=abspath, version=version))


-@main.command('list')
-@click.option('--name', '-n', help='Filter by name')
-@click.option('--abspath__icontains', help='Filter by path contains')
-@click.option('--version__icontains', help='Filter by version contains')
-@click.option('--limit', type=int, help='Limit number of results')
-def list_cmd(name: Optional[str], abspath__icontains: Optional[str],
-             version__icontains: Optional[str], limit: Optional[int]):
+@main.command("list")
+@click.option("--name", "-n", help="Filter by name")
+@click.option("--abspath__icontains", help="Filter by path contains")
+@click.option("--version__icontains", help="Filter by version contains")
+@click.option("--limit", type=int, help="Limit number of results")
+def list_cmd(
+    name: str | None,
+    abspath__icontains: str | None,
+    version__icontains: str | None,
+    limit: int | None,
+):
    """List Binaries as JSONL."""
-    sys.exit(list_binaries(
-        name=name,
-        abspath__icontains=abspath__icontains,
-        version__icontains=version__icontains,
-        limit=limit,
-    ))
+    sys.exit(
+        list_binaries(
+            name=name,
+            abspath__icontains=abspath__icontains,
+            version__icontains=version__icontains,
+            limit=limit,
+        ),
+    )


-@main.command('update')
-@click.option('--version', '-v', help='Set version')
-@click.option('--abspath', '-p', help='Set path')
-def update_cmd(version: Optional[str], abspath: Optional[str]):
+@main.command("update")
+@click.option("--version", "-v", help="Set version")
+@click.option("--abspath", "-p", help="Set path")
+def update_cmd(version: str | None, abspath: str | None):
    """Update Binaries from stdin JSONL."""
    sys.exit(update_binaries(version=version, abspath=abspath))


-@main.command('delete')
-@click.option('--yes', '-y', is_flag=True, help='Confirm deletion')
-@click.option('--dry-run', is_flag=True, help='Show what would be deleted')
+@main.command("delete")
+@click.option("--yes", "-y", is_flag=True, help="Confirm deletion")
+@click.option("--dry-run", is_flag=True, help="Show what would be deleted")
 def delete_cmd(yes: bool, dry_run: bool):
    """Delete Binaries from stdin JSONL."""
    sys.exit(delete_binaries(yes=yes, dry_run=dry_run))


-if __name__ == '__main__':
+if __name__ == "__main__":
    main()
--- a/archivebox/cli/archivebox_config.py
+++ b/archivebox/cli/archivebox_config.py
@@ -1,6 +1,6 @@
 #!/usr/bin/env python3

-__package__ = 'archivebox.cli'
+__package__ = "archivebox.cli"

 import sys
 import rich_click as click
@@ -12,12 +12,14 @@ from archivebox.misc.toml_util import CustomTOMLEncoder


@enforce_types
-def config(*keys,
-          get: bool=False,
-          set: bool=False, 
-          search: bool=False,
-          reset: bool=False,
-          **kwargs) -> None:
+def config(
+    *keys,
+    get: bool = False,
+    set: bool = False,
+    search: bool = False,
+    reset: bool = False,
+    **kwargs,
+) -> None:
    """Get and set your ArchiveBox project configuration values"""

    from archivebox.misc.checks import check_data_folder
@@ -29,8 +31,8 @@ def config(*keys,

    FLAT_CONFIG = get_flat_config()
    CONFIGS = get_all_configs()
-    
-    config_options: list[str] = list(kwargs.pop('key=value', []) or keys or [f'{key}={val}' for key, val in kwargs.items()])
+
+    config_options: list[str] = list(kwargs.pop("key=value", []) or keys or [f"{key}={val}" for key, val in kwargs.items()])
    no_args = not (get or set or reset or config_options)

    matching_config = {}
@@ -39,19 +41,19 @@ def config(*keys,
            config_options = [get_real_name(key) for key in config_options]
            matching_config = {key: FLAT_CONFIG[key] for key in config_options if key in FLAT_CONFIG}
            for config_section in CONFIGS.values():
-                aliases = getattr(config_section, 'aliases', {})
-                
+                aliases = getattr(config_section, "aliases", {})
+
                for search_key in config_options:
                    # search all aliases in the section
                    for alias_key, key in aliases.items():
                        if search_key.lower() in alias_key.lower():
                            matching_config[key] = dict(config_section)[key]
-                    
+
                    # search all keys and values in the section
                    for existing_key, value in dict(config_section).items():
                        if search_key.lower() in existing_key.lower() or search_key.lower() in str(value).lower():
                            matching_config[existing_key] = value
-            
+
        print(printable_config(matching_config))
        raise SystemExit(not matching_config)

@@ -61,23 +63,23 @@ def config(*keys,
            matching_config = {key: FLAT_CONFIG[key] for key in config_options if key in FLAT_CONFIG}
            failed_config = [key for key in config_options if key not in FLAT_CONFIG]
            if failed_config:
-                print('\n[red][X] These options failed to get[/red]')
-                print('    {}'.format('\n    '.join(config_options)))
+                print("\n[red][X] These options failed to get[/red]")
+                print("    {}".format("\n    ".join(config_options)))
                raise SystemExit(1)
        else:
            matching_config = FLAT_CONFIG

        # Display core config sections
        for config_section in CONFIGS.values():
-            section_header = getattr(config_section, 'toml_section_header', '')
+            section_header = getattr(config_section, "toml_section_header", "")
            if isinstance(section_header, str) and section_header:
-                print(f'[grey53]\\[{section_header}][/grey53]')
+                print(f"[grey53]\\[{section_header}][/grey53]")
            else:
-                print('[grey53]\\[CONSTANTS]                                        # (read-only)[/grey53]')
+                print("[grey53]\\[CONSTANTS]                                        # (read-only)[/grey53]")

            kv_in_section = {key: val for key, val in dict(config_section).items() if key in matching_config}
-            print(benedict(kv_in_section).to_toml(encoder=CustomTOMLEncoder()).strip().replace('\n\n', '\n'))
-            print('[grey53]################################################################[/grey53]')
+            print(benedict(kv_in_section).to_toml(encoder=CustomTOMLEncoder()).strip().replace("\n\n", "\n"))
+            print("[grey53]################################################################[/grey53]")

        # Display plugin config section
        from archivebox.hooks import discover_plugin_configs
@@ -87,17 +89,17 @@ def config(*keys,

        # Collect all plugin config keys
        for plugin_name, schema in plugin_configs.items():
-            if 'properties' not in schema:
+            if "properties" not in schema:
                continue
-            for key in schema['properties'].keys():
+            for key in schema["properties"].keys():
                if key in matching_config:
                    plugin_keys[key] = matching_config[key]

        # Display all plugin config in single [PLUGINS] section
        if plugin_keys:
-            print('[grey53]\\[PLUGINS][/grey53]')
-            print(benedict(plugin_keys).to_toml(encoder=CustomTOMLEncoder()).strip().replace('\n\n', '\n'))
-            print('[grey53]################################################################[/grey53]')
+            print("[grey53]\\[PLUGINS][/grey53]")
+            print(benedict(plugin_keys).to_toml(encoder=CustomTOMLEncoder()).strip().replace("\n\n", "\n"))
+            print("[grey53]################################################################[/grey53]")

        raise SystemExit(not matching_config)

@@ -105,18 +107,20 @@ def config(*keys,
        new_config = {}
        failed_options = []
        for line in config_options:
-            if line.startswith('#') or not line.strip():
+            if line.startswith("#") or not line.strip():
                continue
-            if '=' not in line:
-                print('[red][X] Config KEY=VALUE must have an = sign in it[/red]')
-                print(f'    {line}')
+            if "=" not in line:
+                print("[red][X] Config KEY=VALUE must have an = sign in it[/red]")
+                print(f"    {line}")
                raise SystemExit(2)

-            raw_key, val = line.split('=', 1)
+            raw_key, val = line.split("=", 1)
            raw_key = raw_key.upper().strip()
            key = get_real_name(raw_key)
            if key != raw_key:
-                print(f'[yellow][i] Note: The config option {raw_key} has been renamed to {key}, please use the new name going forwards.[/yellow]')
+                print(
+                    f"[yellow][i] Note: The config option {raw_key} has been renamed to {key}, please use the new name going forwards.[/yellow]",
+                )

            if key in FLAT_CONFIG:
                new_config[key] = val.strip()
@@ -136,38 +140,38 @@ def config(*keys,

            if side_effect_changes:
                print(file=sys.stderr)
-                print('[yellow][i] Note: This change also affected these other options that depended on it:[/yellow]', file=sys.stderr)
-                print('    {}'.format(printable_config(side_effect_changes, prefix='    ')), file=sys.stderr)
+                print("[yellow][i] Note: This change also affected these other options that depended on it:[/yellow]", file=sys.stderr)
+                print("    {}".format(printable_config(side_effect_changes, prefix="    ")), file=sys.stderr)

        if failed_options:
            print()
-            print('[red][X] These options failed to set (check for typos):[/red]')
-            print('    {}'.format('\n    '.join(failed_options)))
+            print("[red][X] These options failed to set (check for typos):[/red]")
+            print("    {}".format("\n    ".join(failed_options)))
            raise SystemExit(1)

    elif reset:
-        print('[red][X] This command is not implemented yet.[/red]')
-        print('    Please manually remove the relevant lines from your config file:')
+        print("[red][X] This command is not implemented yet.[/red]")
+        print("    Please manually remove the relevant lines from your config file:")
        raise SystemExit(2)

    else:
-        print('[red][X] You must pass either --get or --set, or no arguments to get the whole config.[/red]')
-        print('    archivebox config')
-        print('    archivebox config --get SOME_KEY')
-        print('    archivebox config --set SOME_KEY=SOME_VALUE')
+        print("[red][X] You must pass either --get or --set, or no arguments to get the whole config.[/red]")
+        print("    archivebox config")
+        print("    archivebox config --get SOME_KEY")
+        print("    archivebox config --set SOME_KEY=SOME_VALUE")
        raise SystemExit(2)


@click.command()
-@click.option('--search', is_flag=True, help='Search config KEYs, VALUEs, and ALIASES for the given term')
-@click.option('--get', is_flag=True, help='Get the value for the given config KEYs')
-@click.option('--set', is_flag=True, help='Set the given KEY=VALUE config values')
-@click.option('--reset', is_flag=True, help='Reset the given KEY config values to their defaults')
-@click.argument('KEY=VALUE', nargs=-1, type=str)
+@click.option("--search", is_flag=True, help="Search config KEYs, VALUEs, and ALIASES for the given term")
+@click.option("--get", is_flag=True, help="Get the value for the given config KEYs")
+@click.option("--set", is_flag=True, help="Set the given KEY=VALUE config values")
+@click.option("--reset", is_flag=True, help="Reset the given KEY config values to their defaults")
+@click.argument("KEY=VALUE", nargs=-1, type=str)
@docstring(config.__doc__)
 def main(**kwargs) -> None:
    config(**kwargs)


-if __name__ == '__main__':
+if __name__ == "__main__":
    main()
--- a/archivebox/cli/archivebox_crawl.py
+++ b/archivebox/cli/archivebox_crawl.py
@@ -30,11 +30,11 @@ Examples:
    archivebox crawl create https://example.com | archivebox snapshot create | archivebox run
 """

-__package__ = 'archivebox.cli'
-__command__ = 'archivebox crawl'
+__package__ = "archivebox.cli"
+__command__ = "archivebox crawl"

 import sys
-from typing import Optional, Iterable
+from collections.abc import Iterable

 import rich_click as click
 from rich import print as rprint
@@ -46,12 +46,13 @@ from archivebox.cli.cli_utils import apply_filters
 # CREATE
 # =============================================================================

+
 def create_crawl(
    urls: Iterable[str],
    depth: int = 0,
-    tag: str = '',
-    status: str = 'queued',
-    created_by_id: Optional[int] = None,
+    tag: str = "",
+    status: str = "queued",
+    created_by_id: int | None = None,
 ) -> int:
    """
    Create a Crawl job from URLs.
@@ -74,7 +75,7 @@ def create_crawl(
    records = list(read_args_or_stdin(urls))

    if not records:
-        rprint('[yellow]No URLs provided. Pass URLs as arguments or via stdin.[/yellow]', file=sys.stderr)
+        rprint("[yellow]No URLs provided. Pass URLs as arguments or via stdin.[/yellow]", file=sys.stderr)
        return 1

    # Separate pass-through records from URL records
@@ -82,29 +83,29 @@ def create_crawl(
    pass_through_records = []

    for record in records:
-        record_type = record.get('type', '')
+        record_type = record.get("type", "")

        # Pass-through: output records that aren't URL/Crawl types
-        if record_type and record_type != TYPE_CRAWL and not record.get('url') and not record.get('urls'):
+        if record_type and record_type != TYPE_CRAWL and not record.get("url") and not record.get("urls"):
            pass_through_records.append(record)
            continue

        # Handle existing Crawl records (just pass through with id)
-        if record_type == TYPE_CRAWL and record.get('id'):
+        if record_type == TYPE_CRAWL and record.get("id"):
            pass_through_records.append(record)
            continue

        # Collect URLs
-        url = record.get('url')
+        url = record.get("url")
        if url:
            url_list.append(url)

        # Handle 'urls' field (newline-separated)
-        urls_field = record.get('urls')
+        urls_field = record.get("urls")
        if urls_field:
-            for line in urls_field.split('\n'):
+            for line in urls_field.split("\n"):
                line = line.strip()
-                if line and not line.startswith('#'):
+                if line and not line.startswith("#"):
                    url_list.append(line)

    # Output pass-through records first
@@ -115,44 +116,44 @@ def create_crawl(
    if not url_list:
        if pass_through_records:
            # If we had pass-through records but no URLs, that's OK
-            rprint(f'[dim]Passed through {len(pass_through_records)} records, no new URLs[/dim]', file=sys.stderr)
+            rprint(f"[dim]Passed through {len(pass_through_records)} records, no new URLs[/dim]", file=sys.stderr)
            return 0
-        rprint('[red]No valid URLs found[/red]', file=sys.stderr)
+        rprint("[red]No valid URLs found[/red]", file=sys.stderr)
        return 1

    try:
        # Build crawl record with all URLs as newline-separated string
        crawl_record = {
-            'urls': '\n'.join(url_list),
-            'max_depth': depth,
-            'tags_str': tag,
-            'status': status,
-            'label': '',
+            "urls": "\n".join(url_list),
+            "max_depth": depth,
+            "tags_str": tag,
+            "status": status,
+            "label": "",
        }

-        crawl = Crawl.from_json(crawl_record, overrides={'created_by_id': created_by_id})
+        crawl = Crawl.from_json(crawl_record, overrides={"created_by_id": created_by_id})
        if not crawl:
-            rprint('[red]Failed to create crawl[/red]', file=sys.stderr)
+            rprint("[red]Failed to create crawl[/red]", file=sys.stderr)
            return 1

        # Output JSONL record (only when piped)
        if not is_tty:
            write_record(crawl.to_json())

-        rprint(f'[green]Created crawl with {len(url_list)} URLs[/green]', file=sys.stderr)
+        rprint(f"[green]Created crawl with {len(url_list)} URLs[/green]", file=sys.stderr)

        # If TTY, show human-readable output
        if is_tty:
-            rprint(f'  [dim]{crawl.id}[/dim]', file=sys.stderr)
+            rprint(f"  [dim]{crawl.id}[/dim]", file=sys.stderr)
            for url in url_list[:5]:  # Show first 5 URLs
-                rprint(f'    {url[:70]}', file=sys.stderr)
+                rprint(f"    {url[:70]}", file=sys.stderr)
            if len(url_list) > 5:
-                rprint(f'    ... and {len(url_list) - 5} more', file=sys.stderr)
+                rprint(f"    ... and {len(url_list) - 5} more", file=sys.stderr)

        return 0

    except Exception as e:
-        rprint(f'[red]Error creating crawl: {e}[/red]', file=sys.stderr)
+        rprint(f"[red]Error creating crawl: {e}[/red]", file=sys.stderr)
        return 1


@@ -160,11 +161,12 @@ def create_crawl(
 # LIST
 # =============================================================================

+
 def list_crawls(
-    status: Optional[str] = None,
-    urls__icontains: Optional[str] = None,
-    max_depth: Optional[int] = None,
-    limit: Optional[int] = None,
+    status: str | None = None,
+    urls__icontains: str | None = None,
+    max_depth: int | None = None,
+    limit: int | None = None,
 ) -> int:
    """
    List Crawls as JSONL with optional filters.
@@ -177,13 +179,13 @@ def list_crawls(

    is_tty = sys.stdout.isatty()

-    queryset = Crawl.objects.all().order_by('-created_at')
+    queryset = Crawl.objects.all().order_by("-created_at")

    # Apply filters
    filter_kwargs = {
-        'status': status,
-        'urls__icontains': urls__icontains,
-        'max_depth': max_depth,
+        "status": status,
+        "urls__icontains": urls__icontains,
+        "max_depth": max_depth,
    }
    queryset = apply_filters(queryset, filter_kwargs, limit=limit)

@@ -191,17 +193,17 @@ def list_crawls(
    for crawl in queryset:
        if is_tty:
            status_color = {
-                'queued': 'yellow',
-                'started': 'blue',
-                'sealed': 'green',
-            }.get(crawl.status, 'dim')
-            url_preview = crawl.urls[:50].replace('\n', ' ')
-            rprint(f'[{status_color}]{crawl.status:8}[/{status_color}] [dim]{crawl.id}[/dim] {url_preview}...')
+                "queued": "yellow",
+                "started": "blue",
+                "sealed": "green",
+            }.get(crawl.status, "dim")
+            url_preview = crawl.urls[:50].replace("\n", " ")
+            rprint(f"[{status_color}]{crawl.status:8}[/{status_color}] [dim]{crawl.id}[/dim] {url_preview}...")
        else:
            write_record(crawl.to_json())
        count += 1

-    rprint(f'[dim]Listed {count} crawls[/dim]', file=sys.stderr)
+    rprint(f"[dim]Listed {count} crawls[/dim]", file=sys.stderr)
    return 0


@@ -209,9 +211,10 @@ def list_crawls(
 # UPDATE
 # =============================================================================

+
 def update_crawls(
-    status: Optional[str] = None,
-    max_depth: Optional[int] = None,
+    status: str | None = None,
+    max_depth: int | None = None,
 ) -> int:
    """
    Update Crawls from stdin JSONL.
@@ -232,12 +235,12 @@ def update_crawls(

    records = list(read_stdin())
    if not records:
-        rprint('[yellow]No records provided via stdin[/yellow]', file=sys.stderr)
+        rprint("[yellow]No records provided via stdin[/yellow]", file=sys.stderr)
        return 1

    updated_count = 0
    for record in records:
-        crawl_id = record.get('id')
+        crawl_id = record.get("id")
        if not crawl_id:
            continue

@@ -258,10 +261,10 @@ def update_crawls(
                write_record(crawl.to_json())

        except Crawl.DoesNotExist:
-            rprint(f'[yellow]Crawl not found: {crawl_id}[/yellow]', file=sys.stderr)
+            rprint(f"[yellow]Crawl not found: {crawl_id}[/yellow]", file=sys.stderr)
            continue

-    rprint(f'[green]Updated {updated_count} crawls[/green]', file=sys.stderr)
+    rprint(f"[green]Updated {updated_count} crawls[/green]", file=sys.stderr)
    return 0


@@ -269,6 +272,7 @@ def update_crawls(
 # DELETE
 # =============================================================================

+
 def delete_crawls(yes: bool = False, dry_run: bool = False) -> int:
    """
    Delete Crawls from stdin JSONL.
@@ -284,36 +288,36 @@ def delete_crawls(yes: bool = False, dry_run: bool = False) -> int:

    records = list(read_stdin())
    if not records:
-        rprint('[yellow]No records provided via stdin[/yellow]', file=sys.stderr)
+        rprint("[yellow]No records provided via stdin[/yellow]", file=sys.stderr)
        return 1

-    crawl_ids = [r.get('id') for r in records if r.get('id')]
+    crawl_ids = [r.get("id") for r in records if r.get("id")]

    if not crawl_ids:
-        rprint('[yellow]No valid crawl IDs in input[/yellow]', file=sys.stderr)
+        rprint("[yellow]No valid crawl IDs in input[/yellow]", file=sys.stderr)
        return 1

    crawls = Crawl.objects.filter(id__in=crawl_ids)
    count = crawls.count()

    if count == 0:
-        rprint('[yellow]No matching crawls found[/yellow]', file=sys.stderr)
+        rprint("[yellow]No matching crawls found[/yellow]", file=sys.stderr)
        return 0

    if dry_run:
-        rprint(f'[yellow]Would delete {count} crawls (dry run)[/yellow]', file=sys.stderr)
+        rprint(f"[yellow]Would delete {count} crawls (dry run)[/yellow]", file=sys.stderr)
        for crawl in crawls:
-            url_preview = crawl.urls[:50].replace('\n', ' ')
-            rprint(f'  [dim]{crawl.id}[/dim] {url_preview}...', file=sys.stderr)
+            url_preview = crawl.urls[:50].replace("\n", " ")
+            rprint(f"  [dim]{crawl.id}[/dim] {url_preview}...", file=sys.stderr)
        return 0

    if not yes:
-        rprint('[red]Use --yes to confirm deletion[/red]', file=sys.stderr)
+        rprint("[red]Use --yes to confirm deletion[/red]", file=sys.stderr)
        return 1

    # Perform deletion
    deleted_count, _ = crawls.delete()
-    rprint(f'[green]Deleted {deleted_count} crawls[/green]', file=sys.stderr)
+    rprint(f"[green]Deleted {deleted_count} crawls[/green]", file=sys.stderr)
    return 0


@@ -321,53 +325,60 @@ def delete_crawls(yes: bool = False, dry_run: bool = False) -> int:
 # CLI Commands
 # =============================================================================

+
@click.group()
 def main():
    """Manage Crawl records."""
    pass


-@main.command('create')
-@click.argument('urls', nargs=-1)
-@click.option('--depth', '-d', type=int, default=0, help='Max crawl depth (default: 0)')
-@click.option('--tag', '-t', default='', help='Comma-separated tags to add')
-@click.option('--status', '-s', default='queued', help='Initial status (default: queued)')
+@main.command("create")
+@click.argument("urls", nargs=-1)
+@click.option("--depth", "-d", type=int, default=0, help="Max crawl depth (default: 0)")
+@click.option("--tag", "-t", default="", help="Comma-separated tags to add")
+@click.option("--status", "-s", default="queued", help="Initial status (default: queued)")
 def create_cmd(urls: tuple, depth: int, tag: str, status: str):
    """Create a Crawl job from URLs or stdin."""
    sys.exit(create_crawl(urls, depth=depth, tag=tag, status=status))


-@main.command('list')
-@click.option('--status', '-s', help='Filter by status (queued, started, sealed)')
-@click.option('--urls__icontains', help='Filter by URLs contains')
-@click.option('--max-depth', type=int, help='Filter by max depth')
-@click.option('--limit', '-n', type=int, help='Limit number of results')
-def list_cmd(status: Optional[str], urls__icontains: Optional[str],
-             max_depth: Optional[int], limit: Optional[int]):
+@main.command("list")
+@click.option("--status", "-s", help="Filter by status (queued, started, sealed)")
+@click.option("--urls__icontains", help="Filter by URLs contains")
+@click.option("--max-depth", type=int, help="Filter by max depth")
+@click.option("--limit", "-n", type=int, help="Limit number of results")
+def list_cmd(
+    status: str | None,
+    urls__icontains: str | None,
+    max_depth: int | None,
+    limit: int | None,
+):
    """List Crawls as JSONL."""
-    sys.exit(list_crawls(
-        status=status,
-        urls__icontains=urls__icontains,
-        max_depth=max_depth,
-        limit=limit,
-    ))
+    sys.exit(
+        list_crawls(
+            status=status,
+            urls__icontains=urls__icontains,
+            max_depth=max_depth,
+            limit=limit,
+        ),
+    )


-@main.command('update')
-@click.option('--status', '-s', help='Set status')
-@click.option('--max-depth', type=int, help='Set max depth')
-def update_cmd(status: Optional[str], max_depth: Optional[int]):
+@main.command("update")
+@click.option("--status", "-s", help="Set status")
+@click.option("--max-depth", type=int, help="Set max depth")
+def update_cmd(status: str | None, max_depth: int | None):
    """Update Crawls from stdin JSONL."""
    sys.exit(update_crawls(status=status, max_depth=max_depth))


-@main.command('delete')
-@click.option('--yes', '-y', is_flag=True, help='Confirm deletion')
-@click.option('--dry-run', is_flag=True, help='Show what would be deleted')
+@main.command("delete")
+@click.option("--yes", "-y", is_flag=True, help="Confirm deletion")
+@click.option("--dry-run", is_flag=True, help="Show what would be deleted")
 def delete_cmd(yes: bool, dry_run: bool):
    """Delete Crawls from stdin JSONL."""
    sys.exit(delete_crawls(yes=yes, dry_run=dry_run))


-if __name__ == '__main__':
+if __name__ == "__main__":
    main()
--- a/archivebox/cli/archivebox_crawl_compat.py
+++ b/archivebox/cli/archivebox_crawl_compat.py
@@ -1,7 +1,7 @@
 #!/usr/bin/env python3

-__package__ = 'archivebox.cli'
-__command__ = 'archivebox crawl'
+__package__ = "archivebox.cli"
+__command__ = "archivebox crawl"

 import sys

@@ -10,12 +10,12 @@ import rich_click as click
 from archivebox.cli.archivebox_add import add


-@click.command(context_settings={'ignore_unknown_options': True})
-@click.option('--depth', '-d', type=int, default=0, help='Max crawl depth (default: 0)')
-@click.option('--tag', '-t', default='', help='Comma-separated tags to add')
-@click.option('--status', '-s', default='queued', help='Initial status (default: queued)')
-@click.option('--wait/--no-wait', 'wait', default=True, help='Accepted for backwards compatibility')
-@click.argument('urls', nargs=-1)
+@click.command(context_settings={"ignore_unknown_options": True})
+@click.option("--depth", "-d", type=int, default=0, help="Max crawl depth (default: 0)")
+@click.option("--tag", "-t", default="", help="Comma-separated tags to add")
+@click.option("--status", "-s", default="queued", help="Initial status (default: queued)")
+@click.option("--wait/--no-wait", "wait", default=True, help="Accepted for backwards compatibility")
+@click.argument("urls", nargs=-1)
 def main(depth: int, tag: str, status: str, wait: bool, urls: tuple[str, ...]):
    """Backwards-compatible `archivebox crawl URL...` entrypoint."""
    del status, wait
@@ -23,5 +23,5 @@ def main(depth: int, tag: str, status: str, wait: bool, urls: tuple[str, ...]):
    sys.exit(0)


-if __name__ == '__main__':
+if __name__ == "__main__":
    main()
--- a/archivebox/cli/archivebox_extract.py
+++ b/archivebox/cli/archivebox_extract.py
@@ -27,8 +27,8 @@ Examples:
    archivebox crawl https://example.com | archivebox snapshot | archivebox extract
 """

-__package__ = 'archivebox.cli'
-__command__ = 'archivebox extract'
+__package__ = "archivebox.cli"
+__command__ = "archivebox extract"

 import sys
 from collections import defaultdict
@@ -52,51 +52,52 @@ def process_archiveresult_by_id(archiveresult_id: str) -> int:
    try:
        archiveresult = ArchiveResult.objects.get(id=archiveresult_id)
    except ArchiveResult.DoesNotExist:
-        rprint(f'[red]ArchiveResult {archiveresult_id} not found[/red]', file=sys.stderr)
+        rprint(f"[red]ArchiveResult {archiveresult_id} not found[/red]", file=sys.stderr)
        return 1

-    rprint(f'[blue]Extracting {archiveresult.plugin} for {archiveresult.snapshot.url}[/blue]', file=sys.stderr)
+    rprint(f"[blue]Extracting {archiveresult.plugin} for {archiveresult.snapshot.url}[/blue]", file=sys.stderr)

    try:
        archiveresult.reset_for_retry()
        snapshot = archiveresult.snapshot
        snapshot.status = snapshot.StatusChoices.QUEUED
        snapshot.retry_at = timezone.now()
-        snapshot.save(update_fields=['status', 'retry_at', 'modified_at'])
+        snapshot.save(update_fields=["status", "retry_at", "modified_at"])

        crawl = snapshot.crawl
        if crawl.status != crawl.StatusChoices.STARTED:
            crawl.status = crawl.StatusChoices.QUEUED
        crawl.retry_at = timezone.now()
-        crawl.save(update_fields=['status', 'retry_at', 'modified_at'])
+        crawl.save(update_fields=["status", "retry_at", "modified_at"])

        run_crawl(str(crawl.id), snapshot_ids=[str(snapshot.id)], selected_plugins=[archiveresult.plugin])
        archiveresult.refresh_from_db()

        if archiveresult.status == ArchiveResult.StatusChoices.SUCCEEDED:
-            print(f'[green]Extraction succeeded: {archiveresult.output_str}[/green]')
+            print(f"[green]Extraction succeeded: {archiveresult.output_str}[/green]")
            return 0
        elif archiveresult.status == ArchiveResult.StatusChoices.NORESULTS:
-            print(f'[dim]Extraction completed with no results: {archiveresult.output_str}[/dim]')
+            print(f"[dim]Extraction completed with no results: {archiveresult.output_str}[/dim]")
            return 0
        elif archiveresult.status == ArchiveResult.StatusChoices.FAILED:
-            print(f'[red]Extraction failed: {archiveresult.output_str}[/red]', file=sys.stderr)
+            print(f"[red]Extraction failed: {archiveresult.output_str}[/red]", file=sys.stderr)
            return 1
        else:
            # Still in progress or backoff - not a failure
-            print(f'[yellow]Extraction status: {archiveresult.status}[/yellow]')
+            print(f"[yellow]Extraction status: {archiveresult.status}[/yellow]")
            return 0

    except Exception as e:
-        print(f'[red]Extraction error: {type(e).__name__}: {e}[/red]', file=sys.stderr)
+        print(f"[red]Extraction error: {type(e).__name__}: {e}[/red]", file=sys.stderr)
        return 1


 def run_plugins(
    args: tuple,
    records: list[dict] | None = None,
-    plugins: str = '',
+    plugins: str = "",
    wait: bool = True,
+    emit_results: bool = True,
 ) -> int:
    """
    Run plugins on Snapshots from input.
@@ -111,16 +112,18 @@ def run_plugins(
    from django.utils import timezone

    from archivebox.misc.jsonl import (
-        read_args_or_stdin, write_record,
-        TYPE_SNAPSHOT, TYPE_ARCHIVERESULT
+        read_args_or_stdin,
+        write_record,
+        TYPE_SNAPSHOT,
+        TYPE_ARCHIVERESULT,
    )
-    from archivebox.core.models import Snapshot, ArchiveResult
+    from archivebox.core.models import Snapshot
    from archivebox.services.runner import run_crawl

    is_tty = sys.stdout.isatty()

    # Parse comma-separated plugins list once (reused in creation and filtering)
-    plugins_list = [p.strip() for p in plugins.split(',') if p.strip()] if plugins else []
+    plugins_list = [p.strip() for p in plugins.split(",") if p.strip()] if plugins else []

    # Parse stdin/args exactly once per CLI invocation.
    # `main()` may already have consumed stdin to distinguish Snapshot input from
@@ -130,41 +133,41 @@ def run_plugins(
        records = list(read_args_or_stdin(args))

    if not records:
-        rprint('[yellow]No snapshots provided. Pass snapshot IDs as arguments or via stdin.[/yellow]', file=sys.stderr)
+        rprint("[yellow]No snapshots provided. Pass snapshot IDs as arguments or via stdin.[/yellow]", file=sys.stderr)
        return 1

    # Gather snapshot IDs and optional plugin constraints to process
    snapshot_ids = set()
    requested_plugins_by_snapshot: dict[str, set[str]] = defaultdict(set)
    for record in records:
-        record_type = record.get('type')
+        record_type = record.get("type")

        if record_type == TYPE_SNAPSHOT:
-            snapshot_id = record.get('id')
+            snapshot_id = record.get("id")
            if snapshot_id:
                snapshot_ids.add(snapshot_id)
-            elif record.get('url'):
+            elif record.get("url"):
                # Look up by URL (get most recent if multiple exist)
-                snap = Snapshot.objects.filter(url=record['url']).order_by('-created_at').first()
+                snap = Snapshot.objects.filter(url=record["url"]).order_by("-created_at").first()
                if snap:
                    snapshot_ids.add(str(snap.id))
                else:
-                    rprint(f'[yellow]Snapshot not found for URL: {record["url"]}[/yellow]', file=sys.stderr)
+                    rprint(f"[yellow]Snapshot not found for URL: {record['url']}[/yellow]", file=sys.stderr)

        elif record_type == TYPE_ARCHIVERESULT:
-            snapshot_id = record.get('snapshot_id')
+            snapshot_id = record.get("snapshot_id")
            if snapshot_id:
                snapshot_ids.add(snapshot_id)
-                plugin_name = record.get('plugin')
+                plugin_name = record.get("plugin")
                if plugin_name and not plugins_list:
                    requested_plugins_by_snapshot[str(snapshot_id)].add(str(plugin_name))

-        elif 'id' in record:
+        elif "id" in record:
            # Assume it's a snapshot ID
-            snapshot_ids.add(record['id'])
+            snapshot_ids.add(record["id"])

    if not snapshot_ids:
-        rprint('[red]No valid snapshot IDs found in input[/red]', file=sys.stderr)
+        rprint("[red]No valid snapshot IDs found in input[/red]", file=sys.stderr)
        return 1

    # Get snapshots and ensure they have pending ArchiveResults
@@ -173,17 +176,13 @@ def run_plugins(
        try:
            snapshot = Snapshot.objects.get(id=snapshot_id)
        except Snapshot.DoesNotExist:
-            rprint(f'[yellow]Snapshot {snapshot_id} not found[/yellow]', file=sys.stderr)
+            rprint(f"[yellow]Snapshot {snapshot_id} not found[/yellow]", file=sys.stderr)
            continue

-        for plugin_name in requested_plugins_by_snapshot.get(str(snapshot.id), set()):
-            existing_result = snapshot.archiveresult_set.filter(plugin=plugin_name).order_by('-created_at').first()
-            if existing_result and existing_result.status in [
-                ArchiveResult.StatusChoices.FAILED,
-                ArchiveResult.StatusChoices.SKIPPED,
-                ArchiveResult.StatusChoices.NORESULTS,
-                ArchiveResult.StatusChoices.BACKOFF,
-            ]:
+        requested_plugin_names = set(plugins_list) | requested_plugins_by_snapshot.get(str(snapshot.id), set())
+        for plugin_name in requested_plugin_names:
+            existing_result = snapshot.archiveresult_set.filter(plugin=plugin_name).order_by("-created_at").first()
+            if existing_result:
                existing_result.reset_for_retry()

        # Reset snapshot status to allow processing
@@ -195,34 +194,39 @@ def run_plugins(
        processed_count += 1

    if processed_count == 0:
-        rprint('[red]No snapshots to process[/red]', file=sys.stderr)
+        rprint("[red]No snapshots to process[/red]", file=sys.stderr)
        return 1

-    rprint(f'[blue]Queued {processed_count} snapshots for extraction[/blue]', file=sys.stderr)
+    rprint(f"[blue]Queued {processed_count} snapshots for extraction[/blue]", file=sys.stderr)

    # Run orchestrator if --wait (default)
    if wait:
-        rprint('[blue]Running plugins...[/blue]', file=sys.stderr)
+        rprint("[blue]Running plugins...[/blue]", file=sys.stderr)
        snapshot_ids_by_crawl: dict[str, set[str]] = defaultdict(set)
        for snapshot_id in snapshot_ids:
            try:
-                snapshot = Snapshot.objects.only('id', 'crawl_id').get(id=snapshot_id)
+                snapshot = Snapshot.objects.only("id", "crawl_id").get(id=snapshot_id)
            except Snapshot.DoesNotExist:
                continue
            snapshot_ids_by_crawl[str(snapshot.crawl_id)].add(str(snapshot.id))

        for crawl_id, crawl_snapshot_ids in snapshot_ids_by_crawl.items():
-            selected_plugins = plugins_list or sorted({
-                plugin
-                for snapshot_id in crawl_snapshot_ids
-                for plugin in requested_plugins_by_snapshot.get(str(snapshot_id), set())
-            }) or None
+            selected_plugins = (
+                plugins_list
+                or sorted(
+                    {plugin for snapshot_id in crawl_snapshot_ids for plugin in requested_plugins_by_snapshot.get(str(snapshot_id), set())},
+                )
+                or None
+            )
            run_crawl(
                crawl_id,
                snapshot_ids=sorted(crawl_snapshot_ids),
                selected_plugins=selected_plugins,
            )

+    if not emit_results:
+        return 0
+
    # Output results as JSONL (when piped) or human-readable (when TTY)
    for snapshot_id in snapshot_ids:
        try:
@@ -234,11 +238,14 @@ def run_plugins(
            for result in results:
                if is_tty:
                    status_color = {
-                        'succeeded': 'green',
-                        'failed': 'red',
-                        'skipped': 'yellow',
-                    }.get(result.status, 'dim')
-                    rprint(f'  [{status_color}]{result.status}[/{status_color}] {result.plugin} → {result.output_str or ""}', file=sys.stderr)
+                        "succeeded": "green",
+                        "failed": "red",
+                        "skipped": "yellow",
+                    }.get(result.status, "dim")
+                    rprint(
+                        f"  [{status_color}]{result.status}[/{status_color}] {result.plugin} → {result.output_str or ''}",
+                        file=sys.stderr,
+                    )
                else:
                    write_record(result.to_json())
        except Snapshot.DoesNotExist:
@@ -250,18 +257,20 @@ def run_plugins(
 def is_archiveresult_id(value: str) -> bool:
    """Check if value looks like an ArchiveResult UUID."""
    import re
-    uuid_pattern = re.compile(r'^[0-9a-f]{8}-[0-9a-f]{4}-[0-9a-f]{4}-[0-9a-f]{4}-[0-9a-f]{12}$', re.I)
+
+    uuid_pattern = re.compile(r"^[0-9a-f]{8}-[0-9a-f]{4}-[0-9a-f]{4}-[0-9a-f]{4}-[0-9a-f]{12}$", re.I)
    if not uuid_pattern.match(value):
        return False
    # Verify it's actually an ArchiveResult (not a Snapshot or other object)
    from archivebox.core.models import ArchiveResult
+
    return ArchiveResult.objects.filter(id=value).exists()


@click.command()
-@click.option('--plugins', '--plugin', '-p', default='', help='Comma-separated list of plugins to run (e.g., screenshot,singlefile)')
-@click.option('--wait/--no-wait', default=True, help='Wait for plugins to complete (default: wait)')
-@click.argument('args', nargs=-1)
+@click.option("--plugins", "--plugin", "-p", default="", help="Comma-separated list of plugins to run (e.g., screenshot,singlefile)")
+@click.option("--wait/--no-wait", default=True, help="Wait for plugins to complete (default: wait)")
+@click.argument("args", nargs=-1)
 def main(plugins: str, wait: bool, args: tuple):
    """Run plugins on Snapshots, or process existing ArchiveResults by ID"""
    from archivebox.misc.jsonl import read_args_or_stdin
@@ -271,14 +280,12 @@ def main(plugins: str, wait: bool, args: tuple):

    if not records:
        from rich import print as rprint
-        rprint('[yellow]No Snapshot IDs or ArchiveResult IDs provided. Pass as arguments or via stdin.[/yellow]', file=sys.stderr)
+
+        rprint("[yellow]No Snapshot IDs or ArchiveResult IDs provided. Pass as arguments or via stdin.[/yellow]", file=sys.stderr)
        sys.exit(1)

    # Check if input looks like existing ArchiveResult IDs to process
-    all_are_archiveresult_ids = all(
-        is_archiveresult_id(r.get('id') or r.get('url', ''))
-        for r in records
-    )
+    all_are_archiveresult_ids = all(is_archiveresult_id(r.get("id") or r.get("url", "")) for r in records)

    if all_are_archiveresult_ids:
        # Process existing ArchiveResults by ID
@@ -286,9 +293,9 @@ def main(plugins: str, wait: bool, args: tuple):

        exit_code = 0
        for record in records:
-            archiveresult_id = record.get('id') or record.get('url')
+            archiveresult_id = record.get("id") or record.get("url")
            if not isinstance(archiveresult_id, str):
-                rprint(f'[red]Invalid ArchiveResult input: {record}[/red]', file=sys.stderr)
+                rprint(f"[red]Invalid ArchiveResult input: {record}[/red]", file=sys.stderr)
                exit_code = 1
                continue
            result = process_archiveresult_by_id(archiveresult_id)
@@ -300,5 +307,5 @@ def main(plugins: str, wait: bool, args: tuple):
        sys.exit(run_plugins(args, records=records, plugins=plugins, wait=wait))


-if __name__ == '__main__':
+if __name__ == "__main__":
    main()
--- a/archivebox/cli/archivebox_help.py
+++ b/archivebox/cli/archivebox_help.py
@@ -1,8 +1,8 @@
 #!/usr/bin/env python3
-__package__ = 'archivebox.cli'
-__command__ = 'archivebox help'
+__package__ = "archivebox.cli"
+__command__ = "archivebox help"

-import os    
+import os
 from pathlib import Path

 import click
@@ -17,33 +17,44 @@ def help() -> None:
    from archivebox.config import CONSTANTS
    from archivebox.config.permissions import IN_DOCKER
    from archivebox.misc.logging_util import log_cli_command
-    
-    log_cli_command('help', [], None, '.')
-    
-    COMMANDS_HELP_TEXT = '\n    '.join(
-        f'[green]{cmd.ljust(20)}[/green] {ArchiveBoxGroup._lazy_load(cmd).__doc__}'
-        for cmd in ArchiveBoxGroup.meta_commands.keys()
-    ) + '\n\n    ' + '\n    '.join(
-        f'[green]{cmd.ljust(20)}[/green] {ArchiveBoxGroup._lazy_load(cmd).__doc__}'
-        for cmd in ArchiveBoxGroup.setup_commands.keys()
-    ) + '\n\n    ' + '\n    '.join(
-        f'[green]{cmd.ljust(20)}[/green] {ArchiveBoxGroup._lazy_load(cmd).__doc__}'
-        for cmd in ArchiveBoxGroup.archive_commands.keys()
+
+    log_cli_command("help", [], None, ".")
+
+    COMMANDS_HELP_TEXT = (
+        "\n    ".join(
+            f"[green]{cmd.ljust(20)}[/green] {ArchiveBoxGroup._lazy_load(cmd).__doc__}" for cmd in ArchiveBoxGroup.meta_commands.keys()
+        )
+        + "\n\n    "
+        + "\n    ".join(
+            f"[green]{cmd.ljust(20)}[/green] {ArchiveBoxGroup._lazy_load(cmd).__doc__}" for cmd in ArchiveBoxGroup.setup_commands.keys()
+        )
+        + "\n\n    "
+        + "\n    ".join(
+            f"[green]{cmd.ljust(20)}[/green] {ArchiveBoxGroup._lazy_load(cmd).__doc__}" for cmd in ArchiveBoxGroup.archive_commands.keys()
+        )
    )
-    
-    DOCKER_USAGE = '''
+
+    DOCKER_USAGE = (
+        """
 [dodger_blue3]Docker Usage:[/dodger_blue3]
    [grey53]# using Docker Compose:[/grey53]
    [blue]docker compose run[/blue] [dark_green]archivebox[/dark_green] [green]\\[command][/green] [green3][...args][/green3] [violet][--help][/violet] [grey53][--version][/grey53]

    [grey53]# using Docker:[/grey53]
    [blue]docker run[/blue] -v [light_slate_blue]$PWD:/data[/light_slate_blue] [grey53]-p 8000:8000[/grey53] -it [dark_green]archivebox/archivebox[/dark_green] [green]\\[command][/green] [green3][...args][/green3] [violet][--help][/violet] [grey53][--version][/grey53]
-''' if IN_DOCKER else ''
-    DOCKER_DOCS = '\n    [link=https://github.com/ArchiveBox/ArchiveBox/wiki/Docker#usage]https://github.com/ArchiveBox/ArchiveBox/wiki/Docker[/link]' if IN_DOCKER else ''
-    DOCKER_OUTSIDE_HINT = "\n    [grey53]# outside of Docker:[/grey53]" if IN_DOCKER else ''
-    DOCKER_CMD_PREFIX = "[blue]docker ... [/blue]" if IN_DOCKER else ''
+"""
+        if IN_DOCKER
+        else ""
+    )
+    DOCKER_DOCS = (
+        "\n    [link=https://github.com/ArchiveBox/ArchiveBox/wiki/Docker#usage]https://github.com/ArchiveBox/ArchiveBox/wiki/Docker[/link]"
+        if IN_DOCKER
+        else ""
+    )
+    DOCKER_OUTSIDE_HINT = "\n    [grey53]# outside of Docker:[/grey53]" if IN_DOCKER else ""
+    DOCKER_CMD_PREFIX = "[blue]docker ... [/blue]" if IN_DOCKER else ""

-    print(f'''{DOCKER_USAGE}
+    print(f"""{DOCKER_USAGE}
 [deep_sky_blue4]Usage:[/deep_sky_blue4]{DOCKER_OUTSIDE_HINT}
    [dark_green]archivebox[/dark_green] [green]\\[command][/green] [green3][...args][/green3] [violet][--help][/violet] [grey53][--version][/grey53]

@@ -54,12 +65,11 @@ def help() -> None:
    [link=https://github.com/ArchiveBox/ArchiveBox/wiki]https://github.com/ArchiveBox/ArchiveBox/wiki[/link]{DOCKER_DOCS}
    [link=https://github.com/ArchiveBox/ArchiveBox/wiki/Usage#cli-usage]https://github.com/ArchiveBox/ArchiveBox/wiki/Usage[/link]
    [link=https://github.com/ArchiveBox/ArchiveBox/wiki/Configuration]https://github.com/ArchiveBox/ArchiveBox/wiki/Configuration[/link]
-''')
-    
-    
+""")
+
    if os.access(CONSTANTS.ARCHIVE_DIR, os.R_OK) and CONSTANTS.ARCHIVE_DIR.is_dir():
-        pretty_out_dir = str(CONSTANTS.DATA_DIR).replace(str(Path('~').expanduser()), '~')
-        EXAMPLE_USAGE = f'''
+        pretty_out_dir = str(CONSTANTS.DATA_DIR).replace(str(Path("~").expanduser()), "~")
+        EXAMPLE_USAGE = f"""
 [light_slate_blue]DATA DIR[/light_slate_blue]: [yellow]{pretty_out_dir}[/yellow]

 [violet]Hint:[/violet] [i]Common maintenance tasks:[/i]
@@ -73,33 +83,49 @@ def help() -> None:
    [dark_green]archivebox[/dark_green] [green]list[/green] --sort=timestamp --csv=timestamp,downloaded_at,url,title
    [dark_green]archivebox[/dark_green] [green]schedule[/green] --every=day --depth=1 "https://example.com/some/feed.rss"
    [dark_green]archivebox[/dark_green] [green]server[/green] [blue]0.0.0.0:8000[/blue]                [grey53]# Start the Web UI / API server[/grey53]
-'''
-        print(Panel(EXAMPLE_USAGE, expand=False, border_style='grey53', title='[green3]:white_check_mark: A collection [light_slate_blue]DATA DIR[/light_slate_blue] is currently active[/green3]', subtitle='Commands run inside this dir will only apply to this collection.'))
+"""
+        print(
+            Panel(
+                EXAMPLE_USAGE,
+                expand=False,
+                border_style="grey53",
+                title="[green3]:white_check_mark: A collection [light_slate_blue]DATA DIR[/light_slate_blue] is currently active[/green3]",
+                subtitle="Commands run inside this dir will only apply to this collection.",
+            ),
+        )
    else:
-        DATA_SETUP_HELP = '\n'
+        DATA_SETUP_HELP = "\n"
        if IN_DOCKER:
-            DATA_SETUP_HELP += '[violet]Hint:[/violet] When using Docker, you need to mount a volume to use as your data dir:\n'
-            DATA_SETUP_HELP += '    docker run [violet]-v /some/path/data:/data[/violet] archivebox/archivebox ...\n\n'
-        DATA_SETUP_HELP += 'To load an [dark_blue]existing[/dark_blue] collection:\n'
-        DATA_SETUP_HELP += '    1. [green]cd[/green] ~/archivebox/data     [grey53]# go into existing [light_slate_blue]DATA DIR[/light_slate_blue] (can be anywhere)[/grey53]\n'
-        DATA_SETUP_HELP += f'    2. {DOCKER_CMD_PREFIX}[dark_green]archivebox[/dark_green] [green]init[/green]          [grey53]# migrate to latest version (safe to run multiple times)[/grey53]\n'
-        DATA_SETUP_HELP += f'    3. {DOCKER_CMD_PREFIX}[dark_green]archivebox[/dark_green] [green]install[/green]       [grey53]# auto-update all plugins (wget, chrome, singlefile, etc.)[/grey53]\n'
-        DATA_SETUP_HELP += f'    4. {DOCKER_CMD_PREFIX}[dark_green]archivebox[/dark_green] [green]help[/green]          [grey53]# ...get help with next steps... [/grey53]\n\n'
-        DATA_SETUP_HELP += 'To start a [sea_green1]new[/sea_green1] collection:\n'
-        DATA_SETUP_HELP += '    1. [green]mkdir[/green] ~/archivebox/data  [grey53]# create a new, empty [light_slate_blue]DATA DIR[/light_slate_blue] (can be anywhere)[/grey53]\n'
-        DATA_SETUP_HELP += '    2. [green]cd[/green] ~/archivebox/data     [grey53]# cd into the new directory[/grey53]\n'
-        DATA_SETUP_HELP += f'    3. {DOCKER_CMD_PREFIX}[dark_green]archivebox[/dark_green] [green]init[/green]          [grey53]# initialize ArchiveBox in the new data dir[/grey53]\n'
-        DATA_SETUP_HELP += f'    4. {DOCKER_CMD_PREFIX}[dark_green]archivebox[/dark_green] [green]install[/green]       [grey53]# auto-install all plugins (wget, chrome, singlefile, etc.)[/grey53]\n'
-        DATA_SETUP_HELP += f'    5. {DOCKER_CMD_PREFIX}[dark_green]archivebox[/dark_green] [green]help[/green]          [grey53]# ... get help with next steps... [/grey53]\n'
-        print(Panel(DATA_SETUP_HELP, expand=False, border_style='grey53', title='[red]:cross_mark: No collection is currently active[/red]', subtitle='All archivebox [green]commands[/green] should be run from inside a collection [light_slate_blue]DATA DIR[/light_slate_blue]'))
-
+            DATA_SETUP_HELP += "[violet]Hint:[/violet] When using Docker, you need to mount a volume to use as your data dir:\n"
+            DATA_SETUP_HELP += "    docker run [violet]-v /some/path/data:/data[/violet] archivebox/archivebox ...\n\n"
+        DATA_SETUP_HELP += "To load an [dark_blue]existing[/dark_blue] collection:\n"
+        DATA_SETUP_HELP += "    1. [green]cd[/green] ~/archivebox/data     [grey53]# go into existing [light_slate_blue]DATA DIR[/light_slate_blue] (can be anywhere)[/grey53]\n"
+        DATA_SETUP_HELP += f"    2. {DOCKER_CMD_PREFIX}[dark_green]archivebox[/dark_green] [green]init[/green]          [grey53]# migrate to latest version (safe to run multiple times)[/grey53]\n"
+        DATA_SETUP_HELP += f"    3. {DOCKER_CMD_PREFIX}[dark_green]archivebox[/dark_green] [green]install[/green]       [grey53]# auto-update all plugins (wget, chrome, singlefile, etc.)[/grey53]\n"
+        DATA_SETUP_HELP += f"    4. {DOCKER_CMD_PREFIX}[dark_green]archivebox[/dark_green] [green]help[/green]          [grey53]# ...get help with next steps... [/grey53]\n\n"
+        DATA_SETUP_HELP += "To start a [sea_green1]new[/sea_green1] collection:\n"
+        DATA_SETUP_HELP += "    1. [green]mkdir[/green] ~/archivebox/data  [grey53]# create a new, empty [light_slate_blue]DATA DIR[/light_slate_blue] (can be anywhere)[/grey53]\n"
+        DATA_SETUP_HELP += "    2. [green]cd[/green] ~/archivebox/data     [grey53]# cd into the new directory[/grey53]\n"
+        DATA_SETUP_HELP += f"    3. {DOCKER_CMD_PREFIX}[dark_green]archivebox[/dark_green] [green]init[/green]          [grey53]# initialize ArchiveBox in the new data dir[/grey53]\n"
+        DATA_SETUP_HELP += f"    4. {DOCKER_CMD_PREFIX}[dark_green]archivebox[/dark_green] [green]install[/green]       [grey53]# auto-install all plugins (wget, chrome, singlefile, etc.)[/grey53]\n"
+        DATA_SETUP_HELP += f"    5. {DOCKER_CMD_PREFIX}[dark_green]archivebox[/dark_green] [green]help[/green]          [grey53]# ... get help with next steps... [/grey53]\n"
+        print(
+            Panel(
+                DATA_SETUP_HELP,
+                expand=False,
+                border_style="grey53",
+                title="[red]:cross_mark: No collection is currently active[/red]",
+                subtitle="All archivebox [green]commands[/green] should be run from inside a collection [light_slate_blue]DATA DIR[/light_slate_blue]",
+            ),
+        )


@click.command()
-@click.option('--help', '-h', is_flag=True, help='Show help')
+@click.option("--help", "-h", is_flag=True, help="Show help")
 def main(**kwargs):
    """Print the ArchiveBox help message and usage"""
    return help()

-if __name__ == '__main__':
+
+if __name__ == "__main__":
    main()
--- a/archivebox/cli/archivebox_init.py
+++ b/archivebox/cli/archivebox_init.py
@@ -1,11 +1,11 @@
 #!/usr/bin/env python3

-__package__ = 'archivebox.cli'
+__package__ = "archivebox.cli"

 import os
 import sys
 from pathlib import Path
-from typing import Mapping
+from collections.abc import Mapping

 from rich import print
 import rich_click as click
@@ -14,12 +14,12 @@ from archivebox.misc.util import docstring, enforce_types


 def _normalize_snapshot_record(link_dict: Mapping[str, object]) -> tuple[str, dict[str, object]] | None:
-    url = link_dict.get('url')
+    url = link_dict.get("url")
    if not isinstance(url, str) or not url:
        return None

-    record: dict[str, object] = {'url': url}
-    for key in ('timestamp', 'title', 'tags', 'sources'):
+    record: dict[str, object] = {"url": url}
+    for key in ("timestamp", "title", "tags", "sources"):
        value = link_dict.get(key)
        if value is not None:
            record[key] = value
@@ -27,15 +27,15 @@ def _normalize_snapshot_record(link_dict: Mapping[str, object]) -> tuple[str, di


@enforce_types
-def init(force: bool=False, quick: bool=False, install: bool=False) -> None:
+def init(force: bool = False, quick: bool = False, install: bool = False) -> None:
    """Initialize a new ArchiveBox collection in the current directory"""
-    
+
    from archivebox.config import CONSTANTS, VERSION, DATA_DIR
    from archivebox.config.common import SERVER_CONFIG
    from archivebox.config.collection import write_config_file
    from archivebox.misc.legacy import parse_json_main_index, parse_json_links_details
    from archivebox.misc.db import apply_migrations
-    
+
    # if os.access(out_dir / CONSTANTS.JSON_INDEX_FILENAME, os.F_OK):
    #     print("[red]:warning: This folder contains a JSON index. It is deprecated, and will no longer be kept up to date automatically.[/red]", file=sys.stderr)
    #     print("[red]    You can run `archivebox list --json --with-headers > static_index.json` to manually generate it.[/red]", file=sys.stderr)
@@ -43,69 +43,71 @@ def init(force: bool=False, quick: bool=False, install: bool=False) -> None:
    is_empty = not len(set(os.listdir(DATA_DIR)) - CONSTANTS.ALLOWED_IN_DATA_DIR)
    existing_index = os.path.isfile(CONSTANTS.DATABASE_FILE)
    if is_empty and not existing_index:
-        print(f'[turquoise4][+] Initializing a new ArchiveBox v{VERSION} collection...[/turquoise4]')
-        print('[green]----------------------------------------------------------------------[/green]')
+        print(f"[turquoise4][+] Initializing a new ArchiveBox v{VERSION} collection...[/turquoise4]")
+        print("[green]----------------------------------------------------------------------[/green]")
    elif existing_index:
        # TODO: properly detect and print the existing version in current index as well
-        print(f'[green][*] Verifying and updating existing ArchiveBox collection to v{VERSION}...[/green]')
-        print('[green]----------------------------------------------------------------------[/green]')
+        print(f"[green][*] Verifying and updating existing ArchiveBox collection to v{VERSION}...[/green]")
+        print("[green]----------------------------------------------------------------------[/green]")
    else:
        if force:
-            print('[red][!] This folder appears to already have files in it, but no index.sqlite3 is present.[/red]')
-            print('[red]    Because --force was passed, ArchiveBox will initialize anyway (which may overwrite existing files).[/red]')
+            print("[red][!] This folder appears to already have files in it, but no index.sqlite3 is present.[/red]")
+            print("[red]    Because --force was passed, ArchiveBox will initialize anyway (which may overwrite existing files).[/red]")
        else:
            print(
-                ("[red][X] This folder appears to already have files in it, but no index.sqlite3 present.[/red]\n\n"
+                "[red][X] This folder appears to already have files in it, but no index.sqlite3 present.[/red]\n\n"
                "    You must run init in a completely empty directory, or an existing data folder.\n\n"
                "    [violet]Hint:[/violet] To import an existing data folder make sure to cd into the folder first, \n"
                "    then run and run 'archivebox init' to pick up where you left off.\n\n"
-                "    (Always make sure your data folder is backed up first before updating ArchiveBox)"
-                )
+                "    (Always make sure your data folder is backed up first before updating ArchiveBox)",
            )
            raise SystemExit(2)

    if existing_index:
-        print('\n[green][*] Verifying archive folder structure...[/green]')
+        print("\n[green][*] Verifying archive folder structure...[/green]")
    else:
-        print('\n[green][+] Building archive folder structure...[/green]')
-    
-    print(f'    + ./{CONSTANTS.ARCHIVE_DIR.relative_to(DATA_DIR)}, ./{CONSTANTS.SOURCES_DIR.relative_to(DATA_DIR)}, ./{CONSTANTS.LOGS_DIR.relative_to(DATA_DIR)}...')
+        print("\n[green][+] Building archive folder structure...[/green]")
+
+    print(
+        f"    + ./{CONSTANTS.ARCHIVE_DIR.relative_to(DATA_DIR)}, ./{CONSTANTS.SOURCES_DIR.relative_to(DATA_DIR)}, ./{CONSTANTS.LOGS_DIR.relative_to(DATA_DIR)}...",
+    )
    Path(CONSTANTS.SOURCES_DIR).mkdir(exist_ok=True)
    Path(CONSTANTS.ARCHIVE_DIR).mkdir(exist_ok=True)
    Path(CONSTANTS.LOGS_DIR).mkdir(exist_ok=True)
-    
-    print(f'    + ./{CONSTANTS.CONFIG_FILE.relative_to(DATA_DIR)}...')
-    
+
+    print(f"    + ./{CONSTANTS.CONFIG_FILE.relative_to(DATA_DIR)}...")
+
    # create the .archivebox_id file with a unique ID for this collection
    from archivebox.config.paths import _get_collection_id
-    _get_collection_id(DATA_DIR, force_create=True)
-    
-    # create the ArchiveBox.conf file
-    write_config_file({'SECRET_KEY': SERVER_CONFIG.SECRET_KEY})

+    _get_collection_id(DATA_DIR, force_create=True)
+
+    # create the ArchiveBox.conf file
+    write_config_file({"SECRET_KEY": SERVER_CONFIG.SECRET_KEY})

    if os.access(CONSTANTS.DATABASE_FILE, os.F_OK):
-        print('\n[green][*] Verifying main SQL index and running any migrations needed...[/green]')
+        print("\n[green][*] Verifying main SQL index and running any migrations needed...[/green]")
    else:
-        print('\n[green][+] Building main SQL index and running initial migrations...[/green]')
-    
+        print("\n[green][+] Building main SQL index and running initial migrations...[/green]")
+
    from archivebox.config.django import setup_django
+
    setup_django()
-    
+
    for migration_line in apply_migrations(DATA_DIR):
-        sys.stdout.write(f'    {migration_line}\n')
+        sys.stdout.write(f"    {migration_line}\n")

    assert os.path.isfile(CONSTANTS.DATABASE_FILE) and os.access(CONSTANTS.DATABASE_FILE, os.R_OK)
    print()
-    print(f'    √ ./{CONSTANTS.DATABASE_FILE.relative_to(DATA_DIR)}')
-    
+    print(f"    √ ./{CONSTANTS.DATABASE_FILE.relative_to(DATA_DIR)}")
+
    # from django.contrib.auth.models import User
    # if SHELL_CONFIG.IS_TTY and not User.objects.filter(is_superuser=True).exclude(username='system').exists():
    #     print('{green}[+] Creating admin user account...{reset}'.format(**SHELL_CONFIG.ANSI))
    #     call_command("createsuperuser", interactive=True)

    print()
-    print('[dodger_blue3][*] Checking links from indexes and archive folders (safe to Ctrl+C)...[/dodger_blue3]')
+    print("[dodger_blue3][*] Checking links from indexes and archive folders (safe to Ctrl+C)...[/dodger_blue3]")

    from archivebox.core.models import Snapshot

@@ -114,10 +116,10 @@ def init(force: bool=False, quick: bool=False, install: bool=False) -> None:

    if existing_index:
        all_links = Snapshot.objects.all()
-        print(f'    √ Loaded {all_links.count()} links from existing main index.')
+        print(f"    √ Loaded {all_links.count()} links from existing main index.")

    if quick:
-        print('    > Skipping orphan snapshot import (quick mode)')
+        print("    > Skipping orphan snapshot import (quick mode)")
    else:
        try:
            # Import orphaned links from legacy JSON indexes
@@ -131,7 +133,7 @@ def init(force: bool=False, quick: bool=False, install: bool=False) -> None:
                    orphaned_json_links[url] = record
            if orphaned_json_links:
                pending_links.update(orphaned_json_links)
-                print(f'    [yellow]√ Added {len(orphaned_json_links)} orphaned links from existing JSON index...[/yellow]')
+                print(f"    [yellow]√ Added {len(orphaned_json_links)} orphaned links from existing JSON index...[/yellow]")

            orphaned_data_dir_links: dict[str, dict[str, object]] = {}
            for link_dict in parse_json_links_details(DATA_DIR):
@@ -143,7 +145,7 @@ def init(force: bool=False, quick: bool=False, install: bool=False) -> None:
                    orphaned_data_dir_links[url] = record
            if orphaned_data_dir_links:
                pending_links.update(orphaned_data_dir_links)
-                print(f'    [yellow]√ Added {len(orphaned_data_dir_links)} orphaned links from existing archive directories.[/yellow]')
+                print(f"    [yellow]√ Added {len(orphaned_data_dir_links)} orphaned links from existing archive directories.[/yellow]")

            if pending_links:
                for link_dict in pending_links.values():
@@ -151,42 +153,44 @@ def init(force: bool=False, quick: bool=False, install: bool=False) -> None:

            # Hint for orphaned snapshot directories
            print()
-            print('    [violet]Hint:[/violet] To import orphaned snapshot directories and reconcile filesystem state, run:')
-            print('        archivebox update')
+            print("    [violet]Hint:[/violet] To import orphaned snapshot directories and reconcile filesystem state, run:")
+            print("        archivebox update")

        except (KeyboardInterrupt, SystemExit):
            print(file=sys.stderr)
-            print('[yellow]:stop_sign: Stopped checking archive directories due to Ctrl-C/SIGTERM[/yellow]', file=sys.stderr)
-            print('    Your archive data is safe, but you should re-run `archivebox init` to finish the process later.', file=sys.stderr)
+            print("[yellow]:stop_sign: Stopped checking archive directories due to Ctrl-C/SIGTERM[/yellow]", file=sys.stderr)
+            print("    Your archive data is safe, but you should re-run `archivebox init` to finish the process later.", file=sys.stderr)
            print(file=sys.stderr)
-            print('    [violet]Hint:[/violet] In the future you can run a quick init without checking dirs like so:', file=sys.stderr)
-            print('        archivebox init --quick', file=sys.stderr)
+            print("    [violet]Hint:[/violet] In the future you can run a quick init without checking dirs like so:", file=sys.stderr)
+            print("        archivebox init --quick", file=sys.stderr)
            raise SystemExit(1)

-    print('\n[green]----------------------------------------------------------------------[/green]')
+    print("\n[green]----------------------------------------------------------------------[/green]")

    from django.contrib.auth.models import User

-    if (SERVER_CONFIG.ADMIN_USERNAME and SERVER_CONFIG.ADMIN_PASSWORD) and not User.objects.filter(username=SERVER_CONFIG.ADMIN_USERNAME).exists():
-        print('[green][+] Found ADMIN_USERNAME and ADMIN_PASSWORD configuration options, creating new admin user.[/green]')
+    if (SERVER_CONFIG.ADMIN_USERNAME and SERVER_CONFIG.ADMIN_PASSWORD) and not User.objects.filter(
+        username=SERVER_CONFIG.ADMIN_USERNAME,
+    ).exists():
+        print("[green][+] Found ADMIN_USERNAME and ADMIN_PASSWORD configuration options, creating new admin user.[/green]")
        User.objects.create_superuser(username=SERVER_CONFIG.ADMIN_USERNAME, password=SERVER_CONFIG.ADMIN_PASSWORD)

    if existing_index:
-        print('[green][√] Done. Verified and updated the existing ArchiveBox collection.[/green]')
+        print("[green][√] Done. Verified and updated the existing ArchiveBox collection.[/green]")
    else:
-        print(f'[green][√] Done. A new ArchiveBox collection was initialized ({len(all_links) + len(pending_links)} links).[/green]')
+        print(f"[green][√] Done. A new ArchiveBox collection was initialized ({len(all_links) + len(pending_links)} links).[/green]")

-    
    CONSTANTS.PERSONAS_DIR.mkdir(parents=True, exist_ok=True)
    CONSTANTS.DEFAULT_TMP_DIR.mkdir(parents=True, exist_ok=True)
    CONSTANTS.DEFAULT_LIB_DIR.mkdir(parents=True, exist_ok=True)
-    (CONSTANTS.DEFAULT_LIB_DIR / 'bin').mkdir(parents=True, exist_ok=True)
+    (CONSTANTS.DEFAULT_LIB_DIR / "bin").mkdir(parents=True, exist_ok=True)

    from archivebox.config.common import STORAGE_CONFIG
    from archivebox.config.paths import get_or_create_working_tmp_dir, get_or_create_working_lib_dir
+
    STORAGE_CONFIG.TMP_DIR.mkdir(parents=True, exist_ok=True)
    STORAGE_CONFIG.LIB_DIR.mkdir(parents=True, exist_ok=True)
-    (STORAGE_CONFIG.LIB_DIR / 'bin').mkdir(parents=True, exist_ok=True)
+    (STORAGE_CONFIG.LIB_DIR / "bin").mkdir(parents=True, exist_ok=True)

    working_tmp_dir = get_or_create_working_tmp_dir(autofix=True, quiet=True)
    if working_tmp_dir:
@@ -195,33 +199,35 @@ def init(force: bool=False, quick: bool=False, install: bool=False) -> None:
    working_lib_dir = get_or_create_working_lib_dir(autofix=True, quiet=True)
    if working_lib_dir:
        working_lib_dir.mkdir(parents=True, exist_ok=True)
-        (working_lib_dir / 'bin').mkdir(parents=True, exist_ok=True)
-    
+        (working_lib_dir / "bin").mkdir(parents=True, exist_ok=True)
+
    if install:
        from archivebox.cli.archivebox_install import install as install_method
+
        install_method()

-    if Snapshot.objects.count() < 25:     # hide the hints for experienced users
+    if Snapshot.objects.count() < 25:  # hide the hints for experienced users
        print()
-        print('    [violet]Hint:[/violet] To view your archive index, run:')
-        print('        archivebox server  # then visit [deep_sky_blue4][link=http://127.0.0.1:8000]http://127.0.0.1:8000[/link][/deep_sky_blue4]')
+        print("    [violet]Hint:[/violet] To view your archive index, run:")
+        print(
+            "        archivebox server  # then visit [deep_sky_blue4][link=http://127.0.0.1:8000]http://127.0.0.1:8000[/link][/deep_sky_blue4]",
+        )
        print()
-        print('    To add new links, you can run:')
+        print("    To add new links, you can run:")
        print("        archivebox add < ~/some/path/to/list_of_links.txt")
        print()
-        print('    For more usage and examples, run:')
-        print('        archivebox help')
-
+        print("    For more usage and examples, run:")
+        print("        archivebox help")


@click.command()
-@click.option('--force', '-f', is_flag=True, help='Ignore unrecognized files in current directory and initialize anyway')
-@click.option('--quick', '-q', is_flag=True, help='Run any updates or migrations without rechecking all snapshot dirs')
-@click.option('--install', '-s', is_flag=True, help='Automatically install dependencies and extras used for archiving')
+@click.option("--force", "-f", is_flag=True, help="Ignore unrecognized files in current directory and initialize anyway")
+@click.option("--quick", "-q", is_flag=True, help="Run any updates or migrations without rechecking all snapshot dirs")
+@click.option("--install", "-s", is_flag=True, help="Automatically install dependencies and extras used for archiving")
@docstring(init.__doc__)
 def main(**kwargs) -> None:
    init(**kwargs)


-if __name__ == '__main__':
+if __name__ == "__main__":
    main()
--- a/archivebox/cli/archivebox_install.py
+++ b/archivebox/cli/archivebox_install.py
@@ -1,6 +1,6 @@
 #!/usr/bin/env python3

-__package__ = 'archivebox.cli'
+__package__ = "archivebox.cli"

 import os

@@ -11,7 +11,7 @@ from archivebox.misc.util import docstring, enforce_types


@enforce_types
-def install(binaries: tuple[str, ...] = (), binproviders: str = '*', dry_run: bool = False) -> None:
+def install(binaries: tuple[str, ...] = (), binproviders: str = "*", dry_run: bool = False) -> None:
    """Detect and install ArchiveBox dependencies by running the abx-dl install flow

    Examples:
@@ -31,33 +31,34 @@ def install(binaries: tuple[str, ...] = (), binproviders: str = '*', dry_run: bo

    # Show what we're installing
    if binaries:
-        print(f'\n[green][+] Installing specific binaries: {", ".join(binaries)}[/green]')
+        print(f"\n[green][+] Installing specific binaries: {', '.join(binaries)}[/green]")
    else:
-        print('\n[green][+] Detecting and installing all ArchiveBox dependencies...[/green]')
+        print("\n[green][+] Detecting and installing all ArchiveBox dependencies...[/green]")

-    if binproviders != '*':
-        print(f'[green][+] Using providers: {binproviders}[/green]')
+    if binproviders != "*":
+        print(f"[green][+] Using providers: {binproviders}[/green]")

    if IS_ROOT:
        EUID = os.geteuid()
        print()
-        print(f'[yellow]:warning:  Running as UID=[blue]{EUID}[/blue].[/yellow]')
-        print(f'    DATA_DIR will be owned by [blue]{ARCHIVEBOX_USER}:{ARCHIVEBOX_GROUP}[/blue].')
+        print(f"[yellow]:warning:  Running as UID=[blue]{EUID}[/blue].[/yellow]")
+        print(f"    DATA_DIR will be owned by [blue]{ARCHIVEBOX_USER}:{ARCHIVEBOX_GROUP}[/blue].")
        print()

    if dry_run:
-        print('[dim]Dry run - would run the abx-dl install flow[/dim]')
+        print("[dim]Dry run - would run the abx-dl install flow[/dim]")
        return

    # Set up Django
    from archivebox.config.django import setup_django
+
    setup_django()

    plugin_names = list(binaries)
-    if binproviders != '*':
-        plugin_names.extend(provider.strip() for provider in binproviders.split(',') if provider.strip())
+    if binproviders != "*":
+        plugin_names.extend(provider.strip() for provider in binproviders.split(",") if provider.strip())

-    print('[+] Running installer via abx-dl bus...')
+    print("[+] Running installer via abx-dl bus...")
    print()

    from archivebox.services.runner import run_install
@@ -68,28 +69,36 @@ def install(binaries: tuple[str, ...] = (), binproviders: str = '*', dry_run: bo

    # Check for superuser
    from django.contrib.auth import get_user_model
+
    User = get_user_model()

-    if not User.objects.filter(is_superuser=True).exclude(username='system').exists():
-        stderr('\n[+] Don\'t forget to create a new admin user for the Web UI...', color='green')
-        stderr('    archivebox manage createsuperuser')
+    if not User.objects.filter(is_superuser=True).exclude(username="system").exists():
+        stderr("\n[+] Don't forget to create a new admin user for the Web UI...", color="green")
+        stderr("    archivebox manage createsuperuser")

    print()

    # Show version to display full status including installed binaries
    # Django is already loaded, so just import and call the function directly
    from archivebox.cli.archivebox_version import version as show_version
+
    show_version(quiet=False)


@click.command()
-@click.argument('binaries', nargs=-1, type=str, required=False)
-@click.option('--binproviders', '-p', default='*', help='Comma-separated list of providers to use (pip,npm,brew,apt,env,custom) or * for all', show_default=True)
-@click.option('--dry-run', '-d', is_flag=True, help='Show what would happen without actually running', default=False)
+@click.argument("binaries", nargs=-1, type=str, required=False)
+@click.option(
+    "--binproviders",
+    "-p",
+    default="*",
+    help="Comma-separated list of providers to use (pip,npm,brew,apt,env,custom) or * for all",
+    show_default=True,
+)
+@click.option("--dry-run", "-d", is_flag=True, help="Show what would happen without actually running", default=False)
@docstring(install.__doc__)
 def main(**kwargs) -> None:
    install(**kwargs)
-    

-if __name__ == '__main__':
+
+if __name__ == "__main__":
    main()
--- a/archivebox/cli/archivebox_list.py
+++ b/archivebox/cli/archivebox_list.py
@@ -1,10 +1,9 @@
 #!/usr/bin/env python3

-__package__ = 'archivebox.cli'
-__command__ = 'archivebox list'
+__package__ = "archivebox.cli"
+__command__ = "archivebox list"

 import sys
-from typing import Optional

 import rich_click as click

@@ -12,31 +11,47 @@ from archivebox.cli.archivebox_snapshot import list_snapshots


@click.command()
-@click.option('--status', '-s', help='Filter by status (queued, started, sealed)')
-@click.option('--url__icontains', help='Filter by URL contains')
-@click.option('--url__istartswith', help='Filter by URL starts with')
-@click.option('--tag', '-t', help='Filter by tag name')
-@click.option('--crawl-id', help='Filter by crawl ID')
-@click.option('--limit', '-n', type=int, help='Limit number of results')
-@click.option('--sort', '-o', type=str, help='Field to sort by, e.g. url, created_at, bookmarked_at, downloaded_at')
-@click.option('--csv', '-C', type=str, help='Print output as CSV with the provided fields, e.g.: timestamp,url,title')
-@click.option('--with-headers', is_flag=True, help='Include column headers in structured output')
-def main(status: Optional[str], url__icontains: Optional[str], url__istartswith: Optional[str],
-         tag: Optional[str], crawl_id: Optional[str], limit: Optional[int],
-         sort: Optional[str], csv: Optional[str], with_headers: bool) -> None:
+@click.option("--status", "-s", help="Filter by status (queued, started, sealed)")
+@click.option("--url__icontains", help="Filter by URL contains")
+@click.option("--url__istartswith", help="Filter by URL starts with")
+@click.option("--tag", "-t", help="Filter by tag name")
+@click.option("--crawl-id", help="Filter by crawl ID")
+@click.option("--limit", "-n", type=int, help="Limit number of results")
+@click.option("--sort", "-o", type=str, help="Field to sort by, e.g. url, created_at, bookmarked_at, downloaded_at")
+@click.option("--csv", "-C", type=str, help="Print output as CSV with the provided fields, e.g.: timestamp,url,title")
+@click.option("--with-headers", is_flag=True, help="Include column headers in structured output")
+@click.option("--search", type=click.Choice(["meta", "content", "contents", "deep"]), help="Search mode to use for the query")
+@click.argument("query", nargs=-1)
+def main(
+    status: str | None,
+    url__icontains: str | None,
+    url__istartswith: str | None,
+    tag: str | None,
+    crawl_id: str | None,
+    limit: int | None,
+    sort: str | None,
+    csv: str | None,
+    with_headers: bool,
+    search: str | None,
+    query: tuple[str, ...],
+) -> None:
    """List Snapshots."""
-    sys.exit(list_snapshots(
-        status=status,
-        url__icontains=url__icontains,
-        url__istartswith=url__istartswith,
-        tag=tag,
-        crawl_id=crawl_id,
-        limit=limit,
-        sort=sort,
-        csv=csv,
-        with_headers=with_headers,
-    ))
+    sys.exit(
+        list_snapshots(
+            status=status,
+            url__icontains=url__icontains,
+            url__istartswith=url__istartswith,
+            tag=tag,
+            crawl_id=crawl_id,
+            limit=limit,
+            sort=sort,
+            csv=csv,
+            with_headers=with_headers,
+            search=search,
+            query=" ".join(query),
+        ),
+    )


-if __name__ == '__main__':
+if __name__ == "__main__":
    main()
--- a/archivebox/cli/archivebox_machine.py
+++ b/archivebox/cli/archivebox_machine.py
@@ -19,11 +19,10 @@ Examples:
    archivebox machine list --hostname__icontains=myserver
 """

-__package__ = 'archivebox.cli'
-__command__ = 'archivebox machine'
+__package__ = "archivebox.cli"
+__command__ = "archivebox machine"

 import sys
-from typing import Optional

 import rich_click as click
 from rich import print as rprint
@@ -35,10 +34,11 @@ from archivebox.cli.cli_utils import apply_filters
 # LIST
 # =============================================================================

+
 def list_machines(
-    hostname__icontains: Optional[str] = None,
-    os_platform: Optional[str] = None,
-    limit: Optional[int] = None,
+    hostname__icontains: str | None = None,
+    os_platform: str | None = None,
+    limit: int | None = None,
 ) -> int:
    """
    List Machines as JSONL with optional filters.
@@ -51,24 +51,24 @@ def list_machines(

    is_tty = sys.stdout.isatty()

-    queryset = Machine.objects.all().order_by('-created_at')
+    queryset = Machine.objects.all().order_by("-created_at")

    # Apply filters
    filter_kwargs = {
-        'hostname__icontains': hostname__icontains,
-        'os_platform': os_platform,
+        "hostname__icontains": hostname__icontains,
+        "os_platform": os_platform,
    }
    queryset = apply_filters(queryset, filter_kwargs, limit=limit)

    count = 0
    for machine in queryset:
        if is_tty:
-            rprint(f'[cyan]{machine.hostname:30}[/cyan] [dim]{machine.os_platform:10}[/dim] {machine.id}')
+            rprint(f"[cyan]{machine.hostname:30}[/cyan] [dim]{machine.os_platform:10}[/dim] {machine.id}")
        else:
            write_record(machine.to_json())
        count += 1

-    rprint(f'[dim]Listed {count} machines[/dim]', file=sys.stderr)
+    rprint(f"[dim]Listed {count} machines[/dim]", file=sys.stderr)
    return 0


@@ -76,24 +76,27 @@ def list_machines(
 # CLI Commands
 # =============================================================================

+
@click.group()
 def main():
    """Manage Machine records (read-only, system-managed)."""
    pass


-@main.command('list')
-@click.option('--hostname__icontains', help='Filter by hostname contains')
-@click.option('--os-platform', help='Filter by OS platform')
-@click.option('--limit', '-n', type=int, help='Limit number of results')
-def list_cmd(hostname__icontains: Optional[str], os_platform: Optional[str], limit: Optional[int]):
+@main.command("list")
+@click.option("--hostname__icontains", help="Filter by hostname contains")
+@click.option("--os-platform", help="Filter by OS platform")
+@click.option("--limit", "-n", type=int, help="Limit number of results")
+def list_cmd(hostname__icontains: str | None, os_platform: str | None, limit: int | None):
    """List Machines as JSONL."""
-    sys.exit(list_machines(
-        hostname__icontains=hostname__icontains,
-        os_platform=os_platform,
-        limit=limit,
-    ))
+    sys.exit(
+        list_machines(
+            hostname__icontains=hostname__icontains,
+            os_platform=os_platform,
+            limit=limit,
+        ),
+    )


-if __name__ == '__main__':
+if __name__ == "__main__":
    main()
--- a/archivebox/cli/archivebox_manage.py
+++ b/archivebox/cli/archivebox_manage.py
@@ -1,33 +1,34 @@
 #!/usr/bin/env python3

-__package__ = 'archivebox.cli'
+__package__ = "archivebox.cli"

 import rich_click as click
 from archivebox.misc.util import docstring, enforce_types


@enforce_types
-def manage(args: list[str] | None=None) -> None:
+def manage(args: list[str] | None = None) -> None:
    """Run an ArchiveBox Django management command"""

    from archivebox.config.common import SHELL_CONFIG
    from archivebox.misc.logging import stderr

    if (args and "createsuperuser" in args) and (SHELL_CONFIG.IN_DOCKER and not SHELL_CONFIG.IS_TTY):
-        stderr('[!] Warning: you need to pass -it to use interactive commands in docker', color='lightyellow')
-        stderr('    docker run -it archivebox manage {}'.format(' '.join(args or ['...'])), color='lightyellow')
-        stderr('')
+        stderr("[!] Warning: you need to pass -it to use interactive commands in docker", color="lightyellow")
+        stderr("    docker run -it archivebox manage {}".format(" ".join(args or ["..."])), color="lightyellow")
+        stderr("")

    from django.core.management import execute_from_command_line
-    execute_from_command_line(['manage.py', *(args or ['help'])])
+
+    execute_from_command_line(["manage.py", *(args or ["help"])])


@click.command(add_help_option=False, context_settings=dict(ignore_unknown_options=True))
-@click.argument('args', nargs=-1)
+@click.argument("args", nargs=-1)
@docstring(manage.__doc__)
-def main(args: list[str] | None=None) -> None:
+def main(args: list[str] | None = None) -> None:
    manage(args=args)


-if __name__ == '__main__':
+if __name__ == "__main__":
    main()
--- a/archivebox/cli/archivebox_mcp.py
+++ b/archivebox/cli/archivebox_mcp.py
@@ -6,8 +6,8 @@ Start the Model Context Protocol (MCP) server in stdio mode.
 Exposes all ArchiveBox CLI commands as MCP tools for AI agents.
 """

-__package__ = 'archivebox.cli'
-__command__ = 'archivebox mcp'
+__package__ = "archivebox.cli"
+__command__ = "archivebox mcp"

 import rich_click as click

@@ -45,5 +45,5 @@ def main(**kwargs):
    mcp()


-if __name__ == '__main__':
+if __name__ == "__main__":
    main()
--- a/archivebox/cli/archivebox_persona.py
+++ b/archivebox/cli/archivebox_persona.py
@@ -24,8 +24,8 @@ Examples:
    archivebox persona list --name=old | archivebox persona delete --yes
 """

-__package__ = 'archivebox.cli'
-__command__ = 'archivebox persona'
+__package__ = "archivebox.cli"
+__command__ = "archivebox persona"

 import os
 import sys
@@ -35,7 +35,7 @@ import subprocess
 import tempfile
 import json
 from pathlib import Path
-from typing import Optional, Iterable
+from collections.abc import Iterable
 from collections import OrderedDict

 import rich_click as click
@@ -49,134 +49,145 @@ from archivebox.personas import importers as persona_importers
 # Browser Profile Locations
 # =============================================================================

-def get_chrome_user_data_dir() -> Optional[Path]:
+
+def get_chrome_user_data_dir() -> Path | None:
    """Get the default Chrome user data directory for the current platform."""
    system = platform.system()
    home = Path.home()

-    if system == 'Darwin':  # macOS
+    if system == "Darwin":  # macOS
        candidates = [
-            home / 'Library' / 'Application Support' / 'Google' / 'Chrome',
-            home / 'Library' / 'Application Support' / 'Chromium',
+            home / "Library" / "Application Support" / "Google" / "Chrome",
+            home / "Library" / "Application Support" / "Chromium",
        ]
-    elif system == 'Linux':
+    elif system == "Linux":
        candidates = [
-            home / '.config' / 'google-chrome',
-            home / '.config' / 'chromium',
-            home / '.config' / 'chrome',
-            home / 'snap' / 'chromium' / 'common' / 'chromium',
+            home / ".config" / "google-chrome",
+            home / ".config" / "chromium",
+            home / ".config" / "chrome",
+            home / "snap" / "chromium" / "common" / "chromium",
        ]
-    elif system == 'Windows':
-        local_app_data = Path(os.environ.get('LOCALAPPDATA', home / 'AppData' / 'Local'))
+    elif system == "Windows":
+        local_app_data = Path(os.environ.get("LOCALAPPDATA", home / "AppData" / "Local"))
        candidates = [
-            local_app_data / 'Google' / 'Chrome' / 'User Data',
-            local_app_data / 'Chromium' / 'User Data',
+            local_app_data / "Google" / "Chrome" / "User Data",
+            local_app_data / "Chromium" / "User Data",
        ]
    else:
        candidates = []

    for candidate in candidates:
-        if candidate.exists() and (candidate / 'Default').exists():
+        if candidate.exists() and (candidate / "Default").exists():
            return candidate

    return None


-def get_brave_user_data_dir() -> Optional[Path]:
+def get_brave_user_data_dir() -> Path | None:
    """Get the default Brave user data directory for the current platform."""
    system = platform.system()
    home = Path.home()

-    if system == 'Darwin':
+    if system == "Darwin":
        candidates = [
-            home / 'Library' / 'Application Support' / 'BraveSoftware' / 'Brave-Browser',
+            home / "Library" / "Application Support" / "BraveSoftware" / "Brave-Browser",
        ]
-    elif system == 'Linux':
+    elif system == "Linux":
        candidates = [
-            home / '.config' / 'BraveSoftware' / 'Brave-Browser',
+            home / ".config" / "BraveSoftware" / "Brave-Browser",
        ]
-    elif system == 'Windows':
-        local_app_data = Path(os.environ.get('LOCALAPPDATA', home / 'AppData' / 'Local'))
+    elif system == "Windows":
+        local_app_data = Path(os.environ.get("LOCALAPPDATA", home / "AppData" / "Local"))
        candidates = [
-            local_app_data / 'BraveSoftware' / 'Brave-Browser' / 'User Data',
+            local_app_data / "BraveSoftware" / "Brave-Browser" / "User Data",
        ]
    else:
        candidates = []

    for candidate in candidates:
-        if candidate.exists() and (candidate / 'Default').exists():
+        if candidate.exists() and (candidate / "Default").exists():
            return candidate

    return None


-def get_edge_user_data_dir() -> Optional[Path]:
+def get_edge_user_data_dir() -> Path | None:
    """Get the default Edge user data directory for the current platform."""
    system = platform.system()
    home = Path.home()

-    if system == 'Darwin':
+    if system == "Darwin":
        candidates = [
-            home / 'Library' / 'Application Support' / 'Microsoft Edge',
+            home / "Library" / "Application Support" / "Microsoft Edge",
        ]
-    elif system == 'Linux':
+    elif system == "Linux":
        candidates = [
-            home / '.config' / 'microsoft-edge',
-            home / '.config' / 'microsoft-edge-beta',
-            home / '.config' / 'microsoft-edge-dev',
+            home / ".config" / "microsoft-edge",
+            home / ".config" / "microsoft-edge-beta",
+            home / ".config" / "microsoft-edge-dev",
        ]
-    elif system == 'Windows':
-        local_app_data = Path(os.environ.get('LOCALAPPDATA', home / 'AppData' / 'Local'))
+    elif system == "Windows":
+        local_app_data = Path(os.environ.get("LOCALAPPDATA", home / "AppData" / "Local"))
        candidates = [
-            local_app_data / 'Microsoft' / 'Edge' / 'User Data',
+            local_app_data / "Microsoft" / "Edge" / "User Data",
        ]
    else:
        candidates = []

    for candidate in candidates:
-        if candidate.exists() and (candidate / 'Default').exists():
+        if candidate.exists() and (candidate / "Default").exists():
            return candidate

    return None


-def get_browser_binary(browser: str) -> Optional[str]:
+def get_browser_binary(browser: str) -> str | None:
    system = platform.system()
    home = Path.home()
    browser = browser.lower()

-    if system == 'Darwin':
+    if system == "Darwin":
        candidates = {
-            'chrome': ['/Applications/Google Chrome.app/Contents/MacOS/Google Chrome'],
-            'chromium': ['/Applications/Chromium.app/Contents/MacOS/Chromium'],
-            'brave': ['/Applications/Brave Browser.app/Contents/MacOS/Brave Browser'],
-            'edge': ['/Applications/Microsoft Edge.app/Contents/MacOS/Microsoft Edge'],
+            "chrome": ["/Applications/Google Chrome.app/Contents/MacOS/Google Chrome"],
+            "chromium": ["/Applications/Chromium.app/Contents/MacOS/Chromium"],
+            "brave": ["/Applications/Brave Browser.app/Contents/MacOS/Brave Browser"],
+            "edge": ["/Applications/Microsoft Edge.app/Contents/MacOS/Microsoft Edge"],
        }.get(browser, [])
-    elif system == 'Linux':
+    elif system == "Linux":
        candidates = {
-            'chrome': ['/usr/bin/google-chrome', '/usr/bin/google-chrome-stable', '/usr/bin/google-chrome-beta', '/usr/bin/google-chrome-unstable'],
-            'chromium': ['/usr/bin/chromium', '/usr/bin/chromium-browser'],
-            'brave': ['/usr/bin/brave-browser', '/usr/bin/brave-browser-beta', '/usr/bin/brave-browser-nightly'],
-            'edge': ['/usr/bin/microsoft-edge', '/usr/bin/microsoft-edge-stable', '/usr/bin/microsoft-edge-beta', '/usr/bin/microsoft-edge-dev'],
+            "chrome": [
+                "/usr/bin/google-chrome",
+                "/usr/bin/google-chrome-stable",
+                "/usr/bin/google-chrome-beta",
+                "/usr/bin/google-chrome-unstable",
+            ],
+            "chromium": ["/usr/bin/chromium", "/usr/bin/chromium-browser"],
+            "brave": ["/usr/bin/brave-browser", "/usr/bin/brave-browser-beta", "/usr/bin/brave-browser-nightly"],
+            "edge": [
+                "/usr/bin/microsoft-edge",
+                "/usr/bin/microsoft-edge-stable",
+                "/usr/bin/microsoft-edge-beta",
+                "/usr/bin/microsoft-edge-dev",
+            ],
        }.get(browser, [])
-    elif system == 'Windows':
-        local_app_data = Path(os.environ.get('LOCALAPPDATA', home / 'AppData' / 'Local'))
+    elif system == "Windows":
+        local_app_data = Path(os.environ.get("LOCALAPPDATA", home / "AppData" / "Local"))
        candidates = {
-            'chrome': [
-                str(local_app_data / 'Google' / 'Chrome' / 'Application' / 'chrome.exe'),
-                'C:\\Program Files\\Google\\Chrome\\Application\\chrome.exe',
-                'C:\\Program Files (x86)\\Google\\Chrome\\Application\\chrome.exe',
+            "chrome": [
+                str(local_app_data / "Google" / "Chrome" / "Application" / "chrome.exe"),
+                "C:\\Program Files\\Google\\Chrome\\Application\\chrome.exe",
+                "C:\\Program Files (x86)\\Google\\Chrome\\Application\\chrome.exe",
            ],
-            'chromium': [str(local_app_data / 'Chromium' / 'Application' / 'chrome.exe')],
-            'brave': [
-                str(local_app_data / 'BraveSoftware' / 'Brave-Browser' / 'Application' / 'brave.exe'),
-                'C:\\Program Files\\BraveSoftware\\Brave-Browser\\Application\\brave.exe',
-                'C:\\Program Files (x86)\\BraveSoftware\\Brave-Browser\\Application\\brave.exe',
+            "chromium": [str(local_app_data / "Chromium" / "Application" / "chrome.exe")],
+            "brave": [
+                str(local_app_data / "BraveSoftware" / "Brave-Browser" / "Application" / "brave.exe"),
+                "C:\\Program Files\\BraveSoftware\\Brave-Browser\\Application\\brave.exe",
+                "C:\\Program Files (x86)\\BraveSoftware\\Brave-Browser\\Application\\brave.exe",
            ],
-            'edge': [
-                str(local_app_data / 'Microsoft' / 'Edge' / 'Application' / 'msedge.exe'),
-                'C:\\Program Files\\Microsoft\\Edge\\Application\\msedge.exe',
-                'C:\\Program Files (x86)\\Microsoft\\Edge\\Application\\msedge.exe',
+            "edge": [
+                str(local_app_data / "Microsoft" / "Edge" / "Application" / "msedge.exe"),
+                "C:\\Program Files\\Microsoft\\Edge\\Application\\msedge.exe",
+                "C:\\Program Files (x86)\\Microsoft\\Edge\\Application\\msedge.exe",
            ],
        }.get(browser, [])
    else:
@@ -190,13 +201,13 @@ def get_browser_binary(browser: str) -> Optional[str]:


 BROWSER_PROFILE_FINDERS = {
-    'chrome': get_chrome_user_data_dir,
-    'chromium': get_chrome_user_data_dir,  # Same locations
-    'brave': get_brave_user_data_dir,
-    'edge': get_edge_user_data_dir,
+    "chrome": get_chrome_user_data_dir,
+    "chromium": get_chrome_user_data_dir,  # Same locations
+    "brave": get_brave_user_data_dir,
+    "edge": get_edge_user_data_dir,
 }

-CHROMIUM_BROWSERS = {'chrome', 'chromium', 'brave', 'edge'}
+CHROMIUM_BROWSERS = {"chrome", "chromium", "brave", "edge"}


 # =============================================================================
@@ -204,12 +215,12 @@ CHROMIUM_BROWSERS = {'chrome', 'chromium', 'brave', 'edge'}
 # =============================================================================

 NETSCAPE_COOKIE_HEADER = [
-    '# Netscape HTTP Cookie File',
-    '# https://curl.se/docs/http-cookies.html',
-    '# This file was generated by ArchiveBox persona cookie extraction',
-    '#',
-    '# Format: domain\\tincludeSubdomains\\tpath\\tsecure\\texpiry\\tname\\tvalue',
-    '',
+    "# Netscape HTTP Cookie File",
+    "# https://curl.se/docs/http-cookies.html",
+    "# This file was generated by ArchiveBox persona cookie extraction",
+    "#",
+    "# Format: domain\\tincludeSubdomains\\tpath\\tsecure\\texpiry\\tname\\tvalue",
+    "",
 ]


@@ -219,9 +230,9 @@ def _parse_netscape_cookies(path: Path) -> "OrderedDict[tuple[str, str, str], tu
        return cookies

    for line in path.read_text().splitlines():
-        if not line or line.startswith('#'):
+        if not line or line.startswith("#"):
            continue
-        parts = line.split('\t')
+        parts = line.split("\t")
        if len(parts) < 7:
            continue
        domain, include_subdomains, cookie_path, secure, expiry, name, value = parts[:7]
@@ -233,8 +244,8 @@ def _parse_netscape_cookies(path: Path) -> "OrderedDict[tuple[str, str, str], tu
 def _write_netscape_cookies(path: Path, cookies: "OrderedDict[tuple[str, str, str], tuple[str, str, str, str, str, str, str]]") -> None:
    lines = list(NETSCAPE_COOKIE_HEADER)
    for cookie in cookies.values():
-        lines.append('\t'.join(cookie))
-    path.write_text('\n'.join(lines) + '\n')
+        lines.append("\t".join(cookie))
+    path.write_text("\n".join(lines) + "\n")


 def _merge_netscape_cookies(existing_file: Path, new_file: Path) -> None:
@@ -259,52 +270,52 @@ def extract_cookies_via_cdp(
    from archivebox.config.common import STORAGE_CONFIG

    # Find the cookie extraction script
-    chrome_plugin_dir = Path(__file__).parent.parent / 'plugins' / 'chrome'
-    extract_script = chrome_plugin_dir / 'extract_cookies.js'
+    chrome_plugin_dir = Path(__file__).parent.parent / "plugins" / "chrome"
+    extract_script = chrome_plugin_dir / "extract_cookies.js"

    if not extract_script.exists():
-        rprint(f'[yellow]Cookie extraction script not found at {extract_script}[/yellow]', file=sys.stderr)
+        rprint(f"[yellow]Cookie extraction script not found at {extract_script}[/yellow]", file=sys.stderr)
        return False

    # Get node modules dir
-    node_modules_dir = STORAGE_CONFIG.LIB_DIR / 'npm' / 'node_modules'
+    node_modules_dir = STORAGE_CONFIG.LIB_DIR / "npm" / "node_modules"

    # Set up environment
    env = os.environ.copy()
-    env['NODE_MODULES_DIR'] = str(node_modules_dir)
-    env['CHROME_USER_DATA_DIR'] = str(user_data_dir)
-    env['CHROME_HEADLESS'] = 'true'
+    env["NODE_MODULES_DIR"] = str(node_modules_dir)
+    env["CHROME_USER_DATA_DIR"] = str(user_data_dir)
+    env["CHROME_HEADLESS"] = "true"
    if chrome_binary:
-        env['CHROME_BINARY'] = str(chrome_binary)
+        env["CHROME_BINARY"] = str(chrome_binary)
    output_path = output_file
    temp_output = None
    temp_dir = None
    if output_file.exists():
-        temp_dir = Path(tempfile.mkdtemp(prefix='ab_cookies_'))
-        temp_output = temp_dir / 'cookies.txt'
+        temp_dir = Path(tempfile.mkdtemp(prefix="ab_cookies_"))
+        temp_output = temp_dir / "cookies.txt"
        output_path = temp_output
    if profile_dir:
-        extra_arg = f'--profile-directory={profile_dir}'
-        existing_extra = env.get('CHROME_ARGS_EXTRA', '').strip()
+        extra_arg = f"--profile-directory={profile_dir}"
+        existing_extra = env.get("CHROME_ARGS_EXTRA", "").strip()
        args_list = []
        if existing_extra:
-            if existing_extra.startswith('['):
+            if existing_extra.startswith("["):
                try:
                    parsed = json.loads(existing_extra)
                    if isinstance(parsed, list):
                        args_list.extend(str(x) for x in parsed)
                except Exception:
-                    args_list.extend([s.strip() for s in existing_extra.split(',') if s.strip()])
+                    args_list.extend([s.strip() for s in existing_extra.split(",") if s.strip()])
            else:
-                args_list.extend([s.strip() for s in existing_extra.split(',') if s.strip()])
+                args_list.extend([s.strip() for s in existing_extra.split(",") if s.strip()])
        args_list.append(extra_arg)
-        env['CHROME_ARGS_EXTRA'] = json.dumps(args_list)
+        env["CHROME_ARGS_EXTRA"] = json.dumps(args_list)

-    env['COOKIES_OUTPUT_FILE'] = str(output_path)
+    env["COOKIES_OUTPUT_FILE"] = str(output_path)

    try:
        result = subprocess.run(
-            ['node', str(extract_script)],
+            ["node", str(extract_script)],
            env=env,
            capture_output=True,
            text=True,
@@ -316,17 +327,17 @@ def extract_cookies_via_cdp(
                _merge_netscape_cookies(output_file, temp_output)
            return True
        else:
-            rprint(f'[yellow]Cookie extraction failed: {result.stderr}[/yellow]', file=sys.stderr)
+            rprint(f"[yellow]Cookie extraction failed: {result.stderr}[/yellow]", file=sys.stderr)
            return False

    except subprocess.TimeoutExpired:
-        rprint('[yellow]Cookie extraction timed out[/yellow]', file=sys.stderr)
+        rprint("[yellow]Cookie extraction timed out[/yellow]", file=sys.stderr)
        return False
    except FileNotFoundError:
-        rprint('[yellow]Node.js not found. Cannot extract cookies.[/yellow]', file=sys.stderr)
+        rprint("[yellow]Node.js not found. Cannot extract cookies.[/yellow]", file=sys.stderr)
        return False
    except Exception as e:
-        rprint(f'[yellow]Cookie extraction error: {e}[/yellow]', file=sys.stderr)
+        rprint(f"[yellow]Cookie extraction error: {e}[/yellow]", file=sys.stderr)
        return False
    finally:
        if temp_dir and temp_dir.exists():
@@ -337,6 +348,7 @@ def extract_cookies_via_cdp(
 # Validation Helpers
 # =============================================================================

+
 def validate_persona_name(name: str) -> tuple[bool, str]:
    """
    Validate persona name to prevent path traversal attacks.
@@ -348,19 +360,19 @@ def validate_persona_name(name: str) -> tuple[bool, str]:
        return False, "Persona name cannot be empty"

    # Check for path separators
-    if '/' in name or '\\' in name:
+    if "/" in name or "\\" in name:
        return False, "Persona name cannot contain path separators (/ or \\)"

    # Check for parent directory references
-    if '..' in name:
+    if ".." in name:
        return False, "Persona name cannot contain parent directory references (..)"

    # Check for hidden files/directories
-    if name.startswith('.'):
+    if name.startswith("."):
        return False, "Persona name cannot start with a dot (.)"

    # Ensure name doesn't contain null bytes or other dangerous chars
-    if '\x00' in name or '\n' in name or '\r' in name:
+    if "\x00" in name or "\n" in name or "\r" in name:
        return False, "Persona name contains invalid characters"

    return True, ""
@@ -394,10 +406,11 @@ def ensure_path_within_personas_dir(persona_path: Path) -> bool:
 # CREATE
 # =============================================================================

+
 def create_personas(
    names: Iterable[str],
-    import_from: Optional[str] = None,
-    profile: Optional[str] = None,
+    import_from: str | None = None,
+    profile: str | None = None,
 ) -> int:
    """
    Create Personas from names.
@@ -416,7 +429,7 @@ def create_personas(
    name_list = list(names) if names else []

    if not name_list:
-        rprint('[yellow]No persona names provided. Pass names as arguments.[/yellow]', file=sys.stderr)
+        rprint("[yellow]No persona names provided. Pass names as arguments.[/yellow]", file=sys.stderr)
        return 1

    # Validate import source if specified
@@ -424,23 +437,23 @@ def create_personas(
    if import_from:
        import_from = import_from.lower()
        if import_from not in BROWSER_PROFILE_FINDERS:
-            rprint(f'[red]Unknown browser: {import_from}[/red]', file=sys.stderr)
-            rprint(f'[dim]Supported browsers: {", ".join(BROWSER_PROFILE_FINDERS.keys())}[/dim]', file=sys.stderr)
+            rprint(f"[red]Unknown browser: {import_from}[/red]", file=sys.stderr)
+            rprint(f"[dim]Supported browsers: {', '.join(BROWSER_PROFILE_FINDERS.keys())}[/dim]", file=sys.stderr)
            return 1

        source_profile_dir = BROWSER_PROFILE_FINDERS[import_from]()
        if not source_profile_dir:
-            rprint(f'[red]Could not find {import_from} profile directory[/red]', file=sys.stderr)
+            rprint(f"[red]Could not find {import_from} profile directory[/red]", file=sys.stderr)
            return 1

-        rprint(f'[dim]Found {import_from} profile: {source_profile_dir}[/dim]', file=sys.stderr)
+        rprint(f"[dim]Found {import_from} profile: {source_profile_dir}[/dim]", file=sys.stderr)

-        if profile is None and (source_profile_dir / 'Default').exists():
-            profile = 'Default'
+        if profile is None and (source_profile_dir / "Default").exists():
+            profile = "Default"

        browser_binary = get_browser_binary(import_from)
        if browser_binary:
-            rprint(f'[dim]Using {import_from} binary: {browser_binary}[/dim]', file=sys.stderr)
+            rprint(f"[dim]Using {import_from} binary: {browser_binary}[/dim]", file=sys.stderr)

    created_count = 0
    for name in name_list:
@@ -459,11 +472,11 @@ def create_personas(
        if created:
            persona.ensure_dirs()
            created_count += 1
-            rprint(f'[green]Created persona: {name}[/green]', file=sys.stderr)
+            rprint(f"[green]Created persona: {name}[/green]", file=sys.stderr)
        else:
-            rprint(f'[dim]Persona already exists: {name}[/dim]', file=sys.stderr)
+            rprint(f"[dim]Persona already exists: {name}[/dim]", file=sys.stderr)

-        cookies_file = Path(persona.path) / 'cookies.txt'
+        cookies_file = Path(persona.path) / "cookies.txt"

        # Import browser profile if requested
        if import_from in CHROMIUM_BROWSERS and source_profile_dir is not None:
@@ -477,29 +490,31 @@ def create_personas(
                    capture_storage=False,
                )
            except Exception as e:
-                rprint(f'[red]Failed to import browser profile: {e}[/red]', file=sys.stderr)
+                rprint(f"[red]Failed to import browser profile: {e}[/red]", file=sys.stderr)
                return 1

            if import_result.profile_copied:
-                rprint('[green]Copied browser profile to persona[/green]', file=sys.stderr)
+                rprint("[green]Copied browser profile to persona[/green]", file=sys.stderr)
            if import_result.cookies_imported:
-                rprint(f'[green]Extracted cookies to {cookies_file}[/green]', file=sys.stderr)
+                rprint(f"[green]Extracted cookies to {cookies_file}[/green]", file=sys.stderr)
            elif not import_result.profile_copied:
-                rprint('[yellow]Could not import cookies automatically.[/yellow]', file=sys.stderr)
+                rprint("[yellow]Could not import cookies automatically.[/yellow]", file=sys.stderr)

            for warning in import_result.warnings:
-                rprint(f'[yellow]{warning}[/yellow]', file=sys.stderr)
+                rprint(f"[yellow]{warning}[/yellow]", file=sys.stderr)

        if not is_tty:
-            write_record({
-                'id': str(persona.id) if hasattr(persona, 'id') else None,
-                'name': persona.name,
-                'path': str(persona.path),
-                'CHROME_USER_DATA_DIR': persona.CHROME_USER_DATA_DIR,
-                'COOKIES_FILE': persona.COOKIES_FILE,
-            })
+            write_record(
+                {
+                    "id": str(persona.id) if hasattr(persona, "id") else None,
+                    "name": persona.name,
+                    "path": str(persona.path),
+                    "CHROME_USER_DATA_DIR": persona.CHROME_USER_DATA_DIR,
+                    "COOKIES_FILE": persona.COOKIES_FILE,
+                },
+            )

-    rprint(f'[green]Created {created_count} new persona(s)[/green]', file=sys.stderr)
+    rprint(f"[green]Created {created_count} new persona(s)[/green]", file=sys.stderr)
    return 0


@@ -507,10 +522,11 @@ def create_personas(
 # LIST
 # =============================================================================

+
 def list_personas(
-    name: Optional[str] = None,
-    name__icontains: Optional[str] = None,
-    limit: Optional[int] = None,
+    name: str | None = None,
+    name__icontains: str | None = None,
+    limit: int | None = None,
 ) -> int:
    """
    List Personas as JSONL with optional filters.
@@ -523,33 +539,35 @@ def list_personas(

    is_tty = sys.stdout.isatty()

-    queryset = Persona.objects.all().order_by('name')
+    queryset = Persona.objects.all().order_by("name")

    # Apply filters
    filter_kwargs = {
-        'name': name,
-        'name__icontains': name__icontains,
+        "name": name,
+        "name__icontains": name__icontains,
    }
    queryset = apply_filters(queryset, filter_kwargs, limit=limit)

    count = 0
    for persona in queryset:
-        cookies_status = '[green]✓[/green]' if persona.COOKIES_FILE else '[dim]✗[/dim]'
-        chrome_status = '[green]✓[/green]' if Path(persona.CHROME_USER_DATA_DIR).exists() else '[dim]✗[/dim]'
+        cookies_status = "[green]✓[/green]" if persona.COOKIES_FILE else "[dim]✗[/dim]"
+        chrome_status = "[green]✓[/green]" if Path(persona.CHROME_USER_DATA_DIR).exists() else "[dim]✗[/dim]"

        if is_tty:
-            rprint(f'[cyan]{persona.name:20}[/cyan] cookies:{cookies_status} chrome:{chrome_status} [dim]{persona.path}[/dim]')
+            rprint(f"[cyan]{persona.name:20}[/cyan] cookies:{cookies_status} chrome:{chrome_status} [dim]{persona.path}[/dim]")
        else:
-            write_record({
-                'id': str(persona.id) if hasattr(persona, 'id') else None,
-                'name': persona.name,
-                'path': str(persona.path),
-                'CHROME_USER_DATA_DIR': persona.CHROME_USER_DATA_DIR,
-                'COOKIES_FILE': persona.COOKIES_FILE,
-            })
+            write_record(
+                {
+                    "id": str(persona.id) if hasattr(persona, "id") else None,
+                    "name": persona.name,
+                    "path": str(persona.path),
+                    "CHROME_USER_DATA_DIR": persona.CHROME_USER_DATA_DIR,
+                    "COOKIES_FILE": persona.COOKIES_FILE,
+                },
+            )
        count += 1

-    rprint(f'[dim]Listed {count} persona(s)[/dim]', file=sys.stderr)
+    rprint(f"[dim]Listed {count} persona(s)[/dim]", file=sys.stderr)
    return 0


@@ -557,7 +575,8 @@ def list_personas(
 # UPDATE
 # =============================================================================

-def update_personas(name: Optional[str] = None) -> int:
+
+def update_personas(name: str | None = None) -> int:
    """
    Update Personas from stdin JSONL.

@@ -575,13 +594,13 @@ def update_personas(name: Optional[str] = None) -> int:

    records = list(read_stdin())
    if not records:
-        rprint('[yellow]No records provided via stdin[/yellow]', file=sys.stderr)
+        rprint("[yellow]No records provided via stdin[/yellow]", file=sys.stderr)
        return 1

    updated_count = 0
    for record in records:
-        persona_id = record.get('id')
-        old_name = record.get('name')
+        persona_id = record.get("id")
+        old_name = record.get("name")

        if not persona_id and not old_name:
            continue
@@ -613,17 +632,19 @@ def update_personas(name: Optional[str] = None) -> int:
            updated_count += 1

            if not is_tty:
-                write_record({
-                    'id': str(persona.id) if hasattr(persona, 'id') else None,
-                    'name': persona.name,
-                    'path': str(persona.path),
-                })
+                write_record(
+                    {
+                        "id": str(persona.id) if hasattr(persona, "id") else None,
+                        "name": persona.name,
+                        "path": str(persona.path),
+                    },
+                )

        except Persona.DoesNotExist:
-            rprint(f'[yellow]Persona not found: {persona_id or old_name}[/yellow]', file=sys.stderr)
+            rprint(f"[yellow]Persona not found: {persona_id or old_name}[/yellow]", file=sys.stderr)
            continue

-    rprint(f'[green]Updated {updated_count} persona(s)[/green]', file=sys.stderr)
+    rprint(f"[green]Updated {updated_count} persona(s)[/green]", file=sys.stderr)
    return 0


@@ -631,6 +652,7 @@ def update_personas(name: Optional[str] = None) -> int:
 # DELETE
 # =============================================================================

+
 def delete_personas(yes: bool = False, dry_run: bool = False) -> int:
    """
    Delete Personas from stdin JSONL.
@@ -646,23 +668,24 @@ def delete_personas(yes: bool = False, dry_run: bool = False) -> int:

    records = list(read_stdin())
    if not records:
-        rprint('[yellow]No records provided via stdin[/yellow]', file=sys.stderr)
+        rprint("[yellow]No records provided via stdin[/yellow]", file=sys.stderr)
        return 1

    # Collect persona IDs or names
    persona_ids = []
    persona_names = []
    for r in records:
-        if r.get('id'):
-            persona_ids.append(r['id'])
-        elif r.get('name'):
-            persona_names.append(r['name'])
+        if r.get("id"):
+            persona_ids.append(r["id"])
+        elif r.get("name"):
+            persona_names.append(r["name"])

    if not persona_ids and not persona_names:
-        rprint('[yellow]No valid persona IDs or names in input[/yellow]', file=sys.stderr)
+        rprint("[yellow]No valid persona IDs or names in input[/yellow]", file=sys.stderr)
        return 1

    from django.db.models import Q
+
    query = Q()
    if persona_ids:
        query |= Q(id__in=persona_ids)
@@ -673,17 +696,17 @@ def delete_personas(yes: bool = False, dry_run: bool = False) -> int:
    count = personas.count()

    if count == 0:
-        rprint('[yellow]No matching personas found[/yellow]', file=sys.stderr)
+        rprint("[yellow]No matching personas found[/yellow]", file=sys.stderr)
        return 0

    if dry_run:
-        rprint(f'[yellow]Would delete {count} persona(s) (dry run)[/yellow]', file=sys.stderr)
+        rprint(f"[yellow]Would delete {count} persona(s) (dry run)[/yellow]", file=sys.stderr)
        for persona in personas:
-            rprint(f'  {persona.name} ({persona.path})', file=sys.stderr)
+            rprint(f"  {persona.name} ({persona.path})", file=sys.stderr)
        return 0

    if not yes:
-        rprint('[red]Use --yes to confirm deletion[/red]', file=sys.stderr)
+        rprint("[red]Use --yes to confirm deletion[/red]", file=sys.stderr)
        return 1

    # Delete persona directories and database records
@@ -701,7 +724,7 @@ def delete_personas(yes: bool = False, dry_run: bool = False) -> int:
        persona.delete()
        deleted_count += 1

-    rprint(f'[green]Deleted {deleted_count} persona(s)[/green]', file=sys.stderr)
+    rprint(f"[green]Deleted {deleted_count} persona(s)[/green]", file=sys.stderr)
    return 0


@@ -709,44 +732,45 @@ def delete_personas(yes: bool = False, dry_run: bool = False) -> int:
 # CLI Commands
 # =============================================================================

+
@click.group()
 def main():
    """Manage Persona records (browser profiles)."""
    pass


-@main.command('create')
-@click.argument('names', nargs=-1)
-@click.option('--import', 'import_from', help='Import profile from browser (chrome, chromium, brave, edge)')
-@click.option('--profile', help='Profile directory name under the user data dir (e.g. Default, Profile 1)')
-def create_cmd(names: tuple, import_from: Optional[str], profile: Optional[str]):
+@main.command("create")
+@click.argument("names", nargs=-1)
+@click.option("--import", "import_from", help="Import profile from browser (chrome, chromium, brave, edge)")
+@click.option("--profile", help="Profile directory name under the user data dir (e.g. Default, Profile 1)")
+def create_cmd(names: tuple, import_from: str | None, profile: str | None):
    """Create Personas, optionally importing from a browser profile."""
    sys.exit(create_personas(names, import_from=import_from, profile=profile))


-@main.command('list')
-@click.option('--name', help='Filter by exact name')
-@click.option('--name__icontains', help='Filter by name contains')
-@click.option('--limit', '-n', type=int, help='Limit number of results')
-def list_cmd(name: Optional[str], name__icontains: Optional[str], limit: Optional[int]):
+@main.command("list")
+@click.option("--name", help="Filter by exact name")
+@click.option("--name__icontains", help="Filter by name contains")
+@click.option("--limit", "-n", type=int, help="Limit number of results")
+def list_cmd(name: str | None, name__icontains: str | None, limit: int | None):
    """List Personas as JSONL."""
    sys.exit(list_personas(name=name, name__icontains=name__icontains, limit=limit))


-@main.command('update')
-@click.option('--name', '-n', help='Set new name')
-def update_cmd(name: Optional[str]):
+@main.command("update")
+@click.option("--name", "-n", help="Set new name")
+def update_cmd(name: str | None):
    """Update Personas from stdin JSONL."""
    sys.exit(update_personas(name=name))


-@main.command('delete')
-@click.option('--yes', '-y', is_flag=True, help='Confirm deletion')
-@click.option('--dry-run', is_flag=True, help='Show what would be deleted')
+@main.command("delete")
+@click.option("--yes", "-y", is_flag=True, help="Confirm deletion")
+@click.option("--dry-run", is_flag=True, help="Show what would be deleted")
 def delete_cmd(yes: bool, dry_run: bool):
    """Delete Personas from stdin JSONL."""
    sys.exit(delete_personas(yes=yes, dry_run=dry_run))


-if __name__ == '__main__':
+if __name__ == "__main__":
    main()
--- a/archivebox/cli/archivebox_pluginmap.py
+++ b/archivebox/cli/archivebox_pluginmap.py
@@ -1,8 +1,7 @@
 #!/usr/bin/env python3

-__package__ = 'archivebox.cli'
+__package__ = "archivebox.cli"

-from typing import Optional

 import rich_click as click

@@ -137,7 +136,7 @@ BINARY_MACHINE_DIAGRAM = """
@enforce_types
 def pluginmap(
    show_disabled: bool = False,
-    model: Optional[str] = None,
+    model: str | None = None,
    quiet: bool = False,
 ) -> dict:
    """
@@ -164,25 +163,25 @@ def pluginmap(

    # Model event types that can have hooks
    model_events = {
-        'Crawl': {
-            'description': 'Hooks run when a Crawl starts (QUEUED→STARTED)',
-            'machine': 'CrawlMachine',
-            'diagram': CRAWL_MACHINE_DIAGRAM,
+        "Crawl": {
+            "description": "Hooks run when a Crawl starts (QUEUED→STARTED)",
+            "machine": "CrawlMachine",
+            "diagram": CRAWL_MACHINE_DIAGRAM,
        },
-        'CrawlEnd': {
-            'description': 'Hooks run when a Crawl finishes (STARTED→SEALED)',
-            'machine': 'CrawlMachine',
-            'diagram': None,  # Part of CrawlMachine
+        "CrawlEnd": {
+            "description": "Hooks run when a Crawl finishes (STARTED→SEALED)",
+            "machine": "CrawlMachine",
+            "diagram": None,  # Part of CrawlMachine
        },
-        'Snapshot': {
-            'description': 'Hooks run for each Snapshot (creates ArchiveResults)',
-            'machine': 'SnapshotMachine',
-            'diagram': SNAPSHOT_MACHINE_DIAGRAM,
+        "Snapshot": {
+            "description": "Hooks run for each Snapshot (creates ArchiveResults)",
+            "machine": "SnapshotMachine",
+            "diagram": SNAPSHOT_MACHINE_DIAGRAM,
        },
-        'Binary': {
-            'description': 'Hooks for installing binary dependencies (providers)',
-            'machine': 'BinaryMachine',
-            'diagram': BINARY_MACHINE_DIAGRAM,
+        "Binary": {
+            "description": "Hooks for installing binary dependencies (providers)",
+            "machine": "BinaryMachine",
+            "diagram": BINARY_MACHINE_DIAGRAM,
        },
    }

@@ -195,16 +194,16 @@ def pluginmap(
        model_events = {model: model_events[model]}

    result = {
-        'models': {},
-        'plugins_dir': str(BUILTIN_PLUGINS_DIR),
-        'user_plugins_dir': str(USER_PLUGINS_DIR),
+        "models": {},
+        "plugins_dir": str(BUILTIN_PLUGINS_DIR),
+        "user_plugins_dir": str(USER_PLUGINS_DIR),
    }

    if not quiet:
        prnt()
-        prnt('[bold cyan]ArchiveBox Plugin Map[/bold cyan]')
-        prnt(f'[dim]Built-in plugins: {BUILTIN_PLUGINS_DIR}[/dim]')
-        prnt(f'[dim]User plugins: {USER_PLUGINS_DIR}[/dim]')
+        prnt("[bold cyan]ArchiveBox Plugin Map[/bold cyan]")
+        prnt(f"[dim]Built-in plugins: {BUILTIN_PLUGINS_DIR}[/dim]")
+        prnt(f"[dim]User plugins: {USER_PLUGINS_DIR}[/dim]")
        prnt()

    for event_name, info in model_events.items():
@@ -218,88 +217,93 @@ def pluginmap(
            plugin_name = hook_path.parent.name
            is_bg = is_background_hook(hook_path.name)

-            hook_infos.append({
-                'path': str(hook_path),
-                'name': hook_path.name,
-                'plugin': plugin_name,
-                'is_background': is_bg,
-                'extension': hook_path.suffix,
-            })
+            hook_infos.append(
+                {
+                    "path": str(hook_path),
+                    "name": hook_path.name,
+                    "plugin": plugin_name,
+                    "is_background": is_bg,
+                    "extension": hook_path.suffix,
+                },
+            )

-        result['models'][event_name] = {
-            'description': info['description'],
-            'machine': info['machine'],
-            'hooks': hook_infos,
-            'hook_count': len(hook_infos),
+        result["models"][event_name] = {
+            "description": info["description"],
+            "machine": info["machine"],
+            "hooks": hook_infos,
+            "hook_count": len(hook_infos),
        }

        if not quiet:
            # Show diagram if this model has one
-            if info.get('diagram'):
-                assert info['diagram'] is not None
-                prnt(Panel(
-                    info['diagram'],
-                    title=f'[bold green]{info["machine"]}[/bold green]',
-                    border_style='green',
-                    expand=False,
-                ))
+            if info.get("diagram"):
+                assert info["diagram"] is not None
+                prnt(
+                    Panel(
+                        info["diagram"],
+                        title=f"[bold green]{info['machine']}[/bold green]",
+                        border_style="green",
+                        expand=False,
+                    ),
+                )
                prnt()

            # Create hooks table
            table = Table(
-                title=f'[bold yellow]on_{event_name}__* Hooks[/bold yellow] ({len(hooks)} found)',
+                title=f"[bold yellow]on_{event_name}__* Hooks[/bold yellow] ({len(hooks)} found)",
                box=box.ROUNDED,
                show_header=True,
-                header_style='bold magenta',
+                header_style="bold magenta",
            )
-            table.add_column('Plugin', style='cyan', width=20)
-            table.add_column('Hook Name', style='green')
-            table.add_column('BG', justify='center', width=4)
-            table.add_column('Type', justify='center', width=5)
+            table.add_column("Plugin", style="cyan", width=20)
+            table.add_column("Hook Name", style="green")
+            table.add_column("BG", justify="center", width=4)
+            table.add_column("Type", justify="center", width=5)

            # Sort lexicographically by hook name
-            sorted_hooks = sorted(hook_infos, key=lambda h: h['name'])
+            sorted_hooks = sorted(hook_infos, key=lambda h: h["name"])

            for hook in sorted_hooks:
-                bg_marker = '[yellow]bg[/yellow]' if hook['is_background'] else ''
-                ext = hook['extension'].lstrip('.')
+                bg_marker = "[yellow]bg[/yellow]" if hook["is_background"] else ""
+                ext = hook["extension"].lstrip(".")
                table.add_row(
-                    hook['plugin'],
-                    hook['name'],
+                    hook["plugin"],
+                    hook["name"],
                    bg_marker,
                    ext,
                )

            prnt(table)
            prnt()
-            prnt(f'[dim]{info["description"]}[/dim]')
+            prnt(f"[dim]{info['description']}[/dim]")
            prnt()

    # Summary
    if not quiet:
-        total_hooks = sum(m['hook_count'] for m in result['models'].values())
-        prnt(f'[bold]Total hooks discovered: {total_hooks}[/bold]')
+        total_hooks = sum(m["hook_count"] for m in result["models"].values())
+        prnt(f"[bold]Total hooks discovered: {total_hooks}[/bold]")
        prnt()
-        prnt('[dim]Hook naming convention: on_{Model}__{XX}_{description}[.bg].{ext}[/dim]')
-        prnt('[dim]  - XX: Two-digit lexicographic order (00-99)[/dim]')
-        prnt('[dim]  - .bg: Background hook (non-blocking)[/dim]')
-        prnt('[dim]  - ext: py, sh, or js[/dim]')
+        prnt("[dim]Hook naming convention: on_{Model}__{XX}_{description}[.bg].{ext}[/dim]")
+        prnt("[dim]  - XX: Two-digit lexicographic order (00-99)[/dim]")
+        prnt("[dim]  - .bg: Background hook (non-blocking)[/dim]")
+        prnt("[dim]  - ext: py, sh, or js[/dim]")
        prnt()

    return result


@click.command()
-@click.option('--show-disabled', '-a', is_flag=True, help='Show hooks from disabled plugins too')
-@click.option('--model', '-m', type=str, default=None, help='Filter to specific model (Crawl, Snapshot, Binary, CrawlEnd)')
-@click.option('--quiet', '-q', is_flag=True, help='Output JSON only, no ASCII diagrams')
+@click.option("--show-disabled", "-a", is_flag=True, help="Show hooks from disabled plugins too")
+@click.option("--model", "-m", type=str, default=None, help="Filter to specific model (Crawl, Snapshot, Binary, CrawlEnd)")
+@click.option("--quiet", "-q", is_flag=True, help="Output JSON only, no ASCII diagrams")
@docstring(pluginmap.__doc__)
 def main(**kwargs):
    import json
+
    result = pluginmap(**kwargs)
-    if kwargs.get('quiet'):
+    if kwargs.get("quiet"):
        print(json.dumps(result, indent=2))


-if __name__ == '__main__':
+if __name__ == "__main__":
    main()
--- a/archivebox/cli/archivebox_process.py
+++ b/archivebox/cli/archivebox_process.py
@@ -22,11 +22,10 @@ Examples:
    archivebox process list --limit=10
 """

-__package__ = 'archivebox.cli'
-__command__ = 'archivebox process'
+__package__ = "archivebox.cli"
+__command__ = "archivebox process"

 import sys
-from typing import Optional

 import rich_click as click
 from rich import print as rprint
@@ -38,10 +37,11 @@ from archivebox.cli.cli_utils import apply_filters
 # LIST
 # =============================================================================

+
 def list_processes(
-    binary_name: Optional[str] = None,
-    machine_id: Optional[str] = None,
-    limit: Optional[int] = None,
+    binary_name: str | None = None,
+    machine_id: str | None = None,
+    limit: int | None = None,
 ) -> int:
    """
    List Processes as JSONL with optional filters.
@@ -54,29 +54,29 @@ def list_processes(

    is_tty = sys.stdout.isatty()

-    queryset = Process.objects.all().select_related('binary', 'machine').order_by('-start_ts')
+    queryset = Process.objects.all().select_related("binary", "machine").order_by("-start_ts")

    # Apply filters
    filter_kwargs = {}
    if binary_name:
-        filter_kwargs['binary__name'] = binary_name
+        filter_kwargs["binary__name"] = binary_name
    if machine_id:
-        filter_kwargs['machine_id'] = machine_id
+        filter_kwargs["machine_id"] = machine_id

    queryset = apply_filters(queryset, filter_kwargs, limit=limit)

    count = 0
    for process in queryset:
        if is_tty:
-            binary_name_str = process.binary.name if process.binary else 'unknown'
-            exit_code = process.exit_code if process.exit_code is not None else '?'
-            status_color = 'green' if process.exit_code == 0 else 'red' if process.exit_code else 'yellow'
-            rprint(f'[{status_color}]exit={exit_code:3}[/{status_color}] [cyan]{binary_name_str:15}[/cyan] [dim]{process.id}[/dim]')
+            binary_name_str = process.binary.name if process.binary else "unknown"
+            exit_code = process.exit_code if process.exit_code is not None else "?"
+            status_color = "green" if process.exit_code == 0 else "red" if process.exit_code else "yellow"
+            rprint(f"[{status_color}]exit={exit_code:3}[/{status_color}] [cyan]{binary_name_str:15}[/cyan] [dim]{process.id}[/dim]")
        else:
            write_record(process.to_json())
        count += 1

-    rprint(f'[dim]Listed {count} processes[/dim]', file=sys.stderr)
+    rprint(f"[dim]Listed {count} processes[/dim]", file=sys.stderr)
    return 0


@@ -84,24 +84,27 @@ def list_processes(
 # CLI Commands
 # =============================================================================

+
@click.group()
 def main():
    """Manage Process records (read-only, system-managed)."""
    pass


-@main.command('list')
-@click.option('--binary-name', '-b', help='Filter by binary name')
-@click.option('--machine-id', '-m', help='Filter by machine ID')
-@click.option('--limit', '-n', type=int, help='Limit number of results')
-def list_cmd(binary_name: Optional[str], machine_id: Optional[str], limit: Optional[int]):
+@main.command("list")
+@click.option("--binary-name", "-b", help="Filter by binary name")
+@click.option("--machine-id", "-m", help="Filter by machine ID")
+@click.option("--limit", "-n", type=int, help="Limit number of results")
+def list_cmd(binary_name: str | None, machine_id: str | None, limit: int | None):
    """List Processes as JSONL."""
-    sys.exit(list_processes(
-        binary_name=binary_name,
-        machine_id=machine_id,
-        limit=limit,
-    ))
+    sys.exit(
+        list_processes(
+            binary_name=binary_name,
+            machine_id=machine_id,
+            limit=limit,
+        ),
+    )


-if __name__ == '__main__':
+if __name__ == "__main__":
    main()
--- a/archivebox/cli/archivebox_remove.py
+++ b/archivebox/cli/archivebox_remove.py
@@ -1,11 +1,11 @@
 #!/usr/bin/env python3

-__package__ = 'archivebox.cli'
-__command__ = 'archivebox remove'
+__package__ = "archivebox.cli"
+__command__ = "archivebox remove"

 import shutil
 from pathlib import Path
-from typing import Iterable
+from collections.abc import Iterable

 import rich_click as click

@@ -26,25 +26,27 @@ from archivebox.misc.logging_util import (


@enforce_types
-def remove(filter_patterns: Iterable[str]=(),
-          filter_type: str='exact',
-          snapshots: QuerySet | None=None,
-          after: float | None=None,
-          before: float | None=None,
-          yes: bool=False,
-          delete: bool=False,
-          out_dir: Path=DATA_DIR) -> QuerySet:
+def remove(
+    filter_patterns: Iterable[str] = (),
+    filter_type: str = "exact",
+    snapshots: QuerySet | None = None,
+    after: float | None = None,
+    before: float | None = None,
+    yes: bool = False,
+    delete: bool = False,
+    out_dir: Path = DATA_DIR,
+) -> QuerySet:
    """Remove the specified URLs from the archive"""
-    
+
    setup_django()
    check_data_folder()
-    
+
    from archivebox.cli.archivebox_search import get_snapshots

    pattern_list = list(filter_patterns)

    log_list_started(pattern_list or None, filter_type)
-    timer = TimedProgress(360, prefix='      ')
+    timer = TimedProgress(360, prefix="      ")
    try:
        snapshots = get_snapshots(
            snapshots=snapshots,
@@ -63,7 +65,7 @@ def remove(filter_patterns: Iterable[str]=(),
    log_list_finished(snapshots)
    log_removal_started(snapshots, yes=yes, delete=delete)

-    timer = TimedProgress(360, prefix='      ')
+    timer = TimedProgress(360, prefix="      ")
    try:
        for snapshot in snapshots:
            if delete:
@@ -88,17 +90,23 @@ def remove(filter_patterns: Iterable[str]=(),


@click.command()
-@click.option('--yes', is_flag=True, help='Remove links instantly without prompting to confirm')
-@click.option('--delete', is_flag=True, help='Delete the archived content and metadata folder in addition to removing from index')
-@click.option('--before', type=float, help='Remove only URLs bookmarked before timestamp')
-@click.option('--after', type=float, help='Remove only URLs bookmarked after timestamp')
-@click.option('--filter-type', '-f', type=click.Choice(('exact', 'substring', 'domain', 'regex', 'tag')), default='exact', help='Type of pattern matching to use when filtering URLs')
-@click.argument('filter_patterns', nargs=-1)
+@click.option("--yes", is_flag=True, help="Remove links instantly without prompting to confirm")
+@click.option("--delete", is_flag=True, help="Delete the archived content and metadata folder in addition to removing from index")
+@click.option("--before", type=float, help="Remove only URLs bookmarked before timestamp")
+@click.option("--after", type=float, help="Remove only URLs bookmarked after timestamp")
+@click.option(
+    "--filter-type",
+    "-f",
+    type=click.Choice(("exact", "substring", "domain", "regex", "tag")),
+    default="exact",
+    help="Type of pattern matching to use when filtering URLs",
+)
+@click.argument("filter_patterns", nargs=-1)
@docstring(remove.__doc__)
 def main(**kwargs):
    """Remove the specified URLs from the archive"""
    remove(**kwargs)


-if __name__ == '__main__':
+if __name__ == "__main__":
    main()
--- a/archivebox/cli/archivebox_run.py
+++ b/archivebox/cli/archivebox_run.py
@@ -37,8 +37,8 @@ Examples:
    archivebox run --binary-id=019b7e90-5a8e-712c-9877-2c70eebe80ad
 """

-__package__ = 'archivebox.cli'
-__command__ = 'archivebox run'
+__package__ = "archivebox.cli"
+__command__ = "archivebox run"

 import sys
 from collections import defaultdict
@@ -87,8 +87,8 @@ def process_stdin_records() -> int:
    binary_ids: list[str] = []

    for record in records:
-        record_type = record.get('type', '')
-        record_id = record.get('id')
+        record_type = record.get("type", "")
+        record_id = record.get("id")

        try:
            if record_type == TYPE_CRAWL:
@@ -97,10 +97,10 @@ def process_stdin_records() -> int:
                    try:
                        crawl = Crawl.objects.get(id=record_id)
                    except Crawl.DoesNotExist:
-                        crawl = Crawl.from_json(record, overrides={'created_by_id': created_by_id})
+                        crawl = Crawl.from_json(record, overrides={"created_by_id": created_by_id})
                else:
                    # New crawl - create it
-                    crawl = Crawl.from_json(record, overrides={'created_by_id': created_by_id})
+                    crawl = Crawl.from_json(record, overrides={"created_by_id": created_by_id})

                if crawl:
                    crawl.retry_at = timezone.now()
@@ -112,16 +112,16 @@ def process_stdin_records() -> int:
                    output_records.append(crawl.to_json())
                    queued_count += 1

-            elif record_type == TYPE_SNAPSHOT or (record.get('url') and not record_type):
+            elif record_type == TYPE_SNAPSHOT or (record.get("url") and not record_type):
                if record_id:
                    # Existing snapshot - re-queue
                    try:
                        snapshot = Snapshot.objects.get(id=record_id)
                    except Snapshot.DoesNotExist:
-                        snapshot = Snapshot.from_json(record, overrides={'created_by_id': created_by_id})
+                        snapshot = Snapshot.from_json(record, overrides={"created_by_id": created_by_id})
                else:
                    # New snapshot - create it
-                    snapshot = Snapshot.from_json(record, overrides={'created_by_id': created_by_id})
+                    snapshot = Snapshot.from_json(record, overrides={"created_by_id": created_by_id})

                if snapshot:
                    snapshot.retry_at = timezone.now()
@@ -132,7 +132,7 @@ def process_stdin_records() -> int:
                    crawl.retry_at = timezone.now()
                    if crawl.status != Crawl.StatusChoices.STARTED:
                        crawl.status = Crawl.StatusChoices.QUEUED
-                    crawl.save(update_fields=['status', 'retry_at', 'modified_at'])
+                    crawl.save(update_fields=["status", "retry_at", "modified_at"])
                    crawl_id = str(snapshot.crawl_id)
                    snapshot_ids_by_crawl[crawl_id].add(str(snapshot.id))
                    run_all_plugins_for_crawl.add(crawl_id)
@@ -149,11 +149,16 @@ def process_stdin_records() -> int:
                else:
                    archiveresult = None

-                snapshot_id = record.get('snapshot_id')
-                plugin_name = record.get('plugin')
+                snapshot_id = record.get("snapshot_id")
+                plugin_name = record.get("plugin")
                snapshot = None
                if archiveresult:
-                    if archiveresult.status in [ArchiveResult.StatusChoices.FAILED, ArchiveResult.StatusChoices.SKIPPED, ArchiveResult.StatusChoices.NORESULTS, ArchiveResult.StatusChoices.BACKOFF]:
+                    if archiveresult.status in [
+                        ArchiveResult.StatusChoices.FAILED,
+                        ArchiveResult.StatusChoices.SKIPPED,
+                        ArchiveResult.StatusChoices.NORESULTS,
+                        ArchiveResult.StatusChoices.BACKOFF,
+                    ]:
                        archiveresult.reset_for_retry()
                    snapshot = archiveresult.snapshot
                    plugin_name = plugin_name or archiveresult.plugin
@@ -167,12 +172,12 @@ def process_stdin_records() -> int:
                    snapshot.retry_at = timezone.now()
                    if snapshot.status != Snapshot.StatusChoices.STARTED:
                        snapshot.status = Snapshot.StatusChoices.QUEUED
-                    snapshot.save(update_fields=['status', 'retry_at', 'modified_at'])
+                    snapshot.save(update_fields=["status", "retry_at", "modified_at"])
                    crawl = snapshot.crawl
                    crawl.retry_at = timezone.now()
                    if crawl.status != Crawl.StatusChoices.STARTED:
                        crawl.status = Crawl.StatusChoices.QUEUED
-                    crawl.save(update_fields=['status', 'retry_at', 'modified_at'])
+                    crawl.save(update_fields=["status", "retry_at", "modified_at"])
                    crawl_id = str(snapshot.crawl_id)
                    snapshot_ids_by_crawl[crawl_id].add(str(snapshot.id))
                    if plugin_name:
@@ -203,7 +208,7 @@ def process_stdin_records() -> int:
                output_records.append(record)

        except Exception as e:
-            rprint(f'[yellow]Error processing record: {e}[/yellow]', file=sys.stderr)
+            rprint(f"[yellow]Error processing record: {e}[/yellow]", file=sys.stderr)
            continue

    # Output all processed records (for chaining)
@@ -212,10 +217,10 @@ def process_stdin_records() -> int:
            write_record(rec)

    if queued_count == 0:
-        rprint('[yellow]No records to process[/yellow]', file=sys.stderr)
+        rprint("[yellow]No records to process[/yellow]", file=sys.stderr)
        return 0

-    rprint(f'[blue]Processing {queued_count} records...[/blue]', file=sys.stderr)
+    rprint(f"[blue]Processing {queued_count} records...[/blue]", file=sys.stderr)

    for binary_id in binary_ids:
        run_binary(binary_id)
@@ -245,13 +250,14 @@ def run_runner(daemon: bool = False) -> int:
    from archivebox.services.runner import recover_orphaned_crawls, recover_orphaned_snapshots, run_pending_crawls

    Process.cleanup_stale_running()
+    Process.cleanup_orphaned_workers()
    recover_orphaned_snapshots()
    recover_orphaned_crawls()
    Machine.current()
    current = Process.current()
    if current.process_type != Process.TypeChoices.ORCHESTRATOR:
        current.process_type = Process.TypeChoices.ORCHESTRATOR
-        current.save(update_fields=['process_type', 'modified_at'])
+        current.save(update_fields=["process_type", "modified_at"])

    try:
        run_pending_crawls(daemon=daemon)
@@ -259,21 +265,21 @@ def run_runner(daemon: bool = False) -> int:
    except KeyboardInterrupt:
        return 0
    except Exception as e:
-        rprint(f'[red]Runner error: {type(e).__name__}: {e}[/red]', file=sys.stderr)
+        rprint(f"[red]Runner error: {type(e).__name__}: {e}[/red]", file=sys.stderr)
        return 1
    finally:
        current.refresh_from_db()
        if current.status != Process.StatusChoices.EXITED:
            current.status = Process.StatusChoices.EXITED
            current.ended_at = current.ended_at or timezone.now()
-            current.save(update_fields=['status', 'ended_at', 'modified_at'])
+            current.save(update_fields=["status", "ended_at", "modified_at"])


@click.command()
-@click.option('--daemon', '-d', is_flag=True, help="Run forever (don't exit on idle)")
-@click.option('--crawl-id', help="Run the crawl runner for a specific crawl only")
-@click.option('--snapshot-id', help="Run one snapshot through its crawl")
-@click.option('--binary-id', help="Run one queued binary install directly on the bus")
+@click.option("--daemon", "-d", is_flag=True, help="Run forever (don't exit on idle)")
+@click.option("--crawl-id", help="Run the crawl runner for a specific crawl only")
+@click.option("--snapshot-id", help="Run one snapshot through its crawl")
+@click.option("--binary-id", help="Run one queued binary install directly on the bus")
 def main(daemon: bool, crawl_id: str, snapshot_id: str, binary_id: str):
    """
    Process queued work.
@@ -297,21 +303,24 @@ def main(daemon: bool, crawl_id: str, snapshot_id: str, binary_id: str):
        except KeyboardInterrupt:
            sys.exit(0)
        except Exception as e:
-            rprint(f'[red]Runner error: {type(e).__name__}: {e}[/red]', file=sys.stderr)
+            rprint(f"[red]Runner error: {type(e).__name__}: {e}[/red]", file=sys.stderr)
            import traceback
+
            traceback.print_exc()
            sys.exit(1)

    if crawl_id:
        try:
            from archivebox.services.runner import run_crawl
+
            run_crawl(crawl_id)
            sys.exit(0)
        except KeyboardInterrupt:
            sys.exit(0)
        except Exception as e:
-            rprint(f'[red]Runner error: {type(e).__name__}: {e}[/red]', file=sys.stderr)
+            rprint(f"[red]Runner error: {type(e).__name__}: {e}[/red]", file=sys.stderr)
            import traceback
+
            traceback.print_exc()
            sys.exit(1)

@@ -333,17 +342,18 @@ def run_snapshot_worker(snapshot_id: str) -> int:
    from archivebox.services.runner import run_crawl

    try:
-        snapshot = Snapshot.objects.select_related('crawl').get(id=snapshot_id)
+        snapshot = Snapshot.objects.select_related("crawl").get(id=snapshot_id)
        run_crawl(str(snapshot.crawl_id), snapshot_ids=[str(snapshot.id)])
        return 0
    except KeyboardInterrupt:
        return 0
    except Exception as e:
-        rprint(f'[red]Runner error: {type(e).__name__}: {e}[/red]', file=sys.stderr)
+        rprint(f"[red]Runner error: {type(e).__name__}: {e}[/red]", file=sys.stderr)
        import traceback
+
        traceback.print_exc()
        return 1


-if __name__ == '__main__':
+if __name__ == "__main__":
    main()
--- a/archivebox/cli/archivebox_schedule.py
+++ b/archivebox/cli/archivebox_schedule.py
@@ -1,6 +1,6 @@
 #!/usr/bin/env python3

-__package__ = 'archivebox.cli'
+__package__ = "archivebox.cli"

 import rich_click as click
 from rich import print
@@ -10,18 +10,20 @@ from archivebox.config.common import ARCHIVING_CONFIG


@enforce_types
-def schedule(add: bool = False,
-            show: bool = False,
-            clear: bool = False,
-            foreground: bool = False,
-            run_all: bool = False,
-            quiet: bool = False,
-            every: str | None = None,
-            tag: str = '',
-            depth: int | str = 0,
-            overwrite: bool = False,
-            update: bool = not ARCHIVING_CONFIG.ONLY_NEW,
-            import_path: str | None = None):
+def schedule(
+    add: bool = False,
+    show: bool = False,
+    clear: bool = False,
+    foreground: bool = False,
+    run_all: bool = False,
+    quiet: bool = False,
+    every: str | None = None,
+    tag: str = "",
+    depth: int | str = 0,
+    overwrite: bool = False,
+    update: bool = not ARCHIVING_CONFIG.ONLY_NEW,
+    import_path: str | None = None,
+):
    """Manage database-backed scheduled crawls processed by the crawl runner."""

    from django.utils import timezone
@@ -33,55 +35,51 @@ def schedule(add: bool = False,

    depth = int(depth)
    result: dict[str, object] = {
-        'created_schedule_ids': [],
-        'disabled_count': 0,
-        'run_all_enqueued': 0,
-        'active_schedule_ids': [],
+        "created_schedule_ids": [],
+        "disabled_count": 0,
+        "run_all_enqueued": 0,
+        "active_schedule_ids": [],
    }

    def _active_schedules():
-        return CrawlSchedule.objects.filter(is_enabled=True).select_related('template').order_by('created_at')
+        return CrawlSchedule.objects.filter(is_enabled=True).select_related("template").order_by("created_at")

    if clear:
        disabled_count = CrawlSchedule.objects.filter(is_enabled=True).update(
            is_enabled=False,
            modified_at=timezone.now(),
        )
-        result['disabled_count'] = disabled_count
-        print(f'[green]\\[√] Disabled {disabled_count} scheduled crawl(s).[/green]')
+        result["disabled_count"] = disabled_count
+        print(f"[green]\\[√] Disabled {disabled_count} scheduled crawl(s).[/green]")

    if every or add:
-        schedule_str = (every or 'day').strip()
+        schedule_str = (every or "day").strip()
        validate_schedule(schedule_str)

        created_by_id = get_or_create_system_user_pk()
        is_update_schedule = not import_path
-        template_urls = import_path or 'archivebox://update'
-        template_label = (
-            f'Scheduled import: {template_urls}'
-            if import_path else
-            'Scheduled ArchiveBox update'
-        )[:64]
+        template_urls = import_path or "archivebox://update"
+        template_label = (f"Scheduled import: {template_urls}" if import_path else "Scheduled ArchiveBox update")[:64]
        template_notes = (
-            f'Created by archivebox schedule for {template_urls}'
-            if import_path else
-            'Created by archivebox schedule to queue recurring archivebox://update maintenance crawls.'
+            f"Created by archivebox schedule for {template_urls}"
+            if import_path
+            else "Created by archivebox schedule to queue recurring archivebox://update maintenance crawls."
        )

        template = Crawl.objects.create(
            urls=template_urls,
            max_depth=0 if is_update_schedule else depth,
-            tags_str='' if is_update_schedule else tag,
+            tags_str="" if is_update_schedule else tag,
            label=template_label,
            notes=template_notes,
            created_by_id=created_by_id,
            status=Crawl.StatusChoices.SEALED,
            retry_at=None,
            config={
-                'ONLY_NEW': not update,
-                'OVERWRITE': overwrite,
-                'DEPTH': 0 if is_update_schedule else depth,
-                'SCHEDULE_KIND': 'update' if is_update_schedule else 'crawl',
+                "ONLY_NEW": not update,
+                "OVERWRITE": overwrite,
+                "DEPTH": 0 if is_update_schedule else depth,
+                "SCHEDULE_KIND": "update" if is_update_schedule else "crawl",
            },
        )
        crawl_schedule = CrawlSchedule.objects.create(
@@ -92,31 +90,31 @@ def schedule(add: bool = False,
            notes=template_notes,
            created_by_id=created_by_id,
        )
-        result['created_schedule_ids'] = [str(crawl_schedule.id)]
+        result["created_schedule_ids"] = [str(crawl_schedule.id)]

-        schedule_type = 'maintenance update' if is_update_schedule else 'crawl'
-        print(f'[green]\\[√] Created scheduled {schedule_type}.[/green]')
-        print(f'    id={crawl_schedule.id}')
-        print(f'    every={crawl_schedule.schedule}')
-        print(f'    next_run={crawl_schedule.next_run_at.isoformat()}')
+        schedule_type = "maintenance update" if is_update_schedule else "crawl"
+        print(f"[green]\\[√] Created scheduled {schedule_type}.[/green]")
+        print(f"    id={crawl_schedule.id}")
+        print(f"    every={crawl_schedule.schedule}")
+        print(f"    next_run={crawl_schedule.next_run_at.isoformat()}")
        if import_path:
-            print(f'    source={import_path}')
+            print(f"    source={import_path}")

    schedules = list(_active_schedules())
-    result['active_schedule_ids'] = [str(schedule.id) for schedule in schedules]
+    result["active_schedule_ids"] = [str(schedule.id) for schedule in schedules]

    if show:
        if schedules:
-            print(f'[green]\\[*] Active scheduled crawls: {len(schedules)}[/green]')
+            print(f"[green]\\[*] Active scheduled crawls: {len(schedules)}[/green]")
            for scheduled_crawl in schedules:
                template = scheduled_crawl.template
                print(
-                    f'  - id={scheduled_crawl.id} every={scheduled_crawl.schedule} '
-                    f'next_run={scheduled_crawl.next_run_at.isoformat()} '
-                    f'source={template.urls.splitlines()[0] if template.urls else ""}'
+                    f"  - id={scheduled_crawl.id} every={scheduled_crawl.schedule} "
+                    f"next_run={scheduled_crawl.next_run_at.isoformat()} "
+                    f"source={template.urls.splitlines()[0] if template.urls else ''}",
                )
        else:
-            print('[yellow]\\[*] No scheduled crawls are enabled.[/yellow]')
+            print("[yellow]\\[*] No scheduled crawls are enabled.[/yellow]")

    if run_all:
        enqueued = 0
@@ -124,13 +122,17 @@ def schedule(add: bool = False,
        for scheduled_crawl in schedules:
            scheduled_crawl.enqueue(queued_at=now)
            enqueued += 1
-        result['run_all_enqueued'] = enqueued
-        print(f'[green]\\[*] Enqueued {enqueued} scheduled crawl(s) immediately.[/green]')
+        result["run_all_enqueued"] = enqueued
+        print(f"[green]\\[*] Enqueued {enqueued} scheduled crawl(s) immediately.[/green]")
        if enqueued:
-            print('[yellow]\\[*] Start `archivebox server`, `archivebox run --daemon`, or `archivebox schedule --foreground` to process the queued crawls.[/yellow]')
+            print(
+                "[yellow]\\[*] Start `archivebox server`, `archivebox run --daemon`, or `archivebox schedule --foreground` to process the queued crawls.[/yellow]",
+            )

    if foreground:
-        print('[green]\\[*] Starting global crawl runner in foreground mode. It will materialize scheduled crawls and process queued work.[/green]')
+        print(
+            "[green]\\[*] Starting global crawl runner in foreground mode. It will materialize scheduled crawls and process queued work.[/green]",
+        )
        run_pending_crawls(daemon=True)

    if quiet:
@@ -138,33 +140,38 @@ def schedule(add: bool = False,

    if not any((every, add, show, clear, foreground, run_all)):
        if schedules:
-            print('[green]\\[*] Active scheduled crawls:[/green]')
+            print("[green]\\[*] Active scheduled crawls:[/green]")
            for scheduled_crawl in schedules:
-                print(f'  - {scheduled_crawl.id} every={scheduled_crawl.schedule} next_run={scheduled_crawl.next_run_at.isoformat()}')
+                print(f"  - {scheduled_crawl.id} every={scheduled_crawl.schedule} next_run={scheduled_crawl.next_run_at.isoformat()}")
        else:
-            print('[yellow]\\[*] No scheduled crawls are enabled.[/yellow]')
+            print("[yellow]\\[*] No scheduled crawls are enabled.[/yellow]")

    return result


@click.command()
-@click.option('--quiet', '-q', is_flag=True, help="Return structured results without extra summary output")
-@click.option('--add', is_flag=True, help='Create a new scheduled crawl')
-@click.option('--every', type=str, help='Run on an alias like daily/weekly/monthly or a cron expression such as "0 */6 * * *"')
-@click.option('--tag', '-t', default='', help='Comma-separated tags to apply to scheduled crawl snapshots')
-@click.option('--depth', type=click.Choice([str(i) for i in range(5)]), default='0', help='Recursively archive linked pages up to N hops away')
-@click.option('--overwrite', is_flag=True, help='Overwrite existing data if URLs have been archived previously')
-@click.option('--update', is_flag=True, help='Retry previously failed/skipped URLs when scheduled crawls run')
-@click.option('--clear', is_flag=True, help='Disable all currently enabled schedules')
-@click.option('--show', is_flag=True, help='Print all currently enabled schedules')
-@click.option('--foreground', '-f', is_flag=True, help='Run the global crawl runner in the foreground (no crontab required)')
-@click.option('--run-all', is_flag=True, help='Enqueue all enabled schedules immediately and process them once')
-@click.argument('import_path', required=False)
+@click.option("--quiet", "-q", is_flag=True, help="Return structured results without extra summary output")
+@click.option("--add", is_flag=True, help="Create a new scheduled crawl")
+@click.option("--every", type=str, help='Run on an alias like daily/weekly/monthly or a cron expression such as "0 */6 * * *"')
+@click.option("--tag", "-t", default="", help="Comma-separated tags to apply to scheduled crawl snapshots")
+@click.option(
+    "--depth",
+    type=click.Choice([str(i) for i in range(5)]),
+    default="0",
+    help="Recursively archive linked pages up to N hops away",
+)
+@click.option("--overwrite", is_flag=True, help="Overwrite existing data if URLs have been archived previously")
+@click.option("--update", is_flag=True, help="Retry previously failed/skipped URLs when scheduled crawls run")
+@click.option("--clear", is_flag=True, help="Disable all currently enabled schedules")
+@click.option("--show", is_flag=True, help="Print all currently enabled schedules")
+@click.option("--foreground", "-f", is_flag=True, help="Run the global crawl runner in the foreground (no crontab required)")
+@click.option("--run-all", is_flag=True, help="Enqueue all enabled schedules immediately and process them once")
+@click.argument("import_path", required=False)
@docstring(schedule.__doc__)
 def main(**kwargs):
    """Manage database-backed scheduled crawls processed by the crawl runner."""
    schedule(**kwargs)


-if __name__ == '__main__':
+if __name__ == "__main__":
    main()
--- a/archivebox/cli/archivebox_search.py
+++ b/archivebox/cli/archivebox_search.py
@@ -1,11 +1,12 @@
 #!/usr/bin/env python3

-__package__ = 'archivebox.cli'
-__command__ = 'archivebox search'
+__package__ = "archivebox.cli"
+__command__ = "archivebox search"

 import sys
 from pathlib import Path
-from typing import TYPE_CHECKING, Callable
+from typing import TYPE_CHECKING
+from collections.abc import Callable

 import rich_click as click

@@ -20,30 +21,28 @@ if TYPE_CHECKING:

 # Filter types for URL matching
 LINK_FILTERS: dict[str, Callable[[str], Q]] = {
-    'exact': lambda pattern: Q(url=pattern),
-    'substring': lambda pattern: Q(url__icontains=pattern),
-    'regex': lambda pattern: Q(url__iregex=pattern),
-    'domain': lambda pattern: (
-        Q(url__istartswith=f'http://{pattern}')
-        | Q(url__istartswith=f'https://{pattern}')
-        | Q(url__istartswith=f'ftp://{pattern}')
+    "exact": lambda pattern: Q(url=pattern),
+    "substring": lambda pattern: Q(url__icontains=pattern),
+    "regex": lambda pattern: Q(url__iregex=pattern),
+    "domain": lambda pattern: (
+        Q(url__istartswith=f"http://{pattern}") | Q(url__istartswith=f"https://{pattern}") | Q(url__istartswith=f"ftp://{pattern}")
    ),
-    'tag': lambda pattern: Q(tags__name=pattern),
-    'timestamp': lambda pattern: Q(timestamp=pattern),
+    "tag": lambda pattern: Q(tags__name=pattern),
+    "timestamp": lambda pattern: Q(timestamp=pattern),
 }

-STATUS_CHOICES = ['indexed', 'archived', 'unarchived']
+STATUS_CHOICES = ["indexed", "archived", "unarchived"]


 def _apply_pattern_filters(
-    snapshots: QuerySet['Snapshot', 'Snapshot'],
+    snapshots: QuerySet["Snapshot", "Snapshot"],
    filter_patterns: list[str],
    filter_type: str,
-) -> QuerySet['Snapshot', 'Snapshot']:
+) -> QuerySet["Snapshot", "Snapshot"]:
    filter_builder = LINK_FILTERS.get(filter_type)
    if filter_builder is None:
        stderr()
-        stderr(f'[X] Got invalid pattern for --filter-type={filter_type}', color='red')
+        stderr(f"[X] Got invalid pattern for --filter-type={filter_type}", color="red")
        raise SystemExit(2)

    query = Q()
@@ -53,7 +52,7 @@ def _apply_pattern_filters(


 def _snapshots_to_json(
-    snapshots: QuerySet['Snapshot', 'Snapshot'],
+    snapshots: QuerySet["Snapshot", "Snapshot"],
    *,
    with_headers: bool,
 ) -> str:
@@ -63,31 +62,35 @@ def _snapshots_to_json(
    from archivebox.config.common import SERVER_CONFIG
    from archivebox.misc.util import to_json

-    main_index_header = {
-        'info': 'This is an index of site data archived by ArchiveBox: The self-hosted web archive.',
-        'schema': 'archivebox.index.json',
-        'copyright_info': SERVER_CONFIG.FOOTER_INFO,
-        'meta': {
-            'project': 'ArchiveBox',
-            'version': VERSION,
-            'git_sha': VERSION,
-            'website': 'https://ArchiveBox.io',
-            'docs': 'https://github.com/ArchiveBox/ArchiveBox/wiki',
-            'source': 'https://github.com/ArchiveBox/ArchiveBox',
-            'issues': 'https://github.com/ArchiveBox/ArchiveBox/issues',
-            'dependencies': {},
-        },
-    } if with_headers else {}
+    main_index_header = (
+        {
+            "info": "This is an index of site data archived by ArchiveBox: The self-hosted web archive.",
+            "schema": "archivebox.index.json",
+            "copyright_info": SERVER_CONFIG.FOOTER_INFO,
+            "meta": {
+                "project": "ArchiveBox",
+                "version": VERSION,
+                "git_sha": VERSION,
+                "website": "https://ArchiveBox.io",
+                "docs": "https://github.com/ArchiveBox/ArchiveBox/wiki",
+                "source": "https://github.com/ArchiveBox/ArchiveBox",
+                "issues": "https://github.com/ArchiveBox/ArchiveBox/issues",
+                "dependencies": {},
+            },
+        }
+        if with_headers
+        else {}
+    )

    snapshot_dicts = [snapshot.to_dict(extended=True) for snapshot in snapshots.iterator(chunk_size=500)]
    output: dict[str, object] | list[dict[str, object]]
    if with_headers:
        output = {
            **main_index_header,
-            'num_links': len(snapshot_dicts),
-            'updated': datetime.now(tz.utc),
-            'last_run_cmd': sys.argv,
-            'links': snapshot_dicts,
+            "num_links": len(snapshot_dicts),
+            "updated": datetime.now(tz.utc),
+            "last_run_cmd": sys.argv,
+            "links": snapshot_dicts,
        }
    else:
        output = snapshot_dicts
@@ -96,18 +99,18 @@ def _snapshots_to_json(


 def _snapshots_to_csv(
-    snapshots: QuerySet['Snapshot', 'Snapshot'],
+    snapshots: QuerySet["Snapshot", "Snapshot"],
    *,
    cols: list[str],
    with_headers: bool,
 ) -> str:
-    header = ','.join(cols) if with_headers else ''
-    rows = [snapshot.to_csv(cols=cols, separator=',') for snapshot in snapshots.iterator(chunk_size=500)]
-    return '\n'.join((header, *rows))
+    header = ",".join(cols) if with_headers else ""
+    rows = [snapshot.to_csv(cols=cols, separator=",") for snapshot in snapshots.iterator(chunk_size=500)]
+    return "\n".join((header, *rows))


 def _snapshots_to_html(
-    snapshots: QuerySet['Snapshot', 'Snapshot'],
+    snapshots: QuerySet["Snapshot", "Snapshot"],
    *,
    with_headers: bool,
 ) -> str:
@@ -119,26 +122,31 @@ def _snapshots_to_html(
    from archivebox.config.common import SERVER_CONFIG
    from archivebox.config.version import get_COMMIT_HASH

-    template = 'static_index.html' if with_headers else 'minimal_index.html'
+    template = "static_index.html" if with_headers else "minimal_index.html"
    snapshot_list = list(snapshots.iterator(chunk_size=500))

-    return render_to_string(template, {
-        'version': VERSION,
-        'git_sha': get_COMMIT_HASH() or VERSION,
-        'num_links': str(len(snapshot_list)),
-        'date_updated': datetime.now(tz.utc).strftime('%Y-%m-%d'),
-        'time_updated': datetime.now(tz.utc).strftime('%Y-%m-%d %H:%M'),
-        'links': snapshot_list,
-        'FOOTER_INFO': SERVER_CONFIG.FOOTER_INFO,
-    })
+    return render_to_string(
+        template,
+        {
+            "version": VERSION,
+            "git_sha": get_COMMIT_HASH() or VERSION,
+            "num_links": str(len(snapshot_list)),
+            "date_updated": datetime.now(tz.utc).strftime("%Y-%m-%d"),
+            "time_updated": datetime.now(tz.utc).strftime("%Y-%m-%d %H:%M"),
+            "links": snapshot_list,
+            "FOOTER_INFO": SERVER_CONFIG.FOOTER_INFO,
+        },
+    )


-def get_snapshots(snapshots: QuerySet['Snapshot', 'Snapshot'] | None=None,
-                  filter_patterns: list[str] | None=None,
-                  filter_type: str='substring',
-                  after: float | None=None,
-                  before: float | None=None,
-                  out_dir: Path=DATA_DIR) -> QuerySet['Snapshot', 'Snapshot']:
+def get_snapshots(
+    snapshots: QuerySet["Snapshot", "Snapshot"] | None = None,
+    filter_patterns: list[str] | None = None,
+    filter_type: str = "substring",
+    after: float | None = None,
+    before: float | None = None,
+    out_dir: Path = DATA_DIR,
+) -> QuerySet["Snapshot", "Snapshot"]:
    """Filter and return Snapshots matching the given criteria."""
    from archivebox.core.models import Snapshot

@@ -155,29 +163,31 @@ def get_snapshots(snapshots: QuerySet['Snapshot', 'Snapshot'] | None=None,
        result = _apply_pattern_filters(result, filter_patterns, filter_type)

    # Prefetch crawl relationship to avoid N+1 queries when accessing output_dir
-    result = result.select_related('crawl', 'crawl__created_by')
+    result = result.select_related("crawl", "crawl__created_by")

    if not result.exists():
-        stderr('[!] No Snapshots matched your filters:', filter_patterns, f'({filter_type})', color='lightyellow')
+        stderr("[!] No Snapshots matched your filters:", filter_patterns, f"({filter_type})", color="lightyellow")

    return result


@enforce_types
-def search(filter_patterns: list[str] | None=None,
-           filter_type: str='substring',
-           status: str='indexed',
-           before: float | None=None,
-           after: float | None=None,
-           sort: str | None=None,
-           json: bool=False,
-           html: bool=False,
-           csv: str | None=None,
-           with_headers: bool=False):
+def search(
+    filter_patterns: list[str] | None = None,
+    filter_type: str = "substring",
+    status: str = "indexed",
+    before: float | None = None,
+    after: float | None = None,
+    sort: str | None = None,
+    json: bool = False,
+    html: bool = False,
+    csv: str | None = None,
+    with_headers: bool = False,
+):
    """List, filter, and export information about archive entries"""

    if with_headers and not (json or html or csv):
-        stderr('[X] --with-headers requires --json, --html or --csv\n', color='red')
+        stderr("[X] --with-headers requires --json, --html or --csv\n", color="red")
        raise SystemExit(2)

    # Query DB directly - no filesystem scanning
@@ -189,9 +199,9 @@ def search(filter_patterns: list[str] | None=None,
    )

    # Apply status filter
-    if status == 'archived':
+    if status == "archived":
        snapshots = snapshots.filter(downloaded_at__isnull=False)
-    elif status == 'unarchived':
+    elif status == "unarchived":
        snapshots = snapshots.filter(downloaded_at__isnull=True)
    # 'indexed' = all snapshots (no filter)

@@ -204,9 +214,10 @@ def search(filter_patterns: list[str] | None=None,
    elif html:
        output = _snapshots_to_html(snapshots, with_headers=with_headers)
    elif csv:
-        output = _snapshots_to_csv(snapshots, cols=csv.split(','), with_headers=with_headers)
+        output = _snapshots_to_csv(snapshots, cols=csv.split(","), with_headers=with_headers)
    else:
        from archivebox.misc.logging_util import printable_folders
+
        # Convert to dict for printable_folders
        folders: dict[str, Snapshot | None] = {str(snapshot.output_dir): snapshot for snapshot in snapshots}
        output = printable_folders(folders, with_headers)
@@ -214,28 +225,33 @@ def search(filter_patterns: list[str] | None=None,
    # Structured exports must be written directly to stdout.
    # rich.print() reflows long lines to console width, which corrupts JSON/CSV/HTML output.
    sys.stdout.write(output)
-    if not output.endswith('\n'):
-        sys.stdout.write('\n')
+    if not output.endswith("\n"):
+        sys.stdout.write("\n")
    return output


@click.command()
-@click.option('--filter-type', '-f', type=click.Choice(['search', *LINK_FILTERS.keys()]), default='substring', help='Pattern matching type for filtering URLs')
-@click.option('--status', '-s', type=click.Choice(STATUS_CHOICES), default='indexed', help='List snapshots with the given status')
-@click.option('--before', '-b', type=float, help='List snapshots bookmarked before the given UNIX timestamp')
-@click.option('--after', '-a', type=float, help='List snapshots bookmarked after the given UNIX timestamp')
-@click.option('--sort', '-o', type=str, help='Field to sort by, e.g. url, created_at, bookmarked_at, downloaded_at')
-@click.option('--json', '-J', is_flag=True, help='Print output in JSON format')
-@click.option('--html', '-M', is_flag=True, help='Print output in HTML format (suitable for viewing statically without a server)')
-@click.option('--csv', '-C', type=str, help='Print output as CSV with the provided fields, e.g.: created_at,url,title')
-@click.option('--with-headers', '-H', is_flag=True, help='Include extra CSV/HTML headers in the output')
-@click.help_option('--help', '-h')
-@click.argument('filter_patterns', nargs=-1)
+@click.option(
+    "--filter-type",
+    "-f",
+    type=click.Choice(["search", *LINK_FILTERS.keys()]),
+    default="substring",
+    help="Pattern matching type for filtering URLs",
+)
+@click.option("--status", "-s", type=click.Choice(STATUS_CHOICES), default="indexed", help="List snapshots with the given status")
+@click.option("--before", "-b", type=float, help="List snapshots bookmarked before the given UNIX timestamp")
+@click.option("--after", "-a", type=float, help="List snapshots bookmarked after the given UNIX timestamp")
+@click.option("--sort", "-o", type=str, help="Field to sort by, e.g. url, created_at, bookmarked_at, downloaded_at")
+@click.option("--json", "-J", is_flag=True, help="Print output in JSON format")
+@click.option("--html", "-M", is_flag=True, help="Print output in HTML format (suitable for viewing statically without a server)")
+@click.option("--csv", "-C", type=str, help="Print output as CSV with the provided fields, e.g.: created_at,url,title")
+@click.option("--with-headers", "-H", is_flag=True, help="Include extra CSV/HTML headers in the output")
+@click.help_option("--help", "-h")
+@click.argument("filter_patterns", nargs=-1)
@docstring(search.__doc__)
 def main(**kwargs):
    return search(**kwargs)


-
-if __name__ == '__main__':
+if __name__ == "__main__":
    main()
--- a/archivebox/cli/archivebox_server.py
+++ b/archivebox/cli/archivebox_server.py
@@ -1,8 +1,8 @@
 #!/usr/bin/env python3

-__package__ = 'archivebox.cli'
+__package__ = "archivebox.cli"

-from typing import Iterable
+from collections.abc import Iterable
 import sys

 import rich_click as click
@@ -15,20 +15,23 @@ from archivebox.config.common import SERVER_CONFIG
 def stop_existing_background_runner(*, machine, process_model, supervisor=None, stop_worker_fn=None, log=print) -> int:
    """Stop any existing orchestrator process so the server can take ownership."""
    process_model.cleanup_stale_running(machine=machine)
+    process_model.cleanup_orphaned_workers()

-    running_runners = list(process_model.objects.filter(
-        machine=machine,
-        status=process_model.StatusChoices.RUNNING,
-        process_type=process_model.TypeChoices.ORCHESTRATOR,
-    ).order_by('created_at'))
+    running_runners = list(
+        process_model.objects.filter(
+            machine=machine,
+            status=process_model.StatusChoices.RUNNING,
+            process_type=process_model.TypeChoices.ORCHESTRATOR,
+        ).order_by("created_at"),
+    )

    if not running_runners:
        return 0

-    log('[yellow][*] Stopping existing ArchiveBox background runner...[/yellow]')
+    log("[yellow][*] Stopping existing ArchiveBox background runner...[/yellow]")

    if supervisor is not None and stop_worker_fn is not None:
-        for worker_name in ('worker_runner', 'worker_runner_watch'):
+        for worker_name in ("worker_runner", "worker_runner_watch"):
            try:
                stop_worker_fn(supervisor, worker_name)
            except Exception:
@@ -47,23 +50,70 @@ def stop_existing_background_runner(*, machine, process_model, supervisor=None,
    return len(running_runners)


+def _read_supervisor_worker_command(worker_name: str) -> str:
+    from archivebox.workers.supervisord_util import WORKERS_DIR_NAME, get_sock_file
+
+    worker_conf = get_sock_file().parent / WORKERS_DIR_NAME / f"{worker_name}.conf"
+    if not worker_conf.exists():
+        return ""
+
+    for line in worker_conf.read_text().splitlines():
+        if line.startswith("command="):
+            return line.removeprefix("command=").strip()
+    return ""
+
+
+def _worker_command_matches_bind(command: str, host: str, port: str) -> bool:
+    if not command:
+        return False
+    return f"{host}:{port}" in command or (f"--bind={host}" in command and f"--port={port}" in command)
+
+
+def stop_existing_server_workers(*, supervisor, stop_worker_fn, host: str, port: str, log=print) -> int:
+    """Stop existing ArchiveBox web workers if they already own the requested bind."""
+    stopped = 0
+
+    for worker_name in ("worker_runserver", "worker_daphne"):
+        try:
+            proc = supervisor.getProcessInfo(worker_name) if supervisor else None
+        except Exception:
+            proc = None
+        if not isinstance(proc, dict) or proc.get("statename") != "RUNNING":
+            continue
+
+        command = _read_supervisor_worker_command(worker_name)
+        if not _worker_command_matches_bind(command, host, port):
+            continue
+
+        if stopped == 0:
+            log("[yellow][*] Taking over existing ArchiveBox web server on same port...[/yellow]")
+        stop_worker_fn(supervisor, worker_name)
+        stopped += 1
+
+    return stopped
+
+
@enforce_types
-def server(runserver_args: Iterable[str]=(SERVER_CONFIG.BIND_ADDR,),
-          reload: bool=False,
-          init: bool=False,
-          debug: bool=False,
-          daemonize: bool=False,
-          nothreading: bool=False) -> None:
+def server(
+    runserver_args: Iterable[str] = (SERVER_CONFIG.BIND_ADDR,),
+    reload: bool = False,
+    init: bool = False,
+    debug: bool = False,
+    daemonize: bool = False,
+    nothreading: bool = False,
+) -> None:
    """Run the ArchiveBox HTTP server"""

    runserver_args = list(runserver_args)
-    
+
    if init:
        from archivebox.cli.archivebox_init import init as archivebox_init
+
        archivebox_init(quick=True)
        print()

    from archivebox.misc.checks import check_data_folder
+
    check_data_folder()

    from archivebox.config.common import SHELL_CONFIG
@@ -73,22 +123,24 @@ def server(runserver_args: Iterable[str]=(SERVER_CONFIG.BIND_ADDR,),
        SHELL_CONFIG.DEBUG = True

    from django.contrib.auth.models import User
-    
-    if not User.objects.filter(is_superuser=True).exclude(username='system').exists():
+
+    if not User.objects.filter(is_superuser=True).exclude(username="system").exists():
        print()
-        print('[violet]Hint:[/violet] To create an [bold]admin username & password[/bold] for the [deep_sky_blue3][underline][link=http://{host}:{port}/admin]Admin UI[/link][/underline][/deep_sky_blue3], run:')
-        print('      [green]archivebox manage createsuperuser[/green]')
+        print(
+            "[violet]Hint:[/violet] To create an [bold]admin username & password[/bold] for the [deep_sky_blue3][underline][link=http://{host}:{port}/admin]Admin UI[/link][/underline][/deep_sky_blue3], run:",
+        )
+        print("      [green]archivebox manage createsuperuser[/green]")
        print()

-    host = '127.0.0.1'
-    port = '8000'
-    
+    host = "127.0.0.1"
+    port = "8000"
+
    try:
-        host_and_port = [arg for arg in runserver_args if arg.replace('.', '').replace(':', '').isdigit()][0]
-        if ':' in host_and_port:
-            host, port = host_and_port.split(':')
+        host_and_port = [arg for arg in runserver_args if arg.replace(".", "").replace(":", "").isdigit()][0]
+        if ":" in host_and_port:
+            host, port = host_and_port.split(":")
        else:
-            if '.' in host_and_port:
+            if "." in host_and_port:
                host = host_and_port
            else:
                port = host_and_port
@@ -104,66 +156,80 @@ def server(runserver_args: Iterable[str]=(SERVER_CONFIG.BIND_ADDR,),
    )
    from archivebox.machine.models import Machine, Process

-    # Check if port is already in use
-    if is_port_in_use(host, int(port)):
-        print(f'[red][X] Error: Port {port} is already in use[/red]')
-        print(f'    Another process (possibly daphne or runserver) is already listening on {host}:{port}')
-        print('    Stop the conflicting process or choose a different port')
-        sys.exit(1)
-
    machine = Machine.current()
+    supervisor = get_existing_supervisord_process()
    stop_existing_background_runner(
        machine=machine,
        process_model=Process,
-        supervisor=get_existing_supervisord_process(),
+        supervisor=supervisor,
        stop_worker_fn=stop_worker,
    )
+    if supervisor:
+        stop_existing_server_workers(
+            supervisor=supervisor,
+            stop_worker_fn=stop_worker,
+            host=host,
+            port=port,
+        )
+
+    # Check if port is already in use
+    if is_port_in_use(host, int(port)):
+        print(f"[red][X] Error: Port {port} is already in use[/red]")
+        print(f"    Another process (possibly daphne or runserver) is already listening on {host}:{port}")
+        print("    Stop the conflicting process or choose a different port")
+        sys.exit(1)

    supervisor = get_existing_supervisord_process()
    if supervisor:
-        server_worker_name = 'worker_runserver' if run_in_debug else 'worker_daphne'
+        server_worker_name = "worker_runserver" if run_in_debug else "worker_daphne"
        server_proc = get_worker(supervisor, server_worker_name)
-        server_state = server_proc.get('statename') if isinstance(server_proc, dict) else None
-        if server_state == 'RUNNING':
-            runner_proc = get_worker(supervisor, 'worker_runner')
-            runner_watch_proc = get_worker(supervisor, 'worker_runner_watch')
-            runner_state = runner_proc.get('statename') if isinstance(runner_proc, dict) else None
-            runner_watch_state = runner_watch_proc.get('statename') if isinstance(runner_watch_proc, dict) else None
-            print('[red][X] Error: ArchiveBox server is already running[/red]')
-            print(f'    [green]√[/green] Web server ({server_worker_name}) is RUNNING on [deep_sky_blue4][link=http://{host}:{port}]http://{host}:{port}[/link][/deep_sky_blue4]')
-            if runner_state == 'RUNNING':
-                print('    [green]√[/green] Background runner (worker_runner) is RUNNING')
-            if runner_watch_state == 'RUNNING':
-                print('    [green]√[/green] Reload watcher (worker_runner_watch) is RUNNING')
+        server_state = server_proc.get("statename") if isinstance(server_proc, dict) else None
+        if server_state == "RUNNING":
+            runner_proc = get_worker(supervisor, "worker_runner")
+            runner_watch_proc = get_worker(supervisor, "worker_runner_watch")
+            runner_state = runner_proc.get("statename") if isinstance(runner_proc, dict) else None
+            runner_watch_state = runner_watch_proc.get("statename") if isinstance(runner_watch_proc, dict) else None
+            print("[red][X] Error: ArchiveBox server is already running[/red]")
+            print(
+                f"    [green]√[/green] Web server ({server_worker_name}) is RUNNING on [deep_sky_blue4][link=http://{host}:{port}]http://{host}:{port}[/link][/deep_sky_blue4]",
+            )
+            if runner_state == "RUNNING":
+                print("    [green]√[/green] Background runner (worker_runner) is RUNNING")
+            if runner_watch_state == "RUNNING":
+                print("    [green]√[/green] Reload watcher (worker_runner_watch) is RUNNING")
            print()
-            print('[yellow]To stop the existing server, run:[/yellow]')
+            print("[yellow]To stop the existing server, run:[/yellow]")
            print('    pkill -f "archivebox server"')
-            print('    pkill -f supervisord')
+            print("    pkill -f supervisord")
            sys.exit(1)

    if run_in_debug:
-        print('[green][+] Starting ArchiveBox webserver in DEBUG mode...[/green]')
+        print("[green][+] Starting ArchiveBox webserver in DEBUG mode...[/green]")
    else:
-        print('[green][+] Starting ArchiveBox webserver...[/green]')
-    print(f'    [blink][green]>[/green][/blink] Starting ArchiveBox webserver on [deep_sky_blue4][link=http://{host}:{port}]http://{host}:{port}[/link][/deep_sky_blue4]')
-    print(f'    [green]>[/green] Log in to ArchiveBox Admin UI on [deep_sky_blue3][link=http://{host}:{port}/admin]http://{host}:{port}/admin[/link][/deep_sky_blue3]')
-    print('    > Writing ArchiveBox error log to ./logs/errors.log')
+        print("[green][+] Starting ArchiveBox webserver...[/green]")
+    print(
+        f"    [blink][green]>[/green][/blink] Starting ArchiveBox webserver on [deep_sky_blue4][link=http://{host}:{port}]http://{host}:{port}[/link][/deep_sky_blue4]",
+    )
+    print(
+        f"    [green]>[/green] Log in to ArchiveBox Admin UI on [deep_sky_blue3][link=http://{host}:{port}/admin]http://{host}:{port}/admin[/link][/deep_sky_blue3]",
+    )
+    print("    > Writing ArchiveBox error log to ./logs/errors.log")
    print()
    start_server_workers(host=host, port=port, daemonize=daemonize, debug=run_in_debug, reload=reload, nothreading=nothreading)
    print("\n[i][green][🟩] ArchiveBox server shut down gracefully.[/green][/i]")


@click.command()
-@click.argument('runserver_args', nargs=-1)
-@click.option('--reload', is_flag=True, help='Enable auto-reloading when code or templates change')
-@click.option('--debug', is_flag=True, help='Enable DEBUG=True mode with more verbose errors')
-@click.option('--nothreading', is_flag=True, help='Force runserver to run in single-threaded mode')
-@click.option('--init', is_flag=True, help='Run a full archivebox init/upgrade before starting the server')
-@click.option('--daemonize', is_flag=True, help='Run the server in the background as a daemon')
+@click.argument("runserver_args", nargs=-1)
+@click.option("--reload", is_flag=True, help="Enable auto-reloading when code or templates change")
+@click.option("--debug", is_flag=True, help="Enable DEBUG=True mode with more verbose errors")
+@click.option("--nothreading", is_flag=True, help="Force runserver to run in single-threaded mode")
+@click.option("--init", is_flag=True, help="Run a full archivebox init/upgrade before starting the server")
+@click.option("--daemonize", is_flag=True, help="Run the server in the background as a daemon")
@docstring(server.__doc__)
 def main(**kwargs):
    server(**kwargs)


-if __name__ == '__main__':
+if __name__ == "__main__":
    main()
--- a/archivebox/cli/archivebox_shell.py
+++ b/archivebox/cli/archivebox_shell.py
@@ -1,27 +1,28 @@
 #!/usr/bin/env python3

-__package__ = 'archivebox.cli'
+__package__ = "archivebox.cli"

-from typing import Iterable
+from collections.abc import Iterable

 import rich_click as click

 from archivebox.misc.util import docstring


-def shell(args: Iterable[str]=()) -> None:
+def shell(args: Iterable[str] = ()) -> None:
    """Enter an interactive ArchiveBox Django shell"""

    from django.core.management import call_command
+
    call_command("shell_plus", *args)


@click.command(add_help_option=False, context_settings=dict(ignore_unknown_options=True))
-@click.argument('args', nargs=-1)
+@click.argument("args", nargs=-1)
@docstring(shell.__doc__)
-def main(args: Iterable[str]=()) -> None:
+def main(args: Iterable[str] = ()) -> None:
    shell(args=args)


-if __name__ == '__main__':
+if __name__ == "__main__":
    main()
--- a/archivebox/cli/archivebox_snapshot.py
+++ b/archivebox/cli/archivebox_snapshot.py
@@ -27,14 +27,16 @@ Examples:
    archivebox snapshot list --url__icontains=spam.com | archivebox snapshot delete --yes
 """

-__package__ = 'archivebox.cli'
-__command__ = 'archivebox snapshot'
+__package__ = "archivebox.cli"
+__command__ = "archivebox snapshot"

 import sys
-from typing import Optional, Iterable
+from collections.abc import Iterable

 import rich_click as click
 from rich import print as rprint
+from django.db.models import Q, Sum
+from django.db.models.functions import Coalesce

 from archivebox.cli.cli_utils import apply_filters

@@ -43,12 +45,13 @@ from archivebox.cli.cli_utils import apply_filters
 # CREATE
 # =============================================================================

+
 def create_snapshots(
    urls: Iterable[str],
-    tag: str = '',
-    status: str = 'queued',
+    tag: str = "",
+    status: str = "queued",
    depth: int = 0,
-    created_by_id: Optional[int] = None,
+    created_by_id: int | None = None,
 ) -> int:
    """
    Create Snapshots from URLs or stdin JSONL (Crawl or Snapshot records).
@@ -59,8 +62,10 @@ def create_snapshots(
        1: Failure
    """
    from archivebox.misc.jsonl import (
-        read_args_or_stdin, write_record,
-        TYPE_SNAPSHOT, TYPE_CRAWL
+        read_args_or_stdin,
+        write_record,
+        TYPE_SNAPSHOT,
+        TYPE_CRAWL,
    )
    from archivebox.base_models.models import get_or_create_system_user_pk
    from archivebox.core.models import Snapshot
@@ -73,7 +78,7 @@ def create_snapshots(
    records = list(read_args_or_stdin(urls))

    if not records:
-        rprint('[yellow]No URLs or Crawls provided. Pass URLs as arguments or via stdin.[/yellow]', file=sys.stderr)
+        rprint("[yellow]No URLs or Crawls provided. Pass URLs as arguments or via stdin.[/yellow]", file=sys.stderr)
        return 1

    # Process each record - handle Crawls and plain URLs/Snapshots
@@ -81,7 +86,7 @@ def create_snapshots(
    pass_through_count = 0

    for record in records:
-        record_type = record.get('type', '')
+        record_type = record.get("type", "")

        try:
            if record_type == TYPE_CRAWL:
@@ -91,14 +96,14 @@ def create_snapshots(

                # Input is a Crawl - get or create it, then create Snapshots for its URLs
                crawl = None
-                crawl_id = record.get('id')
+                crawl_id = record.get("id")
                if crawl_id:
                    try:
                        crawl = Crawl.objects.get(id=crawl_id)
                    except Crawl.DoesNotExist:
-                        crawl = Crawl.from_json(record, overrides={'created_by_id': created_by_id})
+                        crawl = Crawl.from_json(record, overrides={"created_by_id": created_by_id})
                else:
-                    crawl = Crawl.from_json(record, overrides={'created_by_id': created_by_id})
+                    crawl = Crawl.from_json(record, overrides={"created_by_id": created_by_id})

                if not crawl:
                    continue
@@ -109,27 +114,27 @@ def create_snapshots(
                    if tag:
                        merged_tags = f"{merged_tags},{tag}" if merged_tags else tag
                    snapshot_record = {
-                        'url': url,
-                        'tags': merged_tags,
-                        'crawl_id': str(crawl.id),
-                        'depth': depth,
-                        'status': status,
+                        "url": url,
+                        "tags": merged_tags,
+                        "crawl_id": str(crawl.id),
+                        "depth": depth,
+                        "status": status,
                    }
-                    snapshot = Snapshot.from_json(snapshot_record, overrides={'created_by_id': created_by_id})
+                    snapshot = Snapshot.from_json(snapshot_record, overrides={"created_by_id": created_by_id})
                    if snapshot:
                        created_snapshots.append(snapshot)
                        if not is_tty:
                            write_record(snapshot.to_json())

-            elif record_type == TYPE_SNAPSHOT or record.get('url'):
+            elif record_type == TYPE_SNAPSHOT or record.get("url"):
                # Input is a Snapshot or plain URL
-                if tag and not record.get('tags'):
-                    record['tags'] = tag
+                if tag and not record.get("tags"):
+                    record["tags"] = tag
                if status:
-                    record['status'] = status
-                record['depth'] = record.get('depth', depth)
+                    record["status"] = status
+                record["depth"] = record.get("depth", depth)

-                snapshot = Snapshot.from_json(record, overrides={'created_by_id': created_by_id})
+                snapshot = Snapshot.from_json(record, overrides={"created_by_id": created_by_id})
                if snapshot:
                    created_snapshots.append(snapshot)
                    if not is_tty:
@@ -142,21 +147,21 @@ def create_snapshots(
                pass_through_count += 1

        except Exception as e:
-            rprint(f'[red]Error creating snapshot: {e}[/red]', file=sys.stderr)
+            rprint(f"[red]Error creating snapshot: {e}[/red]", file=sys.stderr)
            continue

    if not created_snapshots:
        if pass_through_count > 0:
-            rprint(f'[dim]Passed through {pass_through_count} records, no new snapshots[/dim]', file=sys.stderr)
+            rprint(f"[dim]Passed through {pass_through_count} records, no new snapshots[/dim]", file=sys.stderr)
            return 0
-        rprint('[red]No snapshots created[/red]', file=sys.stderr)
+        rprint("[red]No snapshots created[/red]", file=sys.stderr)
        return 1

-    rprint(f'[green]Created {len(created_snapshots)} snapshots[/green]', file=sys.stderr)
+    rprint(f"[green]Created {len(created_snapshots)} snapshots[/green]", file=sys.stderr)

    if is_tty:
        for snapshot in created_snapshots:
-            rprint(f'  [dim]{snapshot.id}[/dim] {snapshot.url[:60]}', file=sys.stderr)
+            rprint(f"  [dim]{snapshot.id}[/dim] {snapshot.url[:60]}", file=sys.stderr)

    return 0

@@ -165,16 +170,19 @@ def create_snapshots(
 # LIST
 # =============================================================================

+
 def list_snapshots(
-    status: Optional[str] = None,
-    url__icontains: Optional[str] = None,
-    url__istartswith: Optional[str] = None,
-    tag: Optional[str] = None,
-    crawl_id: Optional[str] = None,
-    limit: Optional[int] = None,
-    sort: Optional[str] = None,
-    csv: Optional[str] = None,
+    status: str | None = None,
+    url__icontains: str | None = None,
+    url__istartswith: str | None = None,
+    tag: str | None = None,
+    crawl_id: str | None = None,
+    limit: int | None = None,
+    sort: str | None = None,
+    csv: str | None = None,
    with_headers: bool = False,
+    search: str | None = None,
+    query: str | None = None,
 ) -> int:
    """
    List Snapshots as JSONL with optional filters.
@@ -184,64 +192,106 @@ def list_snapshots(
    """
    from archivebox.misc.jsonl import write_record
    from archivebox.core.models import Snapshot
+    from archivebox.search import (
+        get_default_search_mode,
+        get_search_mode,
+        prioritize_metadata_matches,
+        query_search_index,
+    )

    if with_headers and not csv:
-        rprint('[red]--with-headers requires --csv[/red]', file=sys.stderr)
+        rprint("[red]--with-headers requires --csv[/red]", file=sys.stderr)
        return 2

    is_tty = sys.stdout.isatty() and not csv

-    queryset = Snapshot.objects.all().order_by('-created_at')
+    queryset = Snapshot.objects.annotate(output_size_sum=Coalesce(Sum("archiveresult__output_size"), 0)).order_by("-created_at")

    # Apply filters
    filter_kwargs = {
-        'status': status,
-        'url__icontains': url__icontains,
-        'url__istartswith': url__istartswith,
-        'crawl_id': crawl_id,
+        "status": status,
+        "url__icontains": url__icontains,
+        "url__istartswith": url__istartswith,
+        "crawl_id": crawl_id,
    }
-    queryset = apply_filters(queryset, filter_kwargs, limit=limit)
+    queryset = apply_filters(queryset, filter_kwargs)

    # Tag filter requires special handling (M2M)
    if tag:
        queryset = queryset.filter(tags__name__iexact=tag)

+    query = (query or "").strip()
+    if query:
+        metadata_qs = queryset.filter(
+            Q(title__icontains=query) | Q(url__icontains=query) | Q(timestamp__icontains=query) | Q(tags__name__icontains=query),
+        )
+        requested_search_mode = (search or "").strip().lower()
+        if requested_search_mode == "content":
+            requested_search_mode = "contents"
+        search_mode = get_default_search_mode() if not requested_search_mode else get_search_mode(requested_search_mode)
+
+        if search_mode == "meta":
+            queryset = metadata_qs
+        else:
+            try:
+                deep_qsearch = None
+                if search_mode == "deep":
+                    qsearch = query_search_index(query, search_mode="contents")
+                    deep_qsearch = query_search_index(query, search_mode="deep")
+                else:
+                    qsearch = query_search_index(query, search_mode=search_mode)
+                queryset = prioritize_metadata_matches(
+                    queryset,
+                    metadata_qs,
+                    qsearch,
+                    deep_queryset=deep_qsearch,
+                    ordering=("-created_at",) if not sort else None,
+                )
+            except Exception as err:
+                rprint(
+                    f"[yellow]Search backend error, falling back to metadata search: {err}[/yellow]",
+                    file=sys.stderr,
+                )
+                queryset = metadata_qs
+
    if sort:
        queryset = queryset.order_by(sort)
+    if limit:
+        queryset = queryset[:limit]

    count = 0
    if csv:
-        cols = [col.strip() for col in csv.split(',') if col.strip()]
+        cols = [col.strip() for col in csv.split(",") if col.strip()]
        if not cols:
-            rprint('[red]No CSV columns provided[/red]', file=sys.stderr)
+            rprint("[red]No CSV columns provided[/red]", file=sys.stderr)
            return 2
        rows: list[str] = []
        if with_headers:
-            rows.append(','.join(cols))
+            rows.append(",".join(cols))
        for snapshot in queryset.iterator(chunk_size=500):
-            rows.append(snapshot.to_csv(cols=cols, separator=','))
+            rows.append(snapshot.to_csv(cols=cols, separator=","))
            count += 1
-        output = '\n'.join(rows)
+        output = "\n".join(rows)
        if output:
            sys.stdout.write(output)
-            if not output.endswith('\n'):
-                sys.stdout.write('\n')
-        rprint(f'[dim]Listed {count} snapshots[/dim]', file=sys.stderr)
+            if not output.endswith("\n"):
+                sys.stdout.write("\n")
+        rprint(f"[dim]Listed {count} snapshots[/dim]", file=sys.stderr)
        return 0

    for snapshot in queryset:
        if is_tty:
            status_color = {
-                'queued': 'yellow',
-                'started': 'blue',
-                'sealed': 'green',
-            }.get(snapshot.status, 'dim')
-            rprint(f'[{status_color}]{snapshot.status:8}[/{status_color}] [dim]{snapshot.id}[/dim] {snapshot.url[:60]}')
+                "queued": "yellow",
+                "started": "blue",
+                "sealed": "green",
+            }.get(snapshot.status, "dim")
+            rprint(f"[{status_color}]{snapshot.status:8}[/{status_color}] [dim]{snapshot.id}[/dim] {snapshot.url[:60]}")
        else:
            write_record(snapshot.to_json())
        count += 1

-    rprint(f'[dim]Listed {count} snapshots[/dim]', file=sys.stderr)
+    rprint(f"[dim]Listed {count} snapshots[/dim]", file=sys.stderr)
    return 0


@@ -249,9 +299,10 @@ def list_snapshots(
 # UPDATE
 # =============================================================================

+
 def update_snapshots(
-    status: Optional[str] = None,
-    tag: Optional[str] = None,
+    status: str | None = None,
+    tag: str | None = None,
 ) -> int:
    """
    Update Snapshots from stdin JSONL.
@@ -272,12 +323,12 @@ def update_snapshots(

    records = list(read_stdin())
    if not records:
-        rprint('[yellow]No records provided via stdin[/yellow]', file=sys.stderr)
+        rprint("[yellow]No records provided via stdin[/yellow]", file=sys.stderr)
        return 1

    updated_count = 0
    for record in records:
-        snapshot_id = record.get('id')
+        snapshot_id = record.get("id")
        if not snapshot_id:
            continue

@@ -292,6 +343,7 @@ def update_snapshots(
                # Add tag to existing tags
                snapshot.save()  # Ensure saved before M2M
                from archivebox.core.models import Tag
+
                tag_obj, _ = Tag.objects.get_or_create(name=tag)
                snapshot.tags.add(tag_obj)

@@ -302,10 +354,10 @@ def update_snapshots(
                write_record(snapshot.to_json())

        except Snapshot.DoesNotExist:
-            rprint(f'[yellow]Snapshot not found: {snapshot_id}[/yellow]', file=sys.stderr)
+            rprint(f"[yellow]Snapshot not found: {snapshot_id}[/yellow]", file=sys.stderr)
            continue

-    rprint(f'[green]Updated {updated_count} snapshots[/green]', file=sys.stderr)
+    rprint(f"[green]Updated {updated_count} snapshots[/green]", file=sys.stderr)
    return 0


@@ -313,6 +365,7 @@ def update_snapshots(
 # DELETE
 # =============================================================================

+
 def delete_snapshots(yes: bool = False, dry_run: bool = False) -> int:
    """
    Delete Snapshots from stdin JSONL.
@@ -328,35 +381,35 @@ def delete_snapshots(yes: bool = False, dry_run: bool = False) -> int:

    records = list(read_stdin())
    if not records:
-        rprint('[yellow]No records provided via stdin[/yellow]', file=sys.stderr)
+        rprint("[yellow]No records provided via stdin[/yellow]", file=sys.stderr)
        return 1

-    snapshot_ids = [r.get('id') for r in records if r.get('id')]
+    snapshot_ids = [r.get("id") for r in records if r.get("id")]

    if not snapshot_ids:
-        rprint('[yellow]No valid snapshot IDs in input[/yellow]', file=sys.stderr)
+        rprint("[yellow]No valid snapshot IDs in input[/yellow]", file=sys.stderr)
        return 1

    snapshots = Snapshot.objects.filter(id__in=snapshot_ids)
    count = snapshots.count()

    if count == 0:
-        rprint('[yellow]No matching snapshots found[/yellow]', file=sys.stderr)
+        rprint("[yellow]No matching snapshots found[/yellow]", file=sys.stderr)
        return 0

    if dry_run:
-        rprint(f'[yellow]Would delete {count} snapshots (dry run)[/yellow]', file=sys.stderr)
+        rprint(f"[yellow]Would delete {count} snapshots (dry run)[/yellow]", file=sys.stderr)
        for snapshot in snapshots:
-            rprint(f'  [dim]{snapshot.id}[/dim] {snapshot.url[:60]}', file=sys.stderr)
+            rprint(f"  [dim]{snapshot.id}[/dim] {snapshot.url[:60]}", file=sys.stderr)
        return 0

    if not yes:
-        rprint('[red]Use --yes to confirm deletion[/red]', file=sys.stderr)
+        rprint("[red]Use --yes to confirm deletion[/red]", file=sys.stderr)
        return 1

    # Perform deletion
    deleted_count, _ = snapshots.delete()
-    rprint(f'[green]Deleted {deleted_count} snapshots[/green]', file=sys.stderr)
+    rprint(f"[green]Deleted {deleted_count} snapshots[/green]", file=sys.stderr)
    return 0


@@ -364,57 +417,81 @@ def delete_snapshots(yes: bool = False, dry_run: bool = False) -> int:
 # CLI Commands
 # =============================================================================

+
@click.group()
 def main():
    """Manage Snapshot records."""
    pass


-@main.command('create')
-@click.argument('urls', nargs=-1)
-@click.option('--tag', '-t', default='', help='Comma-separated tags to add')
-@click.option('--status', '-s', default='queued', help='Initial status (default: queued)')
-@click.option('--depth', '-d', type=int, default=0, help='Crawl depth (default: 0)')
+@main.command("create")
+@click.argument("urls", nargs=-1)
+@click.option("--tag", "-t", default="", help="Comma-separated tags to add")
+@click.option("--status", "-s", default="queued", help="Initial status (default: queued)")
+@click.option("--depth", "-d", type=int, default=0, help="Crawl depth (default: 0)")
 def create_cmd(urls: tuple, tag: str, status: str, depth: int):
    """Create Snapshots from URLs or stdin JSONL."""
    sys.exit(create_snapshots(urls, tag=tag, status=status, depth=depth))


-@main.command('list')
-@click.option('--status', '-s', help='Filter by status (queued, started, sealed)')
-@click.option('--url__icontains', help='Filter by URL contains')
-@click.option('--url__istartswith', help='Filter by URL starts with')
-@click.option('--tag', '-t', help='Filter by tag name')
-@click.option('--crawl-id', help='Filter by crawl ID')
-@click.option('--limit', '-n', type=int, help='Limit number of results')
-def list_cmd(status: Optional[str], url__icontains: Optional[str], url__istartswith: Optional[str],
-             tag: Optional[str], crawl_id: Optional[str], limit: Optional[int]):
+@main.command("list")
+@click.option("--status", "-s", help="Filter by status (queued, started, sealed)")
+@click.option("--url__icontains", help="Filter by URL contains")
+@click.option("--url__istartswith", help="Filter by URL starts with")
+@click.option("--tag", "-t", help="Filter by tag name")
+@click.option("--crawl-id", help="Filter by crawl ID")
+@click.option("--limit", "-n", type=int, help="Limit number of results")
+@click.option("--sort", "-o", type=str, help="Field to sort by, e.g. url, created_at, bookmarked_at, downloaded_at")
+@click.option("--csv", "-C", type=str, help="Print output as CSV with the provided fields, e.g.: timestamp,url,title")
+@click.option("--with-headers", is_flag=True, help="Include column headers in structured output")
+@click.option("--search", type=click.Choice(["meta", "content", "contents", "deep"]), help="Search mode to use for the query")
+@click.argument("query", nargs=-1)
+def list_cmd(
+    status: str | None,
+    url__icontains: str | None,
+    url__istartswith: str | None,
+    tag: str | None,
+    crawl_id: str | None,
+    limit: int | None,
+    sort: str | None,
+    csv: str | None,
+    with_headers: bool,
+    search: str | None,
+    query: tuple[str, ...],
+):
    """List Snapshots as JSONL."""
-    sys.exit(list_snapshots(
-        status=status,
-        url__icontains=url__icontains,
-        url__istartswith=url__istartswith,
-        tag=tag,
-        crawl_id=crawl_id,
-        limit=limit,
-    ))
+    sys.exit(
+        list_snapshots(
+            status=status,
+            url__icontains=url__icontains,
+            url__istartswith=url__istartswith,
+            tag=tag,
+            crawl_id=crawl_id,
+            limit=limit,
+            sort=sort,
+            csv=csv,
+            with_headers=with_headers,
+            search=search,
+            query=" ".join(query),
+        ),
+    )


-@main.command('update')
-@click.option('--status', '-s', help='Set status')
-@click.option('--tag', '-t', help='Add tag')
-def update_cmd(status: Optional[str], tag: Optional[str]):
+@main.command("update")
+@click.option("--status", "-s", help="Set status")
+@click.option("--tag", "-t", help="Add tag")
+def update_cmd(status: str | None, tag: str | None):
    """Update Snapshots from stdin JSONL."""
    sys.exit(update_snapshots(status=status, tag=tag))


-@main.command('delete')
-@click.option('--yes', '-y', is_flag=True, help='Confirm deletion')
-@click.option('--dry-run', is_flag=True, help='Show what would be deleted')
+@main.command("delete")
+@click.option("--yes", "-y", is_flag=True, help="Confirm deletion")
+@click.option("--dry-run", is_flag=True, help="Show what would be deleted")
 def delete_cmd(yes: bool, dry_run: bool):
    """Delete Snapshots from stdin JSONL."""
    sys.exit(delete_snapshots(yes=yes, dry_run=dry_run))


-if __name__ == '__main__':
+if __name__ == "__main__":
    main()
--- a/archivebox/cli/archivebox_snapshot_compat.py
+++ b/archivebox/cli/archivebox_snapshot_compat.py
@@ -1,7 +1,7 @@
 #!/usr/bin/env python3

-__package__ = 'archivebox.cli'
-__command__ = 'archivebox snapshot'
+__package__ = "archivebox.cli"
+__command__ = "archivebox snapshot"

 import sys

@@ -10,15 +10,15 @@ import rich_click as click
 from archivebox.cli.archivebox_snapshot import create_snapshots


-@click.command(context_settings={'ignore_unknown_options': True})
-@click.option('--tag', '-t', default='', help='Comma-separated tags to add')
-@click.option('--status', '-s', default='queued', help='Initial status (default: queued)')
-@click.option('--depth', '-d', type=int, default=0, help='Crawl depth (default: 0)')
-@click.argument('urls', nargs=-1)
+@click.command(context_settings={"ignore_unknown_options": True})
+@click.option("--tag", "-t", default="", help="Comma-separated tags to add")
+@click.option("--status", "-s", default="queued", help="Initial status (default: queued)")
+@click.option("--depth", "-d", type=int, default=0, help="Crawl depth (default: 0)")
+@click.argument("urls", nargs=-1)
 def main(tag: str, status: str, depth: int, urls: tuple[str, ...]):
    """Backwards-compatible `archivebox snapshot URL...` entrypoint."""
    sys.exit(create_snapshots(urls, tag=tag, status=status, depth=depth))


-if __name__ == '__main__':
+if __name__ == "__main__":
    main()
--- a/archivebox/cli/archivebox_status.py
+++ b/archivebox/cli/archivebox_status.py
@@ -1,6 +1,6 @@
 #!/usr/bin/env python3

-__package__ = 'archivebox.cli'
+__package__ = "archivebox.cli"

 from pathlib import Path

@@ -16,31 +16,34 @@ from archivebox.misc.logging_util import printable_filesize


@enforce_types
-def status(out_dir: Path=DATA_DIR) -> None:
+def status(out_dir: Path = DATA_DIR) -> None:
    """Print out some info and statistics about the archive collection"""

    from django.contrib.auth import get_user_model
+    from django.db.models import Sum
+    from django.db.models.functions import Coalesce
    from archivebox.core.models import Snapshot
+
    User = get_user_model()

-    print('[green]\\[*] Scanning archive main index...[/green]')
-    print(f'[yellow]   {out_dir}/*[/yellow]')
-    num_bytes, num_dirs, num_files = get_dir_size(out_dir, recursive=False, pattern='index.')
+    print("[green]\\[*] Scanning archive main index...[/green]")
+    print(f"[yellow]   {out_dir}/*[/yellow]")
+    num_bytes, num_dirs, num_files = get_dir_size(out_dir, recursive=False, pattern="index.")
    size = printable_filesize(num_bytes)
-    print(f'    Index size: {size} across {num_files} files')
+    print(f"    Index size: {size} across {num_files} files")
    print()

-    links = list(Snapshot.objects.all())
+    links = list(Snapshot.objects.annotate(output_size_sum=Coalesce(Sum("archiveresult__output_size"), 0)))
    num_sql_links = len(links)
    num_link_details = sum(1 for link in parse_json_links_details(out_dir=out_dir))
-    print(f'    > SQL Main Index: {num_sql_links} links'.ljust(36), f'(found in {CONSTANTS.SQL_INDEX_FILENAME})')
-    print(f'    > JSON Link Details: {num_link_details} links'.ljust(36), f'(found in {ARCHIVE_DIR.name}/*/index.json)')
+    print(f"    > SQL Main Index: {num_sql_links} links".ljust(36), f"(found in {CONSTANTS.SQL_INDEX_FILENAME})")
+    print(f"    > JSON Link Details: {num_link_details} links".ljust(36), f"(found in {ARCHIVE_DIR.name}/*/index.json)")
    print()
-    print('[green]\\[*] Scanning archive data directories...[/green]')
-    users_dir = out_dir / 'users'
+    print("[green]\\[*] Scanning archive data directories...[/green]")
+    users_dir = out_dir / "users"
    scan_roots = [root for root in (ARCHIVE_DIR, users_dir) if root.exists()]
-    scan_roots_display = ', '.join(str(root) for root in scan_roots) if scan_roots else str(ARCHIVE_DIR)
-    print(f'[yellow]   {scan_roots_display}[/yellow]')
+    scan_roots_display = ", ".join(str(root) for root in scan_roots) if scan_roots else str(ARCHIVE_DIR)
+    print(f"[yellow]   {scan_roots_display}[/yellow]")
    num_bytes = num_dirs = num_files = 0
    for root in scan_roots:
        root_bytes, root_dirs, root_files = get_dir_size(root)
@@ -48,80 +51,66 @@ def status(out_dir: Path=DATA_DIR) -> None:
        num_dirs += root_dirs
        num_files += root_files
    size = printable_filesize(num_bytes)
-    print(f'    Size: {size} across {num_files} files in {num_dirs} directories')
+    print(f"    Size: {size} across {num_files} files in {num_dirs} directories")

    # Use DB as source of truth for snapshot status
    num_indexed = len(links)
    num_archived = sum(1 for snapshot in links if snapshot.is_archived)
    num_unarchived = max(num_indexed - num_archived, 0)
-    print(f'    > indexed: {num_indexed}'.ljust(36), '(total snapshots in DB)')
-    print(f'      > archived: {num_archived}'.ljust(36), '(snapshots with archived content)')
-    print(f'      > unarchived: {num_unarchived}'.ljust(36), '(snapshots pending archiving)')
+    print(f"    > indexed: {num_indexed}".ljust(36), "(total snapshots in DB)")
+    print(f"      > archived: {num_archived}".ljust(36), "(snapshots with archived content)")
+    print(f"      > unarchived: {num_unarchived}".ljust(36), "(snapshots pending archiving)")

    # Count snapshot directories on filesystem across both legacy and current layouts.
-    expected_snapshot_dirs = {
-        str(Path(snapshot.output_dir).resolve())
-        for snapshot in links
-        if Path(snapshot.output_dir).exists()
-    }
+    expected_snapshot_dirs = {str(Path(snapshot.output_dir).resolve()) for snapshot in links if Path(snapshot.output_dir).exists()}
    discovered_snapshot_dirs = set()

    if ARCHIVE_DIR.exists():
-        discovered_snapshot_dirs.update(
-            str(entry.resolve())
-            for entry in ARCHIVE_DIR.iterdir()
-            if entry.is_dir()
-        )
+        discovered_snapshot_dirs.update(str(entry.resolve()) for entry in ARCHIVE_DIR.iterdir() if entry.is_dir())

    if users_dir.exists():
-        discovered_snapshot_dirs.update(
-            str(entry.resolve())
-            for entry in users_dir.glob('*/snapshots/*/*/*')
-            if entry.is_dir()
-        )
+        discovered_snapshot_dirs.update(str(entry.resolve()) for entry in users_dir.glob("*/snapshots/*/*/*") if entry.is_dir())

    orphaned_dirs = sorted(discovered_snapshot_dirs - expected_snapshot_dirs)
    num_present = len(discovered_snapshot_dirs)
    num_valid = len(discovered_snapshot_dirs & expected_snapshot_dirs)
    print()
-    print(f'    > present: {num_present}'.ljust(36), '(snapshot directories on disk)')
-    print(f'      > [green]valid:[/green] {num_valid}'.ljust(36), '               (directories with matching DB entry)')
+    print(f"    > present: {num_present}".ljust(36), "(snapshot directories on disk)")
+    print(f"      > [green]valid:[/green] {num_valid}".ljust(36), "               (directories with matching DB entry)")

    num_orphaned = len(orphaned_dirs)
-    print(f'      > [red]orphaned:[/red] {num_orphaned}'.ljust(36), '         (directories without matching DB entry)')
+    print(f"      > [red]orphaned:[/red] {num_orphaned}".ljust(36), "         (directories without matching DB entry)")

    if num_indexed:
-        print('    [violet]Hint:[/violet] You can list snapshots by status like so:')
-        print('        [green]archivebox list --status=<status>  (e.g. archived, queued, etc.)[/green]')
+        print("    [violet]Hint:[/violet] You can list snapshots by status like so:")
+        print("        [green]archivebox list --status=<status>  (e.g. archived, queued, etc.)[/green]")

    if orphaned_dirs:
-        print('    [violet]Hint:[/violet] To automatically import orphaned data directories into the main index, run:')
-        print('        [green]archivebox init[/green]')
+        print("    [violet]Hint:[/violet] To automatically import orphaned data directories into the main index, run:")
+        print("        [green]archivebox init[/green]")

    print()
-    print('[green]\\[*] Scanning recent archive changes and user logins:[/green]')
-    print(f'[yellow]   {CONSTANTS.LOGS_DIR}/*[/yellow]')
-    admin_users = User.objects.filter(is_superuser=True).exclude(username='system')
+    print("[green]\\[*] Scanning recent archive changes and user logins:[/green]")
+    print(f"[yellow]   {CONSTANTS.LOGS_DIR}/*[/yellow]")
+    admin_users = User.objects.filter(is_superuser=True).exclude(username="system")
    users = [user.get_username() for user in admin_users]
-    print(f'    UI users {len(users)}: {", ".join(users)}')
-    last_login = admin_users.order_by('last_login').last()
+    print(f"    UI users {len(users)}: {', '.join(users)}")
+    last_login = admin_users.order_by("last_login").last()
    if last_login:
-        print(f'    Last UI login: {last_login.get_username()} @ {str(last_login.last_login)[:16]}')
-    last_downloaded = Snapshot.objects.order_by('downloaded_at').last()
+        print(f"    Last UI login: {last_login.get_username()} @ {str(last_login.last_login)[:16]}")
+    last_downloaded = Snapshot.objects.order_by("downloaded_at").last()
    if last_downloaded:
-        print(f'    Last changes: {str(last_downloaded.downloaded_at)[:16]}')
+        print(f"    Last changes: {str(last_downloaded.downloaded_at)[:16]}")

    if not users:
        print()
-        print('    [violet]Hint:[/violet] You can create an admin user by running:')
-        print('        [green]archivebox manage createsuperuser[/green]')
+        print("    [violet]Hint:[/violet] You can create an admin user by running:")
+        print("        [green]archivebox manage createsuperuser[/green]")

    print()
    recent_snapshots = sorted(
        links,
-        key=lambda snapshot: (
-            snapshot.downloaded_at or snapshot.modified_at or snapshot.created_at
-        ),
+        key=lambda snapshot: snapshot.downloaded_at or snapshot.modified_at or snapshot.created_at,
        reverse=True,
    )[:10]
    for snapshot in recent_snapshots:
@@ -129,14 +118,14 @@ def status(out_dir: Path=DATA_DIR) -> None:
            continue
        print(
            (
-                '[grey53] '
-                f'   > {str(snapshot.downloaded_at)[:16]} '
-                f'[{snapshot.num_outputs} {("X", "√")[snapshot.is_archived]} {printable_filesize(snapshot.archive_size)}] '
+                "[grey53] "
+                f"   > {str(snapshot.downloaded_at)[:16]} "
+                f"[{snapshot.num_outputs} {('X', '√')[snapshot.is_archived]} {printable_filesize(snapshot.archive_size)}] "
                f'"{snapshot.title}": {snapshot.url}'
-                '[/grey53]'
-            )[:SHELL_CONFIG.TERM_WIDTH],
+                "[/grey53]"
+            )[: SHELL_CONFIG.TERM_WIDTH],
        )
-    print('[grey53]   ...')
+    print("[grey53]   ...")


@click.command()
@@ -146,5 +135,5 @@ def main(**kwargs):
    status(**kwargs)


-if __name__ == '__main__':
+if __name__ == "__main__":
    main()
--- a/archivebox/cli/archivebox_tag.py
+++ b/archivebox/cli/archivebox_tag.py
@@ -27,11 +27,11 @@ Examples:
    archivebox tag list --name=unused | archivebox tag delete --yes
 """

-__package__ = 'archivebox.cli'
-__command__ = 'archivebox tag'
+__package__ = "archivebox.cli"
+__command__ = "archivebox tag"

 import sys
-from typing import Optional, Iterable
+from collections.abc import Iterable

 import rich_click as click
 from rich import print as rprint
@@ -43,6 +43,7 @@ from archivebox.cli.cli_utils import apply_filters
 # CREATE
 # =============================================================================

+
 def create_tags(names: Iterable[str]) -> int:
    """
    Create Tags from names.
@@ -60,7 +61,7 @@ def create_tags(names: Iterable[str]) -> int:
    name_list = list(names) if names else []

    if not name_list:
-        rprint('[yellow]No tag names provided. Pass names as arguments.[/yellow]', file=sys.stderr)
+        rprint("[yellow]No tag names provided. Pass names as arguments.[/yellow]", file=sys.stderr)
        return 1

    created_count = 0
@@ -76,11 +77,11 @@ def create_tags(names: Iterable[str]) -> int:

        if created:
            created_count += 1
-            rprint(f'[green]Created tag: {name}[/green]', file=sys.stderr)
+            rprint(f"[green]Created tag: {name}[/green]", file=sys.stderr)
        else:
-            rprint(f'[dim]Tag already exists: {name}[/dim]', file=sys.stderr)
+            rprint(f"[dim]Tag already exists: {name}[/dim]", file=sys.stderr)

-    rprint(f'[green]Created {created_count} new tags[/green]', file=sys.stderr)
+    rprint(f"[green]Created {created_count} new tags[/green]", file=sys.stderr)
    return 0


@@ -88,10 +89,11 @@ def create_tags(names: Iterable[str]) -> int:
 # LIST
 # =============================================================================

+
 def list_tags(
-    name: Optional[str] = None,
-    name__icontains: Optional[str] = None,
-    limit: Optional[int] = None,
+    name: str | None = None,
+    name__icontains: str | None = None,
+    limit: int | None = None,
 ) -> int:
    """
    List Tags as JSONL with optional filters.
@@ -104,12 +106,12 @@ def list_tags(

    is_tty = sys.stdout.isatty()

-    queryset = Tag.objects.all().order_by('name')
+    queryset = Tag.objects.all().order_by("name")

    # Apply filters
    filter_kwargs = {
-        'name': name,
-        'name__icontains': name__icontains,
+        "name": name,
+        "name__icontains": name__icontains,
    }
    queryset = apply_filters(queryset, filter_kwargs, limit=limit)

@@ -117,12 +119,12 @@ def list_tags(
    for tag in queryset:
        snapshot_count = tag.snapshot_set.count()
        if is_tty:
-            rprint(f'[cyan]{tag.name:30}[/cyan] [dim]({snapshot_count} snapshots)[/dim]')
+            rprint(f"[cyan]{tag.name:30}[/cyan] [dim]({snapshot_count} snapshots)[/dim]")
        else:
            write_record(tag.to_json())
        count += 1

-    rprint(f'[dim]Listed {count} tags[/dim]', file=sys.stderr)
+    rprint(f"[dim]Listed {count} tags[/dim]", file=sys.stderr)
    return 0


@@ -130,7 +132,8 @@ def list_tags(
 # UPDATE
 # =============================================================================

-def update_tags(name: Optional[str] = None) -> int:
+
+def update_tags(name: str | None = None) -> int:
    """
    Update Tags from stdin JSONL.

@@ -148,13 +151,13 @@ def update_tags(name: Optional[str] = None) -> int:

    records = list(read_stdin())
    if not records:
-        rprint('[yellow]No records provided via stdin[/yellow]', file=sys.stderr)
+        rprint("[yellow]No records provided via stdin[/yellow]", file=sys.stderr)
        return 1

    updated_count = 0
    for record in records:
-        tag_id = record.get('id')
-        old_name = record.get('name')
+        tag_id = record.get("id")
+        old_name = record.get("name")

        if not tag_id and not old_name:
            continue
@@ -176,10 +179,10 @@ def update_tags(name: Optional[str] = None) -> int:
                write_record(tag.to_json())

        except Tag.DoesNotExist:
-            rprint(f'[yellow]Tag not found: {tag_id or old_name}[/yellow]', file=sys.stderr)
+            rprint(f"[yellow]Tag not found: {tag_id or old_name}[/yellow]", file=sys.stderr)
            continue

-    rprint(f'[green]Updated {updated_count} tags[/green]', file=sys.stderr)
+    rprint(f"[green]Updated {updated_count} tags[/green]", file=sys.stderr)
    return 0


@@ -187,6 +190,7 @@ def update_tags(name: Optional[str] = None) -> int:
 # DELETE
 # =============================================================================

+
 def delete_tags(yes: bool = False, dry_run: bool = False) -> int:
    """
    Delete Tags from stdin JSONL.
@@ -202,23 +206,24 @@ def delete_tags(yes: bool = False, dry_run: bool = False) -> int:

    records = list(read_stdin())
    if not records:
-        rprint('[yellow]No records provided via stdin[/yellow]', file=sys.stderr)
+        rprint("[yellow]No records provided via stdin[/yellow]", file=sys.stderr)
        return 1

    # Collect tag IDs or names
    tag_ids = []
    tag_names = []
    for r in records:
-        if r.get('id'):
-            tag_ids.append(r['id'])
-        elif r.get('name'):
-            tag_names.append(r['name'])
+        if r.get("id"):
+            tag_ids.append(r["id"])
+        elif r.get("name"):
+            tag_names.append(r["name"])

    if not tag_ids and not tag_names:
-        rprint('[yellow]No valid tag IDs or names in input[/yellow]', file=sys.stderr)
+        rprint("[yellow]No valid tag IDs or names in input[/yellow]", file=sys.stderr)
        return 1

    from django.db.models import Q
+
    query = Q()
    if tag_ids:
        query |= Q(id__in=tag_ids)
@@ -229,22 +234,22 @@ def delete_tags(yes: bool = False, dry_run: bool = False) -> int:
    count = tags.count()

    if count == 0:
-        rprint('[yellow]No matching tags found[/yellow]', file=sys.stderr)
+        rprint("[yellow]No matching tags found[/yellow]", file=sys.stderr)
        return 0

    if dry_run:
-        rprint(f'[yellow]Would delete {count} tags (dry run)[/yellow]', file=sys.stderr)
+        rprint(f"[yellow]Would delete {count} tags (dry run)[/yellow]", file=sys.stderr)
        for tag in tags:
-            rprint(f'  {tag.name}', file=sys.stderr)
+            rprint(f"  {tag.name}", file=sys.stderr)
        return 0

    if not yes:
-        rprint('[red]Use --yes to confirm deletion[/red]', file=sys.stderr)
+        rprint("[red]Use --yes to confirm deletion[/red]", file=sys.stderr)
        return 1

    # Perform deletion
    deleted_count, _ = tags.delete()
-    rprint(f'[green]Deleted {deleted_count} tags[/green]', file=sys.stderr)
+    rprint(f"[green]Deleted {deleted_count} tags[/green]", file=sys.stderr)
    return 0


@@ -252,42 +257,43 @@ def delete_tags(yes: bool = False, dry_run: bool = False) -> int:
 # CLI Commands
 # =============================================================================

+
@click.group()
 def main():
    """Manage Tag records."""
    pass


-@main.command('create')
-@click.argument('names', nargs=-1)
+@main.command("create")
+@click.argument("names", nargs=-1)
 def create_cmd(names: tuple):
    """Create Tags from names."""
    sys.exit(create_tags(names))


-@main.command('list')
-@click.option('--name', help='Filter by exact name')
-@click.option('--name__icontains', help='Filter by name contains')
-@click.option('--limit', '-n', type=int, help='Limit number of results')
-def list_cmd(name: Optional[str], name__icontains: Optional[str], limit: Optional[int]):
+@main.command("list")
+@click.option("--name", help="Filter by exact name")
+@click.option("--name__icontains", help="Filter by name contains")
+@click.option("--limit", "-n", type=int, help="Limit number of results")
+def list_cmd(name: str | None, name__icontains: str | None, limit: int | None):
    """List Tags as JSONL."""
    sys.exit(list_tags(name=name, name__icontains=name__icontains, limit=limit))


-@main.command('update')
-@click.option('--name', '-n', help='Set new name')
-def update_cmd(name: Optional[str]):
+@main.command("update")
+@click.option("--name", "-n", help="Set new name")
+def update_cmd(name: str | None):
    """Update Tags from stdin JSONL."""
    sys.exit(update_tags(name=name))


-@main.command('delete')
-@click.option('--yes', '-y', is_flag=True, help='Confirm deletion')
-@click.option('--dry-run', is_flag=True, help='Show what would be deleted')
+@main.command("delete")
+@click.option("--yes", "-y", is_flag=True, help="Confirm deletion")
+@click.option("--dry-run", is_flag=True, help="Show what would be deleted")
 def delete_cmd(yes: bool, dry_run: bool):
    """Delete Tags from stdin JSONL."""
    sys.exit(delete_tags(yes=yes, dry_run=dry_run))


-if __name__ == '__main__':
+if __name__ == "__main__":
    main()
--- a/archivebox/cli/archivebox_update.py
+++ b/archivebox/cli/archivebox_update.py
@@ -1,11 +1,12 @@
 #!/usr/bin/env python3

-__package__ = 'archivebox.cli'
+__package__ = "archivebox.cli"

 import os
 import time

-from typing import TYPE_CHECKING, Callable, Iterable
+from typing import TYPE_CHECKING, Any
+from collections.abc import Callable, Iterable
 from pathlib import Path

 import rich_click as click
@@ -20,24 +21,22 @@ if TYPE_CHECKING:


 LINK_FILTERS: dict[str, Callable[[str], Q]] = {
-    'exact': lambda pattern: Q(url=pattern),
-    'substring': lambda pattern: Q(url__icontains=pattern),
-    'regex': lambda pattern: Q(url__iregex=pattern),
-    'domain': lambda pattern: (
-        Q(url__istartswith=f'http://{pattern}')
-        | Q(url__istartswith=f'https://{pattern}')
-        | Q(url__istartswith=f'ftp://{pattern}')
+    "exact": lambda pattern: Q(url=pattern),
+    "substring": lambda pattern: Q(url__icontains=pattern),
+    "regex": lambda pattern: Q(url__iregex=pattern),
+    "domain": lambda pattern: (
+        Q(url__istartswith=f"http://{pattern}") | Q(url__istartswith=f"https://{pattern}") | Q(url__istartswith=f"ftp://{pattern}")
    ),
-    'tag': lambda pattern: Q(tags__name=pattern),
-    'timestamp': lambda pattern: Q(timestamp=pattern),
+    "tag": lambda pattern: Q(tags__name=pattern),
+    "timestamp": lambda pattern: Q(timestamp=pattern),
 }


 def _apply_pattern_filters(
-    snapshots: QuerySet['Snapshot', 'Snapshot'],
+    snapshots: QuerySet["Snapshot", "Snapshot"],
    filter_patterns: list[str],
    filter_type: str,
-) -> QuerySet['Snapshot', 'Snapshot']:
+) -> QuerySet["Snapshot", "Snapshot"]:
    filter_builder = LINK_FILTERS.get(filter_type)
    if filter_builder is None:
        raise SystemExit(2)
@@ -48,21 +47,120 @@ def _apply_pattern_filters(
    return snapshots.filter(query)


-def _get_snapshot_crawl(snapshot: 'Snapshot') -> 'Crawl | None':
+def _get_snapshot_crawl(snapshot: "Snapshot") -> "Crawl | None":
    try:
        return snapshot.crawl
    except ObjectDoesNotExist:
        return None


+def _get_search_indexing_plugins() -> list[str]:
+    from abx_dl.models import discover_plugins
+    from archivebox.hooks import get_search_backends
+
+    available_backends = set(get_search_backends())
+    plugins = discover_plugins()
+    return sorted(
+        plugin_name
+        for plugin_name, plugin in plugins.items()
+        if plugin_name.startswith("search_backend_")
+        and plugin_name.removeprefix("search_backend_") in available_backends
+        and any("Snapshot" in hook.name and "index" in hook.name.lower() for hook in plugin.hooks)
+    )
+
+
+def _build_filtered_snapshots_queryset(
+    *,
+    filter_patterns: Iterable[str],
+    filter_type: str,
+    before: float | None,
+    after: float | None,
+    resume: str | None = None,
+):
+    from archivebox.core.models import Snapshot
+    from datetime import datetime
+
+    snapshots = Snapshot.objects.all()
+
+    if filter_patterns:
+        snapshots = _apply_pattern_filters(snapshots, list(filter_patterns), filter_type)
+
+    if before:
+        snapshots = snapshots.filter(bookmarked_at__lt=datetime.fromtimestamp(before))
+    if after:
+        snapshots = snapshots.filter(bookmarked_at__gt=datetime.fromtimestamp(after))
+    if resume:
+        snapshots = snapshots.filter(timestamp__lte=resume)
+
+    return snapshots.select_related("crawl").order_by("-bookmarked_at")
+
+
+def reindex_snapshots(
+    snapshots: QuerySet["Snapshot", "Snapshot"],
+    *,
+    search_plugins: list[str],
+    batch_size: int,
+) -> dict[str, int]:
+    from archivebox.cli.archivebox_extract import run_plugins
+
+    stats = {"processed": 0, "reconciled": 0, "queued": 0, "reindexed": 0}
+    records: list[dict[str, str]] = []
+
+    total = snapshots.count()
+    print(f"[*] Reindexing {total} snapshots with search plugins: {', '.join(search_plugins)}")
+
+    for snapshot in snapshots.iterator(chunk_size=batch_size):
+        stats["processed"] += 1
+
+        if _get_snapshot_crawl(snapshot) is None:
+            continue
+
+        output_dir = Path(snapshot.output_dir)
+        has_directory = output_dir.exists() and output_dir.is_dir()
+        if has_directory:
+            snapshot.reconcile_with_index_json()
+            stats["reconciled"] += 1
+
+        for plugin_name in search_plugins:
+            existing_result = snapshot.archiveresult_set.filter(plugin=plugin_name).order_by("-created_at").first()
+            if existing_result:
+                existing_result.reset_for_retry()
+            records.append(
+                {
+                    "type": "ArchiveResult",
+                    "snapshot_id": str(snapshot.id),
+                    "plugin": plugin_name,
+                },
+            )
+            stats["queued"] += 1
+
+    if not records:
+        return stats
+
+    exit_code = run_plugins(
+        args=(),
+        records=records,
+        wait=True,
+        emit_results=False,
+    )
+    if exit_code != 0:
+        raise SystemExit(exit_code)
+
+    stats["reindexed"] = len(records)
+    return stats
+
+
@enforce_types
-def update(filter_patterns: Iterable[str] = (),
-          filter_type: str = 'exact',
-          before: float | None = None,
-          after: float | None = None,
-          resume: str | None = None,
-          batch_size: int = 100,
-          continuous: bool = False) -> None:
+def update(
+    filter_patterns: Iterable[str] = (),
+    filter_type: str = "exact",
+    before: float | None = None,
+    after: float | None = None,
+    resume: str | None = None,
+    batch_size: int = 100,
+    continuous: bool = False,
+    index_only: bool = False,
+) -> None:
    """
    Update snapshots: migrate old dirs, reconcile DB, and re-queue for archiving.

@@ -77,41 +175,69 @@ def update(filter_patterns: Iterable[str] = (),

    from rich import print
    from archivebox.config.django import setup_django
+
    setup_django()

    from django.core.management import call_command

    # Run migrations first to ensure DB schema is up-to-date
-    print('[*] Checking for pending migrations...')
+    print("[*] Checking for pending migrations...")
    try:
-        call_command('migrate', '--no-input', verbosity=0)
+        call_command("migrate", "--no-input", verbosity=0)
    except Exception as e:
-        print(f'[!] Warning: Migration check failed: {e}')
+        print(f"[!] Warning: Migration check failed: {e}")

    while True:
-        if filter_patterns or before or after:
+        if index_only:
+            search_plugins = _get_search_indexing_plugins()
+            if not search_plugins:
+                print("[*] No search indexing plugins are available, nothing to backfill.")
+                break
+
+            if not (filter_patterns or before or after):
+                print("[*] Phase 1: Draining old archive/ directories (0.8.x → 0.9.x migration)...")
+                drain_old_archive_dirs(
+                    resume_from=resume,
+                    batch_size=batch_size,
+                )
+
+            snapshots = _build_filtered_snapshots_queryset(
+                filter_patterns=filter_patterns,
+                filter_type=filter_type,
+                before=before,
+                after=after,
+                resume=resume,
+            )
+            stats = reindex_snapshots(
+                snapshots,
+                search_plugins=search_plugins,
+                batch_size=batch_size,
+            )
+            print_index_stats(stats)
+        elif filter_patterns or before or after:
            # Filtered mode: query DB only
-            print('[*] Processing filtered snapshots from database...')
+            print("[*] Processing filtered snapshots from database...")
            stats = process_filtered_snapshots(
                filter_patterns=filter_patterns,
                filter_type=filter_type,
                before=before,
                after=after,
-                batch_size=batch_size
+                resume=resume,
+                batch_size=batch_size,
            )
            print_stats(stats)
        else:
            # Full mode: drain old dirs + process DB
-            stats_combined = {'phase1': {}, 'phase2': {}}
+            stats_combined = {"phase1": {}, "phase2": {}}

-            print('[*] Phase 1: Draining old archive/ directories (0.8.x → 0.9.x migration)...')
-            stats_combined['phase1'] = drain_old_archive_dirs(
+            print("[*] Phase 1: Draining old archive/ directories (0.8.x → 0.9.x migration)...")
+            stats_combined["phase1"] = drain_old_archive_dirs(
                resume_from=resume,
-                batch_size=batch_size
+                batch_size=batch_size,
            )

-            print('[*] Phase 2: Processing all database snapshots (most recent first)...')
-            stats_combined['phase2'] = process_all_db_snapshots(batch_size=batch_size)
+            print("[*] Phase 2: Processing all database snapshots (most recent first)...")
+            stats_combined["phase2"] = process_all_db_snapshots(batch_size=batch_size, resume=resume)

            # Phase 3: Deduplication (disabled for now)
            # print('[*] Phase 3: Deduplicating...')
@@ -122,7 +248,7 @@ def update(filter_patterns: Iterable[str] = (),
        if not continuous:
            break

-        print('[yellow]Sleeping 60s before next pass...[/yellow]')
+        print("[yellow]Sleeping 60s before next pass...[/yellow]")
        time.sleep(60)
        resume = None

@@ -144,34 +270,34 @@ def drain_old_archive_dirs(resume_from: str | None = None, batch_size: int = 100
    from archivebox.config import CONSTANTS
    from django.db import transaction

-    stats = {'processed': 0, 'migrated': 0, 'skipped': 0, 'invalid': 0}
+    stats = {"processed": 0, "migrated": 0, "skipped": 0, "invalid": 0}

    archive_dir = CONSTANTS.ARCHIVE_DIR
    if not archive_dir.exists():
        return stats

-    print('[DEBUG Phase1] Scanning for old directories in archive/...')
+    print("[DEBUG Phase1] Scanning for old directories in archive/...")

    # Scan for real directories only (skip symlinks - they're already migrated)
    all_entries = list(os.scandir(archive_dir))
-    print(f'[DEBUG Phase1] Total entries in archive/: {len(all_entries)}')
+    print(f"[DEBUG Phase1] Total entries in archive/: {len(all_entries)}")
    entries = [
        (e.stat().st_mtime, e.path)
        for e in all_entries
        if e.is_dir(follow_symlinks=False)  # Skip symlinks
    ]
    entries.sort(reverse=True)  # Newest first
-    print(f'[DEBUG Phase1] Real directories (not symlinks): {len(entries)}')
-    print(f'[*] Found {len(entries)} old directories to drain')
+    print(f"[DEBUG Phase1] Real directories (not symlinks): {len(entries)}")
+    print(f"[*] Found {len(entries)} old directories to drain")

    for mtime, entry_path in entries:
        entry_path = Path(entry_path)

        # Resume from timestamp if specified
-        if resume_from and entry_path.name < resume_from:
+        if resume_from and entry_path.name > resume_from:
            continue

-        stats['processed'] += 1
+        stats["processed"] += 1

        # Try to load existing snapshot from DB
        snapshot = Snapshot.load_from_directory(entry_path)
@@ -182,16 +308,16 @@ def drain_old_archive_dirs(resume_from: str | None = None, batch_size: int = 100
            if not snapshot:
                # Invalid directory - move to invalid/
                Snapshot.move_directory_to_invalid(entry_path)
-                stats['invalid'] += 1
+                stats["invalid"] += 1
                print(f"    [{stats['processed']}] Invalid: {entry_path.name}")
                continue

            try:
                snapshot.save()
-                stats['migrated'] += 1
+                stats["migrated"] += 1
                print(f"    [{stats['processed']}] Imported orphaned snapshot: {entry_path.name}")
            except Exception as e:
-                stats['skipped'] += 1
+                stats["skipped"] += 1
                print(f"    [{stats['processed']}] Skipped (error: {e}): {entry_path.name}")
            continue

@@ -201,30 +327,35 @@ def drain_old_archive_dirs(resume_from: str | None = None, batch_size: int = 100
        if not has_valid_crawl:
            # Create a new crawl (created_by will default to system user)
            from archivebox.crawls.models import Crawl
+
            crawl = Crawl.objects.create(urls=snapshot.url)
            # Use queryset update to avoid triggering save() hooks
            from archivebox.core.models import Snapshot as SnapshotModel
+
            SnapshotModel.objects.filter(pk=snapshot.pk).update(crawl=crawl)
            # Refresh the instance
            snapshot.crawl = crawl
            print(f"[DEBUG Phase1] Created missing crawl for snapshot {str(snapshot.id)[:8]}")

        # Check if needs migration (0.8.x → 0.9.x)
-        print(f"[DEBUG Phase1] Snapshot {str(snapshot.id)[:8]}: fs_version={snapshot.fs_version}, needs_migration={snapshot.fs_migration_needed}")
+        print(
+            f"[DEBUG Phase1] Snapshot {str(snapshot.id)[:8]}: fs_version={snapshot.fs_version}, needs_migration={snapshot.fs_migration_needed}",
+        )
        if snapshot.fs_migration_needed:
            try:
                # Calculate paths using actual directory (entry_path), not snapshot.timestamp
                # because snapshot.timestamp might be truncated
                old_dir = entry_path
-                new_dir = snapshot.get_storage_path_for_version('0.9.0')
+                new_dir = snapshot.get_storage_path_for_version("0.9.0")
                print(f"[DEBUG Phase1] Migrating {old_dir.name} → {new_dir}")

                # Manually migrate files
                if not new_dir.exists() and old_dir.exists():
                    new_dir.mkdir(parents=True, exist_ok=True)
                    import shutil
+
                    file_count = 0
-                    for old_file in old_dir.rglob('*'):
+                    for old_file in old_dir.rglob("*"):
                        if old_file.is_file():
                            rel_path = old_file.relative_to(old_dir)
                            new_file = new_dir / rel_path
@@ -236,7 +367,8 @@ def drain_old_archive_dirs(resume_from: str | None = None, batch_size: int = 100

                # Update only fs_version field using queryset update (bypasses validation)
                from archivebox.core.models import Snapshot as SnapshotModel
-                SnapshotModel.objects.filter(pk=snapshot.pk).update(fs_version='0.9.0')
+
+                SnapshotModel.objects.filter(pk=snapshot.pk).update(fs_version="0.9.0")

                # Commit the transaction
                transaction.commit()
@@ -245,22 +377,22 @@ def drain_old_archive_dirs(resume_from: str | None = None, batch_size: int = 100
                if old_dir.exists() and old_dir != new_dir:
                    snapshot._cleanup_old_migration_dir(old_dir, new_dir)

-                stats['migrated'] += 1
+                stats["migrated"] += 1
                print(f"    [{stats['processed']}] Migrated: {entry_path.name}")
            except Exception as e:
-                stats['skipped'] += 1
+                stats["skipped"] += 1
                print(f"    [{stats['processed']}] Skipped (error: {e}): {entry_path.name}")
        else:
-            stats['skipped'] += 1
+            stats["skipped"] += 1

-        if stats['processed'] % batch_size == 0:
+        if stats["processed"] % batch_size == 0:
            transaction.commit()

    transaction.commit()
    return stats


-def process_all_db_snapshots(batch_size: int = 100) -> dict[str, int]:
+def process_all_db_snapshots(batch_size: int = 100, resume: str | None = None) -> dict[str, int]:
    """
    O(n) scan over entire DB from most recent to least recent.

@@ -275,24 +407,30 @@ def process_all_db_snapshots(batch_size: int = 100) -> dict[str, int]:
    from django.db import transaction
    from django.utils import timezone

-    stats = {'processed': 0, 'reconciled': 0, 'queued': 0}
+    stats = {"processed": 0, "reconciled": 0, "queued": 0}

-    total = Snapshot.objects.count()
-    print(f'[*] Processing {total} snapshots from database (most recent first)...')
+    queryset = Snapshot.objects.all()
+    if resume:
+        queryset = queryset.filter(timestamp__lte=resume)
+    total = queryset.count()
+    print(f"[*] Processing {total} snapshots from database (most recent first)...")

    # Process from most recent to least recent
-    for snapshot in Snapshot.objects.select_related('crawl').order_by('-bookmarked_at').iterator(chunk_size=batch_size):
-        stats['processed'] += 1
+    for snapshot in queryset.select_related("crawl").order_by("-bookmarked_at").iterator(chunk_size=batch_size):
+        stats["processed"] += 1

        # Skip snapshots with missing crawl references (orphaned by migration errors)
        if _get_snapshot_crawl(snapshot) is None:
            continue

        try:
-            print(f"[DEBUG Phase2] Snapshot {str(snapshot.id)[:8]}: fs_version={snapshot.fs_version}, needs_migration={snapshot.fs_migration_needed}")
+            print(
+                f"[DEBUG Phase2] Snapshot {str(snapshot.id)[:8]}: fs_version={snapshot.fs_version}, needs_migration={snapshot.fs_migration_needed}",
+            )

            # Check if snapshot has a directory on disk
            from pathlib import Path
+
            output_dir = Path(snapshot.output_dir)
            has_directory = output_dir.exists() and output_dir.is_dir()

@@ -313,22 +451,23 @@ def process_all_db_snapshots(batch_size: int = 100) -> dict[str, int]:
                    print(f"[DEBUG Phase2] Orphan snapshot {str(snapshot.id)[:8]} - marking as migrated without filesystem operation")
                # Use queryset update to set fs_version without triggering save() hooks
                from archivebox.core.models import Snapshot as SnapshotModel
-                SnapshotModel.objects.filter(pk=snapshot.pk).update(fs_version='0.9.0')
-                snapshot.fs_version = '0.9.0'
+
+                SnapshotModel.objects.filter(pk=snapshot.pk).update(fs_version="0.9.0")
+                snapshot.fs_version = "0.9.0"

            # Queue for archiving (state machine will handle it)
            snapshot.status = Snapshot.StatusChoices.QUEUED
            snapshot.retry_at = timezone.now()
            snapshot.save()

-            stats['reconciled'] += 1 if has_directory else 0
-            stats['queued'] += 1
+            stats["reconciled"] += 1 if has_directory else 0
+            stats["queued"] += 1
        except Exception as e:
            # Skip snapshots that can't be processed (e.g., missing crawl)
            print(f"    [!] Skipping snapshot {snapshot.id}: {e}")
            continue

-        if stats['processed'] % batch_size == 0:
+        if stats["processed"] % batch_size == 0:
            transaction.commit()
            print(f"    [{stats['processed']}/{total}] Processed...")

@@ -341,31 +480,28 @@ def process_filtered_snapshots(
    filter_type: str,
    before: float | None,
    after: float | None,
-    batch_size: int
+    resume: str | None,
+    batch_size: int,
 ) -> dict[str, int]:
    """Process snapshots matching filters (DB query only)."""
-    from archivebox.core.models import Snapshot
    from django.db import transaction
    from django.utils import timezone
-    from datetime import datetime

-    stats = {'processed': 0, 'reconciled': 0, 'queued': 0}
+    stats = {"processed": 0, "reconciled": 0, "queued": 0}

-    snapshots = Snapshot.objects.all()
-
-    if filter_patterns:
-        snapshots = _apply_pattern_filters(snapshots, list(filter_patterns), filter_type)
-
-    if before:
-        snapshots = snapshots.filter(bookmarked_at__lt=datetime.fromtimestamp(before))
-    if after:
-        snapshots = snapshots.filter(bookmarked_at__gt=datetime.fromtimestamp(after))
+    snapshots = _build_filtered_snapshots_queryset(
+        filter_patterns=filter_patterns,
+        filter_type=filter_type,
+        before=before,
+        after=after,
+        resume=resume,
+    )

    total = snapshots.count()
-    print(f'[*] Found {total} matching snapshots')
+    print(f"[*] Found {total} matching snapshots")

-    for snapshot in snapshots.select_related('crawl').iterator(chunk_size=batch_size):
-        stats['processed'] += 1
+    for snapshot in snapshots.select_related("crawl").iterator(chunk_size=batch_size):
+        stats["processed"] += 1

        # Skip snapshots with missing crawl references
        if _get_snapshot_crawl(snapshot) is None:
@@ -384,14 +520,14 @@ def process_filtered_snapshots(
            snapshot.retry_at = timezone.now()
            snapshot.save()

-            stats['reconciled'] += 1
-            stats['queued'] += 1
+            stats["reconciled"] += 1
+            stats["queued"] += 1
        except Exception as e:
            # Skip snapshots that can't be processed
            print(f"    [!] Skipping snapshot {snapshot.id}: {e}")
            continue

-        if stats['processed'] % batch_size == 0:
+        if stats["processed"] % batch_size == 0:
            transaction.commit()
            print(f"    [{stats['processed']}/{total}] Processed...")

@@ -405,9 +541,9 @@ def print_stats(stats: dict):

    print(f"""
 [green]Update Complete[/green]
-  Processed:   {stats['processed']}
-  Reconciled:  {stats['reconciled']}
-  Queued:      {stats['queued']}
+  Processed:   {stats["processed"]}
+  Reconciled:  {stats["reconciled"]}
+  Queued:      {stats["queued"]}
 """)


@@ -415,37 +551,50 @@ def print_combined_stats(stats_combined: dict):
    """Print statistics for full mode."""
    from rich import print

-    s1 = stats_combined['phase1']
-    s2 = stats_combined['phase2']
+    s1 = stats_combined["phase1"]
+    s2 = stats_combined["phase2"]

    print(f"""
 [green]Archive Update Complete[/green]

 Phase 1 (Drain Old Dirs):
-  Checked:     {s1.get('processed', 0)}
-  Migrated:    {s1.get('migrated', 0)}
-  Skipped:     {s1.get('skipped', 0)}
-  Invalid:     {s1.get('invalid', 0)}
+  Checked:     {s1.get("processed", 0)}
+  Migrated:    {s1.get("migrated", 0)}
+  Skipped:     {s1.get("skipped", 0)}
+  Invalid:     {s1.get("invalid", 0)}

 Phase 2 (Process DB):
-  Processed:   {s2.get('processed', 0)}
-  Reconciled:  {s2.get('reconciled', 0)}
-  Queued:      {s2.get('queued', 0)}
+  Processed:   {s2.get("processed", 0)}
+  Reconciled:  {s2.get("reconciled", 0)}
+  Queued:      {s2.get("queued", 0)}
+""")
+
+
+def print_index_stats(stats: dict[str, Any]) -> None:
+    from rich import print
+
+    print(f"""
+[green]Search Reindex Complete[/green]
+  Processed:   {stats["processed"]}
+  Reconciled:  {stats["reconciled"]}
+  Queued:      {stats["queued"]}
+  Reindexed:   {stats["reindexed"]}
 """)


@click.command()
-@click.option('--resume', type=str, help='Resume from timestamp')
-@click.option('--before', type=float, help='Only snapshots before timestamp')
-@click.option('--after', type=float, help='Only snapshots after timestamp')
-@click.option('--filter-type', '-t', type=click.Choice(['exact', 'substring', 'regex', 'domain', 'tag', 'timestamp']), default='exact')
-@click.option('--batch-size', type=int, default=100, help='Commit every N snapshots')
-@click.option('--continuous', is_flag=True, help='Run continuously as background worker')
-@click.argument('filter_patterns', nargs=-1)
+@click.option("--resume", type=str, help="Resume from timestamp")
+@click.option("--before", type=float, help="Only snapshots before timestamp")
+@click.option("--after", type=float, help="Only snapshots after timestamp")
+@click.option("--filter-type", "-t", type=click.Choice(["exact", "substring", "regex", "domain", "tag", "timestamp"]), default="exact")
+@click.option("--batch-size", type=int, default=100, help="Commit every N snapshots")
+@click.option("--continuous", is_flag=True, help="Run continuously as background worker")
+@click.option("--index-only", is_flag=True, help="Backfill available search indexes from existing archived content")
+@click.argument("filter_patterns", nargs=-1)
@docstring(update.__doc__)
 def main(**kwargs):
    update(**kwargs)


-if __name__ == '__main__':
+if __name__ == "__main__":
    main()
--- a/archivebox/cli/archivebox_version.py
+++ b/archivebox/cli/archivebox_version.py
@@ -1,12 +1,12 @@
 #!/usr/bin/env python3

-__package__ = 'archivebox.cli'
+__package__ = "archivebox.cli"

 import sys
 import os
 import platform
 from pathlib import Path
-from typing import Iterable
+from collections.abc import Iterable

 import rich_click as click

@@ -14,19 +14,22 @@ from archivebox.misc.util import docstring, enforce_types


@enforce_types
-def version(quiet: bool=False,
-            binaries: Iterable[str]=()) -> list[str]:
+def version(
+    quiet: bool = False,
+    binaries: Iterable[str] = (),
+) -> list[str]:
    """Print the ArchiveBox version, debug metadata, and installed dependency versions"""
-    
+
    # fast path for just getting the version and exiting, dont do any slower imports
    from archivebox.config.version import VERSION
+
    print(VERSION)
-    if quiet or '--version' in sys.argv:
+    if quiet or "--version" in sys.argv:
        return []
-    
+
    from rich.panel import Panel
    from rich.console import Console
-    
+
    from archivebox.config import CONSTANTS, DATA_DIR
    from archivebox.config.version import get_COMMIT_HASH, get_BUILD_TIME
    from archivebox.config.permissions import ARCHIVEBOX_USER, ARCHIVEBOX_GROUP, RUNNING_AS_UID, RUNNING_AS_GID, IN_DOCKER
@@ -34,78 +37,89 @@ def version(quiet: bool=False,
    from archivebox.config.common import SHELL_CONFIG, STORAGE_CONFIG, SEARCH_BACKEND_CONFIG
    from archivebox.misc.logging_util import printable_folder_status
    from archivebox.config.configset import get_config
-    
+
    console = Console()
    prnt = console.print
-    
+
    # Check if LDAP is enabled (simple config lookup)
    config = get_config()
-    LDAP_ENABLED = config.get('LDAP_ENABLED', False)
+    LDAP_ENABLED = config.get("LDAP_ENABLED", False)

    p = platform.uname()
    COMMIT_HASH = get_COMMIT_HASH()
    prnt(
-        '[dark_green]ArchiveBox[/dark_green] [dark_goldenrod]v{}[/dark_goldenrod]'.format(CONSTANTS.VERSION),
-        f'COMMIT_HASH={COMMIT_HASH[:7] if COMMIT_HASH else "unknown"}',
-        f'BUILD_TIME={get_BUILD_TIME()}',
+        f"[dark_green]ArchiveBox[/dark_green] [dark_goldenrod]v{CONSTANTS.VERSION}[/dark_goldenrod]",
+        f"COMMIT_HASH={COMMIT_HASH[:7] if COMMIT_HASH else 'unknown'}",
+        f"BUILD_TIME={get_BUILD_TIME()}",
    )
    prnt(
-        f'IN_DOCKER={IN_DOCKER}',
-        f'IN_QEMU={SHELL_CONFIG.IN_QEMU}',
-        f'ARCH={p.machine}',
-        f'OS={p.system}',
-        f'PLATFORM={platform.platform()}',
-        f'PYTHON={sys.implementation.name.title()}' + (' (venv)' if CONSTANTS.IS_INSIDE_VENV else ''),
+        f"IN_DOCKER={IN_DOCKER}",
+        f"IN_QEMU={SHELL_CONFIG.IN_QEMU}",
+        f"ARCH={p.machine}",
+        f"OS={p.system}",
+        f"PLATFORM={platform.platform()}",
+        f"PYTHON={sys.implementation.name.title()}" + (" (venv)" if CONSTANTS.IS_INSIDE_VENV else ""),
    )
-    
+
    try:
        OUTPUT_IS_REMOTE_FS = get_data_locations().DATA_DIR.is_mount or get_data_locations().ARCHIVE_DIR.is_mount
    except Exception:
        OUTPUT_IS_REMOTE_FS = False
-        
+
    try:
        DATA_DIR_STAT = CONSTANTS.DATA_DIR.stat()
        prnt(
-            f'EUID={os.geteuid()}:{os.getegid()} UID={RUNNING_AS_UID}:{RUNNING_AS_GID} PUID={ARCHIVEBOX_USER}:{ARCHIVEBOX_GROUP}',
-            f'FS_UID={DATA_DIR_STAT.st_uid}:{DATA_DIR_STAT.st_gid}',
-            f'FS_PERMS={STORAGE_CONFIG.OUTPUT_PERMISSIONS}',
-            f'FS_ATOMIC={STORAGE_CONFIG.ENFORCE_ATOMIC_WRITES}',
-            f'FS_REMOTE={OUTPUT_IS_REMOTE_FS}',
+            f"EUID={os.geteuid()}:{os.getegid()} UID={RUNNING_AS_UID}:{RUNNING_AS_GID} PUID={ARCHIVEBOX_USER}:{ARCHIVEBOX_GROUP}",
+            f"FS_UID={DATA_DIR_STAT.st_uid}:{DATA_DIR_STAT.st_gid}",
+            f"FS_PERMS={STORAGE_CONFIG.OUTPUT_PERMISSIONS}",
+            f"FS_ATOMIC={STORAGE_CONFIG.ENFORCE_ATOMIC_WRITES}",
+            f"FS_REMOTE={OUTPUT_IS_REMOTE_FS}",
        )
    except Exception:
        prnt(
-            f'EUID={os.geteuid()}:{os.getegid()} UID={RUNNING_AS_UID}:{RUNNING_AS_GID} PUID={ARCHIVEBOX_USER}:{ARCHIVEBOX_GROUP}',
+            f"EUID={os.geteuid()}:{os.getegid()} UID={RUNNING_AS_UID}:{RUNNING_AS_GID} PUID={ARCHIVEBOX_USER}:{ARCHIVEBOX_GROUP}",
        )
-        
+
    prnt(
-        f'DEBUG={SHELL_CONFIG.DEBUG}',
-        f'IS_TTY={SHELL_CONFIG.IS_TTY}',
-        f'SUDO={CONSTANTS.IS_ROOT}',
-        f'ID={CONSTANTS.MACHINE_ID}:{CONSTANTS.COLLECTION_ID}',
-        f'SEARCH_BACKEND={SEARCH_BACKEND_CONFIG.SEARCH_BACKEND_ENGINE}',
-        f'LDAP={LDAP_ENABLED}',
+        f"DEBUG={SHELL_CONFIG.DEBUG}",
+        f"IS_TTY={SHELL_CONFIG.IS_TTY}",
+        f"SUDO={CONSTANTS.IS_ROOT}",
+        f"ID={CONSTANTS.MACHINE_ID}:{CONSTANTS.COLLECTION_ID}",
+        f"SEARCH_BACKEND={SEARCH_BACKEND_CONFIG.SEARCH_BACKEND_ENGINE}",
+        f"LDAP={LDAP_ENABLED}",
    )
    prnt()
-    
+
    if not (os.access(CONSTANTS.ARCHIVE_DIR, os.R_OK) and os.access(CONSTANTS.CONFIG_FILE, os.R_OK)):
-        PANEL_TEXT = '\n'.join((
-            '',
-            '[violet]Hint:[/violet] [green]cd[/green] into a collection [blue]DATA_DIR[/blue] and run [green]archivebox version[/green] again...',
-            '      [grey53]OR[/grey53] run [green]archivebox init[/green] to create a new collection in the current dir.',
-            '',
-            '      [i][grey53](this is [red]REQUIRED[/red] if you are opening a Github Issue to get help)[/grey53][/i]',
-            '',
-        ))
-        prnt(Panel(PANEL_TEXT, expand=False, border_style='grey53', title='[red]:exclamation: No collection [blue]DATA_DIR[/blue] is currently active[/red]', subtitle='Full version info is only available when inside a collection [light_slate_blue]DATA DIR[/light_slate_blue]'))
+        PANEL_TEXT = "\n".join(
+            (
+                "",
+                "[violet]Hint:[/violet] [green]cd[/green] into a collection [blue]DATA_DIR[/blue] and run [green]archivebox version[/green] again...",
+                "      [grey53]OR[/grey53] run [green]archivebox init[/green] to create a new collection in the current dir.",
+                "",
+                "      [i][grey53](this is [red]REQUIRED[/red] if you are opening a Github Issue to get help)[/grey53][/i]",
+                "",
+            ),
+        )
+        prnt(
+            Panel(
+                PANEL_TEXT,
+                expand=False,
+                border_style="grey53",
+                title="[red]:exclamation: No collection [blue]DATA_DIR[/blue] is currently active[/red]",
+                subtitle="Full version info is only available when inside a collection [light_slate_blue]DATA DIR[/light_slate_blue]",
+            ),
+        )
        prnt()
        return []

-    prnt('[pale_green1][i] Binary Dependencies:[/pale_green1]')
+    prnt("[pale_green1][i] Binary Dependencies:[/pale_green1]")
    failures = []

    # Setup Django before importing models
    try:
        from archivebox.config.django import setup_django
+
        setup_django()

        from archivebox.machine.models import Machine, Binary
@@ -113,12 +127,17 @@ def version(quiet: bool=False,
        machine = Machine.current()

        # Get all binaries from the database with timeout protection
-        all_installed = Binary.objects.filter(
-            machine=machine
-        ).exclude(abspath='').exclude(abspath__isnull=True).order_by('name')
+        all_installed = (
+            Binary.objects.filter(
+                machine=machine,
+            )
+            .exclude(abspath="")
+            .exclude(abspath__isnull=True)
+            .order_by("name")
+        )

        if not all_installed.exists():
-            prnt('', '[grey53]No binaries detected. Run [green]archivebox install[/green] to detect dependencies.[/grey53]')
+            prnt("", "[grey53]No binaries detected. Run [green]archivebox install[/green] to detect dependencies.[/grey53]")
        else:
            for installed in all_installed:
                # Skip if user specified specific binaries and this isn't one
@@ -126,71 +145,91 @@ def version(quiet: bool=False,
                    continue

                if installed.is_valid:
-                    display_path = installed.abspath.replace(str(DATA_DIR), '.').replace(str(Path('~').expanduser()), '~')
-                    version_str = (installed.version or 'unknown')[:15]
-                    provider = (installed.binprovider or 'env')[:8]
-                    prnt('', '[green]√[/green]', '', installed.name.ljust(18), version_str.ljust(16), provider.ljust(8), display_path, overflow='ignore', crop=False)
+                    display_path = installed.abspath.replace(str(DATA_DIR), ".").replace(str(Path("~").expanduser()), "~")
+                    version_str = (installed.version or "unknown")[:15]
+                    provider = (installed.binprovider or "env")[:8]
+                    prnt(
+                        "",
+                        "[green]√[/green]",
+                        "",
+                        installed.name.ljust(18),
+                        version_str.ljust(16),
+                        provider.ljust(8),
+                        display_path,
+                        overflow="ignore",
+                        crop=False,
+                    )
                else:
-                    prnt('', '[red]X[/red]', '', installed.name.ljust(18), '[grey53]not installed[/grey53]', overflow='ignore', crop=False)
+                    prnt("", "[red]X[/red]", "", installed.name.ljust(18), "[grey53]not installed[/grey53]", overflow="ignore", crop=False)
                    failures.append(installed.name)

        # Show hint if no binaries are installed yet
-        has_any_installed = Binary.objects.filter(machine=machine).exclude(abspath='').exists()
+        has_any_installed = Binary.objects.filter(machine=machine).exclude(abspath="").exists()
        if not has_any_installed:
            prnt()
-            prnt('', '[grey53]Run [green]archivebox install[/green] to detect and install dependencies.[/grey53]')
+            prnt("", "[grey53]Run [green]archivebox install[/green] to detect and install dependencies.[/grey53]")

    except Exception as e:
        # Handle database errors gracefully (locked, missing, etc.)
        prnt()
-        prnt('', f'[yellow]Warning: Could not query binaries from database: {e}[/yellow]')
-        prnt('', '[grey53]Run [green]archivebox init[/green] and [green]archivebox install[/green] to set up dependencies.[/grey53]')
+        prnt("", f"[yellow]Warning: Could not query binaries from database: {e}[/yellow]")
+        prnt("", "[grey53]Run [green]archivebox init[/green] and [green]archivebox install[/green] to set up dependencies.[/grey53]")

    if not binaries:
        # Show code and data locations
        prnt()
-        prnt('[deep_sky_blue3][i] Code locations:[/deep_sky_blue3]')
+        prnt("[deep_sky_blue3][i] Code locations:[/deep_sky_blue3]")
        try:
            for name, path in get_code_locations().items():
                if isinstance(name, str) and isinstance(path, dict):
-                    prnt(printable_folder_status(name, path), overflow='ignore', crop=False)
+                    prnt(printable_folder_status(name, path), overflow="ignore", crop=False)
        except Exception as e:
-            prnt(f'  [red]Error getting code locations: {e}[/red]')
+            prnt(f"  [red]Error getting code locations: {e}[/red]")

        prnt()
        if os.access(CONSTANTS.ARCHIVE_DIR, os.R_OK) or os.access(CONSTANTS.CONFIG_FILE, os.R_OK):
-            prnt('[bright_yellow][i] Data locations:[/bright_yellow]')
+            prnt("[bright_yellow][i] Data locations:[/bright_yellow]")
            try:
                for name, path in get_data_locations().items():
                    if isinstance(name, str) and isinstance(path, dict):
-                        prnt(printable_folder_status(name, path), overflow='ignore', crop=False)
+                        prnt(printable_folder_status(name, path), overflow="ignore", crop=False)
            except Exception as e:
-                prnt(f'  [red]Error getting data locations: {e}[/red]')
-            
+                prnt(f"  [red]Error getting data locations: {e}[/red]")
+
            try:
                from archivebox.misc.checks import check_data_dir_permissions
+
                check_data_dir_permissions()
            except Exception:
                pass
        else:
            prnt()
-            prnt('[red][i] Data locations:[/red] (not in a data directory)')
-        
+            prnt("[red][i] Data locations:[/red] (not in a data directory)")
+
    prnt()
-    
+
    if failures:
-        prnt('[red]Error:[/red] [yellow]Failed to detect the following binaries:[/yellow]')
-        prnt(f'      [red]{", ".join(failures)}[/red]')
+        prnt("[red]Error:[/red] [yellow]Failed to detect the following binaries:[/yellow]")
+        prnt(f"      [red]{', '.join(failures)}[/red]")
        prnt()
-        prnt('[violet]Hint:[/violet] To install missing binaries automatically, run:')
-        prnt('      [green]archivebox install[/green]')
+        prnt("[violet]Hint:[/violet] To install missing binaries automatically, run:")
+        prnt("      [green]archivebox install[/green]")
        prnt()
    return failures


@click.command()
-@click.option('--quiet', '-q', is_flag=True, help='Only print ArchiveBox version number and nothing else. (equivalent to archivebox --version)')
-@click.option('--binaries', '-b', help='Select binaries to detect DEFAULT=curl,wget,git,yt-dlp,chrome,single-file,readability-extractor,postlight-parser,... (all)')
+@click.option(
+    "--quiet",
+    "-q",
+    is_flag=True,
+    help="Only print ArchiveBox version number and nothing else. (equivalent to archivebox --version)",
+)
+@click.option(
+    "--binaries",
+    "-b",
+    help="Select binaries to detect DEFAULT=curl,wget,git,yt-dlp,chrome,single-file,readability-extractor,postlight-parser,... (all)",
+)
@docstring(version.__doc__)
 def main(**kwargs):
    failures = version(**kwargs)
@@ -198,5 +237,5 @@ def main(**kwargs):
        raise SystemExit(1)


-if __name__ == '__main__':
+if __name__ == "__main__":
    main()
--- a/archivebox/cli/cli_utils.py
+++ b/archivebox/cli/cli_utils.py
@@ -5,12 +5,10 @@ This module contains common utilities used across multiple CLI commands,
 extracted to avoid code duplication.
 """

-__package__ = 'archivebox.cli'
-
-from typing import Optional
+__package__ = "archivebox.cli"


-def apply_filters(queryset, filter_kwargs: dict, limit: Optional[int] = None):
+def apply_filters(queryset, filter_kwargs: dict, limit: int | None = None):
    """
    Apply Django-style filters from CLI kwargs to a QuerySet.

@@ -31,11 +29,11 @@ def apply_filters(queryset, filter_kwargs: dict, limit: Optional[int] = None):
    """
    filters = {}
    for key, value in filter_kwargs.items():
-        if value is None or key in ('limit', 'offset'):
+        if value is None or key in ("limit", "offset"):
            continue
        # Handle CSV lists for __in filters
-        if key.endswith('__in') and isinstance(value, str):
-            value = [v.strip() for v in value.split(',')]
+        if key.endswith("__in") and isinstance(value, str):
+            value = [v.strip() for v in value.split(",")]
        filters[key] = value

    if filters:
--- a/archivebox/config/init.py
+++ b/archivebox/config/init.py
@@ -5,16 +5,16 @@ This module provides backwards-compatible config exports for extractors
 and other modules that expect to import config values directly.
 """

-__package__ = 'archivebox.config'
+__package__ = "archivebox.config"
 __order__ = 200

 from .paths import (
-    PACKAGE_DIR,                                    # noqa
-    DATA_DIR,                                       # noqa
-    ARCHIVE_DIR,                                    # noqa
+    PACKAGE_DIR,
+    DATA_DIR,
+    ARCHIVE_DIR,
 )
-from .constants import CONSTANTS, CONSTANTS_CONFIG, PACKAGE_DIR, DATA_DIR, ARCHIVE_DIR      # noqa
-from .version import VERSION                        # noqa
+from .constants import CONSTANTS, CONSTANTS_CONFIG, PACKAGE_DIR, DATA_DIR, ARCHIVE_DIR  # noqa
+from .version import VERSION  # noqa


 ###############################################################################
@@ -22,15 +22,18 @@ from .version import VERSION                        # noqa
 # These provide backwards compatibility with extractors that import from ..config
 ###############################################################################

+
 def _get_config():
    """Lazy import to avoid circular imports."""
    from .common import ARCHIVING_CONFIG, STORAGE_CONFIG
+
    return ARCHIVING_CONFIG, STORAGE_CONFIG


 # Direct exports (evaluated at import time for backwards compat)
 # These are recalculated each time the module attribute is accessed

+
 def __getattr__(name: str):
    """
    Module-level __getattr__ for lazy config loading.
@@ -40,38 +43,38 @@ def __getattr__(name: str):
    """

    # Generic timeout settings (used by multiple plugins)
-    if name == 'TIMEOUT':
+    if name == "TIMEOUT":
        cfg, _ = _get_config()
        return cfg.TIMEOUT

    # Generic SSL/Security settings (used by multiple plugins)
-    if name == 'CHECK_SSL_VALIDITY':
+    if name == "CHECK_SSL_VALIDITY":
        cfg, _ = _get_config()
        return cfg.CHECK_SSL_VALIDITY

    # Generic storage settings (used by multiple plugins)
-    if name == 'RESTRICT_FILE_NAMES':
+    if name == "RESTRICT_FILE_NAMES":
        _, storage = _get_config()
        return storage.RESTRICT_FILE_NAMES

    # Generic user agent / cookies (used by multiple plugins)
-    if name == 'COOKIES_FILE':
+    if name == "COOKIES_FILE":
        cfg, _ = _get_config()
        return cfg.COOKIES_FILE
-    if name == 'USER_AGENT':
+    if name == "USER_AGENT":
        cfg, _ = _get_config()
        return cfg.USER_AGENT

    # Generic resolution settings (used by multiple plugins)
-    if name == 'RESOLUTION':
+    if name == "RESOLUTION":
        cfg, _ = _get_config()
        return cfg.RESOLUTION

    # Allowlist/Denylist patterns (compiled regexes)
-    if name == 'SAVE_ALLOWLIST_PTN':
+    if name == "SAVE_ALLOWLIST_PTN":
        cfg, _ = _get_config()
        return cfg.SAVE_ALLOWLIST_PTNS
-    if name == 'SAVE_DENYLIST_PTN':
+    if name == "SAVE_DENYLIST_PTN":
        cfg, _ = _get_config()
        return cfg.SAVE_DENYLIST_PTNS

@@ -90,12 +93,13 @@ def get_CONFIG():
        SEARCH_BACKEND_CONFIG,
    )
    from .ldap import LDAP_CONFIG
+
    return {
-        'SHELL_CONFIG': SHELL_CONFIG,
-        'STORAGE_CONFIG': STORAGE_CONFIG,
-        'GENERAL_CONFIG': GENERAL_CONFIG,
-        'SERVER_CONFIG': SERVER_CONFIG,
-        'ARCHIVING_CONFIG': ARCHIVING_CONFIG,
-        'SEARCHBACKEND_CONFIG': SEARCH_BACKEND_CONFIG,
-        'LDAP_CONFIG': LDAP_CONFIG,
+        "SHELL_CONFIG": SHELL_CONFIG,
+        "STORAGE_CONFIG": STORAGE_CONFIG,
+        "GENERAL_CONFIG": GENERAL_CONFIG,
+        "SERVER_CONFIG": SERVER_CONFIG,
+        "ARCHIVING_CONFIG": ARCHIVING_CONFIG,
+        "SEARCHBACKEND_CONFIG": SEARCH_BACKEND_CONFIG,
+        "LDAP_CONFIG": LDAP_CONFIG,
    }
--- a/archivebox/config/collection.py
+++ b/archivebox/config/collection.py
@@ -1,8 +1,8 @@
-__package__ = 'archivebox.config'
+__package__ = "archivebox.config"

 import os
 import json
-from typing import Any, Optional, Type, Tuple, Dict
+from typing import Any

 from pathlib import Path
 from configparser import ConfigParser
@@ -27,13 +27,15 @@ def get_real_name(key: str) -> str:
    return key


-def load_config_val(key: str,
-                    default: Any=None,
-                    type: Optional[Type]=None,
-                    aliases: Optional[Tuple[str, ...]]=None,
-                    config: Optional[benedict]=None,
-                    env_vars: Optional[os._Environ]=None,
-                    config_file_vars: Optional[Dict[str, str]]=None) -> Any:
+def load_config_val(
+    key: str,
+    default: Any = None,
+    type: type | None = None,
+    aliases: tuple[str, ...] | None = None,
+    config: benedict | None = None,
+    env_vars: os._Environ | None = None,
+    config_file_vars: dict[str, str] | None = None,
+) -> Any:
    """parse bool, int, and str key=value pairs from env"""

    assert isinstance(config, dict)
@@ -67,8 +69,8 @@ def load_config_val(key: str,
    assert isinstance(val, str)

    # calculate value based on expected type
-    BOOL_TRUEIES = ('true', 'yes', '1')
-    BOOL_FALSEIES = ('false', 'no', '0')
+    BOOL_TRUEIES = ("true", "yes", "1")
+    BOOL_FALSEIES = ("false", "no", "0")

    if type is bool:
        if val.lower() in BOOL_TRUEIES:
@@ -76,28 +78,28 @@ def load_config_val(key: str,
        elif val.lower() in BOOL_FALSEIES:
            return False
        else:
-            raise ValueError(f'Invalid configuration option {key}={val} (expected a boolean: True/False)')
+            raise ValueError(f"Invalid configuration option {key}={val} (expected a boolean: True/False)")

    elif type is str:
        if val.lower() in (*BOOL_TRUEIES, *BOOL_FALSEIES):
-            raise ValueError(f'Invalid configuration option {key}={val} (expected a string, but value looks like a boolean)')
+            raise ValueError(f"Invalid configuration option {key}={val} (expected a string, but value looks like a boolean)")
        return val.strip()

    elif type is int:
        if not val.strip().isdigit():
-            raise ValueError(f'Invalid configuration option {key}={val} (expected an integer)')
+            raise ValueError(f"Invalid configuration option {key}={val} (expected an integer)")
        return int(val.strip())

    elif type is list or type is dict:
        return json.loads(val)
-    
+
    elif type is Path:
        return Path(val)

-    raise Exception('Config values can only be str, bool, int, or json')
+    raise Exception("Config values can only be str, bool, int, or json")


-def load_config_file() -> Optional[benedict]:
+def load_config_file() -> benedict | None:
    """load the ini-formatted config file from DATA_DIR/Archivebox.conf"""

    config_path = CONSTANTS.CONFIG_FILE
@@ -105,17 +107,16 @@ def load_config_file() -> Optional[benedict]:
        config_file = CaseConfigParser()
        config_file.read(config_path)
        # flatten into one namespace
-        config_file_vars = benedict({
-            key.upper(): val
-            for section, options in config_file.items()
-                for key, val in options.items()
-        })
+        config_file_vars = benedict({key.upper(): val for section, options in config_file.items() for key, val in options.items()})
        # print('[i] Loaded config file', os.path.abspath(config_path))
        # print(config_file_vars)
        return config_file_vars
    return None
+
+
 class PluginConfigSection:
    """Pseudo-section for all plugin config keys written to [PLUGINS] section in ArchiveBox.conf"""
+
    toml_section_header = "PLUGINS"

    def __init__(self, key: str):
@@ -144,8 +145,14 @@ def section_for_key(key: str) -> Any:
    )

    # First check core config sections
-    for section in [SHELL_CONFIG, STORAGE_CONFIG, GENERAL_CONFIG,
-                    SERVER_CONFIG, ARCHIVING_CONFIG, SEARCH_BACKEND_CONFIG]:
+    for section in [
+        SHELL_CONFIG,
+        STORAGE_CONFIG,
+        GENERAL_CONFIG,
+        SERVER_CONFIG,
+        ARCHIVING_CONFIG,
+        SEARCH_BACKEND_CONFIG,
+    ]:
        if hasattr(section, key):
            return section

@@ -154,20 +161,19 @@ def section_for_key(key: str) -> Any:

    plugin_configs = discover_plugin_configs()
    for plugin_name, schema in plugin_configs.items():
-        if 'properties' in schema and key in schema['properties']:
+        if "properties" in schema and key in schema["properties"]:
            # All plugin config goes to [PLUGINS] section
            return PluginConfigSection(key)

-    raise ValueError(f'No config section found for key: {key}')
+    raise ValueError(f"No config section found for key: {key}")


-def write_config_file(config: Dict[str, str]) -> benedict:
+def write_config_file(config: dict[str, str]) -> benedict:
    """load the ini-formatted config file from DATA_DIR/Archivebox.conf"""

    from archivebox.misc.system import atomic_write

-    CONFIG_HEADER = (
-    """# This is the config file for your ArchiveBox collection.
+    CONFIG_HEADER = """# This is the config file for your ArchiveBox collection.
    #
    # You can add options here manually in INI format, or automatically by running:
    #    archivebox config --set KEY=VALUE
@@ -178,7 +184,7 @@ def write_config_file(config: Dict[str, str]) -> benedict:
    # A list of all possible config with documentation and examples can be found here:
    #    https://github.com/ArchiveBox/ArchiveBox/wiki/Configuration

-    """)
+    """

    config_path = CONSTANTS.CONFIG_FILE

@@ -188,57 +194,56 @@ def write_config_file(config: Dict[str, str]) -> benedict:
    config_file = CaseConfigParser()
    config_file.read(config_path)

-    with open(config_path, 'r', encoding='utf-8') as old:
-        atomic_write(f'{config_path}.bak', old.read())
+    with open(config_path, encoding="utf-8") as old:
+        atomic_write(f"{config_path}.bak", old.read())

    # Set up sections in empty config file
    for key, val in config.items():
        section = section_for_key(key)
        assert section is not None
-        
-        if not hasattr(section, 'toml_section_header'):
-            raise ValueError(f'{key} is read-only (defined in {type(section).__module__}.{type(section).__name__}). Refusing to set.')
-        
+
+        if not hasattr(section, "toml_section_header"):
+            raise ValueError(f"{key} is read-only (defined in {type(section).__module__}.{type(section).__name__}). Refusing to set.")
+
        section_name = section.toml_section_header
-        
+
        if section_name in config_file:
            existing_config = dict(config_file[section_name])
        else:
            existing_config = {}
-        
+
        config_file[section_name] = benedict({**existing_config, key: val})
        section.update_in_place(warn=False, persist=False, **{key: val})

-    with open(config_path, 'w+', encoding='utf-8') as new:
+    with open(config_path, "w+", encoding="utf-8") as new:
        config_file.write(new)

    updated_config = {}
    try:
        # validate the updated_config by attempting to re-parse it
        from archivebox.config.configset import get_flat_config
+
        updated_config = {**load_all_config(), **get_flat_config()}
-    except BaseException:                                                       # lgtm [py/catch-base-exception]
+    except BaseException:  # lgtm [py/catch-base-exception]
        # something went horribly wrong, revert to the previous version
-        with open(f'{config_path}.bak', 'r', encoding='utf-8') as old:
+        with open(f"{config_path}.bak", encoding="utf-8") as old:
            atomic_write(config_path, old.read())

        raise

-    if os.access(f'{config_path}.bak', os.F_OK):
-        os.remove(f'{config_path}.bak')
+    if os.access(f"{config_path}.bak", os.F_OK):
+        os.remove(f"{config_path}.bak")

-    return benedict({
-        key.upper(): updated_config.get(key.upper())
-        for key in config.keys()
-    })
+    return benedict({key.upper(): updated_config.get(key.upper()) for key in config.keys()})


-
-def load_config(defaults: Dict[str, Any],
-                config: Optional[benedict]=None,
-                out_dir: Optional[str]=None,
-                env_vars: Optional[os._Environ]=None,
-                config_file_vars: Optional[Dict[str, str]]=None) -> benedict:
+def load_config(
+    defaults: dict[str, Any],
+    config: benedict | None = None,
+    out_dir: str | None = None,
+    env_vars: os._Environ | None = None,
+    config_file_vars: dict[str, str] | None = None,
+) -> benedict:

    env_vars = env_vars or os.environ
    config_file_vars = config_file_vars or load_config_file()
@@ -249,9 +254,9 @@ def load_config(defaults: Dict[str, Any],
            # print('LOADING CONFIG KEY:', key, 'DEFAULT=', default)
            extended_config[key] = load_config_val(
                key,
-                default=default['default'],
-                type=default.get('type'),
-                aliases=default.get('aliases'),
+                default=default["default"],
+                type=default.get("type"),
+                aliases=default.get("aliases"),
                config=extended_config,
                env_vars=env_vars,
                config_file_vars=config_file_vars,
@@ -260,19 +265,20 @@ def load_config(defaults: Dict[str, Any],
            raise SystemExit(0)
        except Exception as e:
            stderr()
-            stderr(f'[X] Error while loading configuration value: {key}', color='red', config=extended_config)
-            stderr('    {}: {}'.format(e.__class__.__name__, e))
+            stderr(f"[X] Error while loading configuration value: {key}", color="red", config=extended_config)
+            stderr(f"    {e.__class__.__name__}: {e}")
            stderr()
-            stderr('    Check your config for mistakes and try again (your archive data is unaffected).')
+            stderr("    Check your config for mistakes and try again (your archive data is unaffected).")
            stderr()
-            stderr('    For config documentation and examples see:')
-            stderr('        https://github.com/ArchiveBox/ArchiveBox/wiki/Configuration')
+            stderr("    For config documentation and examples see:")
+            stderr("        https://github.com/ArchiveBox/ArchiveBox/wiki/Configuration")
            stderr()
            # raise
            # raise SystemExit(2)

    return benedict(extended_config)

+
 def load_all_config():
    """Load all config sections and return as a flat dict."""
    from archivebox.config.common import (
@@ -283,11 +289,17 @@ def load_all_config():
        ARCHIVING_CONFIG,
        SEARCH_BACKEND_CONFIG,
    )
-    
+
    flat_config = benedict()
-    
-    for config_section in [SHELL_CONFIG, STORAGE_CONFIG, GENERAL_CONFIG, 
-                           SERVER_CONFIG, ARCHIVING_CONFIG, SEARCH_BACKEND_CONFIG]:
+
+    for config_section in [
+        SHELL_CONFIG,
+        STORAGE_CONFIG,
+        GENERAL_CONFIG,
+        SERVER_CONFIG,
+        ARCHIVING_CONFIG,
+        SEARCH_BACKEND_CONFIG,
+    ]:
        flat_config.update(dict(config_section))
-        
+
    return flat_config
--- a/archivebox/config/common.py
+++ b/archivebox/config/common.py
@@ -4,7 +4,7 @@ import re
 import secrets
 import sys
 import shutil
-from typing import ClassVar, Dict, Optional, List
+from typing import ClassVar
 from pathlib import Path

 from rich.console import Console
@@ -39,8 +39,8 @@ class ShellConfig(BaseConfigSet):
    IN_DOCKER: bool = Field(default=IN_DOCKER)
    IN_QEMU: bool = Field(default=False)

-    ANSI: Dict[str, str] = Field(
-        default_factory=lambda: CONSTANTS.DEFAULT_CLI_COLORS if sys.stdout.isatty() else CONSTANTS.DISABLED_CLI_COLORS
+    ANSI: dict[str, str] = Field(
+        default_factory=lambda: CONSTANTS.DEFAULT_CLI_COLORS if sys.stdout.isatty() else CONSTANTS.DISABLED_CLI_COLORS,
    )

    @property
@@ -50,7 +50,7 @@ class ShellConfig(BaseConfigSet):
        return shutil.get_terminal_size((140, 10)).columns

    @property
-    def COMMIT_HASH(self) -> Optional[str]:
+    def COMMIT_HASH(self) -> str | None:
        return get_COMMIT_HASH()

    @property
@@ -112,7 +112,7 @@ class ServerConfig(BaseConfigSet):
        "danger-onedomain-fullreplay",
    )

-    SECRET_KEY: str = Field(default_factory=lambda: ''.join(secrets.choice("abcdefghijklmnopqrstuvwxyz0123456789_") for _ in range(50)))
+    SECRET_KEY: str = Field(default_factory=lambda: "".join(secrets.choice("abcdefghijklmnopqrstuvwxyz0123456789_") for _ in range(50)))
    BIND_ADDR: str = Field(default="127.0.0.1:8000")
    LISTEN_HOST: str = Field(default="archivebox.localhost:8000")
    ADMIN_BASE_URL: str = Field(default="")
@@ -124,7 +124,7 @@ class ServerConfig(BaseConfigSet):
    SNAPSHOTS_PER_PAGE: int = Field(default=40)
    PREVIEW_ORIGINALS: bool = Field(default=True)
    FOOTER_INFO: str = Field(
-        default="Content is hosted for personal archiving purposes only.  Contact server owner for any takedown requests."
+        default="Content is hosted for personal archiving purposes only.  Contact server owner for any takedown requests.",
    )
    # CUSTOM_TEMPLATES_DIR: Path          = Field(default=None)  # this is now a constant

@@ -132,8 +132,8 @@ class ServerConfig(BaseConfigSet):
    PUBLIC_SNAPSHOTS: bool = Field(default=True)
    PUBLIC_ADD_VIEW: bool = Field(default=False)

-    ADMIN_USERNAME: Optional[str] = Field(default=None)
-    ADMIN_PASSWORD: Optional[str] = Field(default=None)
+    ADMIN_USERNAME: str | None = Field(default=None)
+    ADMIN_PASSWORD: str | None = Field(default=None)

    REVERSE_PROXY_USER_HEADER: str = Field(default="Remote-User")
    REVERSE_PROXY_WHITELIST: str = Field(default="")
@@ -234,22 +234,22 @@ class ArchivingConfig(BaseConfigSet):
    RESOLUTION: str = Field(default="1440,2000")
    CHECK_SSL_VALIDITY: bool = Field(default=True)
    USER_AGENT: str = Field(
-        default=f"Mozilla/5.0 (Macintosh; Intel Mac OS X 10_15_7) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/131.0.0.0 Safari/537.36 ArchiveBox/{VERSION} (+https://github.com/ArchiveBox/ArchiveBox/)"
+        default=f"Mozilla/5.0 (Macintosh; Intel Mac OS X 10_15_7) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/131.0.0.0 Safari/537.36 ArchiveBox/{VERSION} (+https://github.com/ArchiveBox/ArchiveBox/)",
    )
    COOKIES_FILE: Path | None = Field(default=None)

    URL_DENYLIST: str = Field(default=r"\.(css|js|otf|ttf|woff|woff2|gstatic\.com|googleapis\.com/css)(\?.*)?$", alias="URL_BLACKLIST")
    URL_ALLOWLIST: str | None = Field(default=None, alias="URL_WHITELIST")

-    SAVE_ALLOWLIST: Dict[str, List[str]] = Field(default={})  # mapping of regex patterns to list of archive methods
-    SAVE_DENYLIST: Dict[str, List[str]] = Field(default={})
+    SAVE_ALLOWLIST: dict[str, list[str]] = Field(default={})  # mapping of regex patterns to list of archive methods
+    SAVE_DENYLIST: dict[str, list[str]] = Field(default={})

    DEFAULT_PERSONA: str = Field(default="Default")

    def warn_if_invalid(self) -> None:
        if int(self.TIMEOUT) < 5:
            rprint(f"[red][!] Warning: TIMEOUT is set too low! (currently set to TIMEOUT={self.TIMEOUT} seconds)[/red]", file=sys.stderr)
-            rprint("    You must allow *at least* 5 seconds for indexing and archive methods to run succesfully.", file=sys.stderr)
+            rprint("    You must allow *at least* 5 seconds for indexing and archive methods to run successfully.", file=sys.stderr)
            rprint("    (Setting it to somewhere between 30 and 3000 seconds is recommended)", file=sys.stderr)
            rprint(file=sys.stderr)
            rprint("    If you want to make ArchiveBox run faster, disable specific archive methods instead:", file=sys.stderr)
@@ -274,7 +274,7 @@ class ArchivingConfig(BaseConfigSet):
        return re.compile(self.URL_DENYLIST, CONSTANTS.ALLOWDENYLIST_REGEX_FLAGS)

    @property
-    def SAVE_ALLOWLIST_PTNS(self) -> Dict[re.Pattern, List[str]]:
+    def SAVE_ALLOWLIST_PTNS(self) -> dict[re.Pattern, list[str]]:
        return (
            {
                # regexp: methods list
@@ -286,7 +286,7 @@ class ArchivingConfig(BaseConfigSet):
        )

    @property
-    def SAVE_DENYLIST_PTNS(self) -> Dict[re.Pattern, List[str]]:
+    def SAVE_DENYLIST_PTNS(self) -> dict[re.Pattern, list[str]]:
        return (
            {
                # regexp: methods list
--- a/archivebox/config/configset.py
+++ b/archivebox/config/configset.py
@@ -11,7 +11,7 @@ __package__ = "archivebox.config"
 import os
 import json
 from pathlib import Path
-from typing import Any, Dict, Optional, Type, Tuple
+from typing import Any
 from configparser import ConfigParser

 from pydantic_settings import BaseSettings, PydanticBaseSettingsSource, SettingsConfigDict
@@ -28,17 +28,18 @@ class IniConfigSettingsSource(PydanticBaseSettingsSource):
    Flattens all sections into a single namespace.
    """

-    def get_field_value(self, field: Any, field_name: str) -> Tuple[Any, str, bool]:
+    def get_field_value(self, field: Any, field_name: str) -> tuple[Any, str, bool]:
        config_vals = self._load_config_file()
        field_value = config_vals.get(field_name.upper())
        return field_value, field_name, False

-    def __call__(self) -> Dict[str, Any]:
+    def __call__(self) -> dict[str, Any]:
        return self._load_config_file()

-    def _load_config_file(self) -> Dict[str, Any]:
+    def _load_config_file(self) -> dict[str, Any]:
        try:
            from archivebox.config.constants import CONSTANTS
+
            config_path = CONSTANTS.CONFIG_FILE
        except ImportError:
            return {}
@@ -78,25 +79,25 @@ class BaseConfigSet(BaseSettings):
    @classmethod
    def settings_customise_sources(
        cls,
-        settings_cls: Type[BaseSettings],
+        settings_cls: type[BaseSettings],
        init_settings: PydanticBaseSettingsSource,
        env_settings: PydanticBaseSettingsSource,
        dotenv_settings: PydanticBaseSettingsSource,
        file_secret_settings: PydanticBaseSettingsSource,
-    ) -> Tuple[PydanticBaseSettingsSource, ...]:
+    ) -> tuple[PydanticBaseSettingsSource, ...]:
        """
        Define the order of settings sources (first = highest priority).
        """
        return (
-            init_settings,           # 1. Passed to __init__
-            env_settings,            # 2. Environment variables
+            init_settings,  # 1. Passed to __init__
+            env_settings,  # 2. Environment variables
            IniConfigSettingsSource(settings_cls),  # 3. ArchiveBox.conf file
            # dotenv_settings,       # Skip .env files
            # file_secret_settings,  # Skip secrets files
        )

    @classmethod
-    def load_from_file(cls, config_path: Path) -> Dict[str, str]:
+    def load_from_file(cls, config_path: Path) -> dict[str, str]:
        """Load config values from INI file."""
        if not config_path.exists():
            return {}
@@ -120,14 +121,14 @@ class BaseConfigSet(BaseSettings):


 def get_config(
-    defaults: Optional[Dict] = None,
+    defaults: dict | None = None,
    persona: Any = None,
    user: Any = None,
    crawl: Any = None,
    snapshot: Any = None,
    archiveresult: Any = None,
    machine: Any = None,
-) -> Dict[str, Any]:
+) -> dict[str, Any]:
    """
    Get merged config from all sources.

@@ -176,7 +177,7 @@ def get_config(
        if persona_id:
            persona = Persona.objects.filter(id=persona_id).first()
            if persona is None:
-                raise Persona.DoesNotExist(f'Crawl {getattr(crawl, "id", None)} references missing Persona {persona_id}')
+                raise Persona.DoesNotExist(f"Crawl {getattr(crawl, 'id', None)} references missing Persona {persona_id}")

        if persona is None:
            crawl_config = getattr(crawl, "config", None) or {}
@@ -200,6 +201,7 @@ def get_config(
    # Add plugin config defaults from JSONSchema config.json files
    try:
        from archivebox.hooks import get_config_defaults_from_plugins
+
        plugin_defaults = get_config_defaults_from_plugins()
        config.update(plugin_defaults)
    except ImportError:
@@ -224,6 +226,7 @@ def get_config(
        # Default to current machine if not provided
        try:
            from archivebox.machine.models import Machine
+
            machine = Machine.current()
        except Exception:
            pass  # Machine might not be available during early init
@@ -246,16 +249,17 @@ def get_config(
    # Also check plugin config aliases in environment
    try:
        from archivebox.hooks import discover_plugin_configs
+
        plugin_configs = discover_plugin_configs()
        for plugin_name, schema in plugin_configs.items():
-            for key, prop_schema in schema.get('properties', {}).items():
+            for key, prop_schema in schema.get("properties", {}).items():
                # Check x-aliases
-                for alias in prop_schema.get('x-aliases', []):
+                for alias in prop_schema.get("x-aliases", []):
                    if alias in os.environ and key not in os.environ:
                        config[key] = _parse_env_value(os.environ[alias], config.get(key))
                        break
                # Check x-fallback
-                fallback = prop_schema.get('x-fallback')
+                fallback = prop_schema.get("x-fallback")
                if fallback and fallback in config and key not in config:
                    config[key] = config[fallback]
    except ImportError:
@@ -275,33 +279,34 @@ def get_config(

    # Add crawl path aliases for hooks that need shared crawl state.
    if crawl and hasattr(crawl, "output_dir"):
-        config['CRAWL_OUTPUT_DIR'] = str(crawl.output_dir)
-        config['CRAWL_DIR'] = str(crawl.output_dir)
-        config['CRAWL_ID'] = str(getattr(crawl, "id", "")) if getattr(crawl, "id", None) else config.get('CRAWL_ID')
+        config["CRAWL_OUTPUT_DIR"] = str(crawl.output_dir)
+        config["CRAWL_DIR"] = str(crawl.output_dir)
+        config["CRAWL_ID"] = str(getattr(crawl, "id", "")) if getattr(crawl, "id", None) else config.get("CRAWL_ID")

    # Apply snapshot config overrides (highest priority)
    if snapshot and hasattr(snapshot, "config") and snapshot.config:
        config.update(snapshot.config)

    if snapshot:
-        config['SNAPSHOT_ID'] = str(getattr(snapshot, "id", "")) if getattr(snapshot, "id", None) else config.get('SNAPSHOT_ID')
-        config['SNAPSHOT_DEPTH'] = int(getattr(snapshot, "depth", 0) or 0)
+        config["SNAPSHOT_ID"] = str(getattr(snapshot, "id", "")) if getattr(snapshot, "id", None) else config.get("SNAPSHOT_ID")
+        config["SNAPSHOT_DEPTH"] = int(getattr(snapshot, "depth", 0) or 0)
        if hasattr(snapshot, "output_dir"):
-            config['SNAP_DIR'] = str(snapshot.output_dir)
+            config["SNAP_DIR"] = str(snapshot.output_dir)
        if getattr(snapshot, "crawl_id", None):
-            config['CRAWL_ID'] = str(snapshot.crawl_id)
+            config["CRAWL_ID"] = str(snapshot.crawl_id)

    # Normalize all aliases to canonical names (after all sources merged)
    # This handles aliases that came from user/crawl/snapshot configs, not just env
    try:
        from archivebox.hooks import discover_plugin_configs
+
        plugin_configs = discover_plugin_configs()
        aliases_to_normalize = {}  # {alias_key: canonical_key}

        # Build alias mapping from all plugin schemas
        for plugin_name, schema in plugin_configs.items():
-            for canonical_key, prop_schema in schema.get('properties', {}).items():
-                for alias in prop_schema.get('x-aliases', []):
+            for canonical_key, prop_schema in schema.get("properties", {}).items():
+                for alias in prop_schema.get("x-aliases", []):
                    aliases_to_normalize[alias] = canonical_key

        # Normalize: copy alias values to canonical keys (aliases take precedence)
@@ -314,10 +319,14 @@ def get_config(
    except ImportError:
        pass

+    if not config.get("DATA_DIR"):
+        config["DATA_DIR"] = str(CONSTANTS.DATA_DIR)
+    config["ABX_RUNTIME"] = "archivebox"
+
    return config


-def get_flat_config() -> Dict[str, Any]:
+def get_flat_config() -> dict[str, Any]:
    """
    Get a flat dictionary of all config values.

@@ -326,20 +335,24 @@ def get_flat_config() -> Dict[str, Any]:
    return get_config()


-def get_all_configs() -> Dict[str, BaseConfigSet]:
+def get_all_configs() -> dict[str, BaseConfigSet]:
    """
    Get all config section objects as a dictionary.

    Replaces abx.pm.hook.get_CONFIGS()
    """
    from archivebox.config.common import (
-        SHELL_CONFIG, SERVER_CONFIG, ARCHIVING_CONFIG, SEARCH_BACKEND_CONFIG
+        SHELL_CONFIG,
+        SERVER_CONFIG,
+        ARCHIVING_CONFIG,
+        SEARCH_BACKEND_CONFIG,
    )
+
    return {
-        'SHELL_CONFIG': SHELL_CONFIG,
-        'SERVER_CONFIG': SERVER_CONFIG,
-        'ARCHIVING_CONFIG': ARCHIVING_CONFIG,
-        'SEARCH_BACKEND_CONFIG': SEARCH_BACKEND_CONFIG,
+        "SHELL_CONFIG": SHELL_CONFIG,
+        "SERVER_CONFIG": SERVER_CONFIG,
+        "ARCHIVING_CONFIG": ARCHIVING_CONFIG,
+        "SEARCH_BACKEND_CONFIG": SEARCH_BACKEND_CONFIG,
    }


@@ -394,7 +407,7 @@ DEFAULT_WORKER_CONCURRENCY = {
 }


-def get_worker_concurrency() -> Dict[str, int]:
+def get_worker_concurrency() -> dict[str, int]:
    """
    Get worker concurrency settings.

--- a/archivebox/config/constants.py
+++ b/archivebox/config/constants.py
@@ -5,17 +5,16 @@ Constants are for things that never change at runtime.
 DATA_DIR will never change at runtime, but you can run
 archivebox from inside a different DATA_DIR on the same machine.

-This is loaded very early in the archivebox startup flow, so nothing in this file 
-or imported from this file should import anything from archivebox.config.common, 
+This is loaded very early in the archivebox startup flow, so nothing in this file
+or imported from this file should import anything from archivebox.config.common,
 django, other INSTALLED_APPS, or anything else that is not in a standard library.
 """

-__package__ = 'archivebox.config'
+__package__ = "archivebox.config"

 import re
 import sys

-from typing import Dict
 from pathlib import Path

 from benedict import benedict
@@ -46,184 +45,235 @@ from .version import detect_installed_version


 class ConstantsDict:
-    PACKAGE_DIR: Path                   = PACKAGE_DIR
-    DATA_DIR: Path                      = DATA_DIR
-    ARCHIVE_DIR: Path                   = ARCHIVE_DIR
-    
-    MACHINE_TYPE: str                   = get_machine_type()
-    MACHINE_ID: str                     = get_machine_id()
-    COLLECTION_ID: str                  = get_collection_id(DATA_DIR)
-    
+    PACKAGE_DIR: Path = PACKAGE_DIR
+    DATA_DIR: Path = DATA_DIR
+    ARCHIVE_DIR: Path = ARCHIVE_DIR
+
+    MACHINE_TYPE: str = get_machine_type()
+    MACHINE_ID: str = get_machine_id()
+    COLLECTION_ID: str = get_collection_id(DATA_DIR)
+
    # Host system
-    VERSION: str                        = detect_installed_version(PACKAGE_DIR)
-    IN_DOCKER: bool                     = IN_DOCKER
-    
+    VERSION: str = detect_installed_version(PACKAGE_DIR)
+    IN_DOCKER: bool = IN_DOCKER
+
    # Permissions
-    IS_ROOT: bool                       = IS_ROOT
-    ARCHIVEBOX_USER: int                = ARCHIVEBOX_USER
-    ARCHIVEBOX_GROUP: int               = ARCHIVEBOX_GROUP
-    RUNNING_AS_UID: int                 = RUNNING_AS_UID
-    RUNNING_AS_GID: int                 = RUNNING_AS_GID
-    DEFAULT_PUID: int                   = DEFAULT_PUID
-    DEFAULT_PGID: int                   = DEFAULT_PGID
-    IS_INSIDE_VENV: bool                = sys.prefix != sys.base_prefix
-    
+    IS_ROOT: bool = IS_ROOT
+    ARCHIVEBOX_USER: int = ARCHIVEBOX_USER
+    ARCHIVEBOX_GROUP: int = ARCHIVEBOX_GROUP
+    RUNNING_AS_UID: int = RUNNING_AS_UID
+    RUNNING_AS_GID: int = RUNNING_AS_GID
+    DEFAULT_PUID: int = DEFAULT_PUID
+    DEFAULT_PGID: int = DEFAULT_PGID
+    IS_INSIDE_VENV: bool = sys.prefix != sys.base_prefix
+
    # Source code dirs
-    PACKAGE_DIR_NAME: str               = PACKAGE_DIR.name
-    TEMPLATES_DIR_NAME: str             = 'templates'
-    TEMPLATES_DIR: Path                 = PACKAGE_DIR / TEMPLATES_DIR_NAME
-    STATIC_DIR_NAME: str                = 'static'
-    STATIC_DIR: Path                    = TEMPLATES_DIR / STATIC_DIR_NAME
+    PACKAGE_DIR_NAME: str = PACKAGE_DIR.name
+    TEMPLATES_DIR_NAME: str = "templates"
+    TEMPLATES_DIR: Path = PACKAGE_DIR / TEMPLATES_DIR_NAME
+    STATIC_DIR_NAME: str = "static"
+    STATIC_DIR: Path = TEMPLATES_DIR / STATIC_DIR_NAME

    # Data dirs
-    ARCHIVE_DIR_NAME: str               = 'archive'
-    SOURCES_DIR_NAME: str               = 'sources'
-    PERSONAS_DIR_NAME: str              = 'personas'
-    CACHE_DIR_NAME: str                 = 'cache'
-    LOGS_DIR_NAME: str                  = 'logs'
-    CUSTOM_PLUGINS_DIR_NAME: str        = 'custom_plugins'
-    CUSTOM_TEMPLATES_DIR_NAME: str      = 'custom_templates'
-    ARCHIVE_DIR: Path                   = DATA_DIR / ARCHIVE_DIR_NAME
-    SOURCES_DIR: Path                   = DATA_DIR / SOURCES_DIR_NAME
-    PERSONAS_DIR: Path                  = DATA_DIR / PERSONAS_DIR_NAME
-    LOGS_DIR: Path                      = DATA_DIR / LOGS_DIR_NAME
-    CACHE_DIR: Path                     = DATA_DIR / CACHE_DIR_NAME
-    CUSTOM_TEMPLATES_DIR: Path          = DATA_DIR / CUSTOM_TEMPLATES_DIR_NAME
-    USER_PLUGINS_DIR: Path              = DATA_DIR / CUSTOM_PLUGINS_DIR_NAME
+    ARCHIVE_DIR_NAME: str = "archive"
+    SOURCES_DIR_NAME: str = "sources"
+    PERSONAS_DIR_NAME: str = "personas"
+    CACHE_DIR_NAME: str = "cache"
+    LOGS_DIR_NAME: str = "logs"
+    CUSTOM_PLUGINS_DIR_NAME: str = "custom_plugins"
+    CUSTOM_TEMPLATES_DIR_NAME: str = "custom_templates"
+    ARCHIVE_DIR: Path = DATA_DIR / ARCHIVE_DIR_NAME
+    SOURCES_DIR: Path = DATA_DIR / SOURCES_DIR_NAME
+    PERSONAS_DIR: Path = DATA_DIR / PERSONAS_DIR_NAME
+    LOGS_DIR: Path = DATA_DIR / LOGS_DIR_NAME
+    CACHE_DIR: Path = DATA_DIR / CACHE_DIR_NAME
+    CUSTOM_TEMPLATES_DIR: Path = DATA_DIR / CUSTOM_TEMPLATES_DIR_NAME
+    USER_PLUGINS_DIR: Path = DATA_DIR / CUSTOM_PLUGINS_DIR_NAME

    # Data dir files
-    CONFIG_FILENAME: str                = 'ArchiveBox.conf'
-    SQL_INDEX_FILENAME: str             = 'index.sqlite3'
-    CONFIG_FILE: Path                   = DATA_DIR / CONFIG_FILENAME
-    DATABASE_FILE: Path                 = DATA_DIR / SQL_INDEX_FILENAME
-    
-    JSON_INDEX_FILENAME: str            = 'index.json'
-    JSONL_INDEX_FILENAME: str           = 'index.jsonl'
-    HTML_INDEX_FILENAME: str            = 'index.html'
-    ROBOTS_TXT_FILENAME: str            = 'robots.txt'
-    FAVICON_FILENAME: str               = 'favicon.ico'
-    
-    # Runtime dirs
-    TMP_DIR_NAME: str                   = 'tmp'
-    DEFAULT_TMP_DIR: Path               = DATA_DIR / TMP_DIR_NAME / MACHINE_ID    # ./data/tmp/abc3244323
+    CONFIG_FILENAME: str = "ArchiveBox.conf"
+    SQL_INDEX_FILENAME: str = "index.sqlite3"
+    CONFIG_FILE: Path = DATA_DIR / CONFIG_FILENAME
+    DATABASE_FILE: Path = DATA_DIR / SQL_INDEX_FILENAME

-    LIB_DIR_NAME: str                   = 'lib'
-    DEFAULT_LIB_DIR: Path               = DATA_DIR / LIB_DIR_NAME / MACHINE_TYPE  # ./data/lib/arm64-linux-docker
-    DEFAULT_LIB_BIN_DIR: Path           = DEFAULT_LIB_DIR / 'bin'                  # ./data/lib/arm64-linux-docker/bin
+    JSON_INDEX_FILENAME: str = "index.json"
+    JSONL_INDEX_FILENAME: str = "index.jsonl"
+    HTML_INDEX_FILENAME: str = "index.html"
+    ROBOTS_TXT_FILENAME: str = "robots.txt"
+    FAVICON_FILENAME: str = "favicon.ico"
+
+    # Runtime dirs
+    TMP_DIR_NAME: str = "tmp"
+    DEFAULT_TMP_DIR: Path = DATA_DIR / TMP_DIR_NAME / MACHINE_ID  # ./data/tmp/abc3244323
+
+    LIB_DIR_NAME: str = "lib"
+    DEFAULT_LIB_DIR: Path = DATA_DIR / LIB_DIR_NAME / MACHINE_TYPE  # ./data/lib/arm64-linux-docker
+    DEFAULT_LIB_BIN_DIR: Path = DEFAULT_LIB_DIR / "bin"  # ./data/lib/arm64-linux-docker/bin

    # Config constants
-    TIMEZONE: str                       = 'UTC'
-    DEFAULT_CLI_COLORS: Dict[str, str]  = DEFAULT_CLI_COLORS
-    DISABLED_CLI_COLORS: Dict[str, str] = benedict({k: '' for k in DEFAULT_CLI_COLORS})
+    TIMEZONE: str = "UTC"
+    DEFAULT_CLI_COLORS: dict[str, str] = DEFAULT_CLI_COLORS
+    DISABLED_CLI_COLORS: dict[str, str] = benedict({k: "" for k in DEFAULT_CLI_COLORS})

    # Hard safety limits (seconds)
-    MAX_HOOK_RUNTIME_SECONDS: int       = 60 * 60 * 12   # 12 hours
-    MAX_SNAPSHOT_RUNTIME_SECONDS: int   = 60 * 60 * 12   # 12 hours
+    MAX_HOOK_RUNTIME_SECONDS: int = 60 * 60 * 12  # 12 hours
+    MAX_SNAPSHOT_RUNTIME_SECONDS: int = 60 * 60 * 12  # 12 hours

-    ALLOWDENYLIST_REGEX_FLAGS: int      = re.IGNORECASE | re.UNICODE | re.MULTILINE
+    ALLOWDENYLIST_REGEX_FLAGS: int = re.IGNORECASE | re.UNICODE | re.MULTILINE

-    STATICFILE_EXTENSIONS: frozenset[str] = frozenset((
-        # 99.999% of the time, URLs ending in these extensions are static files
-        # that can be downloaded as-is, not html pages that need to be rendered
-        'gif', 'jpeg', 'jpg', 'png', 'tif', 'tiff', 'wbmp', 'ico', 'jng', 'bmp',
-        'svg', 'svgz', 'webp', 'ps', 'eps', 'ai',
-        'mp3', 'mp4', 'm4a', 'mpeg', 'mpg', 'mkv', 'mov', 'webm', 'm4v',
-        'flv', 'wmv', 'avi', 'ogg', 'ts', 'm3u8',
-        'pdf', 'txt', 'rtf', 'rtfd', 'doc', 'docx', 'ppt', 'pptx', 'xls', 'xlsx',
-        'atom', 'rss', 'css', 'js', 'json',
-        'dmg', 'iso', 'img',
-        'rar', 'war', 'hqx', 'zip', 'gz', 'bz2', '7z',
+    STATICFILE_EXTENSIONS: frozenset[str] = frozenset(
+        (
+            # 99.999% of the time, URLs ending in these extensions are static files
+            # that can be downloaded as-is, not html pages that need to be rendered
+            "gif",
+            "jpeg",
+            "jpg",
+            "png",
+            "tif",
+            "tiff",
+            "wbmp",
+            "ico",
+            "jng",
+            "bmp",
+            "svg",
+            "svgz",
+            "webp",
+            "ps",
+            "eps",
+            "ai",
+            "mp3",
+            "mp4",
+            "m4a",
+            "mpeg",
+            "mpg",
+            "mkv",
+            "mov",
+            "webm",
+            "m4v",
+            "flv",
+            "wmv",
+            "avi",
+            "ogg",
+            "ts",
+            "m3u8",
+            "pdf",
+            "txt",
+            "rtf",
+            "rtfd",
+            "doc",
+            "docx",
+            "ppt",
+            "pptx",
+            "xls",
+            "xlsx",
+            "atom",
+            "rss",
+            "css",
+            "js",
+            "json",
+            "dmg",
+            "iso",
+            "img",
+            "rar",
+            "war",
+            "hqx",
+            "zip",
+            "gz",
+            "bz2",
+            "7z",
+            # Less common extensions to consider adding later
+            # jar, swf, bin, com, exe, dll, deb
+            # ear, hqx, eot, wmlc, kml, kmz, cco, jardiff, jnlp, run, msi, msp, msm,
+            # pl pm, prc pdb, rar, rpm, sea, sit, tcl tk, der, pem, crt, xpi, xspf,
+            # ra, mng, asx, asf, 3gpp, 3gp, mid, midi, kar, jad, wml, htc, mml
+            # These are always treated as pages, not as static files, never add them:
+            # html, htm, shtml, xhtml, xml, aspx, php, cgi
+        ),
+    )

-        # Less common extensions to consider adding later
-        # jar, swf, bin, com, exe, dll, deb
-        # ear, hqx, eot, wmlc, kml, kmz, cco, jardiff, jnlp, run, msi, msp, msm,
-        # pl pm, prc pdb, rar, rpm, sea, sit, tcl tk, der, pem, crt, xpi, xspf,
-        # ra, mng, asx, asf, 3gpp, 3gp, mid, midi, kar, jad, wml, htc, mml
-
-        # These are always treated as pages, not as static files, never add them:
-        # html, htm, shtml, xhtml, xml, aspx, php, cgi
-    ))
-
-    PIP_RELATED_NAMES: frozenset[str] = frozenset((
-        ".venv",
-        "venv",
-        "virtualenv",
-        ".virtualenv",
-    ))
-    NPM_RELATED_NAMES: frozenset[str] = frozenset((
-        "node_modules",
-        "package.json",
-        "package-lock.json",
-        "yarn.lock",
-    ))
+    PIP_RELATED_NAMES: frozenset[str] = frozenset(
+        (
+            ".venv",
+            "venv",
+            "virtualenv",
+            ".virtualenv",
+        ),
+    )
+    NPM_RELATED_NAMES: frozenset[str] = frozenset(
+        (
+            "node_modules",
+            "package.json",
+            "package-lock.json",
+            "yarn.lock",
+        ),
+    )

    # When initializing archivebox in a new directory, we check to make sure the dir is
    # actually empty so that we dont clobber someone's home directory or desktop by accident.
    # These files are exceptions to the is_empty check when we're trying to init a new dir,
    # as they could be from a previous archivebox version, system artifacts, dependencies, etc.
-    ALLOWED_IN_DATA_DIR: frozenset[str] = frozenset((
-        *PIP_RELATED_NAMES,
-        *NPM_RELATED_NAMES,
-        
-        ### Dirs:
-        ARCHIVE_DIR_NAME,
-        SOURCES_DIR_NAME,
-        LOGS_DIR_NAME,
-        CACHE_DIR_NAME,
-        LIB_DIR_NAME,
-        TMP_DIR_NAME,
-        PERSONAS_DIR_NAME,
-        CUSTOM_TEMPLATES_DIR_NAME,
-        CUSTOM_PLUGINS_DIR_NAME,
-        "invalid",
-        "users",
-        "machine",
-        # Backwards compatibility with old directory names
-        "user_plugins",          # old name for USER_PLUGINS_DIR (now 'plugins')
-        "user_templates",        # old name for CUSTOM_TEMPLATES_DIR (now 'templates')
-        "static",                # created by old static exports <v0.6.0
-        "sonic",                 # created by docker bind mount / sonic FTS process
-        ".git",
-        ".svn",
-        
-        ### Files:
-        CONFIG_FILENAME,
-        SQL_INDEX_FILENAME,
-        f"{SQL_INDEX_FILENAME}-wal",
-        f"{SQL_INDEX_FILENAME}-shm",
-        "search.sqlite3",
-        "queue.sqlite3",
-        "queue.sqlite3-wal",
-        "queue.sqlite3-shm",
-        JSON_INDEX_FILENAME,
-        JSONL_INDEX_FILENAME,
-        HTML_INDEX_FILENAME,
-        ROBOTS_TXT_FILENAME,
-        FAVICON_FILENAME,
-        CONFIG_FILENAME,
-        f"{CONFIG_FILENAME}.bak",
-        f".{CONFIG_FILENAME}.bak",
-        "static_index.json",
-        ".DS_Store",
-        ".gitignore",
-        "lost+found",
-        ".DS_Store",
-        ".env",
-        ".collection_id",
-        ".archivebox_id",
-        "Dockerfile",
-    ))
-        
+    ALLOWED_IN_DATA_DIR: frozenset[str] = frozenset(
+        (
+            *PIP_RELATED_NAMES,
+            *NPM_RELATED_NAMES,
+            ### Dirs:
+            ARCHIVE_DIR_NAME,
+            SOURCES_DIR_NAME,
+            LOGS_DIR_NAME,
+            CACHE_DIR_NAME,
+            LIB_DIR_NAME,
+            TMP_DIR_NAME,
+            PERSONAS_DIR_NAME,
+            CUSTOM_TEMPLATES_DIR_NAME,
+            CUSTOM_PLUGINS_DIR_NAME,
+            "invalid",
+            "users",
+            "machine",
+            # Backwards compatibility with old directory names
+            "user_plugins",  # old name for USER_PLUGINS_DIR (now 'plugins')
+            "user_templates",  # old name for CUSTOM_TEMPLATES_DIR (now 'templates')
+            "static",  # created by old static exports <v0.6.0
+            "sonic",  # created by docker bind mount / sonic FTS process
+            ".git",
+            ".svn",
+            ### Files:
+            CONFIG_FILENAME,
+            SQL_INDEX_FILENAME,
+            f"{SQL_INDEX_FILENAME}-wal",
+            f"{SQL_INDEX_FILENAME}-shm",
+            "search.sqlite3",
+            "queue.sqlite3",
+            "queue.sqlite3-wal",
+            "queue.sqlite3-shm",
+            JSON_INDEX_FILENAME,
+            JSONL_INDEX_FILENAME,
+            HTML_INDEX_FILENAME,
+            ROBOTS_TXT_FILENAME,
+            FAVICON_FILENAME,
+            CONFIG_FILENAME,
+            f"{CONFIG_FILENAME}.bak",
+            f".{CONFIG_FILENAME}.bak",
+            "static_index.json",
+            ".DS_Store",
+            ".gitignore",
+            "lost+found",
+            ".DS_Store",
+            ".env",
+            ".collection_id",
+            ".archivebox_id",
+            "Dockerfile",
+        ),
+    )

    @classmethod
    def __getitem__(cls, key: str):
        # so it behaves like a dict[key] == dict.key or object attr
        return getattr(cls, key)
-    
+
    @classmethod
    def __benedict__(cls):
        # when casting to benedict, only include uppercase keys that don't start with an underscore
-        return benedict({key: value for key, value in cls.__dict__.items() if key.isupper() and not key.startswith('_')})
+        return benedict({key: value for key, value in cls.__dict__.items() if key.isupper() and not key.startswith("_")})


 CONSTANTS = ConstantsDict
--- a/archivebox/config/django.py
+++ b/archivebox/config/django.py
@@ -1,4 +1,4 @@
-__package__ = 'archivebox.config'
+__package__ = "archivebox.config"

 import os
 import sys
@@ -17,9 +17,9 @@ from .common import SHELL_CONFIG


 if not SHELL_CONFIG.USE_COLOR:
-    os.environ['NO_COLOR'] = '1'
+    os.environ["NO_COLOR"] = "1"
 if not SHELL_CONFIG.SHOW_PROGRESS:
-    os.environ['TERM'] = 'dumb'
+    os.environ["TERM"] = "dumb"

 # recreate rich console obj based on new config values
 STDOUT = CONSOLE = Console()
@@ -32,7 +32,8 @@ def setup_django_minimal():
    # os.environ.setdefault('ARCHIVEBOX_DATA_DIR', str(CONSTANTS.DATA_DIR))
    # os.environ.setdefault('DJANGO_SETTINGS_MODULE', 'core.settings')
    # django.setup()
-    raise Exception('dont use this anymore')
+    raise Exception("dont use this anymore")
+

 DJANGO_SET_UP = False

@@ -61,15 +62,18 @@ def setup_django(check_db=False, in_memory_db=False) -> None:
    # This warning can be triggered during django.setup() but is safe to ignore
    # since we're doing intentional setup operations
    import warnings
-    warnings.filterwarnings('ignore',
-        message='.*Accessing the database during app initialization.*',
-        category=RuntimeWarning)
+
+    warnings.filterwarnings(
+        "ignore",
+        message=".*Accessing the database during app initialization.*",
+        category=RuntimeWarning,
+    )

    try:
        from django.core.management import call_command

        if in_memory_db:
-            raise Exception('dont use this anymore')
+            raise Exception("dont use this anymore")

            # some commands dont store a long-lived sqlite3 db file on disk.
            # in those cases we create a temporary in-memory db and run the migrations
@@ -84,19 +88,22 @@ def setup_django(check_db=False, in_memory_db=False) -> None:
            try:
                django.setup()
            except Exception as e:
-                is_using_meta_cmd = any(ignored_subcommand in sys.argv for ignored_subcommand in ('help', 'version', '--help', '--version'))
+                is_using_meta_cmd = any(ignored_subcommand in sys.argv for ignored_subcommand in ("help", "version", "--help", "--version"))
                if not is_using_meta_cmd:
                    # show error message to user only if they're not running a meta command / just trying to get help
                    STDERR.print()
-                    STDERR.print(Panel(
-                        f'\n[red]{e.__class__.__name__}[/red]: [yellow]{e}[/yellow]\nPlease check your config and [blue]DATA_DIR[/blue] permissions.\n',
-                        title='\n\n[red][X] Error while trying to load database![/red]',
-                        subtitle='[grey53]NO WRITES CAN BE PERFORMED[/grey53]',
-                        expand=False,
-                        style='bold red',
-                    ))
+                    STDERR.print(
+                        Panel(
+                            f"\n[red]{e.__class__.__name__}[/red]: [yellow]{e}[/yellow]\nPlease check your config and [blue]DATA_DIR[/blue] permissions.\n",
+                            title="\n\n[red][X] Error while trying to load database![/red]",
+                            subtitle="[grey53]NO WRITES CAN BE PERFORMED[/grey53]",
+                            expand=False,
+                            style="bold red",
+                        ),
+                    )
                    STDERR.print()
                    import traceback
+
                    traceback.print_exc()
                return

@@ -104,28 +111,29 @@ def setup_django(check_db=False, in_memory_db=False) -> None:
        from archivebox.core.settings_logging import ERROR_LOG as DEFAULT_ERROR_LOG

        # log startup message to the error log
-        error_log = getattr(settings, 'ERROR_LOG', DEFAULT_ERROR_LOG)
-        with open(error_log, "a", encoding='utf-8') as f:
-            command = ' '.join(sys.argv)
-            ts = datetime.now(timezone.utc).strftime('%Y-%m-%d__%H:%M:%S')
+        error_log = getattr(settings, "ERROR_LOG", DEFAULT_ERROR_LOG)
+        with open(error_log, "a", encoding="utf-8") as f:
+            command = " ".join(sys.argv)
+            ts = datetime.now(timezone.utc).strftime("%Y-%m-%d__%H:%M:%S")
            f.write(f"\n> {command}; TS={ts} VERSION={CONSTANTS.VERSION} IN_DOCKER={SHELL_CONFIG.IN_DOCKER} IS_TTY={SHELL_CONFIG.IS_TTY}\n")

        if check_db:
            # make sure the data dir is owned by a non-root user
            if CONSTANTS.DATA_DIR.stat().st_uid == 0:
-                STDERR.print('[red][X] Error: ArchiveBox DATA_DIR cannot be owned by root![/red]')
-                STDERR.print(f'    {CONSTANTS.DATA_DIR}')
+                STDERR.print("[red][X] Error: ArchiveBox DATA_DIR cannot be owned by root![/red]")
+                STDERR.print(f"    {CONSTANTS.DATA_DIR}")
                STDERR.print()
-                STDERR.print('[violet]Hint:[/violet] Are you running archivebox in the right folder? (and as a non-root user?)')
-                STDERR.print('    cd path/to/your/archive/data')
-                STDERR.print('    archivebox [command]')
+                STDERR.print("[violet]Hint:[/violet] Are you running archivebox in the right folder? (and as a non-root user?)")
+                STDERR.print("    cd path/to/your/archive/data")
+                STDERR.print("    archivebox [command]")
                STDERR.print()
                raise SystemExit(9)

            # Create cache table in DB if needed
            try:
                from django.core.cache import cache
-                cache.get('test', None)
+
+                cache.get("test", None)
            except django.db.utils.OperationalError:
                call_command("createcachetable", verbosity=0)

@@ -133,12 +141,14 @@ def setup_django(check_db=False, in_memory_db=False) -> None:
            # the sqlite3 whenever we init from scratch to avoid multiple threads
            # sharing the same connection by accident
            from django.db import connections
+
            for conn in connections.all():
                conn.close_if_unusable_or_obsolete()

            sql_index_path = CONSTANTS.DATABASE_FILE
            assert os.access(sql_index_path, os.F_OK), (
-                f'No database file {sql_index_path} found in: {CONSTANTS.DATA_DIR} (Are you in an ArchiveBox collection directory?)')
+                f"No database file {sql_index_path} found in: {CONSTANTS.DATA_DIR} (Are you in an ArchiveBox collection directory?)"
+            )

            # https://docs.pydantic.dev/logfire/integrations/django/ Logfire Debugging
            # if settings.DEBUG_LOGFIRE:
--- a/archivebox/config/ldap.py
+++ b/archivebox/config/ldap.py
@@ -1,6 +1,5 @@
 __package__ = "archivebox.config"

-from typing import Optional
 from pydantic import Field

 from archivebox.config.configset import BaseConfigSet
@@ -13,13 +12,14 @@ class LDAPConfig(BaseConfigSet):
    Only loads and validates if django-auth-ldap is installed.
    These settings integrate with Django's LDAP authentication backend.
    """
+
    toml_section_header: str = "LDAP_CONFIG"

    LDAP_ENABLED: bool = Field(default=False)
-    LDAP_SERVER_URI: Optional[str] = Field(default=None)
-    LDAP_BIND_DN: Optional[str] = Field(default=None)
-    LDAP_BIND_PASSWORD: Optional[str] = Field(default=None)
-    LDAP_USER_BASE: Optional[str] = Field(default=None)
+    LDAP_SERVER_URI: str | None = Field(default=None)
+    LDAP_BIND_DN: str | None = Field(default=None)
+    LDAP_BIND_PASSWORD: str | None = Field(default=None)
+    LDAP_USER_BASE: str | None = Field(default=None)
    LDAP_USER_FILTER: str = Field(default="(uid=%(user)s)")
    LDAP_USERNAME_ATTR: str = Field(default="username")
    LDAP_FIRSTNAME_ATTR: str = Field(default="givenName")
--- a/archivebox/config/paths.py
+++ b/archivebox/config/paths.py
@@ -1,4 +1,4 @@
-__package__ = 'archivebox.config'
+__package__ = "archivebox.config"

 import os
 import socket
@@ -15,24 +15,25 @@ from .permissions import SudoPermission, IS_ROOT, ARCHIVEBOX_USER

 #############################################################################################

-PACKAGE_DIR: Path = Path(__file__).resolve().parent.parent    # archivebox source code dir
-DATA_DIR: Path = Path(os.environ.get('DATA_DIR', os.getcwd())).resolve()  # archivebox user data dir
-ARCHIVE_DIR: Path = DATA_DIR / 'archive'                      # archivebox snapshot data dir
+PACKAGE_DIR: Path = Path(__file__).resolve().parent.parent  # archivebox source code dir
+DATA_DIR: Path = Path(os.environ.get("DATA_DIR", os.getcwd())).resolve()  # archivebox user data dir
+ARCHIVE_DIR: Path = DATA_DIR / "archive"  # archivebox snapshot data dir

-IN_DOCKER = os.environ.get('IN_DOCKER', False) in ('1', 'true', 'True', 'TRUE', 'yes')
+IN_DOCKER = os.environ.get("IN_DOCKER", False) in ("1", "true", "True", "TRUE", "yes")

-DATABASE_FILE = DATA_DIR / 'index.sqlite3'
+DATABASE_FILE = DATA_DIR / "index.sqlite3"

 #############################################################################################

+
 def _get_collection_id(DATA_DIR=DATA_DIR, force_create=False) -> str:
-    collection_id_file = DATA_DIR / '.archivebox_id'
-    
+    collection_id_file = DATA_DIR / ".archivebox_id"
+
    try:
        return collection_id_file.read_text().strip()
    except (OSError, FileNotFoundError, PermissionError):
        pass
-    
+
    # hash the machine_id + collection dir path + creation time to get a unique collection_id
    machine_id = get_machine_id()
    collection_path = DATA_DIR.resolve()
@@ -40,55 +41,60 @@ def _get_collection_id(DATA_DIR=DATA_DIR, force_create=False) -> str:
        creation_date = DATA_DIR.stat().st_ctime
    except Exception:
        creation_date = datetime.now().isoformat()
-    collection_id = hashlib.sha256(f'{machine_id}:{collection_path}@{creation_date}'.encode()).hexdigest()[:8]
-    
+    collection_id = hashlib.sha256(f"{machine_id}:{collection_path}@{creation_date}".encode()).hexdigest()[:8]
+
    try:
        # only persist collection_id file if we already have an index.sqlite3 file present
        # otherwise we might be running in a directory that is not a collection, no point creating cruft files
        collection_is_active = os.path.isfile(DATABASE_FILE) and os.path.isdir(ARCHIVE_DIR) and os.access(DATA_DIR, os.W_OK)
        if collection_is_active or force_create:
            collection_id_file.write_text(collection_id)
-            
+
            # if we're running as root right now, make sure the collection_id file is owned by the archivebox user
            if IS_ROOT:
                with SudoPermission(uid=0):
                    if ARCHIVEBOX_USER == 0:
                        os.system(f'chmod 777 "{collection_id_file}"')
-                    else:    
+                    else:
                        os.system(f'chown {ARCHIVEBOX_USER} "{collection_id_file}"')
    except (OSError, FileNotFoundError, PermissionError):
        pass
    return collection_id

+
@cache
 def get_collection_id(DATA_DIR=DATA_DIR) -> str:
    """Get a short, stable, unique ID for the current collection (e.g. abc45678)"""
    return _get_collection_id(DATA_DIR=DATA_DIR)

+
@cache
 def get_machine_id() -> str:
    """Get a short, stable, unique ID for the current machine (e.g. abc45678)"""
-    
-    MACHINE_ID = 'unknown'
+
+    MACHINE_ID = "unknown"
    try:
        import machineid
-        MACHINE_ID = machineid.hashed_id('archivebox')[:8]
+
+        MACHINE_ID = machineid.hashed_id("archivebox")[:8]
    except Exception:
        try:
            import uuid
            import hashlib
+
            MACHINE_ID = hashlib.sha256(str(uuid.getnode()).encode()).hexdigest()[:8]
        except Exception:
            pass
    return MACHINE_ID

+
@cache
 def get_machine_type() -> str:
    """Get a short, stable, unique type identifier for the current machine (e.g. linux-x86_64-docker)"""
-    
-    OS: str                             = platform.system().lower()    # darwin, linux, etc.
-    ARCH: str                           = platform.machine().lower()   # arm64, x86_64, aarch64, etc.
-    LIB_DIR_SCOPE: str                  = f'{ARCH}-{OS}-docker' if IN_DOCKER else f'{ARCH}-{OS}'
+
+    OS: str = platform.system().lower()  # darwin, linux, etc.
+    ARCH: str = platform.machine().lower()  # arm64, x86_64, aarch64, etc.
+    LIB_DIR_SCOPE: str = f"{ARCH}-{OS}-docker" if IN_DOCKER else f"{ARCH}-{OS}"
    return LIB_DIR_SCOPE


@@ -97,27 +103,28 @@ def dir_is_writable(dir_path: Path, uid: int | None = None, gid: int | None = No
    current_uid, current_gid = os.geteuid(), os.getegid()
    uid, gid = uid or current_uid, gid or current_gid

-    test_file = dir_path / '.permissions_test'
+    test_file = dir_path / ".permissions_test"
    try:
        with SudoPermission(uid=uid, fallback=fallback):
            test_file.exists()
-            test_file.write_text(f'Checking if PUID={uid} PGID={gid} can write to dir')
+            test_file.write_text(f"Checking if PUID={uid} PGID={gid} can write to dir")
            test_file.unlink()
            return True
-    except (IOError, OSError, PermissionError):
-        if chown:    
+    except (OSError, PermissionError):
+        if chown:
            # try fixing it using sudo permissions
            with SudoPermission(uid=uid, fallback=fallback):
                os.system(f'chown {uid}:{gid} "{dir_path}" 2>/dev/null')
            return dir_is_writable(dir_path, uid=uid, gid=gid, fallback=fallback, chown=False)
    return False

+
 def assert_dir_can_contain_unix_sockets(dir_path: Path) -> bool:
    """Check if a given directory can contain unix sockets (e.g. /tmp/supervisord.sock)"""
    from archivebox.misc.logging_util import pretty_path
-    
+
    try:
-        socket_path = str(dir_path / '.test_socket.sock')
+        socket_path = str(dir_path / ".test_socket.sock")
        s = socket.socket(socket.AF_UNIX, socket.SOCK_STREAM)
        try:
            os.remove(socket_path)
@@ -130,8 +137,8 @@ def assert_dir_can_contain_unix_sockets(dir_path: Path) -> bool:
        except OSError:
            pass
    except Exception as e:
-        raise Exception(f'ArchiveBox failed to create a test UNIX socket file in {pretty_path(dir_path, color=False)}') from e
-    
+        raise Exception(f"ArchiveBox failed to create a test UNIX socket file in {pretty_path(dir_path, color=False)}") from e
+
    return True


@@ -143,8 +150,9 @@ def create_and_chown_dir(dir_path: Path) -> None:


 def tmp_dir_socket_path_is_short_enough(dir_path: Path) -> bool:
-    socket_file = dir_path.absolute().resolve() / 'supervisord.sock'
-    return len(f'file://{socket_file}') <= 96
+    socket_file = dir_path.absolute().resolve() / "supervisord.sock"
+    return len(f"file://{socket_file}") <= 96
+

@cache
 def get_or_create_working_tmp_dir(autofix=True, quiet=True):
@@ -154,14 +162,18 @@ def get_or_create_working_tmp_dir(autofix=True, quiet=True):

    # try a few potential directories in order of preference
    CANDIDATES = [
-        STORAGE_CONFIG.TMP_DIR,                                                # <user-specified>
-        CONSTANTS.DEFAULT_TMP_DIR,                                             # ./data/tmp/<machine_id>
-        Path('/var/run/archivebox') / get_collection_id(),                     # /var/run/archivebox/abc5d8512
-        Path('/tmp') / 'archivebox' / get_collection_id(),                     # /tmp/archivebox/abc5d8512
-        Path('~/.tmp/archivebox').expanduser() / get_collection_id(),          # ~/.tmp/archivebox/abc5d8512
-        Path(tempfile.gettempdir()) / 'archivebox' / get_collection_id(),      # /var/folders/qy/6tpfrpx100j1t4l312nz683m0000gn/T/archivebox/abc5d8512
-        Path(tempfile.gettempdir()) / 'archivebox' / get_collection_id()[:4],  # /var/folders/qy/6tpfrpx100j1t4l312nz683m0000gn/T/archivebox/abc5d
-        Path(tempfile.gettempdir()) / 'abx' / get_collection_id()[:4],         # /var/folders/qy/6tpfrpx100j1t4l312nz683m0000gn/T/abx/abc5
+        STORAGE_CONFIG.TMP_DIR,  # <user-specified>
+        CONSTANTS.DEFAULT_TMP_DIR,  # ./data/tmp/<machine_id>
+        Path("/var/run/archivebox") / get_collection_id(),  # /var/run/archivebox/abc5d8512
+        Path("/tmp") / "archivebox" / get_collection_id(),  # /tmp/archivebox/abc5d8512
+        Path("~/.tmp/archivebox").expanduser() / get_collection_id(),  # ~/.tmp/archivebox/abc5d8512
+        Path(tempfile.gettempdir())
+        / "archivebox"
+        / get_collection_id(),  # /var/folders/qy/6tpfrpx100j1t4l312nz683m0000gn/T/archivebox/abc5d8512
+        Path(tempfile.gettempdir())
+        / "archivebox"
+        / get_collection_id()[:4],  # /var/folders/qy/6tpfrpx100j1t4l312nz683m0000gn/T/archivebox/abc5d
+        Path(tempfile.gettempdir()) / "abx" / get_collection_id()[:4],  # /var/folders/qy/6tpfrpx100j1t4l312nz683m0000gn/T/abx/abc5
    ]
    fallback_candidate = None
    for candidate in CANDIDATES:
@@ -174,7 +186,12 @@ def get_or_create_working_tmp_dir(autofix=True, quiet=True):
                STORAGE_CONFIG.update_in_place(TMP_DIR=candidate)
            return candidate
        try:
-            if fallback_candidate is None and candidate.exists() and dir_is_writable(candidate) and tmp_dir_socket_path_is_short_enough(candidate):
+            if (
+                fallback_candidate is None
+                and candidate.exists()
+                and dir_is_writable(candidate)
+                and tmp_dir_socket_path_is_short_enough(candidate)
+            ):
                fallback_candidate = candidate
        except Exception:
            pass
@@ -186,25 +203,28 @@ def get_or_create_working_tmp_dir(autofix=True, quiet=True):
        if autofix and STORAGE_CONFIG.TMP_DIR != fallback_candidate:
            STORAGE_CONFIG.update_in_place(TMP_DIR=fallback_candidate)
        return fallback_candidate
-    
+
    if not quiet:
-        raise OSError(f'ArchiveBox is unable to find a writable TMP_DIR, tried {CANDIDATES}!')
+        raise OSError(f"ArchiveBox is unable to find a writable TMP_DIR, tried {CANDIDATES}!")
+

@cache
 def get_or_create_working_lib_dir(autofix=True, quiet=False):
    from archivebox import CONSTANTS
    from archivebox.config.common import STORAGE_CONFIG
    from archivebox.misc.checks import check_lib_dir
-    
+
    # try a few potential directories in order of preference
    CANDIDATES = [
-        STORAGE_CONFIG.LIB_DIR,                                                   # <user-specified>
-        CONSTANTS.DEFAULT_LIB_DIR,                                                # ./data/lib/arm64-linux-docker
-        Path('/usr/local/share/archivebox') / get_collection_id(),                # /usr/local/share/archivebox/abc5
-        *([Path('/opt/homebrew/share/archivebox') / get_collection_id()] if os.path.isfile('/opt/homebrew/bin/archivebox') else []),  # /opt/homebrew/share/archivebox/abc5
-        Path('~/.local/share/archivebox').expanduser() / get_collection_id(),     # ~/.local/share/archivebox/abc5
+        STORAGE_CONFIG.LIB_DIR,  # <user-specified>
+        CONSTANTS.DEFAULT_LIB_DIR,  # ./data/lib/arm64-linux-docker
+        Path("/usr/local/share/archivebox") / get_collection_id(),  # /usr/local/share/archivebox/abc5
+        *(
+            [Path("/opt/homebrew/share/archivebox") / get_collection_id()] if os.path.isfile("/opt/homebrew/bin/archivebox") else []
+        ),  # /opt/homebrew/share/archivebox/abc5
+        Path("~/.local/share/archivebox").expanduser() / get_collection_id(),  # ~/.local/share/archivebox/abc5
    ]
-    
+
    for candidate in CANDIDATES:
        try:
            create_and_chown_dir(candidate)
@@ -214,10 +234,9 @@ def get_or_create_working_lib_dir(autofix=True, quiet=False):
            if autofix and STORAGE_CONFIG.LIB_DIR != candidate:
                STORAGE_CONFIG.update_in_place(LIB_DIR=candidate)
            return candidate
-    
-    if not quiet:
-        raise OSError(f'ArchiveBox is unable to find a writable LIB_DIR, tried {CANDIDATES}!')

+    if not quiet:
+        raise OSError(f"ArchiveBox is unable to find a writable LIB_DIR, tried {CANDIDATES}!")


@cache
@@ -229,57 +248,68 @@ def get_data_locations():
        tmp_dir = get_or_create_working_tmp_dir(autofix=True, quiet=True) or STORAGE_CONFIG.TMP_DIR
    except Exception:
        tmp_dir = STORAGE_CONFIG.TMP_DIR
-    
-    return benedict({
-        "DATA_DIR": {
-            "path": DATA_DIR.resolve(),
-            "enabled": True,
-            "is_valid": os.path.isdir(DATA_DIR) and os.access(DATA_DIR, os.R_OK) and os.access(DATA_DIR, os.W_OK),
-            "is_mount": os.path.ismount(DATA_DIR.resolve()),
+
+    return benedict(
+        {
+            "DATA_DIR": {
+                "path": DATA_DIR.resolve(),
+                "enabled": True,
+                "is_valid": os.path.isdir(DATA_DIR) and os.access(DATA_DIR, os.R_OK) and os.access(DATA_DIR, os.W_OK),
+                "is_mount": os.path.ismount(DATA_DIR.resolve()),
+            },
+            "CONFIG_FILE": {
+                "path": CONSTANTS.CONFIG_FILE.resolve(),
+                "enabled": True,
+                "is_valid": os.path.isfile(CONSTANTS.CONFIG_FILE)
+                and os.access(CONSTANTS.CONFIG_FILE, os.R_OK)
+                and os.access(CONSTANTS.CONFIG_FILE, os.W_OK),
+            },
+            "SQL_INDEX": {
+                "path": DATABASE_FILE.resolve(),
+                "enabled": True,
+                "is_valid": os.path.isfile(DATABASE_FILE) and os.access(DATABASE_FILE, os.R_OK) and os.access(DATABASE_FILE, os.W_OK),
+                "is_mount": os.path.ismount(DATABASE_FILE.resolve()),
+            },
+            "ARCHIVE_DIR": {
+                "path": ARCHIVE_DIR.resolve(),
+                "enabled": True,
+                "is_valid": os.path.isdir(ARCHIVE_DIR) and os.access(ARCHIVE_DIR, os.R_OK) and os.access(ARCHIVE_DIR, os.W_OK),
+                "is_mount": os.path.ismount(ARCHIVE_DIR.resolve()),
+            },
+            "SOURCES_DIR": {
+                "path": CONSTANTS.SOURCES_DIR.resolve(),
+                "enabled": True,
+                "is_valid": os.path.isdir(CONSTANTS.SOURCES_DIR)
+                and os.access(CONSTANTS.SOURCES_DIR, os.R_OK)
+                and os.access(CONSTANTS.SOURCES_DIR, os.W_OK),
+            },
+            "PERSONAS_DIR": {
+                "path": CONSTANTS.PERSONAS_DIR.resolve(),
+                "enabled": os.path.isdir(CONSTANTS.PERSONAS_DIR),
+                "is_valid": os.path.isdir(CONSTANTS.PERSONAS_DIR)
+                and os.access(CONSTANTS.PERSONAS_DIR, os.R_OK)
+                and os.access(CONSTANTS.PERSONAS_DIR, os.W_OK),  # read + write
+            },
+            "LOGS_DIR": {
+                "path": CONSTANTS.LOGS_DIR.resolve(),
+                "enabled": True,
+                "is_valid": os.path.isdir(CONSTANTS.LOGS_DIR)
+                and os.access(CONSTANTS.LOGS_DIR, os.R_OK)
+                and os.access(CONSTANTS.LOGS_DIR, os.W_OK),  # read + write
+            },
+            "TMP_DIR": {
+                "path": tmp_dir.resolve(),
+                "enabled": True,
+                "is_valid": os.path.isdir(tmp_dir) and os.access(tmp_dir, os.R_OK) and os.access(tmp_dir, os.W_OK),  # read + write
+            },
+            # "CACHE_DIR": {
+            #     "path": CACHE_DIR.resolve(),
+            #     "enabled": True,
+            #     "is_valid": os.access(CACHE_DIR, os.R_OK) and os.access(CACHE_DIR, os.W_OK),                        # read + write
+            # },
        },
-        "CONFIG_FILE": {
-            "path": CONSTANTS.CONFIG_FILE.resolve(),
-            "enabled": True,
-            "is_valid": os.path.isfile(CONSTANTS.CONFIG_FILE) and os.access(CONSTANTS.CONFIG_FILE, os.R_OK) and os.access(CONSTANTS.CONFIG_FILE, os.W_OK),
-        },
-        "SQL_INDEX": {
-            "path": DATABASE_FILE.resolve(),
-            "enabled": True,
-            "is_valid": os.path.isfile(DATABASE_FILE) and os.access(DATABASE_FILE, os.R_OK) and os.access(DATABASE_FILE, os.W_OK),
-            "is_mount": os.path.ismount(DATABASE_FILE.resolve()),
-        },
-        "ARCHIVE_DIR": {
-            "path": ARCHIVE_DIR.resolve(),
-            "enabled": True,
-            "is_valid": os.path.isdir(ARCHIVE_DIR) and os.access(ARCHIVE_DIR, os.R_OK) and os.access(ARCHIVE_DIR, os.W_OK),
-            "is_mount": os.path.ismount(ARCHIVE_DIR.resolve()),
-        },
-        "SOURCES_DIR": {
-            "path": CONSTANTS.SOURCES_DIR.resolve(),
-            "enabled": True,
-            "is_valid": os.path.isdir(CONSTANTS.SOURCES_DIR) and os.access(CONSTANTS.SOURCES_DIR, os.R_OK) and os.access(CONSTANTS.SOURCES_DIR, os.W_OK),
-        },
-        "PERSONAS_DIR": {
-            "path": CONSTANTS.PERSONAS_DIR.resolve(),
-            "enabled": os.path.isdir(CONSTANTS.PERSONAS_DIR),
-            "is_valid": os.path.isdir(CONSTANTS.PERSONAS_DIR) and os.access(CONSTANTS.PERSONAS_DIR, os.R_OK) and os.access(CONSTANTS.PERSONAS_DIR, os.W_OK),                 # read + write
-        },
-        "LOGS_DIR": {
-            "path": CONSTANTS.LOGS_DIR.resolve(),
-            "enabled": True,
-            "is_valid": os.path.isdir(CONSTANTS.LOGS_DIR) and os.access(CONSTANTS.LOGS_DIR, os.R_OK) and os.access(CONSTANTS.LOGS_DIR, os.W_OK),                             # read + write
-        },
-        'TMP_DIR': {
-            'path': tmp_dir.resolve(),
-            'enabled': True,
-            'is_valid': os.path.isdir(tmp_dir) and os.access(tmp_dir, os.R_OK) and os.access(tmp_dir, os.W_OK),        # read + write
-        },
-        # "CACHE_DIR": {
-        #     "path": CACHE_DIR.resolve(),
-        #     "enabled": True,
-        #     "is_valid": os.access(CACHE_DIR, os.R_OK) and os.access(CACHE_DIR, os.W_OK),                        # read + write
-        # },
-    })
+    )
+

@cache
 def get_code_locations():
@@ -291,41 +321,45 @@ def get_code_locations():
    except Exception:
        lib_dir = STORAGE_CONFIG.LIB_DIR

-    lib_bin_dir = lib_dir / 'bin'
-    
-    return benedict({
-        'PACKAGE_DIR': {
-            'path': (PACKAGE_DIR).resolve(),
-            'enabled': True,
-            'is_valid': os.access(PACKAGE_DIR / '__main__.py', os.X_OK),                                                                  # executable
-        },
-        'TEMPLATES_DIR': {
-            'path': CONSTANTS.TEMPLATES_DIR.resolve(),
-            'enabled': True,
-            'is_valid': os.access(CONSTANTS.STATIC_DIR, os.R_OK) and os.access(CONSTANTS.STATIC_DIR, os.X_OK),                                                # read + list
-        },
-        'CUSTOM_TEMPLATES_DIR': {
-            'path': STORAGE_CONFIG.CUSTOM_TEMPLATES_DIR.resolve(),
-            'enabled': os.path.isdir(STORAGE_CONFIG.CUSTOM_TEMPLATES_DIR),
-            'is_valid': os.path.isdir(STORAGE_CONFIG.CUSTOM_TEMPLATES_DIR) and os.access(STORAGE_CONFIG.CUSTOM_TEMPLATES_DIR, os.R_OK),                                      # read
-        },
-        'USER_PLUGINS_DIR': {
-            'path': CONSTANTS.USER_PLUGINS_DIR.resolve(),
-            'enabled': os.path.isdir(CONSTANTS.USER_PLUGINS_DIR),
-            'is_valid': os.path.isdir(CONSTANTS.USER_PLUGINS_DIR) and os.access(CONSTANTS.USER_PLUGINS_DIR, os.R_OK),                                              # read
-        },
-        'LIB_DIR': {
-            'path': lib_dir.resolve(),
-            'enabled': True,
-            'is_valid': os.path.isdir(lib_dir) and os.access(lib_dir, os.R_OK) and os.access(lib_dir, os.W_OK),                      # read + write
-        },
-        'LIB_BIN_DIR': {
-            'path': lib_bin_dir.resolve(),
-            'enabled': True,
-            'is_valid': os.path.isdir(lib_bin_dir) and os.access(lib_bin_dir, os.R_OK) and os.access(lib_bin_dir, os.W_OK),        # read + write
-        },
-    })
+    lib_bin_dir = lib_dir / "bin"

+    return benedict(
+        {
+            "PACKAGE_DIR": {
+                "path": (PACKAGE_DIR).resolve(),
+                "enabled": True,
+                "is_valid": os.access(PACKAGE_DIR / "__main__.py", os.X_OK),  # executable
+            },
+            "TEMPLATES_DIR": {
+                "path": CONSTANTS.TEMPLATES_DIR.resolve(),
+                "enabled": True,
+                "is_valid": os.access(CONSTANTS.STATIC_DIR, os.R_OK) and os.access(CONSTANTS.STATIC_DIR, os.X_OK),  # read + list
+            },
+            "CUSTOM_TEMPLATES_DIR": {
+                "path": STORAGE_CONFIG.CUSTOM_TEMPLATES_DIR.resolve(),
+                "enabled": os.path.isdir(STORAGE_CONFIG.CUSTOM_TEMPLATES_DIR),
+                "is_valid": os.path.isdir(STORAGE_CONFIG.CUSTOM_TEMPLATES_DIR)
+                and os.access(STORAGE_CONFIG.CUSTOM_TEMPLATES_DIR, os.R_OK),  # read
+            },
+            "USER_PLUGINS_DIR": {
+                "path": CONSTANTS.USER_PLUGINS_DIR.resolve(),
+                "enabled": os.path.isdir(CONSTANTS.USER_PLUGINS_DIR),
+                "is_valid": os.path.isdir(CONSTANTS.USER_PLUGINS_DIR) and os.access(CONSTANTS.USER_PLUGINS_DIR, os.R_OK),  # read
+            },
+            "LIB_DIR": {
+                "path": lib_dir.resolve(),
+                "enabled": True,
+                "is_valid": os.path.isdir(lib_dir) and os.access(lib_dir, os.R_OK) and os.access(lib_dir, os.W_OK),  # read + write
+            },
+            "LIB_BIN_DIR": {
+                "path": lib_bin_dir.resolve(),
+                "enabled": True,
+                "is_valid": os.path.isdir(lib_bin_dir)
+                and os.access(lib_bin_dir, os.R_OK)
+                and os.access(lib_bin_dir, os.W_OK),  # read + write
+            },
+        },
+    )


 # @cache
@@ -340,9 +374,9 @@ def get_code_locations():
 #     - ok to have a long path (doesnt contain SOCKETS)
 #     """
 #     from .version import detect_installed_version
-    
+
 #     HOST_DIRS = PlatformDirs(appname='archivebox', appauthor='ArchiveBox', version=detect_installed_version(), opinion=True, ensure_exists=False)
-    
+
 #     lib_dir = tempfile.gettempdir()
 #     try:
 #         if 'SYSTEM_LIB_DIR' in os.environ:
@@ -350,7 +384,7 @@ def get_code_locations():
 #         else:
 #             with SudoPermission(uid=ARCHIVEBOX_USER, fallback=True):
 #                 lib_dir = HOST_DIRS.site_data_path
-        
+
 #         # Docker: /usr/local/share/archivebox/0.8.5
 #         # Ubuntu: /usr/local/share/archivebox/0.8.5
 #         # macOS: /Library/Application Support/archivebox
@@ -358,16 +392,16 @@ def get_code_locations():
 #             with SudoPermission(uid=0, fallback=True):
 #                 lib_dir.mkdir(parents=True, exist_ok=True)
 #         except PermissionError:
-#             # our user cannot 
+#             # our user cannot
 #             lib_dir = HOST_DIRS.user_data_path
 #             lib_dir.mkdir(parents=True, exist_ok=True)
-        
+
 #         if IS_ROOT or not dir_is_writable(lib_dir, uid=ARCHIVEBOX_USER):
 #             if IS_ROOT:
 #                 # make sure lib dir is owned by the archivebox user, not root
 #                 with SudoPermission(uid=0):
 #                     if ARCHIVEBOX_USER == 0:
-#                         # print(f'[yellow]:warning:  Waring: Creating SYSTEM_LIB_DIR {lib_dir} with mode 777 so that non-root archivebox users can share it.[/yellow] (caches shared libs used by archivebox for performance)', file=sys.stderr)
+#                         # print(f'[yellow]:warning:  Warning: Creating SYSTEM_LIB_DIR {lib_dir} with mode 777 so that non-root archivebox users can share it.[/yellow] (caches shared libs used by archivebox for performance)', file=sys.stderr)
 #                         os.system(f'chmod -R 777 "{lib_dir}"')
 #                     else:
 #                         os.system(f'chown {ARCHIVEBOX_USER}:{ARCHIVEBOX_GROUP} "{lib_dir}"')
@@ -376,9 +410,9 @@ def get_code_locations():
 #     except (PermissionError, AssertionError):
 #         # raise PermissionError(f'SYSTEM_LIB_DIR {lib_dir} is not writable by archivebox user {ARCHIVEBOX_USER}:{ARCHIVEBOX_GROUP}')
 #         print(f'[red]:cross_mark:  ERROR: SYSTEM_LIB_DIR {lib_dir} is not writable by archivebox user {ARCHIVEBOX_USER}:{ARCHIVEBOX_GROUP}[/red]', file=sys.stderr)
-        
+
 #     return lib_dir
-    
+
 # @cache
 # def get_TMP_DIR():
 #     """
@@ -390,9 +424,9 @@ def get_code_locations():
 #     - must be cleared on every archivebox version upgrade
 #     """
 #     from .version import detect_installed_version
-    
+
 #     HOST_DIRS = PlatformDirs(appname='archivebox', appauthor='ArchiveBox', version=detect_installed_version(), opinion=True, ensure_exists=False)
-    
+
 #     # print('DATA_DIR OWNED BY:', ARCHIVEBOX_USER, ARCHIVEBOX_GROUP)
 #     # print('RUNNING AS:', self.PUID, self.PGID)
 #     run_dir = tempfile.gettempdir()
@@ -405,7 +439,7 @@ def get_code_locations():
 #                 if IS_ROOT:
 #                     with SudoPermission(uid=0, fallback=False):
 #                         if ARCHIVEBOX_USER == 0:
-#                             # print(f'[yellow]:warning:  Waring: Creating SYSTEM_TMP_DIR {run_dir} with mode 777 so that non-root archivebox users can access it.[/yellow]', file=sys.stderr)
+#                             # print(f'[yellow]:warning:  Warning: Creating SYSTEM_TMP_DIR {run_dir} with mode 777 so that non-root archivebox users can access it.[/yellow]', file=sys.stderr)
 #                             os.system(f'chmod -R 777 "{run_dir}"')
 #                         else:
 #                             os.system(f'chown {ARCHIVEBOX_USER}:{ARCHIVEBOX_GROUP} "{run_dir}"')
@@ -413,30 +447,30 @@ def get_code_locations():
 #                     raise PermissionError()
 #             assert len(str(run_dir / 'supervisord.conf')) < 95, 'SYSTEM_TMP_DIR path is too long, please set SYSTEM_TMP_DIR env variable to a shorter path (unfortunately unix requires socket paths be < 108 chars)'
 #             return run_dir
-        
+
 #         run_dir = (HOST_DIRS.site_runtime_path / get_collection_id(DATA_DIR=DATA_DIR)).resolve()
 #         try:
 #             assert len(str(run_dir)) + len('/supervisord.sock') < 95
 #         except AssertionError:
 #             run_dir = Path(tempfile.gettempdir()).resolve() / 'archivebox' / get_collection_id(DATA_DIR=DATA_DIR)
 #             assert len(str(run_dir)) + len('/supervisord.sock') < 95, 'SYSTEM_TMP_DIR path is too long, please set SYSTEM_TMP_DIR env variable to a shorter path (unfortunately unix requires socket paths be < 108 chars)'
-        
+
 #         with SudoPermission(uid=0, fallback=True):
 #             run_dir.mkdir(parents=True, exist_ok=True)
-            
+
 #         if IS_ROOT or not dir_is_writable(run_dir, uid=ARCHIVEBOX_USER):
 #             if IS_ROOT:
 #                 with SudoPermission(uid=0):
 #                     if ARCHIVEBOX_USER == 0:
-#                         # print(f'[yellow]:warning:  Waring: Creating SYSTEM_TMP_DIR {run_dir} with mode 777 so that non-root archivebox users can access it.[/yellow]', file=sys.stderr)
+#                         # print(f'[yellow]:warning:  Warning: Creating SYSTEM_TMP_DIR {run_dir} with mode 777 so that non-root archivebox users can access it.[/yellow]', file=sys.stderr)
 #                         os.system(f'chmod -R 777 "{run_dir}"')
 #                     else:
 #                         os.system(f'chown {ARCHIVEBOX_USER}:{ARCHIVEBOX_GROUP} "{run_dir}"')
 #             else:
 #                 raise PermissionError()
-            
+
 #     except (PermissionError, AssertionError):
 #         # raise PermissionError(f'SYSTEM_TMP_DIR {run_dir} is not writable by archivebox user {ARCHIVEBOX_USER}:{ARCHIVEBOX_GROUP}')
 #         print(f'[red]:cross_mark:  ERROR: SYSTEM_TMP_DIR {run_dir} is not writable by archivebox user {ARCHIVEBOX_USER}:{ARCHIVEBOX_GROUP}[/red]', file=sys.stderr)
-        
+
 #     return run_dir
--- a/archivebox/config/permissions.py
+++ b/archivebox/config/permissions.py
@@ -1,4 +1,4 @@
-__package__ = 'archivebox.config'
+__package__ = "archivebox.config"

 import os
 import pwd
@@ -17,26 +17,26 @@ from contextlib import contextmanager
 DATA_DIR = Path(os.getcwd())

 try:
-    DATA_DIR_STAT           = DATA_DIR.stat()
-    DATA_DIR_UID            = DATA_DIR_STAT.st_uid
-    DATA_DIR_GID            = DATA_DIR_STAT.st_gid
+    DATA_DIR_STAT = DATA_DIR.stat()
+    DATA_DIR_UID = DATA_DIR_STAT.st_uid
+    DATA_DIR_GID = DATA_DIR_STAT.st_gid
 except PermissionError:
-    DATA_DIR_UID            = 0
-    DATA_DIR_GID            = 0
+    DATA_DIR_UID = 0
+    DATA_DIR_GID = 0

-DEFAULT_PUID            = 911
-DEFAULT_PGID            = 911
-RUNNING_AS_UID          = os.getuid()
-RUNNING_AS_GID          = os.getgid()
-EUID                    = os.geteuid()
-EGID                    = os.getegid()
-SUDO_UID                = int(os.environ.get('SUDO_UID', 0))
-SUDO_GID                = int(os.environ.get('SUDO_GID', 0))
-USER: str               = Path('~').expanduser().resolve().name
-HOSTNAME: str           = cast(str, max([socket.gethostname(), platform.node()], key=len))
+DEFAULT_PUID = 911
+DEFAULT_PGID = 911
+RUNNING_AS_UID = os.getuid()
+RUNNING_AS_GID = os.getgid()
+EUID = os.geteuid()
+EGID = os.getegid()
+SUDO_UID = int(os.environ.get("SUDO_UID", 0))
+SUDO_GID = int(os.environ.get("SUDO_GID", 0))
+USER: str = Path("~").expanduser().resolve().name
+HOSTNAME: str = cast(str, max([socket.gethostname(), platform.node()], key=len))

 IS_ROOT = RUNNING_AS_UID == 0
-IN_DOCKER = os.environ.get('IN_DOCKER', False) in ('1', 'true', 'True', 'TRUE', 'yes')
+IN_DOCKER = os.environ.get("IN_DOCKER", False) in ("1", "true", "True", "TRUE", "yes")
 # IN_DOCKER_COMPOSE =  # TODO: figure out a way to detect if running in docker compose


@@ -47,74 +47,79 @@ if RUNNING_AS_UID == 0:
        # if we are running as root it's really hard to figure out what the correct archivebox user should be
        # as a last resort instead of setting DATA_DIR ownership to 0:0 (which breaks it for non-root users)
        # check if 911:911 archivebox user exists on host system, and use it instead of 0
-        if pwd.getpwuid(DEFAULT_PUID).pw_name == 'archivebox':
+        if pwd.getpwuid(DEFAULT_PUID).pw_name == "archivebox":
            FALLBACK_UID = DEFAULT_PUID
            FALLBACK_GID = DEFAULT_PGID
    except Exception:
        pass


-os.environ.setdefault('PUID', str(DATA_DIR_UID or EUID or RUNNING_AS_UID or FALLBACK_UID))
-os.environ.setdefault('PGID', str(DATA_DIR_GID or EGID or RUNNING_AS_GID or FALLBACK_GID))
+os.environ.setdefault("PUID", str(DATA_DIR_UID or EUID or RUNNING_AS_UID or FALLBACK_UID))
+os.environ.setdefault("PGID", str(DATA_DIR_GID or EGID or RUNNING_AS_GID or FALLBACK_GID))

-ARCHIVEBOX_USER = int(os.environ['PUID'])
-ARCHIVEBOX_GROUP = int(os.environ['PGID'])
+ARCHIVEBOX_USER = int(os.environ["PUID"])
+ARCHIVEBOX_GROUP = int(os.environ["PGID"])
 if not USER:
    try:
        # alternative method 1 to get username
        USER = pwd.getpwuid(ARCHIVEBOX_USER).pw_name
    except Exception:
        pass
-        
+
 if not USER:
    try:
        # alternative method 2 to get username
        import getpass
+
        USER = getpass.getuser()
    except Exception:
        pass
-    
+
 if not USER:
    try:
        # alternative method 3 to get username
-        USER = os.getlogin() or 'archivebox'
+        USER = os.getlogin() or "archivebox"
    except Exception:
-        USER = 'archivebox'
-        
+        USER = "archivebox"
+
 ARCHIVEBOX_USER_EXISTS = False
 try:
    pwd.getpwuid(ARCHIVEBOX_USER)
    ARCHIVEBOX_USER_EXISTS = True
 except Exception:
    ARCHIVEBOX_USER_EXISTS = False
-    
+

 #############################################################################################

+
 def drop_privileges():
    """If running as root, drop privileges to the user that owns the data dir (or PUID)"""
-    
+
    # always run archivebox as the user that owns the data dir, never as root
    if os.getuid() == 0:
        # drop permissions to the user that owns the data dir / provided PUID
        if os.geteuid() != ARCHIVEBOX_USER and ARCHIVEBOX_USER != 0 and ARCHIVEBOX_USER_EXISTS:
            # drop our effective UID to the archivebox user's UID
            os.seteuid(ARCHIVEBOX_USER)
-            
+
            # update environment variables so that subprocesses dont try to write to /root
            pw_record = pwd.getpwuid(ARCHIVEBOX_USER)
-            os.environ['HOME']     = pw_record.pw_dir
-            os.environ['LOGNAME']  = pw_record.pw_name
-            os.environ['USER']     = pw_record.pw_name
+            os.environ["HOME"] = pw_record.pw_dir
+            os.environ["LOGNAME"] = pw_record.pw_name
+            os.environ["USER"] = pw_record.pw_name

    if ARCHIVEBOX_USER == 0 or not ARCHIVEBOX_USER_EXISTS:
-        print('[yellow]:warning:  Running as [red]root[/red] is not recommended and may make your [blue]DATA_DIR[/blue] inaccessible to other users on your system.[/yellow] (use [blue]sudo[/blue] instead)', file=sys.stderr)
+        print(
+            "[yellow]:warning:  Running as [red]root[/red] is not recommended and may make your [blue]DATA_DIR[/blue] inaccessible to other users on your system.[/yellow] (use [blue]sudo[/blue] instead)",
+            file=sys.stderr,
+        )


@contextmanager
 def SudoPermission(uid=0, fallback=False):
    """Attempt to run code with sudo permissions for a given user (or root)"""
-    
+
    if os.geteuid() == uid:
        # no need to change effective UID, we are already that user
        yield
@@ -125,7 +130,7 @@ def SudoPermission(uid=0, fallback=False):
        os.seteuid(uid)
    except PermissionError as err:
        if not fallback:
-            raise PermissionError(f'Not enough permissions to run code as uid={uid}, please retry with sudo') from err
+            raise PermissionError(f"Not enough permissions to run code as uid={uid}, please retry with sudo") from err
    try:
        # yield back to the caller so they can run code inside context as root
        yield
@@ -135,4 +140,4 @@ def SudoPermission(uid=0, fallback=False):
            os.seteuid(ARCHIVEBOX_USER)
        except PermissionError as err:
            if not fallback:
-                raise PermissionError(f'Failed to revert uid={uid} back to {ARCHIVEBOX_USER} after running code with sudo') from err
+                raise PermissionError(f"Failed to revert uid={uid} back to {ARCHIVEBOX_USER} after running code with sudo") from err
--- a/archivebox/config/version.py
+++ b/archivebox/config/version.py
@@ -1,4 +1,4 @@
-__package__ = 'archivebox.config'
+__package__ = "archivebox.config"

 import os
 import importlib.metadata
@@ -6,71 +6,71 @@ import importlib.metadata
 from pathlib import Path
 from functools import cache
 from datetime import datetime
-from typing import Optional

 #############################################################################################

-IN_DOCKER = os.environ.get('IN_DOCKER', False) in ('1', 'true', 'True', 'TRUE', 'yes')
+IN_DOCKER = os.environ.get("IN_DOCKER", False) in ("1", "true", "True", "TRUE", "yes")

-PACKAGE_DIR: Path = Path(__file__).resolve().parent.parent    # archivebox source code dir
-DATA_DIR: Path = Path(os.environ.get('DATA_DIR', os.getcwd())).resolve()  # archivebox user data dir
-ARCHIVE_DIR: Path = DATA_DIR / 'archive'                      # archivebox snapshot data dir
+PACKAGE_DIR: Path = Path(__file__).resolve().parent.parent  # archivebox source code dir
+DATA_DIR: Path = Path(os.environ.get("DATA_DIR", os.getcwd())).resolve()  # archivebox user data dir
+ARCHIVE_DIR: Path = DATA_DIR / "archive"  # archivebox snapshot data dir

 #############################################################################################


@cache
-def detect_installed_version(PACKAGE_DIR: Path=PACKAGE_DIR):
+def detect_installed_version(PACKAGE_DIR: Path = PACKAGE_DIR):
    """Autodetect the installed archivebox version by using pip package metadata, pyproject.toml file, or package.json file"""
    try:
        # if in production install, use pip-installed package metadata
-        return importlib.metadata.version('archivebox').strip()
+        return importlib.metadata.version("archivebox").strip()
    except importlib.metadata.PackageNotFoundError:
        pass

    try:
        # if in dev Git repo dir, use pyproject.toml file
-        pyproject_config = (PACKAGE_DIR.parent / 'pyproject.toml').read_text().split('\n')
+        pyproject_config = (PACKAGE_DIR.parent / "pyproject.toml").read_text().split("\n")
        for line in pyproject_config:
-            if line.startswith('version = '):
-                return line.split(' = ', 1)[-1].strip('"').strip()
+            if line.startswith("version = "):
+                return line.split(" = ", 1)[-1].strip('"').strip()
    except FileNotFoundError:
        # building docs, pyproject.toml is not available
        pass

    # raise Exception('Failed to detect installed archivebox version!')
-    return 'dev'
+    return "dev"


@cache
-def get_COMMIT_HASH() -> Optional[str]:
+def get_COMMIT_HASH() -> str | None:
    try:
-        git_dir = PACKAGE_DIR.parent / '.git'
-        ref = (git_dir / 'HEAD').read_text().strip().split(' ')[-1]
+        git_dir = PACKAGE_DIR.parent / ".git"
+        ref = (git_dir / "HEAD").read_text().strip().split(" ")[-1]
        commit_hash = git_dir.joinpath(ref).read_text().strip()
        return commit_hash
    except Exception:
        pass

    try:
-        return list((PACKAGE_DIR.parent / '.git/refs/heads/').glob('*'))[0].read_text().strip()
+        return list((PACKAGE_DIR.parent / ".git/refs/heads/").glob("*"))[0].read_text().strip()
    except Exception:
        pass
-    
+
    return None
-    
+
+
@cache
 def get_BUILD_TIME() -> str:
    if IN_DOCKER:
        try:
            # if we're in the archivebox official docker image, /VERSION.txt will contain the build time
-            docker_build_end_time = Path('/VERSION.txt').read_text().rsplit('BUILD_END_TIME=')[-1].split('\n', 1)[0]
+            docker_build_end_time = Path("/VERSION.txt").read_text().rsplit("BUILD_END_TIME=")[-1].split("\n", 1)[0]
            return docker_build_end_time
        except Exception:
            pass

-    src_last_modified_unix_timestamp = (PACKAGE_DIR / 'README.md').stat().st_mtime
-    return datetime.fromtimestamp(src_last_modified_unix_timestamp).strftime('%Y-%m-%d %H:%M:%S %s')
+    src_last_modified_unix_timestamp = (PACKAGE_DIR / "README.md").stat().st_mtime
+    return datetime.fromtimestamp(src_last_modified_unix_timestamp).strftime("%Y-%m-%d %H:%M:%S %s")


 # def get_versions_available_on_github(config):
@@ -78,14 +78,14 @@ def get_BUILD_TIME() -> str:
 #     returns a dictionary containing the ArchiveBox GitHub release info for
 #     the recommended upgrade version and the currently installed version
 #     """
-    
+
 #     # we only want to perform the (relatively expensive) check for new versions
 #     # when its most relevant, e.g. when the user runs a long-running command
 #     subcommand_run_by_user = sys.argv[3] if len(sys.argv) > 3 else 'help'
 #     long_running_commands = ('add', 'schedule', 'update', 'status', 'server')
 #     if subcommand_run_by_user not in long_running_commands:
 #         return None
-    
+
 #     github_releases_api = "https://api.github.com/repos/ArchiveBox/ArchiveBox/releases"
 #     response = requests.get(github_releases_api)
 #     if response.status_code != 200:
@@ -104,7 +104,7 @@ def get_BUILD_TIME() -> str:
 #             break

 #     current_version = current_version or all_releases[-1]
-    
+
 #     # recommended version is whatever comes after current_version in the release list
 #     # (perhaps too conservative to only recommend upgrading one version at a time, but it's safest)
 #     try:
--- a/archivebox/config/views.py
+++ b/archivebox/config/views.py
@@ -1,4 +1,4 @@
-__package__ = 'archivebox.config'
+__package__ = "archivebox.config"

 import html
 import json
@@ -6,7 +6,8 @@ import os
 import inspect
 import re
 from pathlib import Path
-from typing import Any, Callable, Dict
+from typing import Any
+from collections.abc import Callable
 from urllib.parse import quote, urlencode
 from django.http import HttpRequest
 from django.utils import timezone
@@ -21,30 +22,48 @@ from archivebox.misc.util import parse_date

 from archivebox.machine.models import Binary

-ABX_PLUGINS_DOCS_BASE_URL = 'https://archivebox.github.io/abx-plugins/'
-ABX_PLUGINS_GITHUB_BASE_URL = 'https://github.com/ArchiveBox/abx-plugins/tree/main/abx_plugins/plugins/'
-LIVE_CONFIG_BASE_URL = '/admin/environment/config/'
-ENVIRONMENT_BINARIES_BASE_URL = '/admin/environment/binaries/'
-INSTALLED_BINARIES_BASE_URL = '/admin/machine/binary/'
+ABX_PLUGINS_DOCS_BASE_URL = "https://archivebox.github.io/abx-plugins/"
+ABX_PLUGINS_GITHUB_BASE_URL = "https://github.com/ArchiveBox/abx-plugins/tree/main/abx_plugins/plugins/"
+LIVE_CONFIG_BASE_URL = "/admin/environment/config/"
+ENVIRONMENT_BINARIES_BASE_URL = "/admin/environment/binaries/"
+INSTALLED_BINARIES_BASE_URL = "/admin/machine/binary/"


 # Common binaries to check for
 KNOWN_BINARIES = [
-    'wget', 'curl', 'chromium', 'chrome', 'google-chrome', 'google-chrome-stable',
-    'node', 'npm', 'npx', 'yt-dlp',
-    'git', 'singlefile', 'readability-extractor', 'mercury-parser',
-    'python3', 'python', 'bash', 'zsh',
-    'ffmpeg', 'ripgrep', 'rg', 'sonic', 'archivebox',
+    "wget",
+    "curl",
+    "chromium",
+    "chrome",
+    "google-chrome",
+    "google-chrome-stable",
+    "node",
+    "npm",
+    "npx",
+    "yt-dlp",
+    "git",
+    "singlefile",
+    "readability-extractor",
+    "mercury-parser",
+    "python3",
+    "python",
+    "bash",
+    "zsh",
+    "ffmpeg",
+    "ripgrep",
+    "rg",
+    "sonic",
+    "archivebox",
 ]

 CANONICAL_BINARY_ALIASES = {
-    'youtube-dl': 'yt-dlp',
-    'ytdlp': 'yt-dlp',
+    "youtube-dl": "yt-dlp",
+    "ytdlp": "yt-dlp",
 }


 def is_superuser(request: HttpRequest) -> bool:
-    return bool(getattr(request.user, 'is_superuser', False))
+    return bool(getattr(request.user, "is_superuser", False))


 def format_parsed_datetime(value: object) -> str:
@@ -55,9 +74,9 @@ def format_parsed_datetime(value: object) -> str:
 JSON_TOKEN_RE = re.compile(
    r'(?P<key>"(?:\\u[a-fA-F0-9]{4}|\\[^u]|[^\\"])*")(?=\s*:)'
    r'|(?P<string>"(?:\\u[a-fA-F0-9]{4}|\\[^u]|[^\\"])*")'
-    r'|(?P<boolean>\btrue\b|\bfalse\b)'
-    r'|(?P<null>\bnull\b)'
-    r'|(?P<number>-?\d+(?:\.\d+)?(?:[eE][+-]?\d+)?)'
+    r"|(?P<boolean>\btrue\b|\bfalse\b)"
+    r"|(?P<null>\bnull\b)"
+    r"|(?P<number>-?\d+(?:\.\d+)?(?:[eE][+-]?\d+)?)",
 )


@@ -65,13 +84,14 @@ def render_code_block(text: str, *, highlighted: bool = False) -> str:
    code = html.escape(text, quote=False)

    if highlighted:
+
        def _wrap_token(match: re.Match[str]) -> str:
            styles = {
-                'key': 'color: #0550ae;',
-                'string': 'color: #0a7f45;',
-                'boolean': 'color: #8250df; font-weight: 600;',
-                'null': 'color: #6e7781; font-style: italic;',
-                'number': 'color: #b35900;',
+                "key": "color: #0550ae;",
+                "string": "color: #0a7f45;",
+                "boolean": "color: #8250df; font-weight: 600;",
+                "null": "color: #6e7781; font-style: italic;",
+                "number": "color: #b35900;",
            }
            token_type = next(name for name, value in match.groupdict().items() if value is not None)
            return f'<span style="{styles[token_type]}">{match.group(0)}</span>'
@@ -82,9 +102,9 @@ def render_code_block(text: str, *, highlighted: bool = False) -> str:
        '<pre style="max-height: 600px; overflow: auto; background: #f6f8fa; '
        'border: 1px solid #d0d7de; border-radius: 6px; padding: 12px; margin: 0;">'
        '<code style="font-family: ui-monospace, SFMono-Regular, Menlo, Monaco, Consolas, '
-        '\'Liberation Mono\', monospace; white-space: pre; line-height: 1.5;">'
-        f'{code}'
-        '</code></pre>'
+        "'Liberation Mono', monospace; white-space: pre; line-height: 1.5;\">"
+        f"{code}"
+        "</code></pre>"
    )


@@ -93,34 +113,35 @@ def render_highlighted_json_block(value: Any) -> str:


 def get_plugin_docs_url(plugin_name: str) -> str:
-    return f'{ABX_PLUGINS_DOCS_BASE_URL}#{plugin_name}'
+    return f"{ABX_PLUGINS_DOCS_BASE_URL}#{plugin_name}"


 def get_plugin_hook_source_url(plugin_name: str, hook_name: str) -> str:
-    return f'{ABX_PLUGINS_GITHUB_BASE_URL}{quote(plugin_name)}/{quote(hook_name)}'
+    return f"{ABX_PLUGINS_GITHUB_BASE_URL}{quote(plugin_name)}/{quote(hook_name)}"


 def get_live_config_url(key: str) -> str:
-    return f'{LIVE_CONFIG_BASE_URL}{quote(key)}/'
+    return f"{LIVE_CONFIG_BASE_URL}{quote(key)}/"


 def get_environment_binary_url(name: str) -> str:
-    return f'{ENVIRONMENT_BINARIES_BASE_URL}{quote(name)}/'
+    return f"{ENVIRONMENT_BINARIES_BASE_URL}{quote(name)}/"


 def get_installed_binary_change_url(name: str, binary: Any) -> str | None:
-    binary_id = getattr(binary, 'id', None)
+    binary_id = getattr(binary, "id", None)
    if not binary_id:
        return None

-    base_url = getattr(binary, 'admin_change_url', None) or f'{INSTALLED_BINARIES_BASE_URL}{binary_id}/change/'
-    changelist_filters = urlencode({'q': canonical_binary_name(name)})
-    return f'{base_url}?{urlencode({"_changelist_filters": changelist_filters})}'
+    base_url = getattr(binary, "admin_change_url", None) or f"{INSTALLED_BINARIES_BASE_URL}{binary_id}/change/"
+    changelist_filters = urlencode({"q": canonical_binary_name(name)})
+    return f"{base_url}?{urlencode({'_changelist_filters': changelist_filters})}"


 def get_machine_admin_url() -> str | None:
    try:
        from archivebox.machine.models import Machine
+
        return Machine.current().admin_change_url
    except Exception:
        return None
@@ -130,12 +151,14 @@ def render_code_tag_list(values: list[str]) -> str:
    if not values:
        return '<span style="color: #6e7781;">(none)</span>'

-    tags = ''.join(
-        str(format_html(
-            '<code style="display: inline-block; margin: 0 6px 6px 0; padding: 2px 6px; '
-            'background: #f6f8fa; border: 1px solid #d0d7de; border-radius: 999px;">{}</code>',
-            value,
-        ))
+    tags = "".join(
+        str(
+            format_html(
+                '<code style="display: inline-block; margin: 0 6px 6px 0; padding: 2px 6px; '
+                'background: #f6f8fa; border: 1px solid #d0d7de; border-radius: 999px;">{}</code>',
+                value,
+            ),
+        )
        for value in values
    )
    return f'<div style="display: flex; flex-wrap: wrap;">{tags}</div>'
@@ -143,22 +166,21 @@ def render_code_tag_list(values: list[str]) -> str:

 def render_plugin_metadata_html(config: dict[str, Any]) -> str:
    rows = (
-        ('Title', config.get('title') or '(none)'),
-        ('Description', config.get('description') or '(none)'),
-        ('Required Plugins', mark_safe(render_link_tag_list(config.get('required_plugins') or [], get_plugin_docs_url))),
-        ('Required Binaries', mark_safe(render_link_tag_list(config.get('required_binaries') or [], get_environment_binary_url))),
-        ('Output MIME Types', mark_safe(render_code_tag_list(config.get('output_mimetypes') or []))),
+        ("Title", config.get("title") or "(none)"),
+        ("Description", config.get("description") or "(none)"),
+        ("Required Plugins", mark_safe(render_link_tag_list(config.get("required_plugins") or [], get_plugin_docs_url))),
+        ("Required Binaries", mark_safe(render_link_tag_list(config.get("required_binaries") or [], get_environment_binary_url))),
+        ("Output MIME Types", mark_safe(render_code_tag_list(config.get("output_mimetypes") or []))),
    )

-    rendered_rows = ''.join(
-        str(format_html(
-            '<div style="margin: 0 0 14px 0;">'
-            '<div style="font-weight: 600; margin-bottom: 4px;">{}</div>'
-            '<div>{}</div>'
-            '</div>',
-            label,
-            value,
-        ))
+    rendered_rows = "".join(
+        str(
+            format_html(
+                '<div style="margin: 0 0 14px 0;"><div style="font-weight: 600; margin-bottom: 4px;">{}</div><div>{}</div></div>',
+                label,
+                value,
+            ),
+        )
        for label, value in rows
    )
    return f'<div style="margin: 4px 0 0 0;">{rendered_rows}</div>'
@@ -171,20 +193,28 @@ def render_link_tag_list(values: list[str], url_resolver: Callable[[str], str] |
    tags = []
    for value in values:
        if url_resolver is None:
-            tags.append(str(format_html(
-                '<code style="display: inline-block; margin: 0 6px 6px 0; padding: 2px 6px; '
-                'background: #f6f8fa; border: 1px solid #d0d7de; border-radius: 999px;">{}</code>',
-                value,
-            )))
+            tags.append(
+                str(
+                    format_html(
+                        '<code style="display: inline-block; margin: 0 6px 6px 0; padding: 2px 6px; '
+                        'background: #f6f8fa; border: 1px solid #d0d7de; border-radius: 999px;">{}</code>',
+                        value,
+                    ),
+                ),
+            )
        else:
-            tags.append(str(format_html(
-                '<a href="{}" style="text-decoration: none;">'
-                '<code style="display: inline-block; margin: 0 6px 6px 0; padding: 2px 6px; '
-                'background: #f6f8fa; border: 1px solid #d0d7de; border-radius: 999px;">{}</code>'
-                '</a>',
-                url_resolver(value),
-                value,
-            )))
+            tags.append(
+                str(
+                    format_html(
+                        '<a href="{}" style="text-decoration: none;">'
+                        '<code style="display: inline-block; margin: 0 6px 6px 0; padding: 2px 6px; '
+                        'background: #f6f8fa; border: 1px solid #d0d7de; border-radius: 999px;">{}</code>'
+                        "</a>",
+                        url_resolver(value),
+                        value,
+                    ),
+                ),
+            )
    return f'<div style="display: flex; flex-wrap: wrap;">{"".join(tags)}</div>'


@@ -195,21 +225,21 @@ def render_property_links(prop_name: str, prop_info: dict[str, Any], machine_adm
    if machine_admin_url:
        links.append(str(format_html('<a href="{}">Edit override</a>', machine_admin_url)))

-    fallback = prop_info.get('x-fallback')
+    fallback = prop_info.get("x-fallback")
    if isinstance(fallback, str) and fallback:
        links.append(str(format_html('<a href="{}">Fallback: <code>{}</code></a>', get_live_config_url(fallback), fallback)))

-    aliases = prop_info.get('x-aliases') or []
+    aliases = prop_info.get("x-aliases") or []
    if isinstance(aliases, list):
        for alias in aliases:
            if isinstance(alias, str) and alias:
                links.append(str(format_html('<a href="{}">Alias: <code>{}</code></a>', get_live_config_url(alias), alias)))

-    default = prop_info.get('default')
-    if prop_name.endswith('_BINARY') and isinstance(default, str) and default:
+    default = prop_info.get("default")
+    if prop_name.endswith("_BINARY") and isinstance(default, str) and default:
        links.append(str(format_html('<a href="{}">Binary: <code>{}</code></a>', get_environment_binary_url(default), default)))

-    return ' &nbsp; '.join(links)
+    return " &nbsp; ".join(links)


 def render_config_properties_html(properties: dict[str, Any], machine_admin_url: str | None) -> str:
@@ -221,42 +251,48 @@ def render_config_properties_html(properties: dict[str, Any], machine_admin_url:
        header_links.insert(0, str(format_html('<a href="{}">Machine Config Editor</a>', machine_admin_url)))

    cards = [
-        f'<div style="margin: 0 0 16px 0;">{" &nbsp; | &nbsp; ".join(header_links)}</div>'
+        f'<div style="margin: 0 0 16px 0;">{" &nbsp; | &nbsp; ".join(header_links)}</div>',
    ]

    for prop_name, prop_info in properties.items():
-        prop_type = prop_info.get('type', 'unknown')
+        prop_type = prop_info.get("type", "unknown")
        if isinstance(prop_type, list):
-            prop_type = ' | '.join(str(type_name) for type_name in prop_type)
-        prop_desc = prop_info.get('description', '')
+            prop_type = " | ".join(str(type_name) for type_name in prop_type)
+        prop_desc = prop_info.get("description", "")

-        default_html = ''
-        if 'default' in prop_info:
-            default_html = str(format_html(
-                '<div style="margin-top: 6px;"><b>Default:</b> <code>{}</code></div>',
-                prop_info['default'],
-            ))
+        default_html = ""
+        if "default" in prop_info:
+            default_html = str(
+                format_html(
+                    '<div style="margin-top: 6px;"><b>Default:</b> <code>{}</code></div>',
+                    prop_info["default"],
+                ),
+            )

        description_html = prop_desc or mark_safe('<span style="color: #6e7781;">(no description)</span>')
-        cards.append(str(format_html(
-            '<div style="margin: 0 0 14px 0; padding: 12px; background: #f6f8fa; border: 1px solid #d0d7de; border-radius: 6px;">'
-            '<div style="margin-bottom: 6px;">'
-            '<a href="{}" style="font-weight: 600;"><code>{}</code></a>'
-            ' <span style="color: #6e7781;">({})</span>'
-            '</div>'
-            '<div style="margin-bottom: 6px;">{}</div>'
-            '<div style="font-size: 0.95em;">{}</div>'
-            '{}'
-            '</div>',
-            get_live_config_url(prop_name),
-            prop_name,
-            prop_type,
-            description_html,
-            mark_safe(render_property_links(prop_name, prop_info, machine_admin_url)),
-            mark_safe(default_html),
-        )))
+        cards.append(
+            str(
+                format_html(
+                    '<div style="margin: 0 0 14px 0; padding: 12px; background: #f6f8fa; border: 1px solid #d0d7de; border-radius: 6px;">'
+                    '<div style="margin-bottom: 6px;">'
+                    '<a href="{}" style="font-weight: 600;"><code>{}</code></a>'
+                    ' <span style="color: #6e7781;">({})</span>'
+                    "</div>"
+                    '<div style="margin-bottom: 6px;">{}</div>'
+                    '<div style="font-size: 0.95em;">{}</div>'
+                    "{}"
+                    "</div>",
+                    get_live_config_url(prop_name),
+                    prop_name,
+                    prop_type,
+                    description_html,
+                    mark_safe(render_property_links(prop_name, prop_info, machine_admin_url)),
+                    mark_safe(default_html),
+                ),
+            ),
+        )

-    return ''.join(cards)
+    return "".join(cards)


 def render_hook_links_html(plugin_name: str, hooks: list[str], source: str) -> str:
@@ -265,40 +301,47 @@ def render_hook_links_html(plugin_name: str, hooks: list[str], source: str) -> s

    items = []
    for hook_name in hooks:
-        if source == 'builtin':
-            items.append(str(format_html(
-                '<div style="margin: 0 0 8px 0;">'
-                '<a href="{}" target="_blank" rel="noopener noreferrer"><code>{}</code></a>'
-                '</div>',
-                get_plugin_hook_source_url(plugin_name, hook_name),
-                hook_name,
-            )))
+        if source == "builtin":
+            items.append(
+                str(
+                    format_html(
+                        '<div style="margin: 0 0 8px 0;"><a href="{}" target="_blank" rel="noopener noreferrer"><code>{}</code></a></div>',
+                        get_plugin_hook_source_url(plugin_name, hook_name),
+                        hook_name,
+                    ),
+                ),
+            )
        else:
-            items.append(str(format_html(
-                '<div style="margin: 0 0 8px 0;"><code>{}</code></div>',
-                hook_name,
-            )))
-    return ''.join(items)
+            items.append(
+                str(
+                    format_html(
+                        '<div style="margin: 0 0 8px 0;"><code>{}</code></div>',
+                        hook_name,
+                    ),
+                ),
+            )
+    return "".join(items)


 def render_binary_detail_description(name: str, merged: dict[str, Any], db_binary: Any) -> str:
    installed_binary_url = get_installed_binary_change_url(name, db_binary)

    if installed_binary_url:
-        return str(format_html(
-            '<code>{}</code><br/>'
-            '<a href="{}">View Installed Binary Record</a>',
-            merged['abspath'],
-            installed_binary_url,
-        ))
+        return str(
+            format_html(
+                '<code>{}</code><br/><a href="{}">View Installed Binary Record</a>',
+                merged["abspath"],
+                installed_binary_url,
+            ),
+        )

-    return str(format_html('<code>{}</code>', merged['abspath']))
+    return str(format_html("<code>{}</code>", merged["abspath"]))


 def obj_to_yaml(obj: Any, indent: int = 0) -> str:
    indent_str = "  " * indent
    if indent == 0:
-        indent_str = '\n'  # put extra newline between top-level entries
+        indent_str = "\n"  # put extra newline between top-level entries

    if isinstance(obj, dict):
        if not obj:
@@ -326,11 +369,11 @@ def obj_to_yaml(obj: Any, indent: int = 0) -> str:
        return f" {str(obj)}"

    elif callable(obj):
-        source = '\n'.join(
-            '' if 'def ' in line else line
-            for line in inspect.getsource(obj).split('\n')
-            if line.strip()
-        ).split('lambda: ')[-1].rstrip(',')
+        source = (
+            "\n".join("" if "def " in line else line for line in inspect.getsource(obj).split("\n") if line.strip())
+            .split("lambda: ")[-1]
+            .rstrip(",")
+        )
        return f" {indent_str}  " + source.replace("\n", f"\n{indent_str}  ")

    else:
@@ -350,67 +393,64 @@ def _binary_sort_key(binary: Binary) -> tuple[int, int, int, Any]:
    )


-def get_db_binaries_by_name() -> Dict[str, Binary]:
-    grouped: Dict[str, list[Binary]] = {}
+def get_db_binaries_by_name() -> dict[str, Binary]:
+    grouped: dict[str, list[Binary]] = {}
    for binary in Binary.objects.all():
        grouped.setdefault(canonical_binary_name(binary.name), []).append(binary)

-    return {
-        name: max(records, key=_binary_sort_key)
-        for name, records in grouped.items()
-    }
+    return {name: max(records, key=_binary_sort_key) for name, records in grouped.items()}


-def serialize_binary_record(name: str, binary: Binary | None) -> Dict[str, Any]:
+def serialize_binary_record(name: str, binary: Binary | None) -> dict[str, Any]:
    is_installed = bool(binary and binary.status == Binary.StatusChoices.INSTALLED)
    return {
-        'name': canonical_binary_name(name),
-        'version': str(getattr(binary, 'version', '') or ''),
-        'binprovider': str(getattr(binary, 'binprovider', '') or ''),
-        'abspath': str(getattr(binary, 'abspath', '') or ''),
-        'sha256': str(getattr(binary, 'sha256', '') or ''),
-        'status': str(getattr(binary, 'status', '') or ''),
-        'is_available': is_installed and bool(getattr(binary, 'abspath', '') or ''),
+        "name": canonical_binary_name(name),
+        "version": str(getattr(binary, "version", "") or ""),
+        "binprovider": str(getattr(binary, "binprovider", "") or ""),
+        "abspath": str(getattr(binary, "abspath", "") or ""),
+        "sha256": str(getattr(binary, "sha256", "") or ""),
+        "status": str(getattr(binary, "status", "") or ""),
+        "is_available": is_installed and bool(getattr(binary, "abspath", "") or ""),
    }


-def get_filesystem_plugins() -> Dict[str, Dict[str, Any]]:
+def get_filesystem_plugins() -> dict[str, dict[str, Any]]:
    """Discover plugins from filesystem directories."""
    import json
    from archivebox.hooks import BUILTIN_PLUGINS_DIR, USER_PLUGINS_DIR

    plugins = {}

-    for base_dir, source in [(BUILTIN_PLUGINS_DIR, 'builtin'), (USER_PLUGINS_DIR, 'user')]:
+    for base_dir, source in [(BUILTIN_PLUGINS_DIR, "builtin"), (USER_PLUGINS_DIR, "user")]:
        if not base_dir.exists():
            continue

        for plugin_dir in base_dir.iterdir():
-            if plugin_dir.is_dir() and not plugin_dir.name.startswith('_'):
-                plugin_id = f'{source}.{plugin_dir.name}'
+            if plugin_dir.is_dir() and not plugin_dir.name.startswith("_"):
+                plugin_id = f"{source}.{plugin_dir.name}"

                # Find hook scripts
                hooks = []
-                for ext in ('sh', 'py', 'js'):
-                    hooks.extend(plugin_dir.glob(f'on_*__*.{ext}'))
+                for ext in ("sh", "py", "js"):
+                    hooks.extend(plugin_dir.glob(f"on_*__*.{ext}"))

                # Load config.json if it exists
-                config_file = plugin_dir / 'config.json'
+                config_file = plugin_dir / "config.json"
                config_data = None
                if config_file.exists():
                    try:
-                        with open(config_file, 'r') as f:
+                        with open(config_file) as f:
                            config_data = json.load(f)
-                    except (json.JSONDecodeError, IOError):
+                    except (json.JSONDecodeError, OSError):
                        config_data = None

                plugins[plugin_id] = {
-                    'id': plugin_id,
-                    'name': plugin_dir.name,
-                    'path': str(plugin_dir),
-                    'source': source,
-                    'hooks': [str(h.name) for h in hooks],
-                    'config': config_data,
+                    "id": plugin_id,
+                    "name": plugin_dir.name,
+                    "path": str(plugin_dir),
+                    "source": source,
+                    "hooks": [str(h.name) for h in hooks],
+                    "config": config_data,
                }

    return plugins
@@ -418,7 +458,7 @@ def get_filesystem_plugins() -> Dict[str, Dict[str, Any]]:

@render_with_table_view
 def binaries_list_view(request: HttpRequest, **kwargs) -> TableContext:
-    assert is_superuser(request), 'Must be a superuser to view configuration settings.'
+    assert is_superuser(request), "Must be a superuser to view configuration settings."

    rows = {
        "Binary Name": [],
@@ -433,16 +473,16 @@ def binaries_list_view(request: HttpRequest, **kwargs) -> TableContext:
    for name in all_binary_names:
        merged = serialize_binary_record(name, db_binaries.get(name))

-        rows['Binary Name'].append(ItemLink(name, key=name))
+        rows["Binary Name"].append(ItemLink(name, key=name))

-        if merged['is_available']:
-            rows['Found Version'].append(f"✅ {merged['version']}" if merged['version'] else '✅ found')
-            rows['Provided By'].append(merged['binprovider'] or '-')
-            rows['Found Abspath'].append(merged['abspath'] or '-')
+        if merged["is_available"]:
+            rows["Found Version"].append(f"✅ {merged['version']}" if merged["version"] else "✅ found")
+            rows["Provided By"].append(merged["binprovider"] or "-")
+            rows["Found Abspath"].append(merged["abspath"] or "-")
        else:
-            rows['Found Version'].append('❌ missing')
-            rows['Provided By'].append('-')
-            rows['Found Abspath'].append('-')
+            rows["Found Version"].append("❌ missing")
+            rows["Provided By"].append("-")
+            rows["Found Abspath"].append("-")

    return TableContext(
        title="Binaries",
@@ -452,23 +492,23 @@ def binaries_list_view(request: HttpRequest, **kwargs) -> TableContext:

@render_with_item_view
 def binary_detail_view(request: HttpRequest, key: str, **kwargs) -> ItemContext:
-    assert is_superuser(request), 'Must be a superuser to view configuration settings.'
+    assert is_superuser(request), "Must be a superuser to view configuration settings."
    key = canonical_binary_name(key)

    db_binary = get_db_binaries_by_name().get(key)
    merged = serialize_binary_record(key, db_binary)

-    if merged['is_available']:
+    if merged["is_available"]:
        section: SectionData = {
            "name": key,
            "description": mark_safe(render_binary_detail_description(key, merged, db_binary)),
            "fields": {
-                'name': key,
-                'binprovider': merged['binprovider'] or '-',
-                'abspath': merged['abspath'] or 'not found',
-                'version': merged['version'] or 'unknown',
-                'sha256': merged['sha256'],
-                'status': merged['status'],
+                "name": key,
+                "binprovider": merged["binprovider"] or "-",
+                "abspath": merged["abspath"] or "not found",
+                "version": merged["version"] or "unknown",
+                "sha256": merged["sha256"],
+                "status": merged["status"],
            },
            "help_texts": {},
        }
@@ -482,11 +522,11 @@ def binary_detail_view(request: HttpRequest, key: str, **kwargs) -> ItemContext:
        "name": key,
        "description": "No persisted Binary record found",
        "fields": {
-            'name': key,
-            'binprovider': merged['binprovider'] or 'not recorded',
-            'abspath': merged['abspath'] or 'not recorded',
-            'version': merged['version'] or 'N/A',
-            'status': merged['status'] or 'unrecorded',
+            "name": key,
+            "binprovider": merged["binprovider"] or "not recorded",
+            "abspath": merged["abspath"] or "not recorded",
+            "version": merged["version"] or "N/A",
+            "status": merged["status"] or "unrecorded",
        },
        "help_texts": {},
    }
@@ -499,7 +539,7 @@ def binary_detail_view(request: HttpRequest, key: str, **kwargs) -> ItemContext:

@render_with_table_view
 def plugins_list_view(request: HttpRequest, **kwargs) -> TableContext:
-    assert is_superuser(request), 'Must be a superuser to view configuration settings.'
+    assert is_superuser(request), "Must be a superuser to view configuration settings."

    rows = {
        "Name": [],
@@ -512,26 +552,26 @@ def plugins_list_view(request: HttpRequest, **kwargs) -> TableContext:
    plugins = get_filesystem_plugins()

    for plugin_id, plugin in plugins.items():
-        rows['Name'].append(ItemLink(plugin['name'], key=plugin_id))
-        rows['Source'].append(plugin['source'])
-        rows['Path'].append(format_html('<code>{}</code>', plugin['path']))
-        rows['Hooks'].append(', '.join(plugin['hooks']) or '(none)')
+        rows["Name"].append(ItemLink(plugin["name"], key=plugin_id))
+        rows["Source"].append(plugin["source"])
+        rows["Path"].append(format_html("<code>{}</code>", plugin["path"]))
+        rows["Hooks"].append(", ".join(plugin["hooks"]) or "(none)")

        # Show config status
-        if plugin.get('config'):
-            config_properties = plugin['config'].get('properties', {})
+        if plugin.get("config"):
+            config_properties = plugin["config"].get("properties", {})
            config_count = len(config_properties)
-            rows['Config'].append(f'✅ {config_count} properties' if config_count > 0 else '✅ present')
+            rows["Config"].append(f"✅ {config_count} properties" if config_count > 0 else "✅ present")
        else:
-            rows['Config'].append('❌ none')
+            rows["Config"].append("❌ none")

    if not plugins:
        # Show a helpful message when no plugins found
-        rows['Name'].append('(no plugins found)')
-        rows['Source'].append('-')
-        rows['Path'].append(mark_safe('<code>abx_plugins/plugins/</code> or <code>data/custom_plugins/</code>'))
-        rows['Hooks'].append('-')
-        rows['Config'].append('-')
+        rows["Name"].append("(no plugins found)")
+        rows["Source"].append("-")
+        rows["Path"].append(mark_safe("<code>abx_plugins/plugins/</code> or <code>data/custom_plugins/</code>"))
+        rows["Hooks"].append("-")
+        rows["Config"].append("-")

    return TableContext(
        title="Installed plugins",
@@ -541,7 +581,7 @@ def plugins_list_view(request: HttpRequest, **kwargs) -> TableContext:

@render_with_item_view
 def plugin_detail_view(request: HttpRequest, key: str, **kwargs) -> ItemContext:
-    assert is_superuser(request), 'Must be a superuser to view configuration settings.'
+    assert is_superuser(request), "Must be a superuser to view configuration settings."

    plugins = get_filesystem_plugins()

@@ -549,65 +589,75 @@ def plugin_detail_view(request: HttpRequest, key: str, **kwargs) -> ItemContext:
    if not plugin:
        return ItemContext(
            slug=key,
-            title=f'Plugin not found: {key}',
+            title=f"Plugin not found: {key}",
            data=[],
        )

    # Base fields that all plugins have
-    docs_url = get_plugin_docs_url(plugin['name'])
+    docs_url = get_plugin_docs_url(plugin["name"])
    machine_admin_url = get_machine_admin_url()
    fields = {
-        "id": plugin['id'],
-        "name": plugin['name'],
-        "source": plugin['source'],
+        "id": plugin["id"],
+        "name": plugin["name"],
+        "source": plugin["source"],
    }

-    sections: list[SectionData] = [{
-        "name": plugin['name'],
-        "description": format_html(
-            '<code>{}</code><br/><a href="{}" target="_blank" rel="noopener noreferrer">ABX Plugin Docs</a>',
-            plugin['path'],
-            docs_url,
-        ),
-        "fields": fields,
-        "help_texts": {},
-    }]
-
-    if plugin['hooks']:
-        sections.append({
-            "name": "Hooks",
-            "description": mark_safe(render_hook_links_html(plugin['name'], plugin['hooks'], plugin['source'])),
-            "fields": {},
+    sections: list[SectionData] = [
+        {
+            "name": plugin["name"],
+            "description": format_html(
+                '<code>{}</code><br/><a href="{}" target="_blank" rel="noopener noreferrer">ABX Plugin Docs</a>',
+                plugin["path"],
+                docs_url,
+            ),
+            "fields": fields,
            "help_texts": {},
-        })
+        },
+    ]

-    if plugin.get('config'):
-        sections.append({
-            "name": "Plugin Metadata",
-            "description": mark_safe(render_plugin_metadata_html(plugin['config'])),
-            "fields": {},
-            "help_texts": {},
-        })
-
-        sections.append({
-            "name": "config.json",
-            "description": mark_safe(render_highlighted_json_block(plugin['config'])),
-            "fields": {},
-            "help_texts": {},
-        })
-
-        config_properties = plugin['config'].get('properties', {})
-        if config_properties:
-            sections.append({
-                "name": "Config Properties",
-                "description": mark_safe(render_config_properties_html(config_properties, machine_admin_url)),
+    if plugin["hooks"]:
+        sections.append(
+            {
+                "name": "Hooks",
+                "description": mark_safe(render_hook_links_html(plugin["name"], plugin["hooks"], plugin["source"])),
                "fields": {},
                "help_texts": {},
-            })
+            },
+        )
+
+    if plugin.get("config"):
+        sections.append(
+            {
+                "name": "Plugin Metadata",
+                "description": mark_safe(render_plugin_metadata_html(plugin["config"])),
+                "fields": {},
+                "help_texts": {},
+            },
+        )
+
+        sections.append(
+            {
+                "name": "config.json",
+                "description": mark_safe(render_highlighted_json_block(plugin["config"])),
+                "fields": {},
+                "help_texts": {},
+            },
+        )
+
+        config_properties = plugin["config"].get("properties", {})
+        if config_properties:
+            sections.append(
+                {
+                    "name": "Config Properties",
+                    "description": mark_safe(render_config_properties_html(config_properties, machine_admin_url)),
+                    "fields": {},
+                    "help_texts": {},
+                },
+            )

    return ItemContext(
        slug=key,
-        title=plugin['name'],
+        title=plugin["name"],
        data=sections,
    )

@@ -648,20 +698,20 @@ def worker_list_view(request: HttpRequest, **kwargs) -> TableContext:
        all_config[config_name] = config_data

    # Add top row for supervisord process manager
-    rows["Name"].append(ItemLink('supervisord', key='supervisord'))
+    rows["Name"].append(ItemLink("supervisord", key="supervisord"))
    supervisor_state = supervisor.getState()
-    rows["State"].append(str(supervisor_state.get('statename') if isinstance(supervisor_state, dict) else ''))
-    rows['PID'].append(str(supervisor.getPID()))
-    rows["Started"].append('-')
-    rows["Command"].append('supervisord --configuration=tmp/supervisord.conf')
+    rows["State"].append(str(supervisor_state.get("statename") if isinstance(supervisor_state, dict) else ""))
+    rows["PID"].append(str(supervisor.getPID()))
+    rows["Started"].append("-")
+    rows["Command"].append("supervisord --configuration=tmp/supervisord.conf")
    rows["Logfile"].append(
        format_html(
            '<a href="/admin/environment/logs/{}/">{}</a>',
-            'supervisord',
-            'logs/supervisord.log',
-        )
+            "supervisord",
+            "logs/supervisord.log",
+        ),
    )
-    rows['Exit Status'].append('0')
+    rows["Exit Status"].append("0")

    # Add a row for each worker process managed by supervisord
    process_items = supervisor.getAllProcessInfo()
@@ -678,15 +728,15 @@ def worker_list_view(request: HttpRequest, **kwargs) -> TableContext:

        rows["Name"].append(ItemLink(proc_name, key=proc_name))
        rows["State"].append(str(proc_data.get("statename") or ""))
-        rows['PID'].append(proc_description.replace('pid ', ''))
+        rows["PID"].append(proc_description.replace("pid ", ""))
        rows["Started"].append(format_parsed_datetime(proc_start))
        rows["Command"].append(str(proc_config.get("command") or ""))
        rows["Logfile"].append(
            format_html(
                '<a href="/admin/environment/logs/{}/">{}</a>',
-                proc_logfile.split("/")[-1].split('.')[0],
+                proc_logfile.split("/")[-1].split(".")[0],
                proc_logfile,
-            )
+            ),
        )
        rows["Exit Status"].append(str(proc_data.get("exitstatus") or ""))

@@ -708,8 +758,8 @@ def worker_detail_view(request: HttpRequest, key: str, **kwargs) -> ItemContext:
    supervisor = get_existing_supervisord_process()
    if supervisor is None:
        return ItemContext(
-            slug='none',
-            title='error: No running supervisord process.',
+            slug="none",
+            title="error: No running supervisord process.",
            data=[],
        )

@@ -721,7 +771,7 @@ def worker_detail_view(request: HttpRequest, key: str, **kwargs) -> ItemContext:
        if isinstance(config_data, dict):
            all_config.append(config_data)

-    if key == 'supervisord':
+    if key == "supervisord":
        relevant_config = CONFIG_FILE.read_text()
        relevant_logs = str(supervisor.readLog(0, 10_000_000))
        start_ts = [line for line in relevant_logs.split("\n") if "RPC interface 'supervisor' initialized" in line][-1].split(",", 1)[0]
@@ -729,7 +779,7 @@ def worker_detail_view(request: HttpRequest, key: str, **kwargs) -> ItemContext:
        uptime = str(timezone.now() - start_dt).split(".")[0] if start_dt else ""
        supervisor_state = supervisor.getState()

-        proc: Dict[str, object] = {
+        proc: dict[str, object] = {
            "name": "supervisord",
            "pid": supervisor.getPID(),
            "statename": str(supervisor_state.get("statename") if isinstance(supervisor_state, dict) else ""),
@@ -737,12 +787,12 @@ def worker_detail_view(request: HttpRequest, key: str, **kwargs) -> ItemContext:
            "stop": None,
            "exitstatus": "",
            "stdout_logfile": "logs/supervisord.log",
-            "description": f'pid 000, uptime {uptime}',
+            "description": f"pid 000, uptime {uptime}",
        }
    else:
        worker_data = get_worker(supervisor, key)
        proc = worker_data if isinstance(worker_data, dict) else {}
-        relevant_config = next((config for config in all_config if config.get('name') == key), {})
+        relevant_config = next((config for config in all_config if config.get("name") == key), {})
        log_result = supervisor.tailProcessStdoutLog(key, 0, 10_000_000)
        relevant_logs = str(log_result[0] if isinstance(log_result, tuple) else log_result)

@@ -775,7 +825,6 @@ def worker_detail_view(request: HttpRequest, key: str, **kwargs) -> ItemContext:
 def log_list_view(request: HttpRequest, **kwargs) -> TableContext:
    assert is_superuser(request), "Must be a superuser to view configuration settings."

-
    log_files: list[Path] = []
    for logfile in sorted(CONSTANTS.LOGS_DIR.glob("*.log"), key=os.path.getmtime)[::-1]:
        if isinstance(logfile, Path):
@@ -793,14 +842,14 @@ def log_list_view(request: HttpRequest, **kwargs) -> TableContext:
        st = logfile.stat()
        rows["Name"].append(ItemLink("logs" + str(logfile).rsplit("/logs", 1)[-1], key=logfile.name))
        rows["Last Updated"].append(format_parsed_datetime(st.st_mtime))
-        rows["Size"].append(f'{st.st_size//1000} kb')
+        rows["Size"].append(f"{st.st_size // 1000} kb")

-        with open(logfile, 'rb') as f:
+        with open(logfile, "rb") as f:
            try:
                f.seek(-1024, os.SEEK_END)
            except OSError:
                f.seek(0)
-            last_lines = f.read().decode('utf-8', errors='replace').split("\n")
+            last_lines = f.read().decode("utf-8", errors="replace").split("\n")
            non_empty_lines = [line for line in last_lines if line.strip()]
            rows["Most Recent Lines"].append(non_empty_lines[-1])

@@ -814,7 +863,7 @@ def log_list_view(request: HttpRequest, **kwargs) -> TableContext:
 def log_detail_view(request: HttpRequest, key: str, **kwargs) -> ItemContext:
    assert is_superuser(request), "Must be a superuser to view configuration settings."

-    log_file = [logfile for logfile in CONSTANTS.LOGS_DIR.glob('*.log') if key in logfile.name][0]
+    log_file = [logfile for logfile in CONSTANTS.LOGS_DIR.glob("*.log") if key in logfile.name][0]

    log_text = log_file.read_text()
    log_stat = log_file.stat()
@@ -824,7 +873,7 @@ def log_detail_view(request: HttpRequest, key: str, **kwargs) -> ItemContext:
        "description": key,
        "fields": {
            "Path": str(log_file),
-            "Size": f"{log_stat.st_size//1000} kb",
+            "Size": f"{log_stat.st_size // 1000} kb",
            "Last Updated": format_parsed_datetime(log_stat.st_mtime),
            "Tail": "\n".join(log_text[-10_000:].split("\n")[-20:]),
            "Full Log": log_text,
--- a/archivebox/core/init.py
+++ b/archivebox/core/init.py
@@ -1,10 +1,11 @@
-__package__ = 'archivebox.core'
+__package__ = "archivebox.core"
 __order__ = 100


 def register_admin(admin_site):
    """Register the core.models views (Snapshot, ArchiveResult, Tag, etc.) with the admin site"""
    from archivebox.core.admin import register_admin as do_register
+
    do_register(admin_site)


@@ -17,11 +18,12 @@ def get_CONFIG():
        ARCHIVING_CONFIG,
        SEARCH_BACKEND_CONFIG,
    )
+
    return {
-        'SHELL_CONFIG': SHELL_CONFIG,
-        'STORAGE_CONFIG': STORAGE_CONFIG,
-        'GENERAL_CONFIG': GENERAL_CONFIG,
-        'SERVER_CONFIG': SERVER_CONFIG,
-        'ARCHIVING_CONFIG': ARCHIVING_CONFIG,
-        'SEARCHBACKEND_CONFIG': SEARCH_BACKEND_CONFIG,
+        "SHELL_CONFIG": SHELL_CONFIG,
+        "STORAGE_CONFIG": STORAGE_CONFIG,
+        "GENERAL_CONFIG": GENERAL_CONFIG,
+        "SERVER_CONFIG": SERVER_CONFIG,
+        "ARCHIVING_CONFIG": ARCHIVING_CONFIG,
+        "SEARCHBACKEND_CONFIG": SEARCH_BACKEND_CONFIG,
    }
--- a/archivebox/core/admin.py
+++ b/archivebox/core/admin.py
@@ -1,4 +1,4 @@
-__package__ = 'archivebox.core'
+__package__ = "archivebox.core"

 from django.contrib.auth import get_user_model

--- a/archivebox/core/admin_archiveresults.py
+++ b/archivebox/core/admin_archiveresults.py
@@ -1,4 +1,4 @@
-__package__ = 'archivebox.core'
+__package__ = "archivebox.core"

 import html
 import json
@@ -21,57 +21,45 @@ from django.utils.text import smart_split

 from archivebox.config import DATA_DIR
 from archivebox.config.common import SERVER_CONFIG
-from archivebox.misc.paginators import AccelleratedPaginator
+from archivebox.misc.paginators import AcceleratedPaginator
 from archivebox.base_models.admin import BaseModelAdmin
 from archivebox.hooks import get_plugin_icon
 from archivebox.core.host_utils import build_snapshot_url
 from archivebox.core.widgets import InlineTagEditorWidget
 from archivebox.core.views import LIVE_PLUGIN_BASE_URL
+from archivebox.machine.env_utils import env_to_shell_exports


 from archivebox.core.models import ArchiveResult, Snapshot


-def _stringify_env_value(value) -> str:
-    if value is None:
-        return ''
-    if isinstance(value, str):
-        return value
-    return json.dumps(value, separators=(',', ':'))
-
-
 def _quote_shell_string(value: str) -> str:
    return "'" + str(value).replace("'", "'\"'\"'") + "'"


 def _get_replay_source_url(result: ArchiveResult) -> str:
-    process_env = getattr(getattr(result, 'process', None), 'env', None) or {}
-    return str(process_env.get('SOURCE_URL') or result.snapshot.url or '')
+    process_env = getattr(getattr(result, "process", None), "env", None) or {}
+    return str(process_env.get("SOURCE_URL") or result.snapshot.url or "")


 def build_abx_dl_display_command(result: ArchiveResult) -> str:
    source_url = _get_replay_source_url(result)
-    plugin_name = str(result.plugin or '').strip()
+    plugin_name = str(result.plugin or "").strip()
    if not plugin_name and not source_url:
-        return 'abx-dl'
+        return "abx-dl"
    if not source_url:
-        return f'abx-dl --plugins={plugin_name}'
-    return f'abx-dl --plugins={plugin_name} {_quote_shell_string(source_url)}'
+        return f"abx-dl --plugins={plugin_name}"
+    return f"abx-dl --plugins={plugin_name} {_quote_shell_string(source_url)}"


 def build_abx_dl_replay_command(result: ArchiveResult) -> str:
    display_command = build_abx_dl_display_command(result)
-    process = getattr(result, 'process', None)
-    env = getattr(process, 'env', None) or {}
-    env_items = ' '.join(
-        f'{key}={shlex.quote(_stringify_env_value(value))}'
-        for key, value in sorted(env.items())
-        if value is not None
-    )
+    process = getattr(result, "process", None)
+    env_items = env_to_shell_exports(getattr(process, "env", None) or {})
    snapshot_dir = shlex.quote(str(result.snapshot_dir))
    if env_items:
-        return f'cd {snapshot_dir}; env {env_items} {display_command}'
-    return f'cd {snapshot_dir}; {display_command}'
+        return f"cd {snapshot_dir}; env {env_items} {display_command}"
+    return f"cd {snapshot_dir}; {display_command}"


 def get_plugin_admin_url(plugin_name: str) -> str:
@@ -81,50 +69,87 @@ def get_plugin_admin_url(plugin_name: str) -> str:
    if plugin_dir:
        builtin_root = BUILTIN_PLUGINS_DIR.resolve()
        if plugin_dir.is_relative_to(builtin_root):
-            return f'{LIVE_PLUGIN_BASE_URL}builtin.{quote(plugin_name)}/'
+            return f"{LIVE_PLUGIN_BASE_URL}builtin.{quote(plugin_name)}/"

        user_root = USER_PLUGINS_DIR.resolve()
        if plugin_dir.is_relative_to(user_root):
-            return f'{LIVE_PLUGIN_BASE_URL}user.{quote(plugin_name)}/'
+            return f"{LIVE_PLUGIN_BASE_URL}user.{quote(plugin_name)}/"

-    return f'{LIVE_PLUGIN_BASE_URL}builtin.{quote(plugin_name)}/'
+    return f"{LIVE_PLUGIN_BASE_URL}builtin.{quote(plugin_name)}/"


 def render_archiveresults_list(archiveresults_qs, limit=50):
    """Render a nice inline list view of archive results with status, plugin, output, and actions."""

-    results = list(archiveresults_qs.order_by('plugin').select_related('snapshot')[:limit])
+    result_ids = list(archiveresults_qs.order_by("plugin").values_list("pk", flat=True)[:limit])
+    if not result_ids:
+        return mark_safe('<div style="color: #64748b; font-style: italic; padding: 16px 0;">No Archive Results yet...</div>')
+
+    results_by_id = {
+        result.pk: result
+        for result in ArchiveResult.objects.filter(pk__in=result_ids).select_related("snapshot", "process", "process__machine")
+    }
+    results = [results_by_id[result_id] for result_id in result_ids if result_id in results_by_id]

    if not results:
        return mark_safe('<div style="color: #64748b; font-style: italic; padding: 16px 0;">No Archive Results yet...</div>')

    # Status colors
    status_colors = {
-        'succeeded': ('#166534', '#dcfce7'),   # green
-        'failed': ('#991b1b', '#fee2e2'),       # red
-        'queued': ('#6b7280', '#f3f4f6'),       # gray
-        'started': ('#92400e', '#fef3c7'),      # amber
-        'backoff': ('#92400e', '#fef3c7'),
-        'skipped': ('#475569', '#f1f5f9'),
-        'noresults': ('#475569', '#f1f5f9'),
+        "succeeded": ("#166534", "#dcfce7"),  # green
+        "failed": ("#991b1b", "#fee2e2"),  # red
+        "queued": ("#6b7280", "#f3f4f6"),  # gray
+        "started": ("#92400e", "#fef3c7"),  # amber
+        "backoff": ("#92400e", "#fef3c7"),
+        "skipped": ("#475569", "#f1f5f9"),
+        "noresults": ("#475569", "#f1f5f9"),
    }

    rows = []
    for idx, result in enumerate(results):
-        status = result.status or 'queued'
-        color, bg = status_colors.get(status, ('#6b7280', '#f3f4f6'))
+        status = result.status or "queued"
+        color, bg = status_colors.get(status, ("#6b7280", "#f3f4f6"))
+        output_files = result.output_files or {}
+        if isinstance(output_files, dict):
+            output_file_count = len(output_files)
+        elif isinstance(output_files, (list, tuple, set)):
+            output_file_count = len(output_files)
+        elif isinstance(output_files, str):
+            try:
+                parsed = json.loads(output_files)
+                output_file_count = len(parsed) if isinstance(parsed, (dict, list, tuple, set)) else 0
+            except Exception:
+                output_file_count = 0
+        else:
+            output_file_count = 0

        # Get plugin icon
        icon = get_plugin_icon(result.plugin)

        # Format timestamp
-        end_time = result.end_ts.strftime('%Y-%m-%d %H:%M:%S') if result.end_ts else '-'
+        end_time = result.end_ts.strftime("%Y-%m-%d %H:%M:%S") if result.end_ts else "-"
+
+        process_display = "-"
+        if result.process_id and result.process:
+            process_display = f'''
+                <a href="{reverse("admin:machine_process_change", args=[result.process_id])}"
+                   style="color: #2563eb; text-decoration: none; font-family: ui-monospace, monospace; font-size: 12px;"
+                   title="View process">{result.process.pid or "-"}</a>
+            '''
+
+        machine_display = "-"
+        if result.process_id and result.process and result.process.machine_id:
+            machine_display = f'''
+                <a href="{reverse("admin:machine_machine_change", args=[result.process.machine_id])}"
+                   style="color: #2563eb; text-decoration: none; font-size: 12px;"
+                   title="View machine">{result.process.machine.hostname}</a>
+            '''

        # Truncate output for display
-        full_output = result.output_str or '-'
+        full_output = result.output_str or "-"
        output_display = full_output[:60]
        if len(full_output) > 60:
-            output_display += '...'
+            output_display += "..."

        display_cmd = build_abx_dl_display_command(result)
        replay_cmd = build_abx_dl_replay_command(result)
@@ -132,23 +157,23 @@ def render_archiveresults_list(archiveresults_qs, limit=50):
        cmd_attr = html.escape(replay_cmd, quote=True)

        # Build output link - use embed_path() which checks output_files first
-        embed_path = result.embed_path() if hasattr(result, 'embed_path') else None
-        snapshot_id = str(getattr(result, 'snapshot_id', ''))
-        if embed_path and result.status == 'succeeded':
+        embed_path = result.embed_path() if hasattr(result, "embed_path") else None
+        snapshot_id = str(getattr(result, "snapshot_id", ""))
+        if embed_path and result.status == "succeeded":
            output_link = build_snapshot_url(snapshot_id, embed_path)
        else:
-            output_link = build_snapshot_url(snapshot_id, '')
+            output_link = build_snapshot_url(snapshot_id, "")

        # Get version - try cmd_version field
-        version = result.cmd_version if result.cmd_version else '-'
+        version = result.cmd_version if result.cmd_version else "-"

        # Unique ID for this row's expandable output
-        row_id = f'output_{idx}_{str(result.id)[:8]}'
+        row_id = f"output_{idx}_{str(result.id)[:8]}"

        rows.append(f'''
            <tr style="border-bottom: 1px solid #f1f5f9; transition: background 0.15s;" onmouseover="this.style.background='#f8fafc'" onmouseout="this.style.background='transparent'">
                <td style="padding: 10px 12px; white-space: nowrap;">
-                    <a href="{reverse('admin:core_archiveresult_change', args=[result.id])}"
+                    <a href="{reverse("admin:core_archiveresult_change", args=[result.id])}"
                       style="color: #2563eb; text-decoration: none; font-family: ui-monospace, monospace; font-size: 11px;"
                       title="View/edit archive result">
                        <code>{str(result.id)[-8:]}</code>
@@ -178,9 +203,18 @@ def render_archiveresults_list(archiveresults_qs, limit=50):
                        {output_display}
                    </span>
                </td>
+                <td style="padding: 10px 12px; white-space: nowrap; color: #64748b; font-size: 12px; text-align: right;">
+                    {output_file_count}
+                </td>
                <td style="padding: 10px 12px; white-space: nowrap; color: #64748b; font-size: 12px;">
                    {end_time}
                </td>
+                <td style="padding: 10px 12px; white-space: nowrap;">
+                    {process_display}
+                </td>
+                <td style="padding: 10px 12px; white-space: nowrap;">
+                    {machine_display}
+                </td>
                <td style="padding: 10px 12px; white-space: nowrap; font-family: ui-monospace, monospace; font-size: 11px; color: #64748b;">
                    {version}
                </td>
@@ -189,14 +223,14 @@ def render_archiveresults_list(archiveresults_qs, limit=50):
                        <a href="{output_link}" target="_blank"
                           style="padding: 4px 8px; background: #f1f5f9; border-radius: 4px; color: #475569; text-decoration: none; font-size: 11px;"
                           title="View output">📄</a>
-                        <a href="{reverse('admin:core_archiveresult_change', args=[result.id])}"
+                        <a href="{reverse("admin:core_archiveresult_change", args=[result.id])}"
                           style="padding: 4px 8px; background: #f1f5f9; border-radius: 4px; color: #475569; text-decoration: none; font-size: 11px;"
                           title="Edit">✏️</a>
                    </div>
                </td>
            </tr>
            <tr style="border-bottom: 1px solid #e2e8f0;">
-                <td colspan="8" style="padding: 0 12px 10px 12px;">
+                <td colspan="11" style="padding: 0 12px 10px 12px;">
                    <details id="{row_id}" style="margin: 0;">
                        <summary style="cursor: pointer; font-size: 11px; color: #94a3b8; user-select: none;">
                            Details &amp; Output
@@ -205,7 +239,7 @@ def render_archiveresults_list(archiveresults_qs, limit=50):
                            <div style="font-size: 11px; color: #64748b; margin-bottom: 8px;">
                                <span style="margin-right: 16px;"><b>ID:</b> <code>{str(result.id)}</code></span>
                                <span style="margin-right: 16px;"><b>Version:</b> <code>{version}</code></span>
-                                <span style="margin-right: 16px;"><b>PWD:</b> <code>{result.pwd or '-'}</code></span>
+                                <span style="margin-right: 16px;"><b>PWD:</b> <code>{result.pwd or "-"}</code></span>
                            </div>
                            <div style="font-size: 11px; color: #64748b; margin-bottom: 8px;">
                                <b>Output:</b>
@@ -230,19 +264,19 @@ def render_archiveresults_list(archiveresults_qs, limit=50):
        ''')

    total_count = archiveresults_qs.count()
-    footer = ''
+    footer = ""
    if total_count > limit:
-        footer = f'''
+        footer = f"""
            <tr>
-                <td colspan="8" style="padding: 12px; text-align: center; color: #64748b; font-size: 13px; background: #f8fafc;">
+                <td colspan="11" style="padding: 12px; text-align: center; color: #64748b; font-size: 13px; background: #f8fafc;">
                    Showing {limit} of {total_count} results &nbsp;
-                    <a href="/admin/core/archiveresult/?snapshot__id__exact={results[0].snapshot_id if results else ''}"
+                    <a href="/admin/core/archiveresult/?snapshot__id__exact={results[0].snapshot_id if results else ""}"
                       style="color: #2563eb;">View all →</a>
                </td>
            </tr>
-        '''
+        """

-    return mark_safe(f'''
+    return mark_safe(f"""
        <div style="border: 1px solid #e2e8f0; border-radius: 8px; overflow: hidden; background: #fff; width: 100%;">
            <table style="width: 100%; border-collapse: collapse; font-size: 14px;">
                <thead>
@@ -252,86 +286,92 @@ def render_archiveresults_list(archiveresults_qs, limit=50):
                        <th style="padding: 10px 12px; text-align: left; font-weight: 600; color: #475569; font-size: 12px; width: 32px;"></th>
                        <th style="padding: 10px 12px; text-align: left; font-weight: 600; color: #475569; font-size: 12px; text-transform: uppercase; letter-spacing: 0.05em;">Plugin</th>
                        <th style="padding: 10px 12px; text-align: left; font-weight: 600; color: #475569; font-size: 12px; text-transform: uppercase; letter-spacing: 0.05em;">Output</th>
+                        <th style="padding: 10px 12px; text-align: right; font-weight: 600; color: #475569; font-size: 12px; text-transform: uppercase; letter-spacing: 0.05em;">Files</th>
                        <th style="padding: 10px 12px; text-align: left; font-weight: 600; color: #475569; font-size: 12px; text-transform: uppercase; letter-spacing: 0.05em;">Completed</th>
+                        <th style="padding: 10px 12px; text-align: left; font-weight: 600; color: #475569; font-size: 12px; text-transform: uppercase; letter-spacing: 0.05em;">Process</th>
+                        <th style="padding: 10px 12px; text-align: left; font-weight: 600; color: #475569; font-size: 12px; text-transform: uppercase; letter-spacing: 0.05em;">Machine</th>
                        <th style="padding: 10px 12px; text-align: left; font-weight: 600; color: #475569; font-size: 12px; text-transform: uppercase; letter-spacing: 0.05em;">Version</th>
                        <th style="padding: 10px 8px; text-align: left; font-weight: 600; color: #475569; font-size: 12px; text-transform: uppercase; letter-spacing: 0.05em;">Actions</th>
                    </tr>
                </thead>
                <tbody>
-                    {''.join(rows)}
+                    {"".join(rows)}
                    {footer}
                </tbody>
            </table>
        </div>
-    ''')
-
+    """)


 class ArchiveResultInline(admin.TabularInline):
-    name = 'Archive Results Log'
+    name = "Archive Results Log"
    model = ArchiveResult
    parent_model = Snapshot
    # fk_name = 'snapshot'
    extra = 0
-    sort_fields = ('end_ts', 'plugin', 'output_str', 'status', 'cmd_version')
-    readonly_fields = ('id', 'result_id', 'completed', 'command', 'version')
-    fields = ('start_ts', 'end_ts', *readonly_fields, 'plugin', 'cmd', 'cmd_version', 'pwd', 'status', 'output_str')
+    sort_fields = ("end_ts", "plugin", "output_str", "status", "cmd_version")
+    readonly_fields = ("id", "result_id", "completed", "command", "version")
+    fields = ("start_ts", "end_ts", *readonly_fields, "plugin", "cmd", "cmd_version", "pwd", "status", "output_str")
    # exclude = ('id',)
-    ordering = ('end_ts',)
+    ordering = ("end_ts",)
    show_change_link = True
    # # classes = ['collapse']

    def get_parent_object_from_request(self, request):
        resolved = resolve(request.path_info)
        try:
-            return self.parent_model.objects.get(pk=resolved.kwargs['object_id'])
+            return self.parent_model.objects.get(pk=resolved.kwargs["object_id"])
        except (self.parent_model.DoesNotExist, ValidationError):
            return None

    @admin.display(
-        description='Completed',
-        ordering='end_ts',
+        description="Completed",
+        ordering="end_ts",
    )
    def completed(self, obj):
-        return format_html('<p style="white-space: nowrap">{}</p>', obj.end_ts.strftime('%Y-%m-%d %H:%M:%S'))
+        return format_html('<p style="white-space: nowrap">{}</p>', obj.end_ts.strftime("%Y-%m-%d %H:%M:%S"))

    def result_id(self, obj):
-        return format_html('<a href="{}"><code style="font-size: 10px">[{}]</code></a>', reverse('admin:core_archiveresult_change', args=(obj.id,)), str(obj.id)[:8])
-    
+        return format_html(
+            '<a href="{}"><code style="font-size: 10px">[{}]</code></a>',
+            reverse("admin:core_archiveresult_change", args=(obj.id,)),
+            str(obj.id)[:8],
+        )
+
    def command(self, obj):
-        return format_html('<small><code>{}</code></small>', " ".join(obj.cmd or []))
-    
+        return format_html("<small><code>{}</code></small>", " ".join(obj.cmd or []))
+
    def version(self, obj):
-        return format_html('<small><code>{}</code></small>', obj.cmd_version or '-')
-    
+        return format_html("<small><code>{}</code></small>", obj.cmd_version or "-")
+
    def get_formset(self, request, obj=None, **kwargs):
        formset = super().get_formset(request, obj, **kwargs)
        snapshot = self.get_parent_object_from_request(request)
-        form_class = getattr(formset, 'form', None)
-        base_fields = getattr(form_class, 'base_fields', {})
-        snapshot_output_dir = str(snapshot.output_dir) if snapshot else ''
+        form_class = getattr(formset, "form", None)
+        base_fields = getattr(form_class, "base_fields", {})
+        snapshot_output_dir = str(snapshot.output_dir) if snapshot else ""

        # import ipdb; ipdb.set_trace()
        # formset.form.base_fields['id'].widget = formset.form.base_fields['id'].hidden_widget()
-        
+
        # default values for new entries
-        base_fields['status'].initial = 'succeeded'
-        base_fields['start_ts'].initial = timezone.now()
-        base_fields['end_ts'].initial = timezone.now()
-        base_fields['cmd_version'].initial = '-'
-        base_fields['pwd'].initial = snapshot_output_dir
-        base_fields['cmd'].initial = '["-"]'
-        base_fields['output_str'].initial = 'Manually recorded cmd output...'
+        base_fields["status"].initial = "succeeded"
+        base_fields["start_ts"].initial = timezone.now()
+        base_fields["end_ts"].initial = timezone.now()
+        base_fields["cmd_version"].initial = "-"
+        base_fields["pwd"].initial = snapshot_output_dir
+        base_fields["cmd"].initial = '["-"]'
+        base_fields["output_str"].initial = "Manually recorded cmd output..."

        if obj is not None:
            # hidden values for existing entries and new entries
-            base_fields['start_ts'].widget = base_fields['start_ts'].hidden_widget()
-            base_fields['end_ts'].widget = base_fields['end_ts'].hidden_widget()
-            base_fields['cmd'].widget = base_fields['cmd'].hidden_widget()
-            base_fields['pwd'].widget = base_fields['pwd'].hidden_widget()
-            base_fields['cmd_version'].widget = base_fields['cmd_version'].hidden_widget()
+            base_fields["start_ts"].widget = base_fields["start_ts"].hidden_widget()
+            base_fields["end_ts"].widget = base_fields["end_ts"].hidden_widget()
+            base_fields["cmd"].widget = base_fields["cmd"].hidden_widget()
+            base_fields["pwd"].widget = base_fields["pwd"].hidden_widget()
+            base_fields["cmd_version"].widget = base_fields["cmd_version"].hidden_widget()
        return formset
-    
+
    def get_readonly_fields(self, request, obj=None):
        if obj is not None:
            return self.readonly_fields
@@ -339,62 +379,122 @@ class ArchiveResultInline(admin.TabularInline):
            return []


-
 class ArchiveResultAdmin(BaseModelAdmin):
-    list_display = ('details_link', 'created_at', 'snapshot_info', 'tags_inline', 'status_badge', 'plugin_with_icon', 'process_link', 'machine_link', 'cmd_str', 'output_str_display')
+    list_display = (
+        "details_link",
+        "zip_link",
+        "created_at",
+        "snapshot_info",
+        "tags_inline",
+        "status_badge",
+        "plugin_with_icon",
+        "process_link",
+        "machine_link",
+        "cmd_str",
+        "output_str_display",
+    )
    list_display_links = None
-    sort_fields = ('id', 'created_at', 'plugin', 'status')
-    readonly_fields = ('cmd', 'cmd_version', 'pwd', 'cmd_str', 'snapshot_info', 'tags_str', 'created_at', 'modified_at', 'output_summary', 'plugin_with_icon', 'process_link')
-    search_fields = ()
-    autocomplete_fields = ['snapshot']
+    sort_fields = ("id", "created_at", "plugin", "status")
+    readonly_fields = (
+        "admin_actions",
+        "cmd",
+        "cmd_version",
+        "pwd",
+        "cmd_str",
+        "snapshot_info",
+        "tags_str",
+        "created_at",
+        "modified_at",
+        "output_summary",
+        "plugin_with_icon",
+        "process_link",
+    )
+    search_fields = (
+        "snapshot__id",
+        "snapshot__url",
+        "snapshot__tags__name",
+        "snapshot__crawl_id",
+        "plugin",
+        "hook_name",
+        "output_str",
+        "output_json",
+        "process__cmd",
+    )
+    autocomplete_fields = ["snapshot"]

    fieldsets = (
-        ('Snapshot', {
-            'fields': ('snapshot', 'snapshot_info', 'tags_str'),
-            'classes': ('card', 'wide'),
-        }),
-        ('Plugin', {
-            'fields': ('plugin_with_icon', 'process_link', 'status'),
-            'classes': ('card',),
-        }),
-        ('Timing', {
-            'fields': ('start_ts', 'end_ts', 'created_at', 'modified_at'),
-            'classes': ('card',),
-        }),
-        ('Command', {
-            'fields': ('cmd', 'cmd_str', 'cmd_version', 'pwd'),
-            'classes': ('card',),
-        }),
-        ('Output', {
-            'fields': ('output_str', 'output_json', 'output_files', 'output_size', 'output_mimetypes', 'output_summary'),
-            'classes': ('card', 'wide'),
-        }),
+        (
+            "Actions",
+            {
+                "fields": ("admin_actions",),
+                "classes": ("card", "wide"),
+            },
+        ),
+        (
+            "Snapshot",
+            {
+                "fields": ("snapshot", "snapshot_info", "tags_str"),
+                "classes": ("card", "wide"),
+            },
+        ),
+        (
+            "Plugin",
+            {
+                "fields": ("plugin_with_icon", "process_link", "status"),
+                "classes": ("card",),
+            },
+        ),
+        (
+            "Timing",
+            {
+                "fields": ("start_ts", "end_ts", "created_at", "modified_at"),
+                "classes": ("card",),
+            },
+        ),
+        (
+            "Command",
+            {
+                "fields": ("cmd", "cmd_str", "cmd_version", "pwd"),
+                "classes": ("card",),
+            },
+        ),
+        (
+            "Output",
+            {
+                "fields": ("output_str", "output_json", "output_files", "output_size", "output_mimetypes", "output_summary"),
+                "classes": ("card", "wide"),
+            },
+        ),
    )

-    list_filter = ('status', 'plugin', 'start_ts')
-    ordering = ['-start_ts']
+    list_filter = ("status", "plugin", "start_ts")
+    ordering = ["-start_ts"]
    list_per_page = SERVER_CONFIG.SNAPSHOTS_PER_PAGE

-    paginator = AccelleratedPaginator
+    paginator = AcceleratedPaginator
    save_on_top = True

-    actions = ['delete_selected']
+    actions = ["delete_selected"]

    class Meta:
-        verbose_name = 'Archive Result'
-        verbose_name_plural = 'Archive Results'
+        verbose_name = "Archive Result"
+        verbose_name_plural = "Archive Results"

    def change_view(self, request, object_id, form_url="", extra_context=None):
        self.request = request
        return super().change_view(request, object_id, form_url, extra_context)

+    def changelist_view(self, request, extra_context=None):
+        self.request = request
+        return super().changelist_view(request, extra_context)
+
    def get_queryset(self, request):
        return (
            super()
            .get_queryset(request)
-            .select_related('snapshot', 'process')
-            .prefetch_related('snapshot__tags')
-            .annotate(snapshot_first_tag=Min('snapshot__tags__name'))
+            .select_related("snapshot", "process")
+            .prefetch_related("snapshot__tags")
+            .annotate(snapshot_first_tag=Min("snapshot__tags__name"))
        )

    def get_search_results(self, request, queryset, search_term):
@@ -402,15 +502,14 @@ class ArchiveResultAdmin(BaseModelAdmin):
            return queryset, False

        queryset = queryset.annotate(
-            snapshot_id_text=Cast('snapshot__id', output_field=TextField()),
-            snapshot_crawl_id_text=Cast('snapshot__crawl_id', output_field=TextField()),
-            output_json_text=Cast('output_json', output_field=TextField()),
-            cmd_text=Cast('process__cmd', output_field=TextField()),
+            snapshot_id_text=Cast("snapshot__id", output_field=TextField()),
+            snapshot_crawl_id_text=Cast("snapshot__crawl_id", output_field=TextField()),
+            output_json_text=Cast("output_json", output_field=TextField()),
+            cmd_text=Cast("process__cmd", output_field=TextField()),
        )

        search_bits = [
-            bit[1:-1] if len(bit) >= 2 and bit[0] == bit[-1] and bit[0] in {'"', "'"} else bit
-            for bit in smart_split(search_term)
+            bit[1:-1] if len(bit) >= 2 and bit[0] == bit[-1] and bit[0] in {'"', "'"} else bit for bit in smart_split(search_term)
        ]
        search_bits = [bit.strip() for bit in search_bits if bit.strip()]
        if not search_bits:
@@ -427,22 +526,44 @@ class ArchiveResultAdmin(BaseModelAdmin):
                | Q(hook_name__icontains=bit)
                | Q(output_str__icontains=bit)
                | Q(output_json_text__icontains=bit)
-                | Q(cmd_text__icontains=bit)
+                | Q(cmd_text__icontains=bit),
            )

        return queryset.filter(reduce(and_, filters)).distinct(), True

-    @admin.display(description='Details', ordering='id')
+    def get_snapshot_view_url(self, result: ArchiveResult) -> str:
+        return build_snapshot_url(str(result.snapshot_id), request=getattr(self, "request", None))
+
+    def get_output_view_url(self, result: ArchiveResult) -> str:
+        output_path = result.embed_path() if hasattr(result, "embed_path") else None
+        if not output_path:
+            output_path = result.plugin or ""
+        return build_snapshot_url(str(result.snapshot_id), output_path, request=getattr(self, "request", None))
+
+    def get_output_files_url(self, result: ArchiveResult) -> str:
+        return f"{build_snapshot_url(str(result.snapshot_id), result.plugin, request=getattr(self, 'request', None))}/?files=1"
+
+    def get_output_zip_url(self, result: ArchiveResult) -> str:
+        return f"{self.get_output_files_url(result)}&download=zip"
+
+    @admin.display(description="Details", ordering="id")
    def details_link(self, result):
        return format_html(
            '<a href="{}"><code>{}</code></a>',
-            reverse('admin:core_archiveresult_change', args=[result.id]),
+            reverse("admin:core_archiveresult_change", args=[result.id]),
            str(result.id)[-8:],
        )

+    @admin.display(description="Zip")
+    def zip_link(self, result):
+        return format_html(
+            '<a href="{}" class="archivebox-zip-button" data-loading-mode="spinner-only" onclick="return window.archiveboxHandleZipClick(this, event);" style="display:inline-flex; align-items:center; justify-content:center; gap:4px; width:48px; min-width:48px; height:24px; padding:0; box-sizing:border-box; border-radius:999px; border:1px solid #bfdbfe; background:#eff6ff; color:#1d4ed8; font-size:11px; font-weight:600; line-height:1; text-decoration:none;"><span class="archivebox-zip-spinner" aria-hidden="true"></span><span class="archivebox-zip-label">⬇ ZIP</span></a>',
+            self.get_output_zip_url(result),
+        )
+
    @admin.display(
-        description='Snapshot',
-        ordering='snapshot__url',
+        description="Snapshot",
+        ordering="snapshot__url",
    )
    def snapshot_info(self, result):
        snapshot_id = str(result.snapshot_id)
@@ -450,29 +571,28 @@ class ArchiveResultAdmin(BaseModelAdmin):
            '<a href="{}"><b><code>[{}]</code></b> &nbsp; {} &nbsp; {}</a><br/>',
            build_snapshot_url(snapshot_id, "index.html"),
            snapshot_id[:8],
-            result.snapshot.bookmarked_at.strftime('%Y-%m-%d %H:%M'),
+            result.snapshot.bookmarked_at.strftime("%Y-%m-%d %H:%M"),
            result.snapshot.url[:128],
        )

-
    @admin.display(
-        description='Snapshot Tags'
+        description="Snapshot Tags",
    )
    def tags_str(self, result):
        return result.snapshot.tags_str()

-    @admin.display(description='Tags', ordering='snapshot_first_tag')
+    @admin.display(description="Tags", ordering="snapshot_first_tag")
    def tags_inline(self, result):
        widget = InlineTagEditorWidget(snapshot_id=str(result.snapshot_id), editable=False)
        tags_html = widget.render(
-            name=f'tags_{result.snapshot_id}',
+            name=f"tags_{result.snapshot_id}",
            value=result.snapshot.tags.all(),
-            attrs={'id': f'tags_{result.snapshot_id}'},
+            attrs={"id": f"tags_{result.snapshot_id}"},
            snapshot_id=str(result.snapshot_id),
        )
        return mark_safe(f'<span class="tags-inline-editor">{tags_html}</span>')

-    @admin.display(description='Status', ordering='status')
+    @admin.display(description="Status", ordering="status")
    def status_badge(self, result):
        status = result.status or ArchiveResult.StatusChoices.QUEUED
        return format_html(
@@ -482,7 +602,7 @@ class ArchiveResultAdmin(BaseModelAdmin):
            result.get_status_display() or status,
        )

-    @admin.display(description='Plugin', ordering='plugin')
+    @admin.display(description="Plugin", ordering="plugin")
    def plugin_with_icon(self, result):
        icon = get_plugin_icon(result.plugin)
        return format_html(
@@ -494,36 +614,36 @@ class ArchiveResultAdmin(BaseModelAdmin):
            result.plugin,
        )

-    @admin.display(description='Process', ordering='process__pid')
+    @admin.display(description="Process", ordering="process__pid")
    def process_link(self, result):
        if not result.process_id:
-            return '-'
-        process_label = result.process.pid if result.process and result.process.pid else '-'
+            return "-"
+        process_label = result.process.pid if result.process and result.process.pid else "-"
        return format_html(
            '<a href="{}"><code>{}</code></a>',
-            reverse('admin:machine_process_change', args=[result.process_id]),
+            reverse("admin:machine_process_change", args=[result.process_id]),
            process_label,
        )

-    @admin.display(description='Machine', ordering='process__machine__hostname')
+    @admin.display(description="Machine", ordering="process__machine__hostname")
    def machine_link(self, result):
        if not result.process_id or not result.process or not result.process.machine_id:
-            return '-'
+            return "-"
        machine = result.process.machine
        return format_html(
            '<a href="{}"><code>{}</code> {}</a>',
-            reverse('admin:machine_machine_change', args=[machine.id]),
+            reverse("admin:machine_machine_change", args=[machine.id]),
            str(machine.id)[:8],
            machine.hostname,
        )

-    @admin.display(description='Command')
+    @admin.display(description="Command")
    def cmd_str(self, result):
        display_cmd = build_abx_dl_display_command(result)
        replay_cmd = build_abx_dl_replay_command(result)
        return format_html(
-            '''
-            <div style="position: relative; width: 300px; min-width: 300px; max-width: 300px; overflow: hidden; box-sizing: border-box;">
+            """
+            <div style="position: relative; width: 100%; max-width: 100%; overflow: hidden; box-sizing: border-box;">
                <button type="button"
                        data-command="{}"
                        onclick="(function(btn){{var text=btn.dataset.command||''; if(navigator.clipboard&&navigator.clipboard.writeText){{navigator.clipboard.writeText(text);}} else {{var ta=document.createElement('textarea'); ta.value=text; document.body.appendChild(ta); ta.select(); document.execCommand('copy'); document.body.removeChild(ta);}}}})(this); return false;"
@@ -534,7 +654,7 @@ class ArchiveResultAdmin(BaseModelAdmin):
                    {}
                </code>
            </div>
-            ''',
+            """,
            replay_cmd,
            replay_cmd,
            display_cmd,
@@ -542,8 +662,8 @@ class ArchiveResultAdmin(BaseModelAdmin):

    def output_display(self, result):
        # Determine output link path - use embed_path() which checks output_files
-        embed_path = result.embed_path() if hasattr(result, 'embed_path') else None
-        output_path = embed_path if (result.status == 'succeeded' and embed_path) else 'index.html'
+        embed_path = result.embed_path() if hasattr(result, "embed_path") else None
+        output_path = embed_path if (result.status == "succeeded" and embed_path) else "index.html"
        snapshot_id = str(result.snapshot_id)
        return format_html(
            '<a href="{}" class="output-link">↗️</a><pre>{}</pre>',
@@ -551,13 +671,13 @@ class ArchiveResultAdmin(BaseModelAdmin):
            result.output_str,
        )

-    @admin.display(description='Output', ordering='output_str')
+    @admin.display(description="Output", ordering="output_str")
    def output_str_display(self, result):
-        output_text = str(result.output_str or '').strip()
+        output_text = str(result.output_str or "").strip()
        if not output_text:
-            return '-'
+            return "-"

-        live_path = result.embed_path() if hasattr(result, 'embed_path') else None
+        live_path = result.embed_path() if hasattr(result, "embed_path") else None
        if live_path:
            return format_html(
                '<a href="{}" title="{}"><code>{}</code></a>',
@@ -572,8 +692,48 @@ class ArchiveResultAdmin(BaseModelAdmin):
            output_text,
        )

+    @admin.display(description="")
+    def admin_actions(self, result):
+        return format_html(
+            """
+            <div style="display:flex; flex-wrap:wrap; gap:12px; align-items:center;">
+                <a class="btn" style="display:inline-flex; align-items:center; gap:6px; padding:10px 16px; background:#f8fafc; border:1px solid #e2e8f0; border-radius:8px; color:#334155; text-decoration:none; font-size:14px; font-weight:500; transition:all 0.15s;"
+                   href="{}"
+                   onmouseover="this.style.background='#f1f5f9'; this.style.borderColor='#cbd5e1';"
+                   onmouseout="this.style.background='#f8fafc'; this.style.borderColor='#e2e8f0';">
+                    📄 View Output
+                </a>
+                <a class="btn" style="display:inline-flex; align-items:center; gap:6px; padding:10px 16px; background:#f8fafc; border:1px solid #e2e8f0; border-radius:8px; color:#334155; text-decoration:none; font-size:14px; font-weight:500; transition:all 0.15s;"
+                   href="{}"
+                   onmouseover="this.style.background='#f1f5f9'; this.style.borderColor='#cbd5e1';"
+                   onmouseout="this.style.background='#f8fafc'; this.style.borderColor='#e2e8f0';">
+                    📁 Output files
+                </a>
+                <a class="btn archivebox-zip-button" style="display:inline-flex; align-items:center; gap:6px; padding:10px 16px; background:#eff6ff; border:1px solid #bfdbfe; border-radius:8px; color:#1d4ed8; text-decoration:none; font-size:14px; font-weight:500; transition:all 0.15s;"
+                   href="{}"
+                   data-loading-label="Preparing..."
+                   onclick="return window.archiveboxHandleZipClick(this, event);"
+                   onmouseover="this.style.background='#dbeafe'; this.style.borderColor='#93c5fd';"
+                   onmouseout="this.style.background='#eff6ff'; this.style.borderColor='#bfdbfe';">
+                    <span class="archivebox-zip-spinner" aria-hidden="true"></span>
+                    <span class="archivebox-zip-label">⬇ Download Zip</span>
+                </a>
+                <a class="btn" style="display:inline-flex; align-items:center; gap:6px; padding:10px 16px; background:#f8fafc; border:1px solid #e2e8f0; border-radius:8px; color:#334155; text-decoration:none; font-size:14px; font-weight:500; transition:all 0.15s;"
+                   href="{}"
+                   onmouseover="this.style.background='#f1f5f9'; this.style.borderColor='#cbd5e1';"
+                   onmouseout="this.style.background='#f8fafc'; this.style.borderColor='#e2e8f0';">
+                    🗂 Snapshot
+                </a>
+            </div>
+            """,
+            self.get_output_view_url(result),
+            self.get_output_files_url(result),
+            self.get_output_zip_url(result),
+            self.get_snapshot_view_url(result),
+        )
+
    def output_summary(self, result):
-        snapshot_dir = Path(DATA_DIR) / str(result.pwd).split('data/', 1)[-1]
+        snapshot_dir = Path(DATA_DIR) / str(result.pwd).split("data/", 1)[-1]
        output_html = format_html(
            '<pre style="display: inline-block">{}</pre><br/>',
            result.output_str,
@@ -583,9 +743,13 @@ class ArchiveResultAdmin(BaseModelAdmin):
            '<a href="{}#all">See result files ...</a><br/><pre><code>',
            build_snapshot_url(snapshot_id, "index.html"),
        )
-        embed_path = result.embed_path() if hasattr(result, 'embed_path') else ''
-        path_from_embed = (snapshot_dir / (embed_path or ''))
-        output_html += format_html('<i style="padding: 1px">{}</i><b style="padding-right: 20px">/</b><i>{}</i><br/><hr/>', str(snapshot_dir), str(embed_path))
+        embed_path = result.embed_path() if hasattr(result, "embed_path") else ""
+        path_from_embed = snapshot_dir / (embed_path or "")
+        output_html += format_html(
+            '<i style="padding: 1px">{}</i><b style="padding-right: 20px">/</b><i>{}</i><br/><hr/>',
+            str(snapshot_dir),
+            str(embed_path),
+        )
        if os.access(path_from_embed, os.R_OK):
            root_dir = str(path_from_embed)
        else:
@@ -594,19 +758,22 @@ class ArchiveResultAdmin(BaseModelAdmin):
        # print(root_dir, str(list(os.walk(root_dir))))

        for root, dirs, files in os.walk(root_dir):
-            depth = root.replace(root_dir, '').count(os.sep) + 1
+            depth = root.replace(root_dir, "").count(os.sep) + 1
            if depth > 2:
                continue
-            indent = ' ' * 4 * (depth)
+            indent = " " * 4 * (depth)
            output_html += format_html('<b style="padding: 1px">{}{}/</b><br/>', indent, os.path.basename(root))
-            indentation_str = ' ' * 4 * (depth + 1)
+            indentation_str = " " * 4 * (depth + 1)
            for filename in sorted(files):
-                is_hidden = filename.startswith('.')
-                output_html += format_html('<span style="opacity: {}.2">{}{}</span><br/>', int(not is_hidden), indentation_str, filename.strip())
-
-        return output_html + mark_safe('</code></pre>')
-
+                is_hidden = filename.startswith(".")
+                output_html += format_html(
+                    '<span style="opacity: {}.2">{}{}</span><br/>',
+                    int(not is_hidden),
+                    indentation_str,
+                    filename.strip(),
+                )

+        return output_html + mark_safe("</code></pre>")


 def register_admin(admin_site):
--- a/archivebox/core/admin_site.py
+++ b/archivebox/core/admin_site.py
@@ -1,4 +1,4 @@
-__package__ = 'archivebox.core'
+__package__ = "archivebox.core"

 from typing import TYPE_CHECKING, Any

@@ -18,23 +18,23 @@ if TYPE_CHECKING:


 class ArchiveBoxAdmin(admin.AdminSite):
-    site_header = 'ArchiveBox'
-    index_title = 'Admin Views'
-    site_title = 'Admin'
-    namespace = 'admin'
+    site_header = "ArchiveBox"
+    index_title = "Admin Views"
+    site_title = "Admin"
+    namespace = "admin"

-    def get_app_list(self, request: 'HttpRequest', app_label: str | None = None) -> list['AppDict']:
+    def get_app_list(self, request: "HttpRequest", app_label: str | None = None) -> list["AppDict"]:
        if app_label is None:
            return adv_get_app_list(self, request)
        return adv_get_app_list(self, request, app_label)

-    def admin_data_index_view(self, request: 'HttpRequest', **kwargs: Any) -> 'TemplateResponse':
+    def admin_data_index_view(self, request: "HttpRequest", **kwargs: Any) -> "TemplateResponse":
        return adv_admin_data_index_view(self, request, **kwargs)

-    def get_admin_data_urls(self) -> list['URLResolver | URLPattern']:
+    def get_admin_data_urls(self) -> list["URLResolver | URLPattern"]:
        return adv_get_admin_data_urls(self)

-    def get_urls(self) -> list['URLResolver | URLPattern']:
+    def get_urls(self) -> list["URLResolver | URLPattern"]:
        return self.get_admin_data_urls() + super().get_urls()


@@ -43,7 +43,6 @@ archivebox_admin = ArchiveBoxAdmin()
 # TODO: https://stackoverflow.com/questions/40760880/add-custom-button-to-django-admin-panel


-
 ############### Admin Data View sections are defined in settings.ADMIN_DATA_VIEWS #########


--- a/archivebox/core/admin_snapshots.py
+++ b/archivebox/core/admin_snapshots.py
--- a/archivebox/core/admin_tags.py
+++ b/archivebox/core/admin_tags.py
@@ -1,4 +1,4 @@
-__package__ = 'archivebox.core'
+__package__ = "archivebox.core"

 from urllib.parse import quote

@@ -28,92 +28,107 @@ from archivebox.core.host_utils import build_snapshot_url

 class TagInline(admin.TabularInline):
    model = SnapshotTag
-    fields = ('id', 'tag')
+    fields = ("id", "tag")
    extra = 1
    max_num = 1000
-    autocomplete_fields = (
-        'tag',
-    )
+    autocomplete_fields = ("tag",)


 class TagAdminForm(forms.ModelForm):
    class Meta:
        model = Tag
-        fields = '__all__'
+        fields = "__all__"
        widgets = {
-            'name': forms.TextInput(attrs={
-                'placeholder': 'research, receipts, product-design...',
-                'autocomplete': 'off',
-                'spellcheck': 'false',
-                'data-tag-name-input': '1',
-            }),
+            "name": forms.TextInput(
+                attrs={
+                    "placeholder": "research, receipts, product-design...",
+                    "autocomplete": "off",
+                    "spellcheck": "false",
+                    "data-tag-name-input": "1",
+                },
+            ),
        }

    def clean_name(self):
-        name = (self.cleaned_data.get('name') or '').strip()
+        name = (self.cleaned_data.get("name") or "").strip()
        if not name:
-            raise forms.ValidationError('Tag name is required.')
+            raise forms.ValidationError("Tag name is required.")
        return name


 class TagAdmin(BaseModelAdmin):
    form = TagAdminForm
-    change_list_template = 'admin/core/tag/change_list.html'
-    change_form_template = 'admin/core/tag/change_form.html'
-    list_display = ('name', 'num_snapshots', 'created_at', 'created_by')
-    list_filter = ('created_at', 'created_by')
-    search_fields = ('id', 'name', 'slug')
-    readonly_fields = ('slug', 'id', 'created_at', 'modified_at', 'snapshots')
-    actions = ['delete_selected']
-    ordering = ['name', 'id']
+    change_list_template = "admin/core/tag/change_list.html"
+    change_form_template = "admin/core/tag/change_form.html"
+    list_display = ("name", "num_snapshots", "created_at", "created_by")
+    list_filter = ("created_at", "created_by")
+    search_fields = ("id", "name", "slug")
+    readonly_fields = ("slug", "id", "created_at", "modified_at", "snapshots")
+    actions = ["delete_selected"]
+    ordering = ["name", "id"]

    fieldsets = (
-        ('Tag', {
-            'fields': ('name', 'slug'),
-            'classes': ('card',),
-        }),
-        ('Metadata', {
-            'fields': ('id', 'created_by', 'created_at', 'modified_at'),
-            'classes': ('card',),
-        }),
-        ('Recent Snapshots', {
-            'fields': ('snapshots',),
-            'classes': ('card', 'wide'),
-        }),
+        (
+            "Tag",
+            {
+                "fields": ("name", "slug"),
+                "classes": ("card",),
+            },
+        ),
+        (
+            "Metadata",
+            {
+                "fields": ("id", "created_by", "created_at", "modified_at"),
+                "classes": ("card",),
+            },
+        ),
+        (
+            "Recent Snapshots",
+            {
+                "fields": ("snapshots",),
+                "classes": ("card", "wide"),
+            },
+        ),
    )

    add_fieldsets = (
-        ('Tag', {
-            'fields': ('name',),
-            'classes': ('card', 'wide'),
-        }),
-        ('Metadata', {
-            'fields': ('created_by',),
-            'classes': ('card',),
-        }),
+        (
+            "Tag",
+            {
+                "fields": ("name",),
+                "classes": ("card", "wide"),
+            },
+        ),
+        (
+            "Metadata",
+            {
+                "fields": ("created_by",),
+                "classes": ("card",),
+            },
+        ),
    )

    def get_fieldsets(self, request: HttpRequest, obj: Tag | None = None):
        return self.fieldsets if obj else self.add_fieldsets

    def changelist_view(self, request: HttpRequest, extra_context=None):
-        query = (request.GET.get('q') or '').strip()
-        sort = normalize_tag_sort((request.GET.get('sort') or 'created_desc').strip())
-        created_by = normalize_created_by_filter((request.GET.get('created_by') or '').strip())
-        year = normalize_created_year_filter((request.GET.get('year') or '').strip())
-        has_snapshots = normalize_has_snapshots_filter((request.GET.get('has_snapshots') or 'all').strip())
+        query = (request.GET.get("q") or "").strip()
+        sort = normalize_tag_sort((request.GET.get("sort") or "created_desc").strip())
+        created_by = normalize_created_by_filter((request.GET.get("created_by") or "").strip())
+        year = normalize_created_year_filter((request.GET.get("year") or "").strip())
+        has_snapshots = normalize_has_snapshots_filter((request.GET.get("has_snapshots") or "all").strip())
        extra_context = {
            **(extra_context or {}),
-            'initial_query': query,
-            'initial_sort': sort,
-            'initial_created_by': created_by,
-            'initial_year': year,
-            'initial_has_snapshots': has_snapshots,
-            'tag_sort_choices': TAG_SORT_CHOICES,
-            'tag_has_snapshots_choices': TAG_HAS_SNAPSHOTS_CHOICES,
-            'tag_created_by_choices': get_tag_creator_choices(),
-            'tag_year_choices': get_tag_year_choices(),
-            'initial_tag_cards': build_tag_cards(
+            "initial_query": query,
+            "initial_sort": sort,
+            "initial_created_by": created_by,
+            "initial_year": year,
+            "initial_has_snapshots": has_snapshots,
+            "tag_sort_choices": TAG_SORT_CHOICES,
+            "tag_has_snapshots_choices": TAG_HAS_SNAPSHOTS_CHOICES,
+            "tag_created_by_choices": get_tag_creator_choices(),
+            "tag_year_choices": get_tag_year_choices(),
+            "initial_tag_cards": build_tag_cards(
                query=query,
                request=request,
                sort=sort,
@@ -121,62 +136,67 @@ class TagAdmin(BaseModelAdmin):
                year=year,
                has_snapshots=has_snapshots,
            ),
-            'tag_search_api_url': reverse('api-1:search_tags'),
-            'tag_create_api_url': reverse('api-1:tags_create'),
+            "tag_search_api_url": reverse("api-1:search_tags"),
+            "tag_create_api_url": reverse("api-1:tags_create"),
        }
        return super().changelist_view(request, extra_context=extra_context)

-    def render_change_form(self, request, context, add=False, change=False, form_url='', obj=None):
-        current_name = (request.POST.get('name') or '').strip()
+    def render_change_form(self, request, context, add=False, change=False, form_url="", obj=None):
+        current_name = (request.POST.get("name") or "").strip()
        if not current_name and obj:
            current_name = obj.name

-        similar_tag_cards = build_tag_cards(query=current_name, request=request, limit=12) if current_name else build_tag_cards(request=request, limit=12)
+        similar_tag_cards = (
+            build_tag_cards(query=current_name, request=request, limit=12) if current_name else build_tag_cards(request=request, limit=12)
+        )
        if obj:
-            similar_tag_cards = [card for card in similar_tag_cards if card['id'] != obj.pk]
+            similar_tag_cards = [card for card in similar_tag_cards if card["id"] != obj.pk]

-        context.update({
-            'tag_search_api_url': reverse('api-1:search_tags'),
-            'tag_similar_cards': similar_tag_cards,
-            'tag_similar_query': current_name,
-        })
+        context.update(
+            {
+                "tag_search_api_url": reverse("api-1:search_tags"),
+                "tag_similar_cards": similar_tag_cards,
+                "tag_similar_query": current_name,
+            },
+        )
        return super().render_change_form(request, context, add=add, change=change, form_url=form_url, obj=obj)

    def response_add(self, request: HttpRequest, obj: Tag, post_url_continue=None):
-        if IS_POPUP_VAR in request.POST or '_continue' in request.POST or '_addanother' in request.POST:
+        if IS_POPUP_VAR in request.POST or "_continue" in request.POST or "_addanother" in request.POST:
            return super().response_add(request, obj, post_url_continue=post_url_continue)

        self.message_user(request, f'Tag "{obj.name}" saved.', level=messages.SUCCESS)
        return self._redirect_to_changelist(obj.name)

    def response_change(self, request: HttpRequest, obj: Tag):
-        if IS_POPUP_VAR in request.POST or '_continue' in request.POST or '_addanother' in request.POST or '_saveasnew' in request.POST:
+        if IS_POPUP_VAR in request.POST or "_continue" in request.POST or "_addanother" in request.POST or "_saveasnew" in request.POST:
            return super().response_change(request, obj)

        self.message_user(request, f'Tag "{obj.name}" updated.', level=messages.SUCCESS)
        return self._redirect_to_changelist(obj.name)

-    def _redirect_to_changelist(self, query: str = '') -> HttpResponseRedirect:
-        changelist_url = reverse('admin:core_tag_changelist')
+    def _redirect_to_changelist(self, query: str = "") -> HttpResponseRedirect:
+        changelist_url = reverse("admin:core_tag_changelist")
        if query:
-            changelist_url = f'{changelist_url}?q={quote(query)}'
+            changelist_url = f"{changelist_url}?q={quote(query)}"
        return HttpResponseRedirect(changelist_url)

-    @admin.display(description='Snapshots')
+    @admin.display(description="Snapshots")
    def snapshots(self, tag: Tag):
-        snapshots = tag.snapshot_set.select_related('crawl__created_by').order_by('-downloaded_at', '-created_at', '-pk')[:10]
+        snapshots = tag.snapshot_set.select_related("crawl__created_by").order_by("-downloaded_at", "-created_at", "-pk")[:10]
        total_count = tag.snapshot_set.count()
        if not snapshots:
            return mark_safe(
                f'<p style="margin:0;color:#64748b;">No snapshots use this tag yet. '
-                f'<a href="/admin/core/snapshot/?tags__id__exact={tag.id}">Open filtered snapshot list</a>.</p>'
+                f'<a href="/admin/core/snapshot/?tags__id__exact={tag.id}">Open filtered snapshot list</a>.</p>',
            )

        cards = []
        for snapshot in snapshots:
-            title = (snapshot.title or '').strip() or snapshot.url
-            cards.append(format_html(
-                '''
+            title = (snapshot.title or "").strip() or snapshot.url
+            cards.append(
+                format_html(
+                    """
                <a href="{}" style="display:flex;align-items:center;gap:10px;padding:10px 12px;border:1px solid #e2e8f0;border-radius:12px;background:#fff;text-decoration:none;color:#0f172a;">
                    <img src="{}" alt="" style="width:18px;height:18px;border-radius:4px;flex:0 0 auto;" onerror="this.style.display='none'">
                    <span style="min-width:0;">
@@ -184,23 +204,26 @@ class TagAdmin(BaseModelAdmin):
                        <code style="display:block;color:#64748b;white-space:nowrap;overflow:hidden;text-overflow:ellipsis;">{}</code>
                    </span>
                </a>
-                ''',
-                reverse('admin:core_snapshot_change', args=[snapshot.pk]),
-                build_snapshot_url(str(snapshot.pk), 'favicon.ico'),
-                title[:120],
-                snapshot.url[:120],
-            ))
+                """,
+                    reverse("admin:core_snapshot_change", args=[snapshot.pk]),
+                    build_snapshot_url(str(snapshot.pk), "favicon.ico"),
+                    title[:120],
+                    snapshot.url[:120],
+                ),
+            )

-        cards.append(format_html(
-            '<a href="/admin/core/snapshot/?tags__id__exact={}" style="display:inline-flex;margin-top:10px;font-weight:600;">View all {} tagged snapshots</a>',
-            tag.id,
-            total_count,
-        ))
-        return mark_safe('<div style="display:grid;gap:10px;">' + ''.join(cards) + '</div>')
+        cards.append(
+            format_html(
+                '<a href="/admin/core/snapshot/?tags__id__exact={}" style="display:inline-flex;margin-top:10px;font-weight:600;">View all {} tagged snapshots</a>',
+                tag.id,
+                total_count,
+            ),
+        )
+        return mark_safe('<div style="display:grid;gap:10px;">' + "".join(cards) + "</div>")

-    @admin.display(description='Snapshots', ordering='num_snapshots')
+    @admin.display(description="Snapshots", ordering="num_snapshots")
    def num_snapshots(self, tag: Tag):
-        count = getattr(tag, 'num_snapshots', tag.snapshot_set.count())
+        count = getattr(tag, "num_snapshots", tag.snapshot_set.count())
        return format_html(
            '<a href="/admin/core/snapshot/?tags__id__exact={}">{} total</a>',
            tag.id,
--- a/archivebox/core/admin_users.py
+++ b/archivebox/core/admin_users.py
@@ -1,4 +1,4 @@
-__package__ = 'archivebox.core'
+__package__ = "archivebox.core"

 from django.contrib import admin
 from django.contrib.auth.admin import UserAdmin
@@ -8,87 +8,100 @@ from django.utils.safestring import mark_safe


 class CustomUserAdmin(UserAdmin):
-    sort_fields = ['id', 'email', 'username', 'is_superuser', 'last_login', 'date_joined']
-    list_display = ['username', 'id', 'email', 'is_superuser', 'last_login', 'date_joined']
-    readonly_fields = ('snapshot_set', 'archiveresult_set', 'tag_set', 'apitoken_set', 'outboundwebhook_set')
+    sort_fields = ["id", "email", "username", "is_superuser", "last_login", "date_joined"]
+    list_display = ["username", "id", "email", "is_superuser", "last_login", "date_joined"]
+    readonly_fields = ("snapshot_set", "archiveresult_set", "tag_set", "apitoken_set", "outboundwebhook_set")

    # Preserve Django's default user creation form and fieldsets
    # This ensures passwords are properly hashed and permissions are set correctly
    add_fieldsets = UserAdmin.add_fieldsets

    # Extend fieldsets for change form only (not user creation)
-    fieldsets = [*(UserAdmin.fieldsets or ()), ('Data', {'fields': readonly_fields})]
+    fieldsets = [*(UserAdmin.fieldsets or ()), ("Data", {"fields": readonly_fields})]

-    @admin.display(description='Snapshots')
+    @admin.display(description="Snapshots")
    def snapshot_set(self, obj):
        total_count = obj.snapshot_set.count()
-        return mark_safe('<br/>'.join(
-            format_html(
-                '<code><a href="/admin/core/snapshot/{}/change"><b>[{}]</b></a></code> <b>📅 {}</b> {}',
-                snap.pk,
-                str(snap.id)[:8],
-                snap.downloaded_at.strftime('%Y-%m-%d %H:%M') if snap.downloaded_at else 'pending...',
-                snap.url[:64],
+        return mark_safe(
+            "<br/>".join(
+                format_html(
+                    '<code><a href="/admin/core/snapshot/{}/change"><b>[{}]</b></a></code> <b>📅 {}</b> {}',
+                    snap.pk,
+                    str(snap.id)[:8],
+                    snap.downloaded_at.strftime("%Y-%m-%d %H:%M") if snap.downloaded_at else "pending...",
+                    snap.url[:64],
+                )
+                for snap in obj.snapshot_set.order_by("-modified_at")[:10]
            )
-            for snap in obj.snapshot_set.order_by('-modified_at')[:10]
-        ) + f'<br/><a href="/admin/core/snapshot/?created_by__id__exact={obj.pk}">{total_count} total records...<a>')
+            + f'<br/><a href="/admin/core/snapshot/?created_by__id__exact={obj.pk}">{total_count} total records...<a>',
+        )

-    @admin.display(description='Archive Result Logs')
+    @admin.display(description="Archive Result Logs")
    def archiveresult_set(self, obj):
        total_count = obj.archiveresult_set.count()
-        return mark_safe('<br/>'.join(
-            format_html(
-                '<code><a href="/admin/core/archiveresult/{}/change"><b>[{}]</b></a></code> <b>📅 {}</b> <b>📄 {}</b> {}',
-                result.pk,
-                str(result.id)[:8],
-                result.snapshot.downloaded_at.strftime('%Y-%m-%d %H:%M') if result.snapshot.downloaded_at else 'pending...',
-                result.extractor,
-                result.snapshot.url[:64],
+        return mark_safe(
+            "<br/>".join(
+                format_html(
+                    '<code><a href="/admin/core/archiveresult/{}/change"><b>[{}]</b></a></code> <b>📅 {}</b> <b>📄 {}</b> {}',
+                    result.pk,
+                    str(result.id)[:8],
+                    result.snapshot.downloaded_at.strftime("%Y-%m-%d %H:%M") if result.snapshot.downloaded_at else "pending...",
+                    result.extractor,
+                    result.snapshot.url[:64],
+                )
+                for result in obj.archiveresult_set.order_by("-modified_at")[:10]
            )
-            for result in obj.archiveresult_set.order_by('-modified_at')[:10]
-        ) + f'<br/><a href="/admin/core/archiveresult/?created_by__id__exact={obj.pk}">{total_count} total records...<a>')
+            + f'<br/><a href="/admin/core/archiveresult/?created_by__id__exact={obj.pk}">{total_count} total records...<a>',
+        )

-    @admin.display(description='Tags')
+    @admin.display(description="Tags")
    def tag_set(self, obj):
        total_count = obj.tag_set.count()
-        return mark_safe(', '.join(
-            format_html(
-                '<code><a href="/admin/core/tag/{}/change"><b>{}</b></a></code>',
-                tag.pk,
-                tag.name,
+        return mark_safe(
+            ", ".join(
+                format_html(
+                    '<code><a href="/admin/core/tag/{}/change"><b>{}</b></a></code>',
+                    tag.pk,
+                    tag.name,
+                )
+                for tag in obj.tag_set.order_by("-modified_at")[:10]
            )
-            for tag in obj.tag_set.order_by('-modified_at')[:10]
-        ) + f'<br/><a href="/admin/core/tag/?created_by__id__exact={obj.pk}">{total_count} total records...<a>')
+            + f'<br/><a href="/admin/core/tag/?created_by__id__exact={obj.pk}">{total_count} total records...<a>',
+        )

-    @admin.display(description='API Tokens')
+    @admin.display(description="API Tokens")
    def apitoken_set(self, obj):
        total_count = obj.apitoken_set.count()
-        return mark_safe('<br/>'.join(
-            format_html(
-                '<code><a href="/admin/api/apitoken/{}/change"><b>[{}]</b></a></code> {} (expires {})',
-                apitoken.pk,
-                str(apitoken.id)[:8],
-                apitoken.token_redacted[:64],
-                apitoken.expires,
+        return mark_safe(
+            "<br/>".join(
+                format_html(
+                    '<code><a href="/admin/api/apitoken/{}/change"><b>[{}]</b></a></code> {} (expires {})',
+                    apitoken.pk,
+                    str(apitoken.id)[:8],
+                    apitoken.token_redacted[:64],
+                    apitoken.expires,
+                )
+                for apitoken in obj.apitoken_set.order_by("-modified_at")[:10]
            )
-            for apitoken in obj.apitoken_set.order_by('-modified_at')[:10]
-        ) + f'<br/><a href="/admin/api/apitoken/?created_by__id__exact={obj.pk}">{total_count} total records...<a>')
+            + f'<br/><a href="/admin/api/apitoken/?created_by__id__exact={obj.pk}">{total_count} total records...<a>',
+        )

-    @admin.display(description='API Outbound Webhooks')
+    @admin.display(description="API Outbound Webhooks")
    def outboundwebhook_set(self, obj):
        total_count = obj.outboundwebhook_set.count()
-        return mark_safe('<br/>'.join(
-            format_html(
-                '<code><a href="/admin/api/outboundwebhook/{}/change"><b>[{}]</b></a></code> {} -> {}',
-                outboundwebhook.pk,
-                str(outboundwebhook.id)[:8],
-                outboundwebhook.referenced_model,
-                outboundwebhook.endpoint,
+        return mark_safe(
+            "<br/>".join(
+                format_html(
+                    '<code><a href="/admin/api/outboundwebhook/{}/change"><b>[{}]</b></a></code> {} -> {}',
+                    outboundwebhook.pk,
+                    str(outboundwebhook.id)[:8],
+                    outboundwebhook.referenced_model,
+                    outboundwebhook.endpoint,
+                )
+                for outboundwebhook in obj.outboundwebhook_set.order_by("-modified_at")[:10]
            )
-            for outboundwebhook in obj.outboundwebhook_set.order_by('-modified_at')[:10]
-        ) + f'<br/><a href="/admin/api/outboundwebhook/?created_by__id__exact={obj.pk}">{total_count} total records...<a>')
-
-
+            + f'<br/><a href="/admin/api/outboundwebhook/?created_by__id__exact={obj.pk}">{total_count} total records...<a>',
+        )


 def register_admin(admin_site):
--- a/archivebox/core/apps.py
+++ b/archivebox/core/apps.py
@@ -1,12 +1,12 @@
-__package__ = 'archivebox.core'
+__package__ = "archivebox.core"

 from django.apps import AppConfig
 import os


 class CoreConfig(AppConfig):
-    name = 'archivebox.core'
-    label = 'core'
+    name = "archivebox.core"
+    label = "core"

    def ready(self):
        """Register the archivebox.core.admin_site as the main django admin site"""
@@ -14,29 +14,30 @@ class CoreConfig(AppConfig):
        from django.utils.autoreload import DJANGO_AUTORELOAD_ENV

        from archivebox.core.admin_site import register_admin_site
+
        register_admin_site()

        # Import models to register state machines with the registry
        # Skip during makemigrations to avoid premature state machine access
-        if 'makemigrations' not in sys.argv:
+        if "makemigrations" not in sys.argv:
            from archivebox.core import models  # noqa: F401

-        pidfile = os.environ.get('ARCHIVEBOX_RUNSERVER_PIDFILE')
+        pidfile = os.environ.get("ARCHIVEBOX_RUNSERVER_PIDFILE")
        if pidfile:
            should_write_pid = True
-            if os.environ.get('ARCHIVEBOX_AUTORELOAD') == '1':
-                should_write_pid = os.environ.get(DJANGO_AUTORELOAD_ENV) == 'true'
+            if os.environ.get("ARCHIVEBOX_AUTORELOAD") == "1":
+                should_write_pid = os.environ.get(DJANGO_AUTORELOAD_ENV) == "true"
            if should_write_pid:
                try:
-                    with open(pidfile, 'w') as handle:
+                    with open(pidfile, "w") as handle:
                        handle.write(str(os.getpid()))
                except Exception:
                    pass

        def _should_prepare_runtime() -> bool:
-            if os.environ.get('ARCHIVEBOX_RUNSERVER') == '1':
-                if os.environ.get('ARCHIVEBOX_AUTORELOAD') == '1':
-                    return os.environ.get(DJANGO_AUTORELOAD_ENV) == 'true'
+            if os.environ.get("ARCHIVEBOX_RUNSERVER") == "1":
+                if os.environ.get("ARCHIVEBOX_AUTORELOAD") == "1":
+                    return os.environ.get(DJANGO_AUTORELOAD_ENV) == "true"
                return True
            return False

@@ -44,4 +45,5 @@ class CoreConfig(AppConfig):
            from archivebox.machine.models import Process, Machine

            Process.cleanup_stale_running()
+            Process.cleanup_orphaned_workers()
            Machine.current()
--- a/archivebox/core/forms.py
+++ b/archivebox/core/forms.py
@@ -1,9 +1,9 @@
-__package__ = 'archivebox.core'
+__package__ = "archivebox.core"

 from django import forms
 from django.utils.html import format_html

-from archivebox.misc.util import URL_REGEX, find_all_urls
+from archivebox.misc.util import URL_REGEX, find_all_urls, parse_filesize_to_bytes
 from taggit.utils import edit_string_for_tags, parse_tags
 from archivebox.base_models.admin import KeyValueWidget
 from archivebox.crawls.schedule_utils import validate_schedule
@@ -13,11 +13,11 @@ from archivebox.hooks import get_plugins, discover_plugin_configs, get_plugin_ic
 from archivebox.personas.models import Persona

 DEPTH_CHOICES = (
-    ('0', 'depth = 0 (archive just these URLs)'),
-    ('1', 'depth = 1 (+ URLs one hop away)'),
-    ('2', 'depth = 2 (+ URLs two hops away)'),
-    ('3', 'depth = 3 (+ URLs three hops away)'),
-    ('4', 'depth = 4 (+ URLs four hops away)'),
+    ("0", "depth = 0 (archive just these URLs)"),
+    ("1", "depth = 1 (+ URLs one hop away)"),
+    ("2", "depth = 2 (+ URLs two hops away)"),
+    ("3", "depth = 3 (+ URLs three hops away)"),
+    ("4", "depth = 4 (+ URLs four hops away)"),
 )


@@ -28,7 +28,7 @@ def get_plugin_choices():

 def get_plugin_choice_label(plugin_name: str, plugin_configs: dict[str, dict]) -> str:
    schema = plugin_configs.get(plugin_name, {})
-    description = str(schema.get('description') or '').strip()
+    description = str(schema.get("description") or "").strip()
    if not description:
        return plugin_name
    icon_html = get_plugin_icon(plugin_name)
@@ -45,7 +45,7 @@ def get_plugin_choice_label(plugin_name: str, plugin_configs: dict[str, dict]) -
 def get_choice_field(form: forms.Form, name: str) -> forms.ChoiceField:
    field = form.fields[name]
    if not isinstance(field, forms.ChoiceField):
-        raise TypeError(f'{name} must be a ChoiceField')
+        raise TypeError(f"{name} must be a ChoiceField")
    return field


@@ -54,10 +54,12 @@ class AddLinkForm(forms.Form):
    url = forms.CharField(
        label="URLs",
        strip=True,
-        widget=forms.Textarea(attrs={
-            'data-url-regex': URL_REGEX.pattern,
-        }),
-        required=True
+        widget=forms.Textarea(
+            attrs={
+                "data-url-regex": URL_REGEX.pattern,
+            },
+        ),
+        required=True,
    )
    tag = forms.CharField(
        label="Tags",
@@ -68,16 +70,41 @@ class AddLinkForm(forms.Form):
    depth = forms.ChoiceField(
        label="Archive depth",
        choices=DEPTH_CHOICES,
-        initial='0',
-        widget=forms.RadioSelect(attrs={"class": "depth-selection"})
+        initial="0",
+        widget=forms.RadioSelect(attrs={"class": "depth-selection"}),
+    )
+    max_urls = forms.IntegerField(
+        label="Max URLs",
+        required=False,
+        min_value=0,
+        initial=0,
+        widget=forms.NumberInput(
+            attrs={
+                "min": 0,
+                "step": 1,
+                "placeholder": "0 = unlimited",
+            },
+        ),
+    )
+    max_size = forms.CharField(
+        label="Max size",
+        required=False,
+        initial="0",
+        widget=forms.TextInput(
+            attrs={
+                "placeholder": "0 = unlimited, or e.g. 45mb / 1gb",
+            },
+        ),
    )
    notes = forms.CharField(
        label="Notes",
        strip=True,
        required=False,
-        widget=forms.TextInput(attrs={
-            'placeholder': 'Optional notes about this crawl',
-        })
+        widget=forms.TextInput(
+            attrs={
+                "placeholder": "Optional notes about this crawl",
+            },
+        ),
    )
    url_filters = forms.Field(
        label="URL allowlist / denylist",
@@ -128,16 +155,18 @@ class AddLinkForm(forms.Form):
        label="Repeat schedule",
        max_length=64,
        required=False,
-        widget=forms.TextInput(attrs={
-            'placeholder': 'e.g., daily, weekly, 0 */6 * * * (every 6 hours)',
-        })
+        widget=forms.TextInput(
+            attrs={
+                "placeholder": "e.g., daily, weekly, 0 */6 * * * (every 6 hours)",
+            },
+        ),
    )
    persona = forms.ModelChoiceField(
        label="Persona (authentication profile)",
        required=False,
        queryset=Persona.objects.none(),
        empty_label=None,
-        to_field_name='name',
+        to_field_name="name",
    )
    index_only = forms.BooleanField(
        label="Index only dry run (add crawl but don't archive yet)",
@@ -155,8 +184,8 @@ class AddLinkForm(forms.Form):
        super().__init__(*args, **kwargs)

        default_persona = Persona.get_or_create_default()
-        self.fields['persona'].queryset = Persona.objects.order_by('name')
-        self.fields['persona'].initial = default_persona.name
+        self.fields["persona"].queryset = Persona.objects.order_by("name")
+        self.fields["persona"].initial = default_persona.name

        # Get all plugins
        all_plugins = get_plugins()
@@ -164,86 +193,136 @@ class AddLinkForm(forms.Form):

        # Define plugin groups
        chrome_dependent = {
-            'accessibility', 'chrome', 'consolelog', 'dom', 'headers',
-            'parse_dom_outlinks', 'pdf', 'redirects', 'responses',
-            'screenshot', 'seo', 'singlefile', 'ssl', 'staticfile', 'title'
+            "accessibility",
+            "chrome",
+            "consolelog",
+            "dom",
+            "headers",
+            "parse_dom_outlinks",
+            "pdf",
+            "redirects",
+            "responses",
+            "screenshot",
+            "seo",
+            "singlefile",
+            "ssl",
+            "staticfile",
+            "title",
        }
        archiving = {
-            'archivedotorg', 'defuddle', 'favicon', 'forumdl', 'gallerydl', 'git',
-            'htmltotext', 'mercury', 'papersdl', 'readability', 'trafilatura', 'wget', 'ytdlp'
+            "archivedotorg",
+            "defuddle",
+            "favicon",
+            "forumdl",
+            "gallerydl",
+            "git",
+            "htmltotext",
+            "mercury",
+            "papersdl",
+            "readability",
+            "trafilatura",
+            "wget",
+            "ytdlp",
        }
        parsing = {
-            'parse_html_urls', 'parse_jsonl_urls',
-            'parse_netscape_urls', 'parse_rss_urls', 'parse_txt_urls'
+            "parse_html_urls",
+            "parse_jsonl_urls",
+            "parse_netscape_urls",
+            "parse_rss_urls",
+            "parse_txt_urls",
        }
        search = {
-            'search_backend_ripgrep', 'search_backend_sonic', 'search_backend_sqlite'
+            "search_backend_ripgrep",
+            "search_backend_sonic",
+            "search_backend_sqlite",
        }
-        binary = {'apt', 'brew', 'custom', 'env', 'npm', 'pip'}
-        extensions = {'twocaptcha', 'istilldontcareaboutcookies', 'ublock'}
+        binary = {"apt", "brew", "custom", "env", "npm", "pip"}
+        extensions = {"twocaptcha", "istilldontcareaboutcookies", "ublock"}

        # Populate plugin field choices
-        get_choice_field(self, 'chrome_plugins').choices = [
+        get_choice_field(self, "chrome_plugins").choices = [
            (p, get_plugin_choice_label(p, plugin_configs)) for p in sorted(all_plugins) if p in chrome_dependent
        ]
-        get_choice_field(self, 'archiving_plugins').choices = [
+        get_choice_field(self, "archiving_plugins").choices = [
            (p, get_plugin_choice_label(p, plugin_configs)) for p in sorted(all_plugins) if p in archiving
        ]
-        get_choice_field(self, 'parsing_plugins').choices = [
+        get_choice_field(self, "parsing_plugins").choices = [
            (p, get_plugin_choice_label(p, plugin_configs)) for p in sorted(all_plugins) if p in parsing
        ]
-        get_choice_field(self, 'search_plugins').choices = [
+        get_choice_field(self, "search_plugins").choices = [
            (p, get_plugin_choice_label(p, plugin_configs)) for p in sorted(all_plugins) if p in search
        ]
-        get_choice_field(self, 'binary_plugins').choices = [
+        get_choice_field(self, "binary_plugins").choices = [
            (p, get_plugin_choice_label(p, plugin_configs)) for p in sorted(all_plugins) if p in binary
        ]
-        get_choice_field(self, 'extension_plugins').choices = [
+        get_choice_field(self, "extension_plugins").choices = [
            (p, get_plugin_choice_label(p, plugin_configs)) for p in sorted(all_plugins) if p in extensions
        ]

-        required_search_plugin = f'search_backend_{SEARCH_BACKEND_CONFIG.SEARCH_BACKEND_ENGINE}'.strip()
-        search_choices = [choice[0] for choice in get_choice_field(self, 'search_plugins').choices]
+        required_search_plugin = f"search_backend_{SEARCH_BACKEND_CONFIG.SEARCH_BACKEND_ENGINE}".strip()
+        search_choices = [choice[0] for choice in get_choice_field(self, "search_plugins").choices]
        if required_search_plugin in search_choices:
-            get_choice_field(self, 'search_plugins').initial = [required_search_plugin]
+            get_choice_field(self, "search_plugins").initial = [required_search_plugin]

    def clean(self):
        cleaned_data = super().clean() or {}

        # Combine all plugin groups into single list
        all_selected_plugins = []
-        for field in ['chrome_plugins', 'archiving_plugins', 'parsing_plugins',
-                      'search_plugins', 'binary_plugins', 'extension_plugins']:
+        for field in [
+            "chrome_plugins",
+            "archiving_plugins",
+            "parsing_plugins",
+            "search_plugins",
+            "binary_plugins",
+            "extension_plugins",
+        ]:
            selected = cleaned_data.get(field)
            if isinstance(selected, list):
                all_selected_plugins.extend(selected)

        # Store combined list for easy access
-        cleaned_data['plugins'] = all_selected_plugins
+        cleaned_data["plugins"] = all_selected_plugins

        return cleaned_data

    def clean_url(self):
-        value = self.cleaned_data.get('url') or ''
-        urls = '\n'.join(find_all_urls(value))
+        value = self.cleaned_data.get("url") or ""
+        urls = "\n".join(find_all_urls(value))
        if not urls:
-            raise forms.ValidationError('Enter at least one valid URL.')
+            raise forms.ValidationError("Enter at least one valid URL.")
        return urls

    def clean_url_filters(self):
        from archivebox.crawls.models import Crawl

-        value = self.cleaned_data.get('url_filters') or {}
+        value = self.cleaned_data.get("url_filters") or {}
        return {
-            'allowlist': '\n'.join(Crawl.split_filter_patterns(value.get('allowlist', ''))),
-            'denylist': '\n'.join(Crawl.split_filter_patterns(value.get('denylist', ''))),
-            'same_domain_only': bool(value.get('same_domain_only')),
+            "allowlist": "\n".join(Crawl.split_filter_patterns(value.get("allowlist", ""))),
+            "denylist": "\n".join(Crawl.split_filter_patterns(value.get("denylist", ""))),
+            "same_domain_only": bool(value.get("same_domain_only")),
        }

+    def clean_max_urls(self):
+        value = self.cleaned_data.get("max_urls")
+        return int(value or 0)
+
+    def clean_max_size(self):
+        raw_value = str(self.cleaned_data.get("max_size") or "").strip()
+        if not raw_value:
+            return 0
+        try:
+            value = parse_filesize_to_bytes(raw_value)
+        except ValueError as err:
+            raise forms.ValidationError(str(err))
+        if value < 0:
+            raise forms.ValidationError("Max size must be 0 or a positive number of bytes.")
+        return value
+
    def clean_schedule(self):
-        schedule = (self.cleaned_data.get('schedule') or '').strip()
+        schedule = (self.cleaned_data.get("schedule") or "").strip()
        if not schedule:
-            return ''
+            return ""

        try:
            validate_schedule(schedule)
@@ -269,7 +348,7 @@ class TagField(forms.CharField):
            return parse_tags(value)
        except ValueError:
            raise forms.ValidationError(
-                "Please provide a comma-separated list of tags."
+                "Please provide a comma-separated list of tags.",
            )

    def has_changed(self, initial, data):
--- a/archivebox/core/host_utils.py
+++ b/archivebox/core/host_utils.py
@@ -1,7 +1,5 @@
 from __future__ import annotations

-from __future__ import annotations
-
 import re
 from urllib.parse import urlparse

@@ -9,6 +7,7 @@ from archivebox.config.common import SERVER_CONFIG


 _SNAPSHOT_ID_RE = re.compile(r"^[0-9a-fA-F-]{8,36}$")
+_SNAPSHOT_SUBDOMAIN_RE = re.compile(r"^snap-(?P<suffix>[0-9a-fA-F]{12})$")


 def split_host_port(host: str) -> tuple[str, str | None]:
@@ -71,21 +70,29 @@ def get_web_host() -> str:
        return urlparse(override).netloc.lower()
    return _build_listen_host("web")

+
 def get_api_host() -> str:
    if not SERVER_CONFIG.USES_SUBDOMAIN_ROUTING:
        return get_listen_host().lower()
    return _build_listen_host("api")

+
 def get_public_host() -> str:
    if not SERVER_CONFIG.USES_SUBDOMAIN_ROUTING:
        return get_listen_host().lower()
    return _build_listen_host("public")


+def get_snapshot_subdomain(snapshot_id: str) -> str:
+    normalized = re.sub(r"[^0-9a-fA-F]", "", snapshot_id or "")
+    suffix = (normalized[-12:] if len(normalized) >= 12 else normalized).lower()
+    return f"snap-{suffix}"
+
+
 def get_snapshot_host(snapshot_id: str) -> str:
    if not SERVER_CONFIG.USES_SUBDOMAIN_ROUTING:
        return get_listen_host().lower()
-    return _build_listen_host(snapshot_id)
+    return _build_listen_host(get_snapshot_subdomain(snapshot_id))


 def get_original_host(domain: str) -> str:
@@ -95,7 +102,16 @@ def get_original_host(domain: str) -> str:


 def is_snapshot_subdomain(subdomain: str) -> bool:
-    return bool(_SNAPSHOT_ID_RE.match(subdomain or ""))
+    value = (subdomain or "").strip()
+    return bool(_SNAPSHOT_SUBDOMAIN_RE.match(value) or _SNAPSHOT_ID_RE.match(value))
+
+
+def get_snapshot_lookup_key(snapshot_ref: str) -> str:
+    value = (snapshot_ref or "").strip().lower()
+    match = _SNAPSHOT_SUBDOMAIN_RE.match(value)
+    if match:
+        return match.group("suffix")
+    return value


 def get_listen_subdomain(request_host: str) -> str:
@@ -141,22 +157,23 @@ def _build_base_url_for_host(host: str, request=None) -> str:


 def get_admin_base_url(request=None) -> str:
-    if not SERVER_CONFIG.USES_SUBDOMAIN_ROUTING:
-        return _build_base_url_for_host(get_listen_host(), request=request)
    override = _normalize_base_url(SERVER_CONFIG.ADMIN_BASE_URL)
    if override:
        return override
+    if not SERVER_CONFIG.USES_SUBDOMAIN_ROUTING:
+        return _build_base_url_for_host(get_listen_host(), request=request)
    return _build_base_url_for_host(get_admin_host(), request=request)


 def get_web_base_url(request=None) -> str:
-    if not SERVER_CONFIG.USES_SUBDOMAIN_ROUTING:
-        return _build_base_url_for_host(get_listen_host(), request=request)
    override = _normalize_base_url(SERVER_CONFIG.ARCHIVE_BASE_URL)
    if override:
        return override
+    if not SERVER_CONFIG.USES_SUBDOMAIN_ROUTING:
+        return _build_base_url_for_host(get_listen_host(), request=request)
    return _build_base_url_for_host(get_web_host(), request=request)

+
 def get_api_base_url(request=None) -> str:
    if not SERVER_CONFIG.USES_SUBDOMAIN_ROUTING:
        return _build_base_url_for_host(get_listen_host(), request=request)
@@ -191,6 +208,7 @@ def build_admin_url(path: str = "", request=None) -> str:
 def build_web_url(path: str = "", request=None) -> str:
    return _build_url(get_web_base_url(request), path)

+
 def build_api_url(path: str = "", request=None) -> str:
    return _build_url(get_api_base_url(request), path)

--- a/archivebox/core/management/commands/archivebox.py
+++ b/archivebox/core/management/commands/archivebox.py
@@ -1,4 +1,4 @@
-__package__ = 'archivebox'
+__package__ = "archivebox"

 from django.core.management.base import BaseCommand

@@ -6,13 +6,12 @@ from archivebox.cli import main as run_cli


 class Command(BaseCommand):
-    help = 'Run an ArchiveBox CLI subcommand (e.g. add, remove, list, etc)'
+    help = "Run an ArchiveBox CLI subcommand (e.g. add, remove, list, etc)"

    def add_arguments(self, parser):
-        parser.add_argument('subcommand', type=str, help='The subcommand you want to run')
-        parser.add_argument('command_args', nargs='*', help='Arguments to pass to the subcommand')
-
+        parser.add_argument("subcommand", type=str, help="The subcommand you want to run")
+        parser.add_argument("command_args", nargs="*", help="Arguments to pass to the subcommand")

    def handle(self, *args, **kwargs):
-        command_args = [kwargs['subcommand'], *kwargs['command_args']]
+        command_args = [kwargs["subcommand"], *kwargs["command_args"]]
        run_cli(args=command_args)
--- a/archivebox/core/middleware.py
+++ b/archivebox/core/middleware.py
@@ -1,4 +1,4 @@
-__package__ = 'archivebox.core'
+__package__ = "archivebox.core"

 import ipaddress
 import re
@@ -16,6 +16,7 @@ from archivebox.config.common import SERVER_CONFIG
 from archivebox.config import VERSION
 from archivebox.config.version import get_COMMIT_HASH
 from archivebox.core.host_utils import (
+    build_snapshot_url,
    build_admin_url,
    build_web_url,
    get_api_host,
@@ -31,10 +32,10 @@ from archivebox.core.host_utils import (
 from archivebox.core.views import SnapshotHostView, OriginalDomainHostView


-def detect_timezone(request, activate: bool=True):
-    gmt_offset = (request.COOKIES.get('GMT_OFFSET') or '').strip()
+def detect_timezone(request, activate: bool = True):
+    gmt_offset = (request.COOKIES.get("GMT_OFFSET") or "").strip()
    tz = None
-    if gmt_offset.replace('-', '').isdigit():
+    if gmt_offset.replace("-", "").isdigit():
        tz = timezone.get_fixed_timezone(int(gmt_offset))
        if activate:
            timezone.activate(tz)
@@ -53,11 +54,12 @@ def TimezoneMiddleware(get_response):
 def CacheControlMiddleware(get_response):
    snapshot_path_re = re.compile(r"^/[^/]+/\\d{8}/[^/]+/[0-9a-fA-F-]{8,36}/")
    static_cache_key = (get_COMMIT_HASH() or VERSION or "dev").strip()
+
    def middleware(request):
        response = get_response(request)

-        if request.path.startswith('/static/'):
-            rel_path = request.path[len('/static/'):]
+        if request.path.startswith("/static/"):
+            rel_path = request.path[len("/static/") :]
            static_path = finders.find(rel_path)
            if static_path:
                try:
@@ -81,10 +83,10 @@ def CacheControlMiddleware(get_response):
                    response.headers["Last-Modified"] = http_date(mtime)
                return response

-        if '/archive/' in request.path or '/static/' in request.path or snapshot_path_re.match(request.path):
-            if not response.get('Cache-Control'):
-                policy = 'public' if SERVER_CONFIG.PUBLIC_SNAPSHOTS else 'private'
-                response['Cache-Control'] = f'{policy}, max-age=60, stale-while-revalidate=300'
+        if "/archive/" in request.path or "/static/" in request.path or snapshot_path_re.match(request.path):
+            if not response.get("Cache-Control"):
+                policy = "public" if SERVER_CONFIG.PUBLIC_SNAPSHOTS else "private"
+                response["Cache-Control"] = f"{policy}, max-age=60, stale-while-revalidate=300"
                # print('Set Cache-Control header to', response['Cache-Control'])
        return response

@@ -115,6 +117,10 @@ def ServerSecurityModeMiddleware(get_response):


 def HostRoutingMiddleware(get_response):
+    snapshot_path_re = re.compile(
+        r"^/(?P<username>[^/]+)/(?P<date>\d{4}(?:\d{2})?(?:\d{2})?)/(?P<domain>[^/]+)/(?P<snapshot_id>[0-9a-fA-F-]{8,36})(?:/(?P<path>.*))?$",
+    )
+
    def middleware(request):
        request_host = (request.get_host() or "").lower()
        admin_host = get_admin_host()
@@ -124,6 +130,23 @@ def HostRoutingMiddleware(get_response):
        listen_host = get_listen_host()
        subdomain = get_listen_subdomain(request_host)

+        # Framework-owned assets must bypass snapshot/original-domain replay routing.
+        # Otherwise pages on snapshot subdomains can receive HTML for JS/CSS requests.
+        if request.path.startswith("/static/") or request.path in {"/favicon.ico", "/robots.txt"}:
+            return get_response(request)
+
+        if SERVER_CONFIG.USES_SUBDOMAIN_ROUTING and not host_matches(request_host, admin_host):
+            if (
+                request.path == "/admin"
+                or request.path.startswith("/admin/")
+                or request.path == "/accounts"
+                or request.path.startswith("/accounts/")
+            ):
+                target = build_admin_url(request.path, request=request)
+                if request.META.get("QUERY_STRING"):
+                    target = f"{target}?{request.META['QUERY_STRING']}"
+                return redirect(target)
+
        if not SERVER_CONFIG.USES_SUBDOMAIN_ROUTING:
            if host_matches(request_host, listen_host):
                return get_response(request)
@@ -140,6 +163,16 @@ def HostRoutingMiddleware(get_response):
            return get_response(request)

        if host_matches(request_host, admin_host):
+            snapshot_match = snapshot_path_re.match(request.path)
+            if SERVER_CONFIG.USES_SUBDOMAIN_ROUTING and snapshot_match:
+                snapshot_id = snapshot_match.group("snapshot_id")
+                replay_path = (snapshot_match.group("path") or "").strip("/")
+                if replay_path == "index.html":
+                    replay_path = ""
+                target = build_snapshot_url(snapshot_id, replay_path, request=request)
+                if request.META.get("QUERY_STRING"):
+                    target = f"{target}?{request.META['QUERY_STRING']}"
+                return redirect(target)
            return get_response(request)

        if host_matches(request_host, api_host):
@@ -160,16 +193,9 @@ def HostRoutingMiddleware(get_response):
        if host_matches(request_host, web_host):
            request.user = AnonymousUser()
            request._cached_user = request.user
-            if request.path.startswith("/admin"):
-                target = build_admin_url(request.path, request=request)
-                if request.META.get("QUERY_STRING"):
-                    target = f"{target}?{request.META['QUERY_STRING']}"
-                return redirect(target)
            return get_response(request)

        if host_matches(request_host, public_host):
-            request.user = AnonymousUser()
-            request._cached_user = request.user
            return get_response(request)

        if subdomain:
@@ -196,24 +222,26 @@ def HostRoutingMiddleware(get_response):

    return middleware

+
 class ReverseProxyAuthMiddleware(RemoteUserMiddleware):
-    header = 'HTTP_{normalized}'.format(normalized=SERVER_CONFIG.REVERSE_PROXY_USER_HEADER.replace('-', '_').upper())
+    header = "HTTP_{normalized}".format(normalized=SERVER_CONFIG.REVERSE_PROXY_USER_HEADER.replace("-", "_").upper())

    def process_request(self, request):
-        if SERVER_CONFIG.REVERSE_PROXY_WHITELIST == '':
+        if SERVER_CONFIG.REVERSE_PROXY_WHITELIST == "":
            return

-        ip = request.META.get('REMOTE_ADDR')
+        ip = request.META.get("REMOTE_ADDR")
        if not isinstance(ip, str):
            return

-        for cidr in SERVER_CONFIG.REVERSE_PROXY_WHITELIST.split(','):
+        for cidr in SERVER_CONFIG.REVERSE_PROXY_WHITELIST.split(","):
            try:
                network = ipaddress.ip_network(cidr)
            except ValueError:
                raise ImproperlyConfigured(
-                    "The REVERSE_PROXY_WHITELIST config paramater is in invalid format, or "
-                    "contains invalid CIDR. Correct format is a coma-separated list of IPv4/IPv6 CIDRs.")
+                    "The REVERSE_PROXY_WHITELIST config parameter is in invalid format, or "
+                    "contains invalid CIDR. Correct format is a coma-separated list of IPv4/IPv6 CIDRs.",
+                )

            if ipaddress.ip_address(ip) in network:
                return super().process_request(request)
--- a/archivebox/core/migrations/0001_initial.py
+++ b/archivebox/core/migrations/0001_initial.py
@@ -5,23 +5,21 @@ import uuid


 class Migration(migrations.Migration):
-
    initial = True

-    dependencies = [
-    ]
+    dependencies = []

    operations = [
        migrations.CreateModel(
-            name='Snapshot',
+            name="Snapshot",
            fields=[
-                ('id', models.UUIDField(default=uuid.uuid4, editable=False, primary_key=True, serialize=False)),
-                ('url', models.URLField(unique=True)),
-                ('timestamp', models.CharField(default=None, max_length=32, null=True, unique=True)),
-                ('title', models.CharField(default=None, max_length=128, null=True)),
-                ('tags', models.CharField(default=None, max_length=256, null=True)),
-                ('added', models.DateTimeField(auto_now_add=True)),
-                ('updated', models.DateTimeField(default=None, null=True)),
+                ("id", models.UUIDField(default=uuid.uuid4, editable=False, primary_key=True, serialize=False)),
+                ("url", models.URLField(unique=True)),
+                ("timestamp", models.CharField(default=None, max_length=32, null=True, unique=True)),
+                ("title", models.CharField(default=None, max_length=128, null=True)),
+                ("tags", models.CharField(default=None, max_length=256, null=True)),
+                ("added", models.DateTimeField(auto_now_add=True)),
+                ("updated", models.DateTimeField(default=None, null=True)),
            ],
        ),
    ]
--- a/archivebox/core/migrations/0002_auto_20200625_1521.py
+++ b/archivebox/core/migrations/0002_auto_20200625_1521.py
@@ -4,15 +4,14 @@ from django.db import migrations, models


 class Migration(migrations.Migration):
-
    dependencies = [
-        ('core', '0001_initial'),
+        ("core", "0001_initial"),
    ]

    operations = [
        migrations.AlterField(
-            model_name='snapshot',
-            name='timestamp',
+            model_name="snapshot",
+            name="timestamp",
            field=models.CharField(default=None, max_length=32, null=True),
        ),
    ]
--- a/archivebox/core/migrations/0003_auto_20200630_1034.py
+++ b/archivebox/core/migrations/0003_auto_20200630_1034.py
@@ -4,35 +4,34 @@ from django.db import migrations, models


 class Migration(migrations.Migration):
-
    dependencies = [
-        ('core', '0002_auto_20200625_1521'),
+        ("core", "0002_auto_20200625_1521"),
    ]

    operations = [
        migrations.AlterField(
-            model_name='snapshot',
-            name='added',
+            model_name="snapshot",
+            name="added",
            field=models.DateTimeField(auto_now_add=True, db_index=True),
        ),
        migrations.AlterField(
-            model_name='snapshot',
-            name='tags',
+            model_name="snapshot",
+            name="tags",
            field=models.CharField(db_index=True, default=None, max_length=256, null=True),
        ),
        migrations.AlterField(
-            model_name='snapshot',
-            name='timestamp',
+            model_name="snapshot",
+            name="timestamp",
            field=models.CharField(db_index=True, default=None, max_length=32, null=True),
        ),
        migrations.AlterField(
-            model_name='snapshot',
-            name='title',
+            model_name="snapshot",
+            name="title",
            field=models.CharField(db_index=True, default=None, max_length=128, null=True),
        ),
        migrations.AlterField(
-            model_name='snapshot',
-            name='updated',
+            model_name="snapshot",
+            name="updated",
            field=models.DateTimeField(db_index=True, default=None, null=True),
        ),
    ]
--- a/archivebox/core/migrations/0004_auto_20200713_1552.py
+++ b/archivebox/core/migrations/0004_auto_20200713_1552.py
@@ -4,15 +4,14 @@ from django.db import migrations, models


 class Migration(migrations.Migration):
-
    dependencies = [
-        ('core', '0003_auto_20200630_1034'),
+        ("core", "0003_auto_20200630_1034"),
    ]

    operations = [
        migrations.AlterField(
-            model_name='snapshot',
-            name='timestamp',
+            model_name="snapshot",
+            name="timestamp",
            field=models.CharField(db_index=True, default=None, max_length=32, unique=True),
            preserve_default=False,
        ),
--- a/archivebox/core/migrations/0005_auto_20200728_0326.py
+++ b/archivebox/core/migrations/0005_auto_20200728_0326.py
@@ -4,25 +4,24 @@ from django.db import migrations, models


 class Migration(migrations.Migration):
-
    dependencies = [
-        ('core', '0004_auto_20200713_1552'),
+        ("core", "0004_auto_20200713_1552"),
    ]

    operations = [
        migrations.AlterField(
-            model_name='snapshot',
-            name='tags',
+            model_name="snapshot",
+            name="tags",
            field=models.CharField(blank=True, db_index=True, max_length=256, null=True),
        ),
        migrations.AlterField(
-            model_name='snapshot',
-            name='title',
+            model_name="snapshot",
+            name="title",
            field=models.CharField(blank=True, db_index=True, max_length=128, null=True),
        ),
        migrations.AlterField(
-            model_name='snapshot',
-            name='updated',
+            model_name="snapshot",
+            name="updated",
            field=models.DateTimeField(blank=True, db_index=True, null=True),
        ),
    ]
--- a/archivebox/core/migrations/0006_auto_20201012_1520.py
+++ b/archivebox/core/migrations/0006_auto_20201012_1520.py
@@ -3,19 +3,18 @@
 from django.db import migrations, models
 from django.utils.text import slugify

+
 def forwards_func(apps, schema_editor):
    SnapshotModel = apps.get_model("core", "Snapshot")
    TagModel = apps.get_model("core", "Tag")

    snapshots = SnapshotModel.objects.all()
    for snapshot in snapshots:
-        tag_set = (
-            set(tag.strip() for tag in (snapshot.tags_old or '').split(','))
-        )
+        tag_set = {tag.strip() for tag in (snapshot.tags_old or "").split(",")}
        tag_set.discard("")

        for tag in tag_set:
-            to_add, _ = TagModel.objects.get_or_create(name=tag, defaults={'slug': slugify(tag)})
+            to_add, _ = TagModel.objects.get_or_create(name=tag, defaults={"slug": slugify(tag)})
            snapshot.tags.add(to_add)


@@ -30,37 +29,36 @@ def reverse_func(apps, schema_editor):


 class Migration(migrations.Migration):
-
    dependencies = [
-        ('core', '0005_auto_20200728_0326'),
+        ("core", "0005_auto_20200728_0326"),
    ]

    operations = [
        migrations.RenameField(
-            model_name='snapshot',
-            old_name='tags',
-            new_name='tags_old',
+            model_name="snapshot",
+            old_name="tags",
+            new_name="tags_old",
        ),
        migrations.CreateModel(
-            name='Tag',
+            name="Tag",
            fields=[
-                ('id', models.AutoField(auto_created=True, primary_key=True, serialize=False, verbose_name='ID')),
-                ('name', models.CharField(max_length=100, unique=True, verbose_name='name')),
-                ('slug', models.SlugField(max_length=100, unique=True, verbose_name='slug')),
+                ("id", models.AutoField(auto_created=True, primary_key=True, serialize=False, verbose_name="ID")),
+                ("name", models.CharField(max_length=100, unique=True, verbose_name="name")),
+                ("slug", models.SlugField(max_length=100, unique=True, verbose_name="slug")),
            ],
            options={
-                'verbose_name': 'Tag',
-                'verbose_name_plural': 'Tags',
+                "verbose_name": "Tag",
+                "verbose_name_plural": "Tags",
            },
        ),
        migrations.AddField(
-            model_name='snapshot',
-            name='tags',
-            field=models.ManyToManyField(to='core.Tag'),
+            model_name="snapshot",
+            name="tags",
+            field=models.ManyToManyField(to="core.Tag"),
        ),
        migrations.RunPython(forwards_func, reverse_func),
        migrations.RemoveField(
-            model_name='snapshot',
-            name='tags_old',
+            model_name="snapshot",
+            name="tags_old",
        ),
    ]
--- a/archivebox/core/migrations/0007_archiveresult.py
+++ b/archivebox/core/migrations/0007_archiveresult.py
@@ -9,13 +9,15 @@ import django.db.models.deletion
 # Handle old vs new import paths
 try:
    from archivebox.config import CONSTANTS
+
    ARCHIVE_DIR = CONSTANTS.ARCHIVE_DIR
 except ImportError:
    try:
        from archivebox.config import CONFIG
-        ARCHIVE_DIR = Path(CONFIG.get('ARCHIVE_DIR', './archive'))
+
+        ARCHIVE_DIR = Path(CONFIG.get("ARCHIVE_DIR", "./archive"))
    except ImportError:
-        ARCHIVE_DIR = Path('./archive')
+        ARCHIVE_DIR = Path("./archive")

 try:
    from archivebox.misc.util import to_json
@@ -29,6 +31,7 @@ try:
    JSONField = models.JSONField
 except AttributeError:
    import jsonfield
+
    JSONField = jsonfield.JSONField


@@ -41,7 +44,7 @@ def forwards_func(apps, schema_editor):
        out_dir = Path(ARCHIVE_DIR) / snapshot.timestamp

        try:
-            with open(out_dir / "index.json", "r") as f:
+            with open(out_dir / "index.json") as f:
                fs_index = json.load(f)
        except Exception:
            continue
@@ -56,37 +59,46 @@ def forwards_func(apps, schema_editor):
                        snapshot=snapshot,
                        pwd=result["pwd"],
                        cmd=result.get("cmd") or [],
-                        cmd_version=result.get("cmd_version") or 'unknown',
+                        cmd_version=result.get("cmd_version") or "unknown",
                        start_ts=result["start_ts"],
                        end_ts=result["end_ts"],
                        status=result["status"],
-                        output=result.get("output") or 'null',
+                        output=result.get("output") or "null",
                    )
                except Exception as e:
                    print(
-                        '    ! Skipping import due to missing/invalid index.json:',
+                        "    ! Skipping import due to missing/invalid index.json:",
                        out_dir,
                        e,
-                        '(open an issue with this index.json for help)',
+                        "(open an issue with this index.json for help)",
                    )


 def verify_json_index_integrity(snapshot):
    results = snapshot.archiveresult_set.all()
    out_dir = Path(ARCHIVE_DIR) / snapshot.timestamp
-    with open(out_dir / "index.json", "r") as f:
+    with open(out_dir / "index.json") as f:
        index = json.load(f)

    history = index["history"]
    index_results = [result for extractor in history for result in history[extractor]]
    flattened_results = [result["start_ts"] for result in index_results]
-    
+
    missing_results = [result for result in results if result.start_ts.isoformat() not in flattened_results]

    for missing in missing_results:
-        index["history"][missing.extractor].append({"cmd": missing.cmd, "cmd_version": missing.cmd_version, "end_ts": missing.end_ts.isoformat(),
-                                                    "start_ts": missing.start_ts.isoformat(), "pwd": missing.pwd, "output": missing.output,
-                                                    "schema": "ArchiveResult", "status": missing.status})
+        index["history"][missing.extractor].append(
+            {
+                "cmd": missing.cmd,
+                "cmd_version": missing.cmd_version,
+                "end_ts": missing.end_ts.isoformat(),
+                "start_ts": missing.start_ts.isoformat(),
+                "pwd": missing.pwd,
+                "output": missing.output,
+                "schema": "ArchiveResult",
+                "status": missing.status,
+            },
+        )

    json_index = to_json(index)
    with open(out_dir / "index.json", "w") as f:
@@ -103,25 +115,47 @@ def reverse_func(apps, schema_editor):


 class Migration(migrations.Migration):
-
    dependencies = [
-        ('core', '0006_auto_20201012_1520'),
+        ("core", "0006_auto_20201012_1520"),
    ]

    operations = [
        migrations.CreateModel(
-            name='ArchiveResult',
+            name="ArchiveResult",
            fields=[
-                ('id', models.AutoField(auto_created=True, primary_key=True, serialize=False, verbose_name='ID')),
-                ('cmd', JSONField()),
-                ('pwd', models.CharField(max_length=256)),
-                ('cmd_version', models.CharField(max_length=32)),
-                ('status', models.CharField(choices=[('succeeded', 'succeeded'), ('failed', 'failed'), ('skipped', 'skipped')], max_length=16)),
-                ('output', models.CharField(max_length=512)),
-                ('start_ts', models.DateTimeField()),
-                ('end_ts', models.DateTimeField()),
-                ('extractor', models.CharField(choices=[('title', 'title'), ('favicon', 'favicon'), ('wget', 'wget'), ('singlefile', 'singlefile'), ('pdf', 'pdf'), ('screenshot', 'screenshot'), ('dom', 'dom'), ('readability', 'readability'), ('mercury', 'mercury'), ('git', 'git'), ('media', 'media'), ('headers', 'headers'), ('archivedotorg', 'archivedotorg')], max_length=32)),
-                ('snapshot', models.ForeignKey(on_delete=django.db.models.deletion.CASCADE, to='core.Snapshot')),
+                ("id", models.AutoField(auto_created=True, primary_key=True, serialize=False, verbose_name="ID")),
+                ("cmd", JSONField()),
+                ("pwd", models.CharField(max_length=256)),
+                ("cmd_version", models.CharField(max_length=32)),
+                (
+                    "status",
+                    models.CharField(choices=[("succeeded", "succeeded"), ("failed", "failed"), ("skipped", "skipped")], max_length=16),
+                ),
+                ("output", models.CharField(max_length=512)),
+                ("start_ts", models.DateTimeField()),
+                ("end_ts", models.DateTimeField()),
+                (
+                    "extractor",
+                    models.CharField(
+                        choices=[
+                            ("title", "title"),
+                            ("favicon", "favicon"),
+                            ("wget", "wget"),
+                            ("singlefile", "singlefile"),
+                            ("pdf", "pdf"),
+                            ("screenshot", "screenshot"),
+                            ("dom", "dom"),
+                            ("readability", "readability"),
+                            ("mercury", "mercury"),
+                            ("git", "git"),
+                            ("media", "media"),
+                            ("headers", "headers"),
+                            ("archivedotorg", "archivedotorg"),
+                        ],
+                        max_length=32,
+                    ),
+                ),
+                ("snapshot", models.ForeignKey(on_delete=django.db.models.deletion.CASCADE, to="core.Snapshot")),
            ],
        ),
        migrations.RunPython(forwards_func, reverse_func),
--- a/archivebox/core/migrations/0008_auto_20210105_1421.py
+++ b/archivebox/core/migrations/0008_auto_20210105_1421.py
@@ -4,15 +4,14 @@ from django.db import migrations, models


 class Migration(migrations.Migration):
-
    dependencies = [
-        ('core', '0007_archiveresult'),
+        ("core", "0007_archiveresult"),
    ]

    operations = [
        migrations.AlterField(
-            model_name='archiveresult',
-            name='cmd_version',
+            model_name="archiveresult",
+            name="cmd_version",
            field=models.CharField(blank=True, default=None, max_length=32, null=True),
        ),
    ]
--- a/archivebox/core/migrations/0009_auto_20210216_1038.py
+++ b/archivebox/core/migrations/0009_auto_20210216_1038.py
@@ -4,15 +4,14 @@ from django.db import migrations, models


 class Migration(migrations.Migration):
-
    dependencies = [
-        ('core', '0008_auto_20210105_1421'),
+        ("core", "0008_auto_20210105_1421"),
    ]

    operations = [
        migrations.AlterField(
-            model_name='snapshot',
-            name='updated',
+            model_name="snapshot",
+            name="updated",
            field=models.DateTimeField(auto_now=True, db_index=True, null=True),
        ),
    ]
--- a/archivebox/core/migrations/0010_auto_20210216_1055.py
+++ b/archivebox/core/migrations/0010_auto_20210216_1055.py
@@ -4,15 +4,14 @@ from django.db import migrations, models


 class Migration(migrations.Migration):
-
    dependencies = [
-        ('core', '0009_auto_20210216_1038'),
+        ("core", "0009_auto_20210216_1038"),
    ]

    operations = [
        migrations.AlterField(
-            model_name='archiveresult',
-            name='start_ts',
+            model_name="archiveresult",
+            name="start_ts",
            field=models.DateTimeField(db_index=True),
        ),
    ]
--- a/archivebox/core/migrations/0011_auto_20210216_1331.py
+++ b/archivebox/core/migrations/0011_auto_20210216_1331.py
@@ -5,20 +5,36 @@ import uuid


 class Migration(migrations.Migration):
-
    dependencies = [
-        ('core', '0010_auto_20210216_1055'),
+        ("core", "0010_auto_20210216_1055"),
    ]

    operations = [
        migrations.AddField(
-            model_name='archiveresult',
-            name='uuid',
+            model_name="archiveresult",
+            name="uuid",
            field=models.UUIDField(default=uuid.uuid4, editable=False),
        ),
        migrations.AlterField(
-            model_name='archiveresult',
-            name='extractor',
-            field=models.CharField(choices=[('title', 'title'), ('favicon', 'favicon'), ('headers', 'headers'), ('singlefile', 'singlefile'), ('pdf', 'pdf'), ('screenshot', 'screenshot'), ('dom', 'dom'), ('wget', 'wget'), ('readability', 'readability'), ('mercury', 'mercury'), ('git', 'git'), ('media', 'media'), ('archivedotorg', 'archivedotorg')], max_length=32),
+            model_name="archiveresult",
+            name="extractor",
+            field=models.CharField(
+                choices=[
+                    ("title", "title"),
+                    ("favicon", "favicon"),
+                    ("headers", "headers"),
+                    ("singlefile", "singlefile"),
+                    ("pdf", "pdf"),
+                    ("screenshot", "screenshot"),
+                    ("dom", "dom"),
+                    ("wget", "wget"),
+                    ("readability", "readability"),
+                    ("mercury", "mercury"),
+                    ("git", "git"),
+                    ("media", "media"),
+                    ("archivedotorg", "archivedotorg"),
+                ],
+                max_length=32,
+            ),
        ),
    ]
--- a/archivebox/core/migrations/0012_auto_20210216_1425.py
+++ b/archivebox/core/migrations/0012_auto_20210216_1425.py
@@ -4,20 +4,19 @@ from django.db import migrations, models


 class Migration(migrations.Migration):
-
    dependencies = [
-        ('core', '0011_auto_20210216_1331'),
+        ("core", "0011_auto_20210216_1331"),
    ]

    operations = [
        migrations.AlterField(
-            model_name='archiveresult',
-            name='cmd_version',
+            model_name="archiveresult",
+            name="cmd_version",
            field=models.CharField(blank=True, default=None, max_length=128, null=True),
        ),
        migrations.AlterField(
-            model_name='archiveresult',
-            name='output',
+            model_name="archiveresult",
+            name="output",
            field=models.CharField(max_length=1024),
        ),
    ]
--- a/archivebox/core/migrations/0013_auto_20210218_0729.py
+++ b/archivebox/core/migrations/0013_auto_20210218_0729.py
@@ -4,15 +4,14 @@ from django.db import migrations, models


 class Migration(migrations.Migration):
-
    dependencies = [
-        ('core', '0012_auto_20210216_1425'),
+        ("core", "0012_auto_20210216_1425"),
    ]

    operations = [
        migrations.AlterField(
-            model_name='snapshot',
-            name='title',
+            model_name="snapshot",
+            name="title",
            field=models.CharField(blank=True, db_index=True, max_length=256, null=True),
        ),
    ]
--- a/archivebox/core/migrations/0014_auto_20210218_0729.py
+++ b/archivebox/core/migrations/0014_auto_20210218_0729.py
@@ -4,15 +4,14 @@ from django.db import migrations, models


 class Migration(migrations.Migration):
-
    dependencies = [
-        ('core', '0013_auto_20210218_0729'),
+        ("core", "0013_auto_20210218_0729"),
    ]

    operations = [
        migrations.AlterField(
-            model_name='snapshot',
-            name='title',
+            model_name="snapshot",
+            name="title",
            field=models.CharField(blank=True, db_index=True, max_length=1024, null=True),
        ),
    ]
--- a/archivebox/core/migrations/0015_auto_20210218_0730.py
+++ b/archivebox/core/migrations/0015_auto_20210218_0730.py
@@ -4,15 +4,14 @@ from django.db import migrations, models


 class Migration(migrations.Migration):
-
    dependencies = [
-        ('core', '0014_auto_20210218_0729'),
+        ("core", "0014_auto_20210218_0729"),
    ]

    operations = [
        migrations.AlterField(
-            model_name='snapshot',
-            name='title',
+            model_name="snapshot",
+            name="title",
            field=models.CharField(blank=True, db_index=True, max_length=512, null=True),
        ),
    ]
--- a/archivebox/core/migrations/0016_auto_20210218_1204.py
+++ b/archivebox/core/migrations/0016_auto_20210218_1204.py
@@ -4,15 +4,14 @@ from django.db import migrations, models


 class Migration(migrations.Migration):
-
    dependencies = [
-        ('core', '0015_auto_20210218_0730'),
+        ("core", "0015_auto_20210218_0730"),
    ]

    operations = [
        migrations.AlterField(
-            model_name='snapshot',
-            name='tags',
-            field=models.ManyToManyField(blank=True, to='core.Tag'),
+            model_name="snapshot",
+            name="tags",
+            field=models.ManyToManyField(blank=True, to="core.Tag"),
        ),
    ]
--- a/archivebox/core/migrations/0017_auto_20210219_0211.py
+++ b/archivebox/core/migrations/0017_auto_20210219_0211.py
@@ -4,15 +4,14 @@ from django.db import migrations, models


 class Migration(migrations.Migration):
-
    dependencies = [
-        ('core', '0016_auto_20210218_1204'),
+        ("core", "0016_auto_20210218_1204"),
    ]

    operations = [
        migrations.AlterField(
-            model_name='tag',
-            name='slug',
-            field=models.SlugField(blank=True, max_length=100, unique=True, verbose_name='slug'),
+            model_name="tag",
+            name="slug",
+            field=models.SlugField(blank=True, max_length=100, unique=True, verbose_name="slug"),
        ),
    ]
--- a/archivebox/core/migrations/0018_auto_20210327_0952.py
+++ b/archivebox/core/migrations/0018_auto_20210327_0952.py
@@ -4,20 +4,19 @@ from django.db import migrations, models


 class Migration(migrations.Migration):
-
    dependencies = [
-        ('core', '0017_auto_20210219_0211'),
+        ("core", "0017_auto_20210219_0211"),
    ]

    operations = [
        migrations.AlterField(
-            model_name='tag',
-            name='name',
+            model_name="tag",
+            name="name",
            field=models.CharField(max_length=100, unique=True),
        ),
        migrations.AlterField(
-            model_name='tag',
-            name='slug',
+            model_name="tag",
+            name="slug",
            field=models.SlugField(blank=True, max_length=100, unique=True),
        ),
    ]
--- a/archivebox/core/migrations/0019_auto_20210401_0654.py
+++ b/archivebox/core/migrations/0019_auto_20210401_0654.py
@@ -4,15 +4,14 @@ from django.db import migrations, models


 class Migration(migrations.Migration):
-
    dependencies = [
-        ('core', '0018_auto_20210327_0952'),
+        ("core", "0018_auto_20210327_0952"),
    ]

    operations = [
        migrations.AlterField(
-            model_name='snapshot',
-            name='url',
+            model_name="snapshot",
+            name="url",
            field=models.URLField(db_index=True, unique=True),
        ),
    ]
--- a/archivebox/core/migrations/0020_auto_20210410_1031.py
+++ b/archivebox/core/migrations/0020_auto_20210410_1031.py
@@ -4,20 +4,19 @@ from django.db import migrations, models


 class Migration(migrations.Migration):
-
    dependencies = [
-        ('core', '0019_auto_20210401_0654'),
+        ("core", "0019_auto_20210401_0654"),
    ]

    operations = [
        migrations.AlterField(
-            model_name='archiveresult',
-            name='id',
-            field=models.AutoField(primary_key=True, serialize=False, verbose_name='ID'),
+            model_name="archiveresult",
+            name="id",
+            field=models.AutoField(primary_key=True, serialize=False, verbose_name="ID"),
        ),
        migrations.AlterField(
-            model_name='tag',
-            name='id',
-            field=models.AutoField(primary_key=True, serialize=False, verbose_name='ID'),
+            model_name="tag",
+            name="id",
+            field=models.AutoField(primary_key=True, serialize=False, verbose_name="ID"),
        ),
    ]
--- a/archivebox/core/migrations/0021_auto_20220914_0934.py
+++ b/archivebox/core/migrations/0021_auto_20220914_0934.py
@@ -4,15 +4,31 @@ from django.db import migrations, models


 class Migration(migrations.Migration):
-
    dependencies = [
-        ('core', '0020_auto_20210410_1031'),
+        ("core", "0020_auto_20210410_1031"),
    ]

    operations = [
        migrations.AlterField(
-            model_name='archiveresult',
-            name='extractor',
-            field=models.CharField(choices=[('favicon', 'favicon'), ('headers', 'headers'), ('singlefile', 'singlefile'), ('pdf', 'pdf'), ('screenshot', 'screenshot'), ('dom', 'dom'), ('wget', 'wget'), ('title', 'title'), ('readability', 'readability'), ('mercury', 'mercury'), ('git', 'git'), ('media', 'media'), ('archivedotorg', 'archivedotorg')], max_length=32),
+            model_name="archiveresult",
+            name="extractor",
+            field=models.CharField(
+                choices=[
+                    ("favicon", "favicon"),
+                    ("headers", "headers"),
+                    ("singlefile", "singlefile"),
+                    ("pdf", "pdf"),
+                    ("screenshot", "screenshot"),
+                    ("dom", "dom"),
+                    ("wget", "wget"),
+                    ("title", "title"),
+                    ("readability", "readability"),
+                    ("mercury", "mercury"),
+                    ("git", "git"),
+                    ("media", "media"),
+                    ("archivedotorg", "archivedotorg"),
+                ],
+                max_length=32,
+            ),
        ),
    ]
--- a/archivebox/core/migrations/0022_auto_20231023_2008.py
+++ b/archivebox/core/migrations/0022_auto_20231023_2008.py
@@ -4,15 +4,32 @@ from django.db import migrations, models


 class Migration(migrations.Migration):
-
    dependencies = [
-        ('core', '0021_auto_20220914_0934'),
+        ("core", "0021_auto_20220914_0934"),
    ]

    operations = [
        migrations.AlterField(
-            model_name='archiveresult',
-            name='extractor',
-            field=models.CharField(choices=[('favicon', 'favicon'), ('headers', 'headers'), ('singlefile', 'singlefile'), ('pdf', 'pdf'), ('screenshot', 'screenshot'), ('dom', 'dom'), ('wget', 'wget'), ('title', 'title'), ('readability', 'readability'), ('mercury', 'mercury'), ('htmltotext', 'htmltotext'), ('git', 'git'), ('media', 'media'), ('archivedotorg', 'archivedotorg')], max_length=32),
+            model_name="archiveresult",
+            name="extractor",
+            field=models.CharField(
+                choices=[
+                    ("favicon", "favicon"),
+                    ("headers", "headers"),
+                    ("singlefile", "singlefile"),
+                    ("pdf", "pdf"),
+                    ("screenshot", "screenshot"),
+                    ("dom", "dom"),
+                    ("wget", "wget"),
+                    ("title", "title"),
+                    ("readability", "readability"),
+                    ("mercury", "mercury"),
+                    ("htmltotext", "htmltotext"),
+                    ("git", "git"),
+                    ("media", "media"),
+                    ("archivedotorg", "archivedotorg"),
+                ],
+                max_length=32,
+            ),
        ),
    ]
--- a/archivebox/core/migrations/0023_upgrade_to_0_9_0.py
+++ b/archivebox/core/migrations/0023_upgrade_to_0_9_0.py
@@ -16,6 +16,7 @@ def get_table_columns(table_name):
 def upgrade_core_tables(apps, schema_editor):
    """Upgrade core tables from v0.7.2 or v0.8.6rc0 to v0.9.0."""
    from archivebox.uuid_compat import uuid7
+
    cursor = connection.cursor()

    # Check if core_archiveresult table exists
@@ -30,11 +31,11 @@ def upgrade_core_tables(apps, schema_editor):
    has_data = row_count > 0

    # Detect which version we're migrating from
-    archiveresult_cols = get_table_columns('core_archiveresult')
-    has_uuid = 'uuid' in archiveresult_cols
-    has_abid = 'abid' in archiveresult_cols
+    archiveresult_cols = get_table_columns("core_archiveresult")
+    has_uuid = "uuid" in archiveresult_cols
+    has_abid = "abid" in archiveresult_cols

-    print(f'DEBUG: ArchiveResult row_count={row_count}, has_data={has_data}, has_uuid={has_uuid}, has_abid={has_abid}')
+    print(f"DEBUG: ArchiveResult row_count={row_count}, has_data={has_data}, has_uuid={has_uuid}, has_abid={has_abid}")

    # ============================================================================
    # PART 1: Upgrade core_archiveresult table
@@ -62,7 +63,7 @@ def upgrade_core_tables(apps, schema_editor):
    if has_data:
        if has_uuid and not has_abid:
            # Migrating from v0.7.2+ (has uuid column)
-            print('Migrating ArchiveResult from v0.7.2+ schema (with uuid)...')
+            print("Migrating ArchiveResult from v0.7.2+ schema (with uuid)...")
            cursor.execute("""
                INSERT OR IGNORE INTO core_archiveresult_new (
                    id, uuid, snapshot_id, cmd, pwd, cmd_version,
@@ -75,7 +76,7 @@ def upgrade_core_tables(apps, schema_editor):
            """)
        elif has_abid and not has_uuid:
            # Migrating from v0.8.6rc0 (has abid instead of uuid)
-            print('Migrating ArchiveResult from v0.8.6rc0 schema...')
+            print("Migrating ArchiveResult from v0.8.6rc0 schema...")
            cursor.execute("""
                INSERT OR IGNORE INTO core_archiveresult_new (
                    id, uuid, snapshot_id, cmd, pwd, cmd_version,
@@ -88,17 +89,34 @@ def upgrade_core_tables(apps, schema_editor):
            """)
        else:
            # Migrating from v0.7.2 (no uuid or abid column - generate fresh UUIDs)
-            print('Migrating ArchiveResult from v0.7.2 schema (no uuid - generating UUIDs)...')
-            cursor.execute("SELECT id, snapshot_id, cmd, pwd, cmd_version, start_ts, end_ts, status, extractor, output FROM core_archiveresult")
+            print("Migrating ArchiveResult from v0.7.2 schema (no uuid - generating UUIDs)...")
+            cursor.execute(
+                "SELECT id, snapshot_id, cmd, pwd, cmd_version, start_ts, end_ts, status, extractor, output FROM core_archiveresult",
+            )
            old_records = cursor.fetchall()
            for record in old_records:
                new_uuid = uuid7().hex
-                cursor.execute("""
+                cursor.execute(
+                    """
                    INSERT OR IGNORE INTO core_archiveresult_new (
                        id, uuid, snapshot_id, cmd, pwd, cmd_version,
                        start_ts, end_ts, status, extractor, output
                    ) VALUES (?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?)
-                """, (record[0], new_uuid, record[1], record[2], record[3], record[4], record[5], record[6], record[7], record[8], record[9]))
+                """,
+                    (
+                        record[0],
+                        new_uuid,
+                        record[1],
+                        record[2],
+                        record[3],
+                        record[4],
+                        record[5],
+                        record[6],
+                        record[7],
+                        record[8],
+                        record[9],
+                    ),
+                )

    cursor.execute("DROP TABLE IF EXISTS core_archiveresult;")
    cursor.execute("ALTER TABLE core_archiveresult_new RENAME TO core_archiveresult;")
@@ -149,13 +167,13 @@ def upgrade_core_tables(apps, schema_editor):

        if snapshot_has_data:
            # Detect which version we're migrating from
-            snapshot_cols = get_table_columns('core_snapshot')
-            has_added = 'added' in snapshot_cols
-            has_bookmarked_at = 'bookmarked_at' in snapshot_cols
+            snapshot_cols = get_table_columns("core_snapshot")
+            has_added = "added" in snapshot_cols
+            has_bookmarked_at = "bookmarked_at" in snapshot_cols

            if has_added and not has_bookmarked_at:
                # Migrating from v0.7.2 (has added/updated fields)
-                print('Migrating Snapshot from v0.7.2 schema...')
+                print("Migrating Snapshot from v0.7.2 schema...")
                # Transform added→bookmarked_at/created_at and updated→modified_at
                cursor.execute("""
                    INSERT OR IGNORE INTO core_snapshot_new (
@@ -173,28 +191,28 @@ def upgrade_core_tables(apps, schema_editor):
                """)
            elif has_bookmarked_at and not has_added:
                # Migrating from v0.8.6rc0 (already has bookmarked_at/created_at/modified_at)
-                print('Migrating Snapshot from v0.8.6rc0 schema...')
+                print("Migrating Snapshot from v0.8.6rc0 schema...")
                # Check what fields exist
-                has_status = 'status' in snapshot_cols
-                has_retry_at = 'retry_at' in snapshot_cols
-                has_crawl_id = 'crawl_id' in snapshot_cols
+                has_status = "status" in snapshot_cols
+                has_retry_at = "retry_at" in snapshot_cols
+                has_crawl_id = "crawl_id" in snapshot_cols

                # Build column list based on what exists
-                cols = ['id', 'url', 'timestamp', 'title', 'bookmarked_at', 'created_at', 'modified_at', 'downloaded_at']
+                cols = ["id", "url", "timestamp", "title", "bookmarked_at", "created_at", "modified_at", "downloaded_at"]
                if has_crawl_id:
-                    cols.append('crawl_id')
+                    cols.append("crawl_id")
                if has_status:
-                    cols.append('status')
+                    cols.append("status")
                if has_retry_at:
-                    cols.append('retry_at')
+                    cols.append("retry_at")

                cursor.execute(f"""
-                    INSERT OR IGNORE INTO core_snapshot_new ({', '.join(cols)})
-                    SELECT {', '.join(cols)}
+                    INSERT OR IGNORE INTO core_snapshot_new ({", ".join(cols)})
+                    SELECT {", ".join(cols)}
                    FROM core_snapshot;
                """)
            else:
-                print(f'Warning: Unexpected Snapshot schema - has_added={has_added}, has_bookmarked_at={has_bookmarked_at}')
+                print(f"Warning: Unexpected Snapshot schema - has_added={has_added}, has_bookmarked_at={has_bookmarked_at}")

    cursor.execute("DROP TABLE IF EXISTS core_snapshot;")
    cursor.execute("ALTER TABLE core_snapshot_new RENAME TO core_snapshot;")
@@ -237,13 +255,13 @@ def upgrade_core_tables(apps, schema_editor):
            cursor.execute("PRAGMA table_info(core_tag)")
            tag_id_type = None
            for row in cursor.fetchall():
-                if row[1] == 'id':  # row[1] is column name
+                if row[1] == "id":  # row[1] is column name
                    tag_id_type = row[2]  # row[2] is type
                    break

-            if tag_id_type and 'char' in tag_id_type.lower():
+            if tag_id_type and "char" in tag_id_type.lower():
                # v0.8.6rc0: Tag IDs are UUIDs, need to convert to INTEGER
-                print('Converting Tag IDs from UUID to INTEGER...')
+                print("Converting Tag IDs from UUID to INTEGER...")

                # Get all tags with their UUIDs
                cursor.execute("SELECT id, name, slug, created_at, modified_at, created_by_id FROM core_tag ORDER BY name")
@@ -255,10 +273,13 @@ def upgrade_core_tables(apps, schema_editor):
                    old_id, name, slug, created_at, modified_at, created_by_id = tag
                    uuid_to_int_map[old_id] = i
                    # Insert with new INTEGER ID
-                    cursor.execute("""
+                    cursor.execute(
+                        """
                        INSERT OR IGNORE INTO core_tag_new (id, name, slug, created_at, modified_at, created_by_id)
                        VALUES (?, ?, ?, ?, ?, ?)
-                    """, (i, name, slug, created_at, modified_at, created_by_id))
+                    """,
+                        (i, name, slug, created_at, modified_at, created_by_id),
+                    )

                # Update snapshot_tags to use new INTEGER IDs
                cursor.execute("SELECT name FROM sqlite_master WHERE type='table' AND name='core_snapshot_tags'")
@@ -273,13 +294,16 @@ def upgrade_core_tables(apps, schema_editor):
                    for st_id, snapshot_id, old_tag_id in snapshot_tags:
                        new_tag_id = uuid_to_int_map.get(old_tag_id)
                        if new_tag_id:
-                            cursor.execute("""
+                            cursor.execute(
+                                """
                                INSERT OR IGNORE INTO core_snapshot_tags (id, snapshot_id, tag_id)
                                VALUES (?, ?, ?)
-                            """, (st_id, snapshot_id, new_tag_id))
+                            """,
+                                (st_id, snapshot_id, new_tag_id),
+                            )
            else:
                # v0.7.2: Tag IDs are already INTEGER
-                print('Migrating Tag from v0.7.2 schema...')
+                print("Migrating Tag from v0.7.2 schema...")
                cursor.execute("""
                    INSERT OR IGNORE INTO core_tag_new (id, name, slug)
                    SELECT id, name, slug
@@ -294,15 +318,14 @@ def upgrade_core_tables(apps, schema_editor):
    cursor.execute("CREATE INDEX IF NOT EXISTS core_tag_created_by_id_idx ON core_tag(created_by_id);")

    if has_data:
-        print('✓ Core tables upgraded to v0.9.0')
+        print("✓ Core tables upgraded to v0.9.0")


 class Migration(migrations.Migration):
-
    dependencies = [
-        ('core', '0022_auto_20231023_2008'),
-        ('crawls', '0001_initial'),
-        ('auth', '0012_alter_user_first_name_max_length'),
+        ("core", "0022_auto_20231023_2008"),
+        ("crawls", "0001_initial"),
+        ("auth", "0012_alter_user_first_name_max_length"),
    ]

    operations = [
@@ -317,60 +340,58 @@ class Migration(migrations.Migration):
                # NOTE: We do NOT remove extractor/output for ArchiveResult!
                # They are still in the database and will be removed by migration 0025
                # after copying their data to plugin/output_str.
-
                # However, for Snapshot, we DO remove added/updated and ADD the new timestamp fields
                # because the SQL above already transformed them.
-                migrations.RemoveField(model_name='snapshot', name='added'),
-                migrations.RemoveField(model_name='snapshot', name='updated'),
+                migrations.RemoveField(model_name="snapshot", name="added"),
+                migrations.RemoveField(model_name="snapshot", name="updated"),
                migrations.AddField(
-                    model_name='snapshot',
-                    name='bookmarked_at',
+                    model_name="snapshot",
+                    name="bookmarked_at",
                    field=models.DateTimeField(db_index=True, default=django.utils.timezone.now),
                ),
                migrations.AddField(
-                    model_name='snapshot',
-                    name='created_at',
+                    model_name="snapshot",
+                    name="created_at",
                    field=models.DateTimeField(db_index=True, default=django.utils.timezone.now),
                ),
                migrations.AddField(
-                    model_name='snapshot',
-                    name='modified_at',
+                    model_name="snapshot",
+                    name="modified_at",
                    field=models.DateTimeField(auto_now=True),
                ),
                # Declare fs_version (already created in database with DEFAULT '0.8.0')
                migrations.AddField(
-                    model_name='snapshot',
-                    name='fs_version',
+                    model_name="snapshot",
+                    name="fs_version",
                    field=models.CharField(
                        max_length=10,
-                        default='0.8.0',
-                        help_text='Filesystem version of this snapshot (e.g., "0.7.0", "0.8.0", "0.9.0"). Used to trigger lazy migration on save().'
+                        default="0.8.0",
+                        help_text='Filesystem version of this snapshot (e.g., "0.7.0", "0.8.0", "0.9.0"). Used to trigger lazy migration on save().',
                    ),
                ),
-
                # SnapshotTag table already exists from v0.7.2, just declare it in state
                migrations.CreateModel(
-                    name='SnapshotTag',
+                    name="SnapshotTag",
                    fields=[
-                        ('id', models.AutoField(primary_key=True, serialize=False)),
-                        ('snapshot', models.ForeignKey(to='core.Snapshot', db_column='snapshot_id', on_delete=models.CASCADE)),
-                        ('tag', models.ForeignKey(to='core.Tag', db_column='tag_id', on_delete=models.CASCADE)),
+                        ("id", models.AutoField(primary_key=True, serialize=False)),
+                        ("snapshot", models.ForeignKey(to="core.Snapshot", db_column="snapshot_id", on_delete=models.CASCADE)),
+                        ("tag", models.ForeignKey(to="core.Tag", db_column="tag_id", on_delete=models.CASCADE)),
                    ],
                    options={
-                        'db_table': 'core_snapshot_tags',
-                        'unique_together': {('snapshot', 'tag')},
+                        "db_table": "core_snapshot_tags",
+                        "unique_together": {("snapshot", "tag")},
                    },
                ),
                # Declare that Snapshot.tags M2M already uses through=SnapshotTag (from v0.7.2)
                migrations.AlterField(
-                    model_name='snapshot',
-                    name='tags',
+                    model_name="snapshot",
+                    name="tags",
                    field=models.ManyToManyField(
-                        'Tag',
+                        "Tag",
                        blank=True,
-                        related_name='snapshot_set',
-                        through='SnapshotTag',
-                        through_fields=('snapshot', 'tag'),
+                        related_name="snapshot_set",
+                        through="SnapshotTag",
+                        through_fields=("snapshot", "tag"),
                    ),
                ),
            ],
--- a/archivebox/core/migrations/0024_assign_default_crawl.py
+++ b/archivebox/core/migrations/0024_assign_default_crawl.py
@@ -20,23 +20,27 @@ def create_default_crawl_and_assign_snapshots(apps, schema_editor):
    snapshots_without_crawl = cursor.fetchone()[0]

    if snapshots_without_crawl == 0:
-        print('✓ Fresh install or all snapshots already have crawls')
+        print("✓ Fresh install or all snapshots already have crawls")
        return

    # Get or create system user (pk=1)
    cursor.execute("SELECT id FROM auth_user WHERE id = 1")
    if not cursor.fetchone():
-        cursor.execute("""
+        cursor.execute(
+            """
            INSERT INTO auth_user (id, password, is_superuser, username, first_name, last_name, email, is_staff, is_active, date_joined)
            VALUES (1, '!', 1, 'system', '', '', '', 1, 1, ?)
-        """, [datetime.now().isoformat()])
+        """,
+            [datetime.now().isoformat()],
+        )

    # Create a default crawl for migrated snapshots
    # At this point crawls_crawl is guaranteed to have v0.9.0 schema (crawls/0002 ran first)
    crawl_id = str(uuid_lib.uuid4())
    now = datetime.now().isoformat()

-    cursor.execute("""
+    cursor.execute(
+        """
        INSERT INTO crawls_crawl (
            id, created_at, modified_at, num_uses_succeeded, num_uses_failed,
            urls, max_depth, tags_str, label, notes, output_dir,
@@ -44,20 +48,21 @@ def create_default_crawl_and_assign_snapshots(apps, schema_editor):
        ) VALUES (?, ?, ?, 0, 0, '', 0, '', 'Migrated from v0.7.2/v0.8.6',
                  'Auto-created crawl for migrated snapshots', '',
                  'sealed', ?, 1, NULL, '{}', NULL)
-    """, [crawl_id, now, now, now])
+    """,
+        [crawl_id, now, now, now],
+    )

    # Assign all snapshots without a crawl to the default crawl
    cursor.execute("UPDATE core_snapshot SET crawl_id = ? WHERE crawl_id IS NULL", [crawl_id])

-    print(f'✓ Assigned {snapshots_without_crawl} snapshots to default crawl {crawl_id}')
+    print(f"✓ Assigned {snapshots_without_crawl} snapshots to default crawl {crawl_id}")


 class Migration(migrations.Migration):
-
    dependencies = [
-        ('core', '0023_upgrade_to_0_9_0'),
-        ('crawls', '0002_upgrade_from_0_8_6'),
-        ('auth', '0012_alter_user_first_name_max_length'),
+        ("core", "0023_upgrade_to_0_9_0"),
+        ("crawls", "0002_upgrade_from_0_8_6"),
+        ("auth", "0012_alter_user_first_name_max_length"),
    ]

    operations = [
@@ -137,12 +142,12 @@ class Migration(migrations.Migration):
            ],
            state_operations=[
                migrations.AddField(
-                    model_name='snapshot',
-                    name='crawl',
+                    model_name="snapshot",
+                    name="crawl",
                    field=models.ForeignKey(
                        on_delete=models.deletion.CASCADE,
-                        to='crawls.crawl',
-                        help_text='Crawl that created this snapshot'
+                        to="crawls.crawl",
+                        help_text="Crawl that created this snapshot",
                    ),
                ),
            ],
--- a/archivebox/core/migrations/0025_alter_archiveresult_options_alter_snapshot_options_and_more.py
+++ b/archivebox/core/migrations/0025_alter_archiveresult_options_alter_snapshot_options_and_more.py
@@ -17,20 +17,24 @@ def copy_old_fields_to_new(apps, schema_editor):
    cursor.execute("PRAGMA table_info(core_archiveresult)")
    cols = {row[1] for row in cursor.fetchall()}

-    if 'extractor' in cols and 'plugin' in cols:
+    if "extractor" in cols and "plugin" in cols:
        # Copy extractor -> plugin
        cursor.execute("UPDATE core_archiveresult SET plugin = COALESCE(extractor, '') WHERE plugin = '' OR plugin IS NULL")

-    if 'output' in cols and 'output_str' in cols:
+    if "output" in cols and "output_str" in cols:
        # Copy output -> output_str
        cursor.execute("UPDATE core_archiveresult SET output_str = COALESCE(output, '') WHERE output_str = '' OR output_str IS NULL")

    # Copy timestamps to new timestamp fields if they don't have values yet
-    if 'start_ts' in cols and 'created_at' in cols:
-        cursor.execute("UPDATE core_archiveresult SET created_at = COALESCE(start_ts, CURRENT_TIMESTAMP) WHERE created_at IS NULL OR created_at = ''")
+    if "start_ts" in cols and "created_at" in cols:
+        cursor.execute(
+            "UPDATE core_archiveresult SET created_at = COALESCE(start_ts, CURRENT_TIMESTAMP) WHERE created_at IS NULL OR created_at = ''",
+        )

-    if 'end_ts' in cols and 'modified_at' in cols:
-        cursor.execute("UPDATE core_archiveresult SET modified_at = COALESCE(end_ts, start_ts, CURRENT_TIMESTAMP) WHERE modified_at IS NULL OR modified_at = ''")
+    if "end_ts" in cols and "modified_at" in cols:
+        cursor.execute(
+            "UPDATE core_archiveresult SET modified_at = COALESCE(end_ts, start_ts, CURRENT_TIMESTAMP) WHERE modified_at IS NULL OR modified_at = ''",
+        )

    # NOTE: Snapshot timestamps (added→bookmarked_at, updated→modified_at) were already
    # transformed by migration 0023, so we don't need to copy them here.
@@ -39,164 +43,191 @@ def copy_old_fields_to_new(apps, schema_editor):
    # Debug: Check Snapshot timestamps at end of RunPython
    cursor.execute("SELECT id, bookmarked_at, modified_at FROM core_snapshot LIMIT 2")
    snap_after = cursor.fetchall()
-    print(f'DEBUG 0025: Snapshot timestamps at END of RunPython: {snap_after}')
+    print(f"DEBUG 0025: Snapshot timestamps at END of RunPython: {snap_after}")


 class Migration(migrations.Migration):
-
    dependencies = [
-        ('core', '0024_assign_default_crawl'),
-        ('crawls', '0001_initial'),
+        ("core", "0024_assign_default_crawl"),
+        ("crawls", "0001_initial"),
        migrations.swappable_dependency(settings.AUTH_USER_MODEL),
    ]

    operations = [
        migrations.AlterModelOptions(
-            name='archiveresult',
-            options={'verbose_name': 'Archive Result', 'verbose_name_plural': 'Archive Results Log'},
+            name="archiveresult",
+            options={"verbose_name": "Archive Result", "verbose_name_plural": "Archive Results Log"},
        ),
        migrations.AlterModelOptions(
-            name='snapshot',
-            options={'verbose_name': 'Snapshot', 'verbose_name_plural': 'Snapshots'},
+            name="snapshot",
+            options={"verbose_name": "Snapshot", "verbose_name_plural": "Snapshots"},
        ),
        # NOTE: RemoveField for cmd, cmd_version, pwd moved to migration 0027
        # to allow data migration to Process records first
        migrations.AddField(
-            model_name='archiveresult',
-            name='config',
+            model_name="archiveresult",
+            name="config",
            field=models.JSONField(blank=True, default=dict, null=True),
        ),
        migrations.AddField(
-            model_name='archiveresult',
-            name='created_at',
+            model_name="archiveresult",
+            name="created_at",
            field=models.DateTimeField(db_index=True, default=django.utils.timezone.now),
        ),
        migrations.AddField(
-            model_name='archiveresult',
-            name='hook_name',
-            field=models.CharField(blank=True, db_index=True, default='', help_text='Full filename of the hook that executed (e.g., on_Snapshot__50_wget.py)', max_length=255),
+            model_name="archiveresult",
+            name="hook_name",
+            field=models.CharField(
+                blank=True,
+                db_index=True,
+                default="",
+                help_text="Full filename of the hook that executed (e.g., on_Snapshot__50_wget.py)",
+                max_length=255,
+            ),
        ),
        migrations.AddField(
-            model_name='archiveresult',
-            name='modified_at',
+            model_name="archiveresult",
+            name="modified_at",
            field=models.DateTimeField(auto_now=True),
        ),
        migrations.AddField(
-            model_name='archiveresult',
-            name='notes',
-            field=models.TextField(blank=True, default=''),
+            model_name="archiveresult",
+            name="notes",
+            field=models.TextField(blank=True, default=""),
        ),
        migrations.AddField(
-            model_name='archiveresult',
-            name='num_uses_failed',
+            model_name="archiveresult",
+            name="num_uses_failed",
            field=models.PositiveIntegerField(default=0),
        ),
        migrations.AddField(
-            model_name='archiveresult',
-            name='num_uses_succeeded',
+            model_name="archiveresult",
+            name="num_uses_succeeded",
            field=models.PositiveIntegerField(default=0),
        ),
        migrations.AddField(
-            model_name='archiveresult',
-            name='output_files',
-            field=models.JSONField(default=dict, help_text='Dict of {relative_path: {metadata}}'),
+            model_name="archiveresult",
+            name="output_files",
+            field=models.JSONField(default=dict, help_text="Dict of {relative_path: {metadata}}"),
        ),
        migrations.AddField(
-            model_name='archiveresult',
-            name='output_json',
-            field=models.JSONField(blank=True, default=None, help_text='Structured metadata (headers, redirects, etc.)', null=True),
+            model_name="archiveresult",
+            name="output_json",
+            field=models.JSONField(blank=True, default=None, help_text="Structured metadata (headers, redirects, etc.)", null=True),
        ),
        migrations.AddField(
-            model_name='archiveresult',
-            name='output_mimetypes',
-            field=models.CharField(blank=True, default='', help_text='CSV of mimetypes sorted by size', max_length=512),
+            model_name="archiveresult",
+            name="output_mimetypes",
+            field=models.CharField(blank=True, default="", help_text="CSV of mimetypes sorted by size", max_length=512),
        ),
        migrations.AddField(
-            model_name='archiveresult',
-            name='output_size',
-            field=models.BigIntegerField(default=0, help_text='Total bytes of all output files'),
+            model_name="archiveresult",
+            name="output_size",
+            field=models.BigIntegerField(default=0, help_text="Total bytes of all output files"),
        ),
        migrations.AddField(
-            model_name='archiveresult',
-            name='output_str',
-            field=models.TextField(blank=True, default='', help_text='Human-readable output summary'),
+            model_name="archiveresult",
+            name="output_str",
+            field=models.TextField(blank=True, default="", help_text="Human-readable output summary"),
        ),
        migrations.AddField(
-            model_name='archiveresult',
-            name='plugin',
-            field=models.CharField(db_index=True, default='', max_length=32),
+            model_name="archiveresult",
+            name="plugin",
+            field=models.CharField(db_index=True, default="", max_length=32),
        ),
        migrations.AddField(
-            model_name='archiveresult',
-            name='retry_at',
+            model_name="archiveresult",
+            name="retry_at",
            field=models.DateTimeField(blank=True, db_index=True, default=django.utils.timezone.now, null=True),
        ),
        # NOTE: bookmarked_at and created_at already added by migration 0023
        migrations.AddField(
-            model_name='snapshot',
-            name='config',
+            model_name="snapshot",
+            name="config",
            field=models.JSONField(default=dict),
        ),
        migrations.AddField(
-            model_name='snapshot',
-            name='current_step',
-            field=models.PositiveSmallIntegerField(db_index=True, default=0, help_text='Current hook step being executed (0-9). Used for sequential hook execution.'),
+            model_name="snapshot",
+            name="current_step",
+            field=models.PositiveSmallIntegerField(
+                db_index=True,
+                default=0,
+                help_text="Current hook step being executed (0-9). Used for sequential hook execution.",
+            ),
        ),
        migrations.AddField(
-            model_name='snapshot',
-            name='depth',
+            model_name="snapshot",
+            name="depth",
            field=models.PositiveSmallIntegerField(db_index=True, default=0),
        ),
        migrations.AddField(
-            model_name='snapshot',
-            name='downloaded_at',
+            model_name="snapshot",
+            name="downloaded_at",
            field=models.DateTimeField(blank=True, db_index=True, default=None, editable=False, null=True),
        ),
        # NOTE: fs_version already added by migration 0023 with default='0.8.0'
        # NOTE: modified_at already added by migration 0023
        migrations.AddField(
-            model_name='snapshot',
-            name='notes',
-            field=models.TextField(blank=True, default=''),
+            model_name="snapshot",
+            name="notes",
+            field=models.TextField(blank=True, default=""),
        ),
        migrations.AddField(
-            model_name='snapshot',
-            name='num_uses_failed',
+            model_name="snapshot",
+            name="num_uses_failed",
            field=models.PositiveIntegerField(default=0),
        ),
        migrations.AddField(
-            model_name='snapshot',
-            name='num_uses_succeeded',
+            model_name="snapshot",
+            name="num_uses_succeeded",
            field=models.PositiveIntegerField(default=0),
        ),
        migrations.AddField(
-            model_name='snapshot',
-            name='parent_snapshot',
-            field=models.ForeignKey(blank=True, help_text='Parent snapshot that discovered this URL (for recursive crawling)', null=True, on_delete=django.db.models.deletion.SET_NULL, related_name='child_snapshots', to='core.snapshot'),
+            model_name="snapshot",
+            name="parent_snapshot",
+            field=models.ForeignKey(
+                blank=True,
+                help_text="Parent snapshot that discovered this URL (for recursive crawling)",
+                null=True,
+                on_delete=django.db.models.deletion.SET_NULL,
+                related_name="child_snapshots",
+                to="core.snapshot",
+            ),
        ),
        migrations.AddField(
-            model_name='snapshot',
-            name='retry_at',
+            model_name="snapshot",
+            name="retry_at",
            field=models.DateTimeField(blank=True, db_index=True, default=django.utils.timezone.now, null=True),
        ),
        migrations.AddField(
-            model_name='snapshot',
-            name='status',
-            field=models.CharField(choices=[('queued', 'Queued'), ('started', 'Started'), ('sealed', 'Sealed')], db_index=True, default='queued', max_length=15),
+            model_name="snapshot",
+            name="status",
+            field=models.CharField(
+                choices=[("queued", "Queued"), ("started", "Started"), ("sealed", "Sealed")],
+                db_index=True,
+                default="queued",
+                max_length=15,
+            ),
        ),
        migrations.AddField(
-            model_name='tag',
-            name='created_at',
+            model_name="tag",
+            name="created_at",
            field=models.DateTimeField(db_index=True, default=django.utils.timezone.now, null=True),
        ),
        migrations.AddField(
-            model_name='tag',
-            name='created_by',
-            field=models.ForeignKey(default=archivebox.base_models.models.get_or_create_system_user_pk, null=True, on_delete=django.db.models.deletion.CASCADE, related_name='tag_set', to=settings.AUTH_USER_MODEL),
+            model_name="tag",
+            name="created_by",
+            field=models.ForeignKey(
+                default=archivebox.base_models.models.get_or_create_system_user_pk,
+                null=True,
+                on_delete=django.db.models.deletion.CASCADE,
+                related_name="tag_set",
+                to=settings.AUTH_USER_MODEL,
+            ),
        ),
        migrations.AddField(
-            model_name='tag',
-            name='modified_at',
+            model_name="tag",
+            name="modified_at",
            field=models.DateTimeField(auto_now=True),
        ),
        # Copy data from old field names to new field names after AddField operations
@@ -206,75 +237,93 @@ class Migration(migrations.Migration):
        ),
        # Now remove the old ArchiveResult fields after data has been copied
        migrations.RemoveField(
-            model_name='archiveresult',
-            name='extractor',
+            model_name="archiveresult",
+            name="extractor",
        ),
        migrations.RemoveField(
-            model_name='archiveresult',
-            name='output',
+            model_name="archiveresult",
+            name="output",
        ),
        # NOTE: Snapshot's added/updated were already removed by migration 0023
        migrations.AlterField(
-            model_name='archiveresult',
-            name='end_ts',
+            model_name="archiveresult",
+            name="end_ts",
            field=models.DateTimeField(blank=True, default=None, null=True),
        ),
        migrations.AlterField(
-            model_name='archiveresult',
-            name='id',
+            model_name="archiveresult",
+            name="id",
            field=models.AutoField(editable=False, primary_key=True, serialize=False),
        ),
        migrations.AlterField(
-            model_name='archiveresult',
-            name='start_ts',
+            model_name="archiveresult",
+            name="start_ts",
            field=models.DateTimeField(blank=True, default=None, null=True),
        ),
        migrations.AlterField(
-            model_name='archiveresult',
-            name='status',
-            field=models.CharField(choices=[('queued', 'Queued'), ('started', 'Started'), ('backoff', 'Waiting to retry'), ('succeeded', 'Succeeded'), ('failed', 'Failed'), ('skipped', 'Skipped')], db_index=True, default='queued', max_length=15),
+            model_name="archiveresult",
+            name="status",
+            field=models.CharField(
+                choices=[
+                    ("queued", "Queued"),
+                    ("started", "Started"),
+                    ("backoff", "Waiting to retry"),
+                    ("succeeded", "Succeeded"),
+                    ("failed", "Failed"),
+                    ("skipped", "Skipped"),
+                ],
+                db_index=True,
+                default="queued",
+                max_length=15,
+            ),
        ),
        migrations.AlterField(
-            model_name='archiveresult',
-            name='uuid',
+            model_name="archiveresult",
+            name="uuid",
            field=models.UUIDField(blank=True, db_index=True, default=uuid7, null=True),
        ),
        migrations.AlterField(
-            model_name='snapshot',
-            name='crawl',
-            field=models.ForeignKey(on_delete=django.db.models.deletion.CASCADE, related_name='snapshot_set', to='crawls.crawl'),
+            model_name="snapshot",
+            name="crawl",
+            field=models.ForeignKey(on_delete=django.db.models.deletion.CASCADE, related_name="snapshot_set", to="crawls.crawl"),
        ),
        migrations.AlterField(
-            model_name='snapshot',
-            name='id',
+            model_name="snapshot",
+            name="id",
            field=models.UUIDField(default=uuid7, editable=False, primary_key=True, serialize=False, unique=True),
        ),
        migrations.AlterField(
-            model_name='snapshot',
-            name='tags',
-            field=models.ManyToManyField(blank=True, related_name='snapshot_set', through='core.SnapshotTag', through_fields=('snapshot', 'tag'), to='core.tag'),
+            model_name="snapshot",
+            name="tags",
+            field=models.ManyToManyField(
+                blank=True,
+                related_name="snapshot_set",
+                through="core.SnapshotTag",
+                through_fields=("snapshot", "tag"),
+                to="core.tag",
+            ),
        ),
        migrations.AlterField(
-            model_name='snapshot',
-            name='timestamp',
+            model_name="snapshot",
+            name="timestamp",
            field=models.CharField(db_index=True, editable=False, max_length=32, unique=True),
        ),
        migrations.AlterField(
-            model_name='snapshot',
-            name='url',
+            model_name="snapshot",
+            name="url",
            field=models.URLField(db_index=True),
        ),
        migrations.AlterField(
-            model_name='tag',
-            name='slug',
+            model_name="tag",
+            name="slug",
            field=models.SlugField(editable=False, max_length=100, unique=True),
        ),
        migrations.AddConstraint(
-            model_name='snapshot',
-            constraint=models.UniqueConstraint(fields=('url', 'crawl'), name='unique_url_per_crawl'),
+            model_name="snapshot",
+            constraint=models.UniqueConstraint(fields=("url", "crawl"), name="unique_url_per_crawl"),
        ),
        migrations.AddConstraint(
-            model_name='snapshot',
-            constraint=models.UniqueConstraint(fields=('timestamp',), name='unique_timestamp'),
+            model_name="snapshot",
+            constraint=models.UniqueConstraint(fields=("timestamp",), name="unique_timestamp"),
        ),
    ]
--- a/archivebox/core/migrations/0026_add_process_to_archiveresult.py
+++ b/archivebox/core/migrations/0026_add_process_to_archiveresult.py
@@ -5,24 +5,30 @@ from django.db import migrations, models


 class Migration(migrations.Migration):
-
    dependencies = [
-        ('core', '0025_alter_archiveresult_options_alter_snapshot_options_and_more'),
-        ('machine', '0007_add_process_type_and_parent'),
+        ("core", "0025_alter_archiveresult_options_alter_snapshot_options_and_more"),
+        ("machine", "0007_add_process_type_and_parent"),
    ]

    operations = [
        migrations.RemoveField(
-            model_name='archiveresult',
-            name='num_uses_failed',
+            model_name="archiveresult",
+            name="num_uses_failed",
        ),
        migrations.RemoveField(
-            model_name='archiveresult',
-            name='num_uses_succeeded',
+            model_name="archiveresult",
+            name="num_uses_succeeded",
        ),
        migrations.AddField(
-            model_name='archiveresult',
-            name='process',
-            field=models.OneToOneField(blank=True, help_text='Process execution details for this archive result', null=True, on_delete=django.db.models.deletion.PROTECT, related_name='archiveresult', to='machine.process'),
+            model_name="archiveresult",
+            name="process",
+            field=models.OneToOneField(
+                blank=True,
+                help_text="Process execution details for this archive result",
+                null=True,
+                on_delete=django.db.models.deletion.PROTECT,
+                related_name="archiveresult",
+                to="machine.process",
+            ),
        ),
    ]
--- a/archivebox/core/migrations/0027_copy_archiveresult_to_process.py
+++ b/archivebox/core/migrations/0027_copy_archiveresult_to_process.py
@@ -25,7 +25,7 @@ def parse_cmd_field(cmd_raw):
        return []

    # Try to parse as JSON first
-    if cmd_raw.startswith('['):
+    if cmd_raw.startswith("["):
        try:
            parsed = json.loads(cmd_raw)
            if isinstance(parsed, list):
@@ -45,7 +45,7 @@ def get_or_create_current_machine(cursor):

    # Simple machine detection - get hostname as guid
    hostname = socket.gethostname()
-    guid = f'host_{hostname}'  # Simple but stable identifier
+    guid = f"host_{hostname}"  # Simple but stable identifier

    # Check if machine exists
    cursor.execute("SELECT id FROM machine_machine WHERE guid = ?", [guid])
@@ -64,9 +64,10 @@ def get_or_create_current_machine(cursor):
    machine_cols = {row[1] for row in cursor.fetchall()}

    # Build INSERT statement based on available columns
-    if 'config' in machine_cols:
+    if "config" in machine_cols:
        # 0.9.x schema with config column
-        cursor.execute("""
+        cursor.execute(
+            """
            INSERT INTO machine_machine (
                id, created_at, modified_at, guid, hostname,
                hw_in_docker, hw_in_vm, hw_manufacturer, hw_product, hw_uuid,
@@ -74,10 +75,13 @@ def get_or_create_current_machine(cursor):
                stats, config, num_uses_failed, num_uses_succeeded
            ) VALUES (?, ?, ?, ?, ?, 0, 0, '', '', '',
                      '', '', '', '', '', '{}', '{}', 0, 0)
-        """, [machine_id, now, now, guid, hostname])
+        """,
+            [machine_id, now, now, guid, hostname],
+        )
    else:
        # 0.8.x schema without config column
-        cursor.execute("""
+        cursor.execute(
+            """
            INSERT INTO machine_machine (
                id, created_at, modified_at, guid, hostname,
                hw_in_docker, hw_in_vm, hw_manufacturer, hw_product, hw_uuid,
@@ -85,7 +89,9 @@ def get_or_create_current_machine(cursor):
                stats, num_uses_failed, num_uses_succeeded
            ) VALUES (?, ?, ?, ?, ?, 0, 0, '', '', '',
                      '', '', '', '', '', '{}', 0, 0)
-        """, [machine_id, now, now, guid, hostname])
+        """,
+            [machine_id, now, now, guid, hostname],
+        )

    return machine_id

@@ -108,15 +114,18 @@ def get_or_create_binary(cursor, machine_id, name, abspath, version):

    # If abspath is just a name without slashes, it's not a full path
    # Store it in both fields for simplicity
-    if '/' not in abspath:
+    if "/" not in abspath:
        # Not a full path - store as-is
        pass

    # Check if binary exists with same machine, name, abspath, version
-    cursor.execute("""
+    cursor.execute(
+        """
        SELECT id FROM machine_binary
        WHERE machine_id = ? AND name = ? AND abspath = ? AND version = ?
-    """, [machine_id, name, abspath, version])
+    """,
+        [machine_id, name, abspath, version],
+    )

    row = cursor.fetchone()
    if row:
@@ -134,9 +143,10 @@ def get_or_create_binary(cursor, machine_id, name, abspath, version):
    # Use only columns that exist in current schema
    # 0.8.x schema: id, created_at, modified_at, machine_id, name, binprovider, abspath, version, sha256, num_uses_failed, num_uses_succeeded
    # 0.9.x schema adds: binproviders, overrides, status, retry_at, output_dir
-    if 'binproviders' in binary_cols:
+    if "binproviders" in binary_cols:
        # 0.9.x schema
-        cursor.execute("""
+        cursor.execute(
+            """
            INSERT INTO machine_binary (
                id, created_at, modified_at, machine_id,
                name, binproviders, overrides, binprovider, abspath, version, sha256,
@@ -144,16 +154,21 @@ def get_or_create_binary(cursor, machine_id, name, abspath, version):
                num_uses_failed, num_uses_succeeded
            ) VALUES (?, ?, ?, ?, ?, 'env', '{}', 'env', ?, ?, '',
                      'succeeded', NULL, '', 0, 0)
-        """, [binary_id, now, now, machine_id, name, abspath, version])
+        """,
+            [binary_id, now, now, machine_id, name, abspath, version],
+        )
    else:
        # 0.8.x schema (simpler)
-        cursor.execute("""
+        cursor.execute(
+            """
            INSERT INTO machine_binary (
                id, created_at, modified_at, machine_id,
                name, binprovider, abspath, version, sha256,
                num_uses_failed, num_uses_succeeded
            ) VALUES (?, ?, ?, ?, ?, 'env', ?, ?, '', 0, 0)
-        """, [binary_id, now, now, machine_id, name, abspath, version])
+        """,
+            [binary_id, now, now, machine_id, name, abspath, version],
+        )

    return binary_id

@@ -169,15 +184,15 @@ def map_status(old_status):
        (process_status, exit_code) tuple
    """
    status_map = {
-        'queued': ('queued', None),
-        'started': ('running', None),
-        'backoff': ('queued', None),
-        'succeeded': ('exited', 0),
-        'failed': ('exited', 1),
-        'skipped': ('exited', None),  # Skipped = exited without error
+        "queued": ("queued", None),
+        "started": ("running", None),
+        "backoff": ("queued", None),
+        "succeeded": ("exited", 0),
+        "failed": ("exited", 1),
+        "skipped": ("exited", None),  # Skipped = exited without error
    }

-    return status_map.get(old_status, ('queued', None))
+    return status_map.get(old_status, ("queued", None))


 def create_process(cursor, machine_id, pwd, cmd, status, exit_code, started_at, ended_at, binary_id):
@@ -197,9 +212,10 @@ def create_process(cursor, machine_id, pwd, cmd, status, exit_code, started_at,
    cmd_json = json.dumps(cmd)

    # Set retry_at to now for queued processes, NULL otherwise
-    retry_at = now if status == 'queued' else None
+    retry_at = now if status == "queued" else None

-    cursor.execute("""
+    cursor.execute(
+        """
        INSERT INTO machine_process (
            id, created_at, modified_at, machine_id, parent_id, process_type,
            pwd, cmd, env, timeout,
@@ -213,14 +229,22 @@ def create_process(cursor, machine_id, pwd, cmd, status, exit_code, started_at,
                  ?, ?,
                  ?, NULL, NULL,
                  ?, ?)
-    """, [
-        process_id, now, now, machine_id,
-        pwd, cmd_json,
-        exit_code,
-        started_at, ended_at,
-        binary_id,
-        status, retry_at
-    ])
+    """,
+        [
+            process_id,
+            now,
+            now,
+            machine_id,
+            pwd,
+            cmd_json,
+            exit_code,
+            started_at,
+            ended_at,
+            binary_id,
+            status,
+            retry_at,
+        ],
+    )

    return process_id

@@ -250,16 +274,18 @@ def copy_archiveresult_data_to_process(apps, schema_editor):
    cursor.execute("PRAGMA table_info(core_archiveresult)")
    cols = {row[1] for row in cursor.fetchall()}

-    print(f'DEBUG 0027: Columns found: {sorted(cols)}')
-    print(f'DEBUG 0027: Has cmd={("cmd" in cols)}, pwd={("pwd" in cols)}, cmd_version={("cmd_version" in cols)}, process_id={("process_id" in cols)}')
+    print(f"DEBUG 0027: Columns found: {sorted(cols)}")
+    print(
+        f"DEBUG 0027: Has cmd={('cmd' in cols)}, pwd={('pwd' in cols)}, cmd_version={('cmd_version' in cols)}, process_id={('process_id' in cols)}",
+    )

-    if 'cmd' not in cols or 'pwd' not in cols or 'cmd_version' not in cols:
-        print('✓ Fresh install or fields already removed - skipping data copy')
+    if "cmd" not in cols or "pwd" not in cols or "cmd_version" not in cols:
+        print("✓ Fresh install or fields already removed - skipping data copy")
        return

    # Check if process_id field exists (should exist from 0026)
-    if 'process_id' not in cols:
-        print('✗ ERROR: process_id field not found. Migration 0026 must run first.')
+    if "process_id" not in cols:
+        print("✗ ERROR: process_id field not found. Migration 0026 must run first.")
        return

    # Get or create Machine.current()
@@ -278,10 +304,10 @@ def copy_archiveresult_data_to_process(apps, schema_editor):
    results = cursor.fetchall()

    if not results:
-        print('✓ No ArchiveResults need Process migration')
+        print("✓ No ArchiveResults need Process migration")
        return

-    print(f'Migrating {len(results)} ArchiveResults to Process records...')
+    print(f"Migrating {len(results)} ArchiveResults to Process records...")

    migrated_count = 0
    skipped_count = 0
@@ -291,42 +317,46 @@ def copy_archiveresult_data_to_process(apps, schema_editor):
        ar_id, snapshot_id, plugin, cmd_raw, pwd, cmd_version, status, start_ts, end_ts, created_at = row

        if i == 0:
-            print(f'DEBUG 0027: First row: ar_id={ar_id}, plugin={plugin}, cmd={cmd_raw[:50] if cmd_raw else None}, status={status}')
+            print(f"DEBUG 0027: First row: ar_id={ar_id}, plugin={plugin}, cmd={cmd_raw[:50] if cmd_raw else None}, status={status}")

        try:
            # Parse cmd field
            cmd_array = parse_cmd_field(cmd_raw)

            if i == 0:
-                print(f'DEBUG 0027: Parsed cmd: {cmd_array}')
+                print(f"DEBUG 0027: Parsed cmd: {cmd_array}")

            # Extract binary info from cmd[0] if available
            binary_id = None
            if cmd_array and cmd_array[0]:
                binary_name = Path(cmd_array[0]).name or plugin  # Fallback to plugin name
                binary_abspath = cmd_array[0]
-                binary_version = cmd_version or ''
+                binary_version = cmd_version or ""

                # Get or create Binary record
                binary_id = get_or_create_binary(
-                    cursor, machine_id, binary_name, binary_abspath, binary_version
+                    cursor,
+                    machine_id,
+                    binary_name,
+                    binary_abspath,
+                    binary_version,
                )

                if i == 0:
-                    print(f'DEBUG 0027: Created Binary: id={binary_id}, name={binary_name}')
+                    print(f"DEBUG 0027: Created Binary: id={binary_id}, name={binary_name}")

            # Map status
            process_status, exit_code = map_status(status)

            # Set timestamps
            started_at = start_ts or created_at
-            ended_at = end_ts if process_status == 'exited' else None
+            ended_at = end_ts if process_status == "exited" else None

            # Create Process record
            process_id = create_process(
                cursor=cursor,
                machine_id=machine_id,
-                pwd=pwd or '',
+                pwd=pwd or "",
                cmd=cmd_array,
                status=process_status,
                exit_code=exit_code,
@@ -336,34 +366,34 @@ def copy_archiveresult_data_to_process(apps, schema_editor):
            )

            if i == 0:
-                print(f'DEBUG 0027: Created Process: id={process_id}')
+                print(f"DEBUG 0027: Created Process: id={process_id}")

            # Link ArchiveResult to Process
            cursor.execute(
                "UPDATE core_archiveresult SET process_id = ? WHERE id = ?",
-                [process_id, ar_id]
+                [process_id, ar_id],
            )

            migrated_count += 1

            if i == 0:
-                print('DEBUG 0027: Linked ArchiveResult to Process')
+                print("DEBUG 0027: Linked ArchiveResult to Process")

        except Exception as e:
-            print(f'✗ Error migrating ArchiveResult {ar_id}: {e}')
+            print(f"✗ Error migrating ArchiveResult {ar_id}: {e}")
            import traceback
+
            traceback.print_exc()
            error_count += 1
            continue

-    print(f'✓ Migration complete: {migrated_count} migrated, {skipped_count} skipped, {error_count} errors')
+    print(f"✓ Migration complete: {migrated_count} migrated, {skipped_count} skipped, {error_count} errors")


 class Migration(migrations.Migration):
-
    dependencies = [
-        ('core', '0026_add_process_to_archiveresult'),
-        ('machine', '0007_add_process_type_and_parent'),
+        ("core", "0026_add_process_to_archiveresult"),
+        ("machine", "0007_add_process_type_and_parent"),
    ]

    operations = [
@@ -372,18 +402,17 @@ class Migration(migrations.Migration):
            copy_archiveresult_data_to_process,
            reverse_code=migrations.RunPython.noop,
        ),
-
        # Now safe to remove old fields (moved from 0025)
        migrations.RemoveField(
-            model_name='archiveresult',
-            name='cmd',
+            model_name="archiveresult",
+            name="cmd",
        ),
        migrations.RemoveField(
-            model_name='archiveresult',
-            name='cmd_version',
+            model_name="archiveresult",
+            name="cmd_version",
        ),
        migrations.RemoveField(
-            model_name='archiveresult',
-            name='pwd',
+            model_name="archiveresult",
+            name="pwd",
        ),
    ]
--- a/Show More
+++ b/Show More