From 07dc880d0b09ad2dd0aa28d85e94269621c972c7 Mon Sep 17 00:00:00 2001 From: Nick Sweeting Date: Thu, 26 Feb 2026 01:23:00 -0800 Subject: [PATCH] Harden AddView config overrides to admin-only --- archivebox/core/tests.py | 50 ++++++++++++++++++++++++++++++++++++++++ archivebox/core/views.py | 17 +++++++++++++- 2 files changed, 66 insertions(+), 1 deletion(-) diff --git a/archivebox/core/tests.py b/archivebox/core/tests.py index 11edb2ab..56060ae6 100644 --- a/archivebox/core/tests.py +++ b/archivebox/core/tests.py @@ -2,6 +2,7 @@ import os import django +from unittest.mock import patch # Set up Django before importing any Django-dependent modules os.environ.setdefault('DJANGO_SETTINGS_MODULE', 'archivebox.settings') @@ -13,6 +14,7 @@ from django.urls import reverse from archivebox.crawls.models import Crawl, CrawlSchedule from archivebox.core.models import Tag +from archivebox.config.common import SERVER_CONFIG class AddViewTests(TestCase): @@ -220,6 +222,54 @@ class AddViewTests(TestCase): # For now, we'll skip this test or mark it as TODO pass + def test_add_public_anonymous_custom_config_is_silently_stripped(self): + """Anonymous users cannot override crawl config, even with PUBLIC_ADD_VIEW enabled.""" + self.client.logout() + + with patch.object(SERVER_CONFIG, 'PUBLIC_ADD_VIEW', True): + response = self.client.post(self.add_url, { + 'url': 'https://example.com', + 'depth': '0', + 'config': '{"YTDLP_ARGS_EXTRA":["--exec","id > /tmp/pwned"]}', + }) + + self.assertEqual(response.status_code, 302) + crawl = Crawl.objects.order_by('-created_at').first() + self.assertNotIn('YTDLP_ARGS_EXTRA', crawl.config) + + def test_add_authenticated_non_admin_custom_config_is_silently_stripped(self): + """Authenticated non-admin users cannot override crawl config.""" + response = self.client.post(self.add_url, { + 'url': 'https://example.com', + 'depth': '0', + 'config': '{"YTDLP_ARGS_EXTRA":["--exec","id > /tmp/pwned"]}', + }) + + self.assertEqual(response.status_code, 302) + crawl = Crawl.objects.order_by('-created_at').first() + self.assertNotIn('YTDLP_ARGS_EXTRA', crawl.config) + + def test_add_staff_admin_custom_config_is_allowed(self): + """Admin users can override crawl config.""" + self.client.logout() + admin_user = User.objects.create_user( + username='adminuser', + password='adminpass123', + email='admin@example.com', + is_staff=True, + ) + self.client.login(username='adminuser', password='adminpass123') + + response = self.client.post(self.add_url, { + 'url': 'https://example.com', + 'depth': '0', + 'config': '{"YTDLP_ARGS_EXTRA":["--exec","echo hello"]}', + }) + + self.assertEqual(response.status_code, 302) + crawl = Crawl.objects.order_by('-created_at').first() + self.assertEqual(crawl.config.get('YTDLP_ARGS_EXTRA'), ['--exec', 'echo hello']) + def test_add_empty_urls_fails(self): """Test that submitting without URLs fails validation.""" response = self.client.post(self.add_url, { diff --git a/archivebox/core/views.py b/archivebox/core/views.py index 42ec421c..7225cd8e 100644 --- a/archivebox/core/views.py +++ b/archivebox/core/views.py @@ -856,6 +856,21 @@ class AddView(UserPassesTestMixin, FormView): def test_func(self): return SERVER_CONFIG.PUBLIC_ADD_VIEW or self.request.user.is_authenticated + def _can_override_crawl_config(self) -> bool: + user = self.request.user + return bool(user.is_authenticated and (user.is_superuser or user.is_staff)) + + def _get_custom_config_overrides(self, form: AddLinkForm) -> dict: + custom_config = form.cleaned_data.get("config") or {} + + if not isinstance(custom_config, dict): + return {} + + if not self._can_override_crawl_config(): + return {} + + return custom_config + def get_context_data(self, **kwargs): from archivebox.core.models import Tag @@ -884,7 +899,7 @@ class AddView(UserPassesTestMixin, FormView): update = form.cleaned_data.get("update", False) index_only = form.cleaned_data.get("index_only", False) notes = form.cleaned_data.get("notes", "") - custom_config = form.cleaned_data.get("config") or {} + custom_config = self._get_custom_config_overrides(form) from archivebox.config.permissions import HOSTNAME