mirror of
https://github.com/ArchiveBox/ArchiveBox.git
synced 2026-04-06 07:47:53 +10:00
Add configurable server security modes
This commit is contained in:
@@ -55,6 +55,8 @@ def _build_listen_host(subdomain: str | None) -> str:
|
||||
|
||||
|
||||
def get_admin_host() -> str:
|
||||
if not SERVER_CONFIG.USES_SUBDOMAIN_ROUTING:
|
||||
return get_listen_host().lower()
|
||||
override = _normalize_base_url(SERVER_CONFIG.ADMIN_BASE_URL)
|
||||
if override:
|
||||
return urlparse(override).netloc.lower()
|
||||
@@ -62,23 +64,33 @@ def get_admin_host() -> str:
|
||||
|
||||
|
||||
def get_web_host() -> str:
|
||||
if not SERVER_CONFIG.USES_SUBDOMAIN_ROUTING:
|
||||
return get_listen_host().lower()
|
||||
override = _normalize_base_url(SERVER_CONFIG.ARCHIVE_BASE_URL)
|
||||
if override:
|
||||
return urlparse(override).netloc.lower()
|
||||
return _build_listen_host("web")
|
||||
|
||||
def get_api_host() -> str:
|
||||
if not SERVER_CONFIG.USES_SUBDOMAIN_ROUTING:
|
||||
return get_listen_host().lower()
|
||||
return _build_listen_host("api")
|
||||
|
||||
def get_public_host() -> str:
|
||||
if not SERVER_CONFIG.USES_SUBDOMAIN_ROUTING:
|
||||
return get_listen_host().lower()
|
||||
return _build_listen_host("public")
|
||||
|
||||
|
||||
def get_snapshot_host(snapshot_id: str) -> str:
|
||||
if not SERVER_CONFIG.USES_SUBDOMAIN_ROUTING:
|
||||
return get_listen_host().lower()
|
||||
return _build_listen_host(snapshot_id)
|
||||
|
||||
|
||||
def get_original_host(domain: str) -> str:
|
||||
if not SERVER_CONFIG.USES_SUBDOMAIN_ROUTING:
|
||||
return get_listen_host().lower()
|
||||
return _build_listen_host(domain)
|
||||
|
||||
|
||||
@@ -87,6 +99,8 @@ def is_snapshot_subdomain(subdomain: str) -> bool:
|
||||
|
||||
|
||||
def get_listen_subdomain(request_host: str) -> str:
|
||||
if not SERVER_CONFIG.USES_SUBDOMAIN_ROUTING:
|
||||
return ""
|
||||
req_host, req_port = split_host_port(request_host)
|
||||
listen_host, listen_port = get_listen_parts()
|
||||
if not listen_host:
|
||||
@@ -127,6 +141,8 @@ def _build_base_url_for_host(host: str, request=None) -> str:
|
||||
|
||||
|
||||
def get_admin_base_url(request=None) -> str:
|
||||
if not SERVER_CONFIG.USES_SUBDOMAIN_ROUTING:
|
||||
return _build_base_url_for_host(get_listen_host(), request=request)
|
||||
override = _normalize_base_url(SERVER_CONFIG.ADMIN_BASE_URL)
|
||||
if override:
|
||||
return override
|
||||
@@ -134,12 +150,16 @@ def get_admin_base_url(request=None) -> str:
|
||||
|
||||
|
||||
def get_web_base_url(request=None) -> str:
|
||||
if not SERVER_CONFIG.USES_SUBDOMAIN_ROUTING:
|
||||
return _build_base_url_for_host(get_listen_host(), request=request)
|
||||
override = _normalize_base_url(SERVER_CONFIG.ARCHIVE_BASE_URL)
|
||||
if override:
|
||||
return override
|
||||
return _build_base_url_for_host(get_web_host(), request=request)
|
||||
|
||||
def get_api_base_url(request=None) -> str:
|
||||
if not SERVER_CONFIG.USES_SUBDOMAIN_ROUTING:
|
||||
return _build_base_url_for_host(get_listen_host(), request=request)
|
||||
return _build_base_url_for_host(get_api_host(), request=request)
|
||||
|
||||
|
||||
@@ -149,10 +169,14 @@ def get_archive_base_url(request=None) -> str:
|
||||
|
||||
|
||||
def get_snapshot_base_url(snapshot_id: str, request=None) -> str:
|
||||
if not SERVER_CONFIG.USES_SUBDOMAIN_ROUTING:
|
||||
return _build_url(get_web_base_url(request=request), f"/snapshot/{snapshot_id}")
|
||||
return _build_base_url_for_host(get_snapshot_host(snapshot_id), request=request)
|
||||
|
||||
|
||||
def get_original_base_url(domain: str, request=None) -> str:
|
||||
if not SERVER_CONFIG.USES_SUBDOMAIN_ROUTING:
|
||||
return _build_url(get_web_base_url(request=request), f"/original/{domain}")
|
||||
return _build_base_url_for_host(get_original_host(domain), request=request)
|
||||
|
||||
|
||||
|
||||
@@ -10,7 +10,7 @@ from django.core.exceptions import ImproperlyConfigured
|
||||
from django.shortcuts import redirect
|
||||
from django.contrib.staticfiles import finders
|
||||
from django.utils.http import http_date
|
||||
from django.http import HttpResponseNotModified
|
||||
from django.http import HttpResponseForbidden, HttpResponseNotModified
|
||||
|
||||
from archivebox.config.common import SERVER_CONFIG
|
||||
from archivebox.config import VERSION
|
||||
@@ -26,6 +26,7 @@ from archivebox.core.host_utils import (
|
||||
get_web_host,
|
||||
host_matches,
|
||||
is_snapshot_subdomain,
|
||||
split_host_port,
|
||||
)
|
||||
from archivebox.core.views import SnapshotHostView, OriginalDomainHostView
|
||||
|
||||
@@ -90,6 +91,29 @@ def CacheControlMiddleware(get_response):
|
||||
return middleware
|
||||
|
||||
|
||||
def ServerSecurityModeMiddleware(get_response):
|
||||
blocked_prefixes = ("/admin", "/accounts", "/api", "/add", "/web")
|
||||
allowed_methods = {"GET", "HEAD", "OPTIONS"}
|
||||
|
||||
def middleware(request):
|
||||
if SERVER_CONFIG.CONTROL_PLANE_ENABLED:
|
||||
return get_response(request)
|
||||
|
||||
request.user = AnonymousUser()
|
||||
request._cached_user = request.user
|
||||
|
||||
if request.method.upper() not in allowed_methods:
|
||||
return HttpResponseForbidden("ArchiveBox is running with the control plane disabled in this security mode.")
|
||||
|
||||
for prefix in blocked_prefixes:
|
||||
if request.path == prefix or request.path.startswith(f"{prefix}/"):
|
||||
return HttpResponseForbidden("ArchiveBox is running with the control plane disabled in this security mode.")
|
||||
|
||||
return get_response(request)
|
||||
|
||||
return middleware
|
||||
|
||||
|
||||
def HostRoutingMiddleware(get_response):
|
||||
def middleware(request):
|
||||
request_host = (request.get_host() or "").lower()
|
||||
@@ -100,6 +124,21 @@ def HostRoutingMiddleware(get_response):
|
||||
listen_host = get_listen_host()
|
||||
subdomain = get_listen_subdomain(request_host)
|
||||
|
||||
if not SERVER_CONFIG.USES_SUBDOMAIN_ROUTING:
|
||||
if host_matches(request_host, listen_host):
|
||||
return get_response(request)
|
||||
|
||||
req_host, req_port = split_host_port(request_host)
|
||||
listen_host_only, listen_port = split_host_port(listen_host)
|
||||
if req_host.endswith(f".{listen_host_only}"):
|
||||
if not listen_port or not req_port or listen_port == req_port:
|
||||
target = build_web_url(request.path, request=request)
|
||||
if request.META.get("QUERY_STRING"):
|
||||
target = f"{target}?{request.META['QUERY_STRING']}"
|
||||
return redirect(target)
|
||||
|
||||
return get_response(request)
|
||||
|
||||
if host_matches(request_host, admin_host):
|
||||
return get_response(request)
|
||||
|
||||
|
||||
@@ -86,6 +86,7 @@ MIDDLEWARE = [
|
||||
"django.middleware.csrf.CsrfViewMiddleware",
|
||||
"django.contrib.auth.middleware.AuthenticationMiddleware",
|
||||
"archivebox.core.middleware.ReverseProxyAuthMiddleware",
|
||||
"archivebox.core.middleware.ServerSecurityModeMiddleware",
|
||||
"archivebox.core.middleware.HostRoutingMiddleware",
|
||||
"django.contrib.messages.middleware.MessageMiddleware",
|
||||
"archivebox.core.middleware.CacheControlMiddleware",
|
||||
|
||||
@@ -9,7 +9,7 @@ from django.http import HttpRequest
|
||||
from archivebox.misc.serve_static import serve_static
|
||||
|
||||
from archivebox.core.admin_site import archivebox_admin
|
||||
from archivebox.core.views import HomepageView, SnapshotView, SnapshotPathView, PublicIndexView, AddView, WebAddView, HealthCheckView, live_progress_view
|
||||
from archivebox.core.views import HomepageView, SnapshotView, SnapshotPathView, SnapshotReplayView, OriginalDomainReplayView, PublicIndexView, AddView, WebAddView, HealthCheckView, live_progress_view
|
||||
|
||||
|
||||
# GLOBAL_CONTEXT doesn't work as-is, disabled for now: https://github.com/ArchiveBox/ArchiveBox/discussions/1306
|
||||
@@ -33,6 +33,8 @@ urlpatterns = [
|
||||
|
||||
path('archive/', RedirectView.as_view(url='/')),
|
||||
path('archive/<path:path>', SnapshotView.as_view(), name='Snapshot'),
|
||||
re_path(r'^snapshot\/(?P<snapshot_id>[0-9a-fA-F-]{8,36})(?:\/(?P<path>.*))?$', SnapshotReplayView.as_view(), name='snapshot-replay'),
|
||||
re_path(r'^original\/(?P<domain>[^/]+)(?:\/(?P<path>.*))?$', OriginalDomainReplayView.as_view(), name='original-replay'),
|
||||
re_path(r'^web/(?P<url>(?!\d{4}(?:\d{2})?(?:\d{2})?(?:/|$)).+)$', WebAddView.as_view(), name='web-add'),
|
||||
re_path(r'^(?P<username>[^/]+)/(?P<date>\d{4}(?:\d{2})?(?:\d{2})?)/(?P<url>https?://.*)$', SnapshotPathView.as_view(), name='snapshot-path-url'),
|
||||
re_path(r'^(?P<username>[^/]+)/(?P<date>\d{4}(?:\d{2})?(?:\d{2})?)/(?P<domain>[^/]+)(?:/(?P<snapshot_id>[0-9a-fA-F-]{8,36})(?:/(?P<path>.*))?)?$', SnapshotPathView.as_view(), name='snapshot-path'),
|
||||
|
||||
@@ -52,15 +52,21 @@ def _files_index_target(snapshot: Snapshot, archivefile: str | None) -> str:
|
||||
return target
|
||||
|
||||
|
||||
def _admin_login_redirect_or_forbidden(request: HttpRequest):
|
||||
if SERVER_CONFIG.CONTROL_PLANE_ENABLED:
|
||||
return redirect(f'/admin/login/?next={request.path}')
|
||||
return HttpResponseForbidden("ArchiveBox is running with the control plane disabled in this security mode.")
|
||||
|
||||
|
||||
class HomepageView(View):
|
||||
def get(self, request):
|
||||
if request.user.is_authenticated:
|
||||
if request.user.is_authenticated and SERVER_CONFIG.CONTROL_PLANE_ENABLED:
|
||||
return redirect('/admin/core/snapshot/')
|
||||
|
||||
if SERVER_CONFIG.PUBLIC_INDEX:
|
||||
return redirect('/public')
|
||||
|
||||
return redirect(f'/admin/login/?next={request.path}')
|
||||
return _admin_login_redirect_or_forbidden(request)
|
||||
|
||||
|
||||
class SnapshotView(View):
|
||||
@@ -277,7 +283,7 @@ class SnapshotView(View):
|
||||
|
||||
def get(self, request, path):
|
||||
if not request.user.is_authenticated and not SERVER_CONFIG.PUBLIC_SNAPSHOTS:
|
||||
return redirect(f'/admin/login/?next={request.path}')
|
||||
return _admin_login_redirect_or_forbidden(request)
|
||||
|
||||
snapshot = None
|
||||
|
||||
@@ -308,7 +314,7 @@ class SnapshotView(View):
|
||||
if request.GET.get('files'):
|
||||
target_path = _files_index_target(snapshot, archivefile)
|
||||
response = serve_static_with_byterange_support(
|
||||
request, target_path, document_root=snapshot.output_dir, show_indexes=True,
|
||||
request, target_path, document_root=snapshot.output_dir, show_indexes=True, is_archive_replay=True,
|
||||
)
|
||||
elif archivefile == 'index.html':
|
||||
# if they requested snapshot index, serve live rendered template instead of static html
|
||||
@@ -474,7 +480,7 @@ class SnapshotPathView(View):
|
||||
|
||||
def get(self, request, username: str, date: str | None = None, domain: str | None = None, snapshot_id: str | None = None, path: str = "", url: str | None = None):
|
||||
if not request.user.is_authenticated and not SERVER_CONFIG.PUBLIC_SNAPSHOTS:
|
||||
return redirect(f'/admin/login/?next={request.path}')
|
||||
return _admin_login_redirect_or_forbidden(request)
|
||||
|
||||
if username == 'system':
|
||||
return redirect(request.path.replace('/system/', '/web/', 1))
|
||||
@@ -573,14 +579,14 @@ class SnapshotPathView(View):
|
||||
if request.GET.get('files'):
|
||||
target_path = _files_index_target(snapshot, archivefile)
|
||||
return serve_static_with_byterange_support(
|
||||
request, target_path, document_root=snapshot.output_dir, show_indexes=True,
|
||||
request, target_path, document_root=snapshot.output_dir, show_indexes=True, is_archive_replay=True,
|
||||
)
|
||||
|
||||
if archivefile == "index.html":
|
||||
return SnapshotView.render_live_index(request, snapshot)
|
||||
|
||||
return serve_static_with_byterange_support(
|
||||
request, archivefile, document_root=snapshot.output_dir, show_indexes=True,
|
||||
request, archivefile, document_root=snapshot.output_dir, show_indexes=True, is_archive_replay=True,
|
||||
)
|
||||
|
||||
|
||||
@@ -670,6 +676,7 @@ def _serve_responses_path(request, responses_root: Path, rel_path: str, show_ind
|
||||
candidate,
|
||||
document_root=str(responses_root),
|
||||
show_indexes=show_indexes,
|
||||
is_archive_replay=True,
|
||||
)
|
||||
except Http404:
|
||||
pass
|
||||
@@ -682,18 +689,85 @@ def _serve_responses_path(request, responses_root: Path, rel_path: str, show_ind
|
||||
rel_dir,
|
||||
document_root=str(responses_root),
|
||||
show_indexes=True,
|
||||
is_archive_replay=True,
|
||||
)
|
||||
except Http404:
|
||||
return None
|
||||
return None
|
||||
|
||||
|
||||
def _serve_snapshot_replay(request: HttpRequest, snapshot: Snapshot, path: str = ""):
|
||||
rel_path = path or ""
|
||||
show_indexes = bool(request.GET.get("files"))
|
||||
if not rel_path or rel_path.endswith("/"):
|
||||
if show_indexes:
|
||||
rel_path = rel_path.rstrip("/")
|
||||
else:
|
||||
rel_path = f"{rel_path}index.html"
|
||||
rel_path = _safe_archive_relpath(rel_path)
|
||||
if rel_path is None:
|
||||
raise Http404
|
||||
|
||||
try:
|
||||
return serve_static_with_byterange_support(
|
||||
request,
|
||||
rel_path,
|
||||
document_root=snapshot.output_dir,
|
||||
show_indexes=show_indexes,
|
||||
is_archive_replay=True,
|
||||
)
|
||||
except Http404:
|
||||
pass
|
||||
|
||||
host = urlparse(snapshot.url).hostname or snapshot.domain
|
||||
responses_root = Path(snapshot.output_dir) / "responses" / host
|
||||
if responses_root.exists():
|
||||
response = _serve_responses_path(request, responses_root, rel_path, show_indexes)
|
||||
if response is not None:
|
||||
return response
|
||||
|
||||
raise Http404
|
||||
|
||||
|
||||
def _serve_original_domain_replay(request: HttpRequest, domain: str, path: str = ""):
|
||||
rel_path = path or ""
|
||||
if not rel_path or rel_path.endswith("/"):
|
||||
rel_path = f"{rel_path}index.html"
|
||||
rel_path = _safe_archive_relpath(rel_path)
|
||||
if rel_path is None:
|
||||
raise Http404
|
||||
|
||||
domain = domain.lower()
|
||||
match = _latest_response_match(domain, rel_path)
|
||||
if not match and "." not in Path(rel_path).name:
|
||||
index_path = f"{rel_path.rstrip('/')}/index.html"
|
||||
match = _latest_response_match(domain, index_path)
|
||||
if not match and "." not in Path(rel_path).name:
|
||||
html_path = f"{rel_path}.html"
|
||||
match = _latest_response_match(domain, html_path)
|
||||
|
||||
show_indexes = bool(request.GET.get("files"))
|
||||
if match:
|
||||
responses_root, rel_to_root = match
|
||||
response = _serve_responses_path(request, responses_root, str(rel_to_root), show_indexes)
|
||||
if response is not None:
|
||||
return response
|
||||
|
||||
responses_root = _latest_responses_root(domain)
|
||||
if responses_root:
|
||||
response = _serve_responses_path(request, responses_root, rel_path, show_indexes)
|
||||
if response is not None:
|
||||
return response
|
||||
|
||||
raise Http404
|
||||
|
||||
|
||||
class SnapshotHostView(View):
|
||||
"""Serve snapshot directory contents on <snapshot_id>.<listen_host>/<path>."""
|
||||
|
||||
def get(self, request, snapshot_id: str, path: str = ""):
|
||||
if not request.user.is_authenticated and not SERVER_CONFIG.PUBLIC_SNAPSHOTS:
|
||||
return HttpResponseForbidden("Public snapshots are disabled.")
|
||||
return _admin_login_redirect_or_forbidden(request)
|
||||
snapshot = None
|
||||
if snapshot_id:
|
||||
try:
|
||||
@@ -708,37 +782,30 @@ class SnapshotHostView(View):
|
||||
|
||||
if not snapshot:
|
||||
raise Http404
|
||||
return _serve_snapshot_replay(request, snapshot, path)
|
||||
|
||||
rel_path = path or ""
|
||||
show_indexes = bool(request.GET.get("files"))
|
||||
if not rel_path or rel_path.endswith("/"):
|
||||
if show_indexes:
|
||||
rel_path = rel_path.rstrip("/")
|
||||
else:
|
||||
rel_path = f"{rel_path}index.html"
|
||||
rel_path = _safe_archive_relpath(rel_path)
|
||||
if rel_path is None:
|
||||
raise Http404
|
||||
|
||||
class SnapshotReplayView(View):
|
||||
"""Serve snapshot directory contents on a one-domain replay path."""
|
||||
|
||||
def get(self, request, snapshot_id: str, path: str = ""):
|
||||
if not request.user.is_authenticated and not SERVER_CONFIG.PUBLIC_SNAPSHOTS:
|
||||
return _admin_login_redirect_or_forbidden(request)
|
||||
|
||||
try:
|
||||
return serve_static_with_byterange_support(
|
||||
request,
|
||||
rel_path,
|
||||
document_root=snapshot.output_dir,
|
||||
show_indexes=show_indexes,
|
||||
)
|
||||
except Http404:
|
||||
pass
|
||||
snapshot = Snapshot.objects.get(pk=snapshot_id)
|
||||
except Snapshot.DoesNotExist:
|
||||
try:
|
||||
snapshot = Snapshot.objects.get(id__startswith=snapshot_id)
|
||||
except Snapshot.DoesNotExist:
|
||||
raise Http404
|
||||
except Snapshot.MultipleObjectsReturned:
|
||||
snapshot = Snapshot.objects.filter(id__startswith=snapshot_id).first()
|
||||
|
||||
# Fallback to responses/<domain>/<path>
|
||||
host = urlparse(snapshot.url).hostname or snapshot.domain
|
||||
responses_root = Path(snapshot.output_dir) / "responses" / host
|
||||
if responses_root.exists():
|
||||
response = _serve_responses_path(request, responses_root, rel_path, show_indexes)
|
||||
if response is not None:
|
||||
return response
|
||||
if snapshot is None:
|
||||
raise Http404
|
||||
|
||||
raise Http404
|
||||
return _serve_snapshot_replay(request, snapshot, path)
|
||||
|
||||
|
||||
class OriginalDomainHostView(View):
|
||||
@@ -746,38 +813,17 @@ class OriginalDomainHostView(View):
|
||||
|
||||
def get(self, request, domain: str, path: str = ""):
|
||||
if not request.user.is_authenticated and not SERVER_CONFIG.PUBLIC_SNAPSHOTS:
|
||||
return HttpResponseForbidden("Public snapshots are disabled.")
|
||||
rel_path = path or ""
|
||||
if not rel_path or rel_path.endswith("/"):
|
||||
rel_path = f"{rel_path}index.html"
|
||||
rel_path = _safe_archive_relpath(rel_path)
|
||||
if rel_path is None:
|
||||
raise Http404
|
||||
return _admin_login_redirect_or_forbidden(request)
|
||||
return _serve_original_domain_replay(request, domain, path)
|
||||
|
||||
domain = domain.lower()
|
||||
match = _latest_response_match(domain, rel_path)
|
||||
if not match and "." not in Path(rel_path).name:
|
||||
index_path = f"{rel_path.rstrip('/')}/index.html"
|
||||
match = _latest_response_match(domain, index_path)
|
||||
if not match and "." not in Path(rel_path).name:
|
||||
html_path = f"{rel_path}.html"
|
||||
match = _latest_response_match(domain, html_path)
|
||||
|
||||
show_indexes = bool(request.GET.get("files"))
|
||||
if match:
|
||||
responses_root, rel_to_root = match
|
||||
response = _serve_responses_path(request, responses_root, str(rel_to_root), show_indexes)
|
||||
if response is not None:
|
||||
return response
|
||||
class OriginalDomainReplayView(View):
|
||||
"""Serve original-domain replay content on a one-domain replay path."""
|
||||
|
||||
# If no direct match, try serving directory index from latest responses root
|
||||
responses_root = _latest_responses_root(domain)
|
||||
if responses_root:
|
||||
response = _serve_responses_path(request, responses_root, rel_path, show_indexes)
|
||||
if response is not None:
|
||||
return response
|
||||
|
||||
raise Http404
|
||||
def get(self, request, domain: str, path: str = ""):
|
||||
if not request.user.is_authenticated and not SERVER_CONFIG.PUBLIC_SNAPSHOTS:
|
||||
return _admin_login_redirect_or_forbidden(request)
|
||||
return _serve_original_domain_replay(request, domain, path)
|
||||
|
||||
|
||||
class PublicIndexView(ListView):
|
||||
@@ -834,7 +880,7 @@ class PublicIndexView(ListView):
|
||||
response = super().get(*args, **kwargs)
|
||||
return response
|
||||
else:
|
||||
return redirect(f'/admin/login/?next={self.request.path}')
|
||||
return _admin_login_redirect_or_forbidden(self.request)
|
||||
|
||||
@method_decorator(csrf_exempt, name='dispatch')
|
||||
class AddView(UserPassesTestMixin, FormView):
|
||||
|
||||
Reference in New Issue
Block a user