ArchiveBox/archivebox/misc/serve_static.py

import html
import json
import re
import os
import stat
import asyncio
import posixpath
import mimetypes
import importlib
import queue
import threading
import time
import zipfile
from datetime import datetime
from collections.abc import Callable
from pathlib import Path
from urllib.parse import urlencode

from django.contrib.staticfiles import finders
from django.template import TemplateDoesNotExist, loader
from django.views import static
from django.http import StreamingHttpResponse, Http404, HttpResponse, HttpResponseNotModified
from django.utils._os import safe_join
from django.utils.http import http_date
from django.utils.translation import gettext as _
from archivebox.config.common import SERVER_CONFIG
from archivebox.misc.logging_util import printable_filesize


_HASHES_CACHE: dict[Path, tuple[float, dict[str, str]]] = {}


def _load_hash_map(snapshot_dir: Path) -> dict[str, str] | None:
    hashes_path = snapshot_dir / "hashes" / "hashes.json"
    if not hashes_path.exists():
        return None
    try:
        mtime = hashes_path.stat().st_mtime
    except OSError:
        return None

    cached = _HASHES_CACHE.get(hashes_path)
    if cached and cached[0] == mtime:
        return cached[1]

    try:
        data = json.loads(hashes_path.read_text(encoding="utf-8"))
    except Exception:
        return None

    file_map = {str(entry.get("path")): entry.get("hash") for entry in data.get("files", []) if entry.get("path")}
    _HASHES_CACHE[hashes_path] = (mtime, file_map)
    return file_map


def _hash_for_path(document_root: Path, rel_path: str) -> str | None:
    file_map = _load_hash_map(document_root)
    if not file_map:
        return None
    return file_map.get(rel_path)


def _cache_policy() -> str:
    return "public" if SERVER_CONFIG.PUBLIC_SNAPSHOTS else "private"


def _format_direntry_timestamp(stat_result: os.stat_result) -> str:
    timestamp = getattr(stat_result, "st_birthtime", None) or stat_result.st_mtime
    return datetime.fromtimestamp(timestamp).strftime("%Y-%m-%d %H:%M")


def _safe_zip_stem(name: str) -> str:
    safe_name = re.sub(r"[^A-Za-z0-9._-]+", "-", name).strip("._-")
    return safe_name or "archivebox"


class _StreamingQueueWriter:
    """Expose a write-only file-like object so zipfile can stream into a queue."""

    def __init__(self, output_queue: queue.Queue[bytes | BaseException | object]) -> None:
        self.output_queue = output_queue
        self.position = 0

    def write(self, data: bytes) -> int:
        if data:
            self.output_queue.put(data)
            self.position += len(data)
        return len(data)

    def tell(self) -> int:
        return self.position

    def flush(self) -> None:
        return None

    def close(self) -> None:
        return None

    def writable(self) -> bool:
        return True

    def seekable(self) -> bool:
        return False


def _iter_visible_files(root: Path):
    """Yield non-hidden files in a stable order so ZIP output is deterministic."""

    for current_root, dirnames, filenames in os.walk(root):
        dirnames[:] = sorted(dirname for dirname in dirnames if not dirname.startswith("."))
        for filename in sorted(name for name in filenames if not name.startswith(".")):
            yield Path(current_root) / filename


def _build_directory_zip_response(
    fullpath: Path,
    path: str,
    *,
    is_archive_replay: bool,
    use_async_stream: bool,
) -> StreamingHttpResponse:
    root_name = _safe_zip_stem(fullpath.name or Path(path).name or "archivebox")
    sentinel = object()
    output_queue: queue.Queue[bytes | BaseException | object] = queue.Queue(maxsize=8)
    initial_chunk_target = 64 * 1024
    initial_chunk_wait = 0.05

    def build_zip() -> None:
        # zipfile wants a write-only file object. Feed those bytes straight into
        # a queue so the response can stream them out as soon as they are ready.
        writer = _StreamingQueueWriter(output_queue)
        try:
            with zipfile.ZipFile(writer, mode="w", compression=zipfile.ZIP_DEFLATED, compresslevel=6) as zip_file:
                for entry in _iter_visible_files(fullpath):
                    rel_parts = entry.relative_to(fullpath).parts
                    arcname = Path(root_name, *rel_parts).as_posix()
                    zip_file.write(entry, arcname)
        except BaseException as err:
            output_queue.put(err)
        finally:
            output_queue.put(sentinel)

    threading.Thread(target=build_zip, name=f"zip-stream-{root_name}", daemon=True).start()

    def iter_zip_chunks():
        # Emit a meaningful first chunk quickly so browsers show the download
        # immediately instead of waiting on dozens of tiny ZIP header writes.
        first_chunk = bytearray()
        initial_deadline = time.monotonic() + initial_chunk_wait

        while True:
            timeout = max(initial_deadline - time.monotonic(), 0) if len(first_chunk) < initial_chunk_target else None
            try:
                chunk = output_queue.get(timeout=timeout) if timeout is not None else output_queue.get()
            except queue.Empty:
                if first_chunk:
                    yield bytes(first_chunk)
                    first_chunk.clear()
                    continue
                chunk = output_queue.get()

            if chunk is sentinel:
                if first_chunk:
                    yield bytes(first_chunk)
                break
            if isinstance(chunk, BaseException):
                raise chunk
            if len(first_chunk) < initial_chunk_target:
                first_chunk.extend(chunk)
                if len(first_chunk) >= initial_chunk_target or time.monotonic() >= initial_deadline:
                    yield bytes(first_chunk)
                    first_chunk.clear()
                continue
            yield chunk

    async def stream_zip_async():
        # Django ASGI buffers sync StreamingHttpResponse iterators by consuming
        # them into a list. Drive the same sync iterator from a worker thread so
        # Daphne can send each chunk as it arrives instead of buffering the ZIP.
        iterator = iter(iter_zip_chunks())
        while True:
            chunk = await asyncio.to_thread(next, iterator, None)
            if chunk is None:
                break
            yield chunk

    response = StreamingHttpResponse(
        stream_zip_async() if use_async_stream else iter_zip_chunks(),
        content_type="application/zip",
    )
    response.headers["Content-Disposition"] = f'attachment; filename="{root_name}.zip"'
    response.headers["Cache-Control"] = f"{_cache_policy()}, max-age=60, stale-while-revalidate=300"
    response.headers["Last-Modified"] = http_date(fullpath.stat().st_mtime)
    response.headers["X-Accel-Buffering"] = "no"
    return _apply_archive_replay_headers(
        response,
        fullpath=fullpath,
        content_type="application/zip",
        is_archive_replay=is_archive_replay,
    )


def _render_directory_index(request, path: str, fullpath: Path) -> HttpResponse:
    try:
        template = loader.select_template(
            [
                "static/directory_index.html",
                "static/directory_index",
            ],
        )
    except TemplateDoesNotExist:
        return static.directory_index(path, fullpath)

    entries = []
    file_list = []
    visible_entries = sorted(
        (entry for entry in fullpath.iterdir() if not entry.name.startswith(".")),
        key=lambda entry: (not entry.is_dir(), entry.name.lower()),
    )
    for entry in visible_entries:
        url = str(entry.relative_to(fullpath))
        if entry.is_dir():
            url += "/"
        file_list.append(url)

        stat_result = entry.stat()
        entries.append(
            {
                "name": url,
                "url": url,
                "is_dir": entry.is_dir(),
                "size": "—" if entry.is_dir() else printable_filesize(stat_result.st_size),
                "timestamp": _format_direntry_timestamp(stat_result),
            },
        )

    zip_query = request.GET.copy()
    zip_query["download"] = "zip"
    zip_url = request.path
    if zip_query:
        zip_url = f"{zip_url}?{zip_query.urlencode()}"

    context = {
        "directory": f"{path}/",
        "file_list": file_list,
        "entries": entries,
        "zip_url": zip_url,
    }
    return HttpResponse(template.render(context))


# Ensure common web types are mapped consistently across platforms.
mimetypes.add_type("text/html", ".html")
mimetypes.add_type("text/html", ".htm")
mimetypes.add_type("text/css", ".css")
mimetypes.add_type("application/javascript", ".js")
mimetypes.add_type("application/json", ".json")
mimetypes.add_type("application/x-ndjson", ".jsonl")
mimetypes.add_type("text/markdown", ".md")
mimetypes.add_type("text/yaml", ".yml")
mimetypes.add_type("text/yaml", ".yaml")
mimetypes.add_type("text/csv", ".csv")
mimetypes.add_type("text/tab-separated-values", ".tsv")
mimetypes.add_type("application/xml", ".xml")
mimetypes.add_type("image/svg+xml", ".svg")

try:
    _markdown = getattr(importlib.import_module("markdown"), "markdown")
except ImportError:
    _markdown: Callable[..., str] | None = None

MARKDOWN_INLINE_LINK_RE = re.compile(r"\[([^\]]+)\]\(([^)\s]+(?:\([^)]*\)[^)\s]*)*)\)")
MARKDOWN_INLINE_IMAGE_RE = re.compile(r"!\[([^\]]*)\]\(([^)]+)\)")
MARKDOWN_BOLD_RE = re.compile(r"\*\*([^*]+)\*\*")
MARKDOWN_ITALIC_RE = re.compile(r"(?<!\*)\*([^*]+)\*(?!\*)")
HTML_TAG_RE = re.compile(r"<[A-Za-z][^>]*>")
HTML_BODY_RE = re.compile(r"<body[^>]*>(.*)</body>", flags=re.IGNORECASE | re.DOTALL)
RISKY_REPLAY_MIMETYPES = {
    "text/html",
    "application/xhtml+xml",
    "image/svg+xml",
}
RISKY_REPLAY_EXTENSIONS = {".html", ".htm", ".xhtml", ".svg", ".svgz"}
RISKY_REPLAY_MARKERS = (
    "<!doctype html",
    "<html",
    "<svg",
)


def _extract_markdown_candidate(text: str) -> str:
    candidate = text
    body_match = HTML_BODY_RE.search(candidate)
    if body_match:
        candidate = body_match.group(1)
    candidate = re.sub(r"^\s*<p[^>]*>", "", candidate, flags=re.IGNORECASE)
    candidate = re.sub(r"</p>\s*$", "", candidate, flags=re.IGNORECASE)
    return candidate.strip()


def _looks_like_markdown(text: str) -> bool:
    lower = text.lower()
    if "<html" in lower and "<head" in lower and "</body>" in lower:
        return False
    md_markers = 0
    md_markers += len(re.findall(r"^\s{0,3}#{1,6}\s+\S", text, flags=re.MULTILINE))
    md_markers += len(re.findall(r"^\s*[-*+]\s+\S", text, flags=re.MULTILINE))
    md_markers += len(re.findall(r"^\s*\d+\.\s+\S", text, flags=re.MULTILINE))
    md_markers += text.count("[TOC]")
    md_markers += len(MARKDOWN_INLINE_LINK_RE.findall(text))
    md_markers += text.count("\n---") + text.count("\n***")
    return md_markers >= 6


def _render_text_preview_document(text: str, title: str) -> str:
    escaped_title = html.escape(title)
    escaped_text = html.escape(text)
    return f"""<!doctype html>
<html lang="en">
<head>
    <meta charset="utf-8">
    <meta name="viewport" content="width=device-width, initial-scale=1">
    <title>{escaped_title}</title>
    <style>
        :root {{
            color-scheme: dark;
        }}
        html, body {{
            margin: 0;
            padding: 0;
            background: #111;
            color: #f3f3f3;
            font-family: ui-monospace, SFMono-Regular, Menlo, Monaco, Consolas, "Liberation Mono", monospace;
        }}
        .archivebox-text-preview-header {{
            position: sticky;
            top: 0;
            z-index: 1;
            padding: 10px 14px;
            font-size: 12px;
            line-height: 1.4;
            color: #bbb;
            background: rgba(17, 17, 17, 0.96);
            border-bottom: 1px solid rgba(255, 255, 255, 0.08);
            backdrop-filter: blur(8px);
        }}
        .archivebox-text-preview {{
            margin: 0;
            padding: 14px;
            white-space: pre-wrap;
            word-break: break-word;
            tab-size: 2;
            line-height: 1.45;
            font-size: 13px;
        }}
    </style>
</head>
<body>
    <div class="archivebox-text-preview-header">{escaped_title}</div>
    <pre class="archivebox-text-preview">{escaped_text}</pre>
</body>
</html>"""


def _render_image_preview_document(image_url: str, title: str) -> str:
    escaped_title = html.escape(title)
    escaped_url = html.escape(image_url, quote=True)
    return f"""<!doctype html>
<html lang="en">
<head>
    <meta charset="utf-8">
    <meta name="viewport" content="width=device-width, initial-scale=1">
    <title>{escaped_title}</title>
    <style>
        :root {{
            color-scheme: dark;
        }}
        html, body {{
            margin: 0;
            padding: 0;
            width: 100%;
            min-height: 100%;
            background: #fff;
        }}
        body {{
            overflow: auto;
        }}
        .archivebox-image-preview {{
            width: 100%;
            min-width: 100%;
            min-height: 100vh;
            display: flex;
            flex-direction: column;
            align-items: center;
            justify-content: flex-start;
            box-sizing: border-box;
        }}
        .archivebox-image-preview img {{
            display: block;
            width: auto;
            max-width: 100%;
            height: auto;
            margin: 0 auto;
        }}
    </style>
</head>
<body>
    <div class="archivebox-image-preview">
        <img src="{escaped_url}" alt="{escaped_title}">
    </div>
</body>
</html>"""


def _render_markdown_fallback(text: str) -> str:
    if _markdown is not None and not HTML_TAG_RE.search(text):
        try:
            return _markdown(
                text,
                extensions=["extra", "toc", "sane_lists"],
                output_format="html",
            )
        except Exception:
            pass

    lines = text.splitlines()
    headings = []

    def slugify(value: str) -> str:
        slug = re.sub(r"[^A-Za-z0-9]+", "-", value).strip("-")
        return slug or "section"

    for raw_line in lines:
        heading_match = re.match(r"^\s{0,3}(#{1,6})\s+(.*)$", raw_line)
        if heading_match:
            level = len(heading_match.group(1))
            content = heading_match.group(2).strip()
            headings.append((level, content, slugify(content)))

    html_lines = []
    in_code = False
    in_ul = False
    in_ol = False
    in_blockquote = False

    def render_inline(markup: str) -> str:
        content = MARKDOWN_INLINE_IMAGE_RE.sub(r'<img alt="\1" src="\2">', markup)
        content = MARKDOWN_INLINE_LINK_RE.sub(r'<a href="\2">\1</a>', content)
        content = MARKDOWN_BOLD_RE.sub(r"<strong>\1</strong>", content)
        content = MARKDOWN_ITALIC_RE.sub(r"<em>\1</em>", content)
        return content

    def close_lists():
        nonlocal in_ul, in_ol
        if in_ul:
            html_lines.append("</ul>")
            in_ul = False
        if in_ol:
            html_lines.append("</ol>")
            in_ol = False

    for raw_line in lines:
        line = raw_line.rstrip("\n")
        stripped = line.strip()

        if stripped.startswith("```"):
            if in_code:
                html_lines.append("</code></pre>")
                in_code = False
            else:
                close_lists()
                if in_blockquote:
                    html_lines.append("</blockquote>")
                    in_blockquote = False
                html_lines.append("<pre><code>")
                in_code = True
            continue

        if in_code:
            html_lines.append(html.escape(line))
            continue

        if not stripped:
            close_lists()
            if in_blockquote:
                html_lines.append("</blockquote>")
                in_blockquote = False
            html_lines.append("<br/>")
            continue

        heading_match = re.match(r"^\s*((?:<[^>]+>\s*)*)(#{1,6})\s+(.*)$", line)
        if heading_match:
            close_lists()
            if in_blockquote:
                html_lines.append("</blockquote>")
                in_blockquote = False
            leading_tags = heading_match.group(1).strip()
            level = len(heading_match.group(2))
            content = heading_match.group(3).strip()
            if leading_tags:
                html_lines.append(leading_tags)
            html_lines.append(f'<h{level} id="{slugify(content)}">{render_inline(content)}</h{level}>')
            continue

        if stripped in ("---", "***"):
            close_lists()
            html_lines.append("<hr/>")
            continue

        if stripped.startswith("> "):
            if not in_blockquote:
                close_lists()
                html_lines.append("<blockquote>")
                in_blockquote = True
            content = stripped[2:]
            html_lines.append(render_inline(content))
            continue
        else:
            if in_blockquote:
                html_lines.append("</blockquote>")
                in_blockquote = False

        ul_match = re.match(r"^\s*[-*+]\s+(.*)$", line)
        if ul_match:
            if in_ol:
                html_lines.append("</ol>")
                in_ol = False
            if not in_ul:
                html_lines.append("<ul>")
                in_ul = True
            html_lines.append(f"<li>{render_inline(ul_match.group(1))}</li>")
            continue

        ol_match = re.match(r"^\s*\d+\.\s+(.*)$", line)
        if ol_match:
            if in_ul:
                html_lines.append("</ul>")
                in_ul = False
            if not in_ol:
                html_lines.append("<ol>")
                in_ol = True
            html_lines.append(f"<li>{render_inline(ol_match.group(1))}</li>")
            continue

        close_lists()

        # Inline conversions (leave raw HTML intact)
        if stripped == "[TOC]":
            toc_items = []
            for level, title, slug in headings:
                toc_items.append(
                    f'<li class="toc-level-{level}"><a href="#{slug}">{title}</a></li>',
                )
            html_lines.append(
                '<nav class="toc"><ul>' + "".join(toc_items) + "</ul></nav>",
            )
            continue

        html_lines.append(f"<p>{render_inline(line)}</p>")

    close_lists()
    if in_blockquote:
        html_lines.append("</blockquote>")
    if in_code:
        html_lines.append("</code></pre>")

    return "\n".join(html_lines)


def _render_markdown_document(markdown_text: str) -> str:
    body = _render_markdown_fallback(markdown_text)
    wrapped = (
        '<!doctype html><html><head><meta charset="utf-8">'
        '<meta name="viewport" content="width=device-width,initial-scale=1">'
        "<style>body{max-width:900px;margin:24px auto;padding:0 16px;"
        "font-family:system-ui,-apple-system,Segoe UI,Roboto,Helvetica,Arial,sans-serif;"
        "line-height:1.55;} img{max-width:100%;} pre{background:#f6f6f6;padding:12px;overflow:auto;}"
        ".toc ul{list-style:none;padding-left:0;} .toc li{margin:4px 0;}</style>"
        "</head><body>"
        f"{body}"
        "</body></html>"
    )
    return wrapped


def _content_type_base(content_type: str) -> str:
    return (content_type or "").split(";", 1)[0].strip().lower()


def _is_risky_replay_document(fullpath: Path, content_type: str) -> bool:
    if fullpath.suffix.lower() in RISKY_REPLAY_EXTENSIONS:
        return True

    if _content_type_base(content_type) in RISKY_REPLAY_MIMETYPES:
        return True

    # Unknown archived response paths often have no extension. Sniff a small prefix
    # so one-domain no-JS mode still catches HTML/SVG documents.
    try:
        head = fullpath.read_bytes()[:4096].decode("utf-8", errors="ignore").lower()
    except Exception:
        return False

    return any(marker in head for marker in RISKY_REPLAY_MARKERS)


def _apply_archive_replay_headers(response: HttpResponse, *, fullpath: Path, content_type: str, is_archive_replay: bool) -> HttpResponse:
    if not is_archive_replay:
        return response

    response.headers.setdefault("X-Content-Type-Options", "nosniff")
    response.headers.setdefault("X-ArchiveBox-Security-Mode", SERVER_CONFIG.SERVER_SECURITY_MODE)

    if SERVER_CONFIG.SHOULD_NEUTER_RISKY_REPLAY and _is_risky_replay_document(fullpath, content_type):
        response.headers["Content-Security-Policy"] = (
            "sandbox; "
            "default-src 'self' data: blob:; "
            "script-src 'none'; "
            "object-src 'none'; "
            "base-uri 'none'; "
            "form-action 'none'; "
            "connect-src 'none'; "
            "worker-src 'none'; "
            "frame-ancestors 'self'; "
            "style-src 'self' 'unsafe-inline' data: blob:; "
            "img-src 'self' data: blob:; "
            "media-src 'self' data: blob:; "
            "font-src 'self' data: blob:;"
        )
        response.headers.setdefault("Referrer-Policy", "no-referrer")

    return response


def serve_static_with_byterange_support(request, path, document_root=None, show_indexes=False, is_archive_replay: bool = False):
    """
    Overrides Django's built-in django.views.static.serve function to support byte range requests.
    This allows you to do things like seek into the middle of a huge mp4 or WACZ without downloading the whole file.
    https://github.com/satchamo/django/commit/2ce75c5c4bee2a858c0214d136bfcd351fcde11d
    """
    assert document_root
    path = posixpath.normpath(path).lstrip("/")
    fullpath = Path(safe_join(document_root, path))
    if os.access(fullpath, os.R_OK) and fullpath.is_dir():
        if request.GET.get("download") == "zip" and show_indexes:
            return _build_directory_zip_response(
                fullpath,
                path,
                is_archive_replay=is_archive_replay,
                use_async_stream=hasattr(request, "scope"),
            )
        if show_indexes:
            response = _render_directory_index(request, path, fullpath)
            return _apply_archive_replay_headers(response, fullpath=fullpath, content_type="text/html", is_archive_replay=is_archive_replay)
        raise Http404(_("Directory indexes are not allowed here."))
    if not os.access(fullpath, os.R_OK):
        raise Http404(_("“%(path)s” does not exist") % {"path": fullpath})

    statobj = fullpath.stat()
    document_root = Path(document_root) if document_root else None
    rel_path = path
    etag = None
    if document_root:
        file_hash = _hash_for_path(document_root, rel_path)
        if file_hash:
            etag = f'"{file_hash}"'

    if etag:
        inm = request.META.get("HTTP_IF_NONE_MATCH")
        if inm:
            inm_list = [item.strip() for item in inm.split(",")]
            if etag in inm_list or etag.strip('"') in [i.strip('"') for i in inm_list]:
                not_modified = HttpResponseNotModified()
                not_modified.headers["ETag"] = etag
                not_modified.headers["Cache-Control"] = f"{_cache_policy()}, max-age=31536000, immutable"
                not_modified.headers["Last-Modified"] = http_date(statobj.st_mtime)
                return _apply_archive_replay_headers(not_modified, fullpath=fullpath, content_type="", is_archive_replay=is_archive_replay)

    content_type, encoding = mimetypes.guess_type(str(fullpath))
    content_type = content_type or "application/octet-stream"
    # Add charset for text-like types (best guess), but don't override the type.
    is_text_like = content_type.startswith("text/") or content_type in {
        "application/json",
        "application/javascript",
        "application/xml",
        "application/x-ndjson",
        "image/svg+xml",
    }
    if is_text_like and "charset=" not in content_type:
        content_type = f"{content_type}; charset=utf-8"
    preview_as_text_html = (
        bool(request.GET.get("preview"))
        and is_text_like
        and not content_type.startswith("text/html")
        and not content_type.startswith("image/svg+xml")
    )
    preview_as_image_html = (
        bool(request.GET.get("preview")) and content_type.startswith("image/") and not content_type.startswith("image/svg+xml")
    )

    # Respect the If-Modified-Since header for non-markdown responses.
    if not (content_type.startswith("text/plain") or content_type.startswith("text/html")):
        if not static.was_modified_since(request.META.get("HTTP_IF_MODIFIED_SINCE"), statobj.st_mtime):
            return _apply_archive_replay_headers(
                HttpResponseNotModified(),
                fullpath=fullpath,
                content_type=content_type,
                is_archive_replay=is_archive_replay,
            )

    # Wrap text-like outputs in HTML when explicitly requested for iframe previewing.
    if preview_as_text_html:
        try:
            max_preview_size = 10 * 1024 * 1024
            if statobj.st_size <= max_preview_size:
                decoded = fullpath.read_text(encoding="utf-8", errors="replace")
                wrapped = _render_text_preview_document(decoded, fullpath.name)
                response = HttpResponse(wrapped, content_type="text/html; charset=utf-8")
                response.headers["Last-Modified"] = http_date(statobj.st_mtime)
                if etag:
                    response.headers["ETag"] = etag
                    response.headers["Cache-Control"] = f"{_cache_policy()}, max-age=31536000, immutable"
                else:
                    response.headers["Cache-Control"] = f"{_cache_policy()}, max-age=60, stale-while-revalidate=300"
                response.headers["Content-Disposition"] = f'inline; filename="{fullpath.name}"'
                if encoding:
                    response.headers["Content-Encoding"] = encoding
                return _apply_archive_replay_headers(
                    response,
                    fullpath=fullpath,
                    content_type="text/html; charset=utf-8",
                    is_archive_replay=is_archive_replay,
                )
        except Exception:
            pass

    if preview_as_image_html:
        try:
            preview_query = request.GET.copy()
            preview_query.pop("preview", None)
            raw_image_url = request.path
            if preview_query:
                raw_image_url = f"{raw_image_url}?{urlencode(list(preview_query.lists()), doseq=True)}"
            wrapped = _render_image_preview_document(raw_image_url, fullpath.name)
            response = HttpResponse(wrapped, content_type="text/html; charset=utf-8")
            response.headers["Last-Modified"] = http_date(statobj.st_mtime)
            if etag:
                response.headers["ETag"] = etag
                response.headers["Cache-Control"] = f"{_cache_policy()}, max-age=31536000, immutable"
            else:
                response.headers["Cache-Control"] = f"{_cache_policy()}, max-age=60, stale-while-revalidate=300"
            response.headers["Content-Disposition"] = f'inline; filename="{fullpath.name}"'
            if encoding:
                response.headers["Content-Encoding"] = encoding
            return _apply_archive_replay_headers(
                response,
                fullpath=fullpath,
                content_type="text/html; charset=utf-8",
                is_archive_replay=is_archive_replay,
            )
        except Exception:
            pass

    # Heuristic fix: some archived HTML outputs (e.g. mercury content.html)
    # are stored with HTML-escaped markup or markdown sources. If so, render sensibly.
    if content_type.startswith("text/plain") or content_type.startswith("text/html"):
        try:
            max_unescape_size = 10 * 1024 * 1024  # 10MB cap to avoid heavy memory use
            if statobj.st_size <= max_unescape_size:
                raw = fullpath.read_bytes()
                decoded = raw.decode("utf-8", errors="replace")
                escaped_count = decoded.count("&lt;") + decoded.count("&gt;")
                tag_count = decoded.count("<")
                if escaped_count and escaped_count > tag_count * 2:
                    decoded = html.unescape(decoded)
                markdown_candidate = _extract_markdown_candidate(decoded)
                if _looks_like_markdown(markdown_candidate):
                    wrapped = _render_markdown_document(markdown_candidate)
                    response = HttpResponse(wrapped, content_type="text/html; charset=utf-8")
                    response.headers["Last-Modified"] = http_date(statobj.st_mtime)
                    if etag:
                        response.headers["ETag"] = etag
                        response.headers["Cache-Control"] = f"{_cache_policy()}, max-age=31536000, immutable"
                    else:
                        response.headers["Cache-Control"] = f"{_cache_policy()}, max-age=60, stale-while-revalidate=300"
                    response.headers["Content-Disposition"] = f'inline; filename="{fullpath.name}"'
                    if encoding:
                        response.headers["Content-Encoding"] = encoding
                    return _apply_archive_replay_headers(
                        response,
                        fullpath=fullpath,
                        content_type="text/html; charset=utf-8",
                        is_archive_replay=is_archive_replay,
                    )
                if escaped_count and escaped_count > tag_count * 2:
                    response = HttpResponse(decoded, content_type=content_type)
                    response.headers["Last-Modified"] = http_date(statobj.st_mtime)
                    if etag:
                        response.headers["ETag"] = etag
                        response.headers["Cache-Control"] = f"{_cache_policy()}, max-age=31536000, immutable"
                    else:
                        response.headers["Cache-Control"] = f"{_cache_policy()}, max-age=60, stale-while-revalidate=300"
                    response.headers["Content-Disposition"] = f'inline; filename="{fullpath.name}"'
                    if encoding:
                        response.headers["Content-Encoding"] = encoding
                    return _apply_archive_replay_headers(
                        response,
                        fullpath=fullpath,
                        content_type=content_type,
                        is_archive_replay=is_archive_replay,
                    )
        except Exception:
            pass

    # setup response object
    ranged_file = RangedFileReader(open(fullpath, "rb"))
    response = StreamingHttpResponse(ranged_file, content_type=content_type)
    response.headers["Last-Modified"] = http_date(statobj.st_mtime)
    if etag:
        response.headers["ETag"] = etag
        response.headers["Cache-Control"] = f"{_cache_policy()}, max-age=31536000, immutable"
    else:
        response.headers["Cache-Control"] = f"{_cache_policy()}, max-age=60, stale-while-revalidate=300"
    if is_text_like:
        response.headers["Content-Disposition"] = f'inline; filename="{fullpath.name}"'
    if content_type.startswith("image/"):
        response.headers["Cache-Control"] = "public, max-age=604800, immutable"

    # handle byte-range requests by serving chunk of file
    if stat.S_ISREG(statobj.st_mode):
        size = statobj.st_size
        response["Content-Length"] = size
        response["Accept-Ranges"] = "bytes"
        response["X-Django-Ranges-Supported"] = "1"
        # Respect the Range header.
        if "HTTP_RANGE" in request.META:
            try:
                ranges = parse_range_header(request.META["HTTP_RANGE"], size)
            except ValueError:
                ranges = None
            # only handle syntactically valid headers, that are simple (no
            # multipart byteranges)
            if ranges is not None and len(ranges) == 1:
                start, stop = ranges[0]
                if stop > size:
                    # requested range not satisfiable
                    return HttpResponse(status=416)
                ranged_file.start = start
                ranged_file.stop = stop
                response["Content-Range"] = "bytes %d-%d/%d" % (start, stop - 1, size)
                response["Content-Length"] = stop - start
                response.status_code = 206
    if encoding:
        response.headers["Content-Encoding"] = encoding
    return _apply_archive_replay_headers(response, fullpath=fullpath, content_type=content_type, is_archive_replay=is_archive_replay)


def serve_static(request, path, **kwargs):
    """
    Serve static files below a given point in the directory structure or
    from locations inferred from the staticfiles finders.

    To use, put a URL pattern such as::

        from django.contrib.staticfiles import views

        path('<path:path>', views.serve)

    in your URLconf.

    It uses the django.views.static.serve() view to serve the found files.
    """

    normalized_path = posixpath.normpath(path).lstrip("/")
    absolute_path = finders.find(normalized_path)
    if not absolute_path:
        if path.endswith("/") or path == "":
            raise Http404("Directory indexes are not allowed here.")
        raise Http404("'%s' could not be found" % path)
    document_root, path = os.path.split(absolute_path)
    return serve_static_with_byterange_support(request, path, document_root=document_root, **kwargs)


def parse_range_header(header, resource_size):
    """
    Parses a range header into a list of two-tuples (start, stop) where `start`
    is the starting byte of the range (inclusive) and `stop` is the ending byte
    position of the range (exclusive).
    Returns None if the value of the header is not syntactically valid.
    https://github.com/satchamo/django/commit/2ce75c5c4bee2a858c0214d136bfcd351fcde11d
    """
    if not header or "=" not in header:
        return None

    ranges = []
    units, range_ = header.split("=", 1)
    units = units.strip().lower()

    if units != "bytes":
        return None

    for val in range_.split(","):
        val = val.strip()
        if "-" not in val:
            return None

        if val.startswith("-"):
            # suffix-byte-range-spec: this form specifies the last N bytes of an
            # entity-body
            start = resource_size + int(val)
            if start < 0:
                start = 0
            stop = resource_size
        else:
            # byte-range-spec: first-byte-pos "-" [last-byte-pos]
            start, stop = val.split("-", 1)
            start = int(start)
            # the +1 is here since we want the stopping point to be exclusive, whereas in
            # the HTTP spec, the last-byte-pos is inclusive
            stop = int(stop) + 1 if stop else resource_size
            if start >= stop:
                return None

        ranges.append((start, stop))

    return ranges


class RangedFileReader:
    """
    Wraps a file like object with an iterator that runs over part (or all) of
    the file defined by start and stop. Blocks of block_size will be returned
    from the starting position, up to, but not including the stop point.
    https://github.com/satchamo/django/commit/2ce75c5c4bee2a858c0214d136bfcd351fcde11d
    """

    block_size = 8192

    def __init__(self, file_like, start=0, stop=float("inf"), block_size=None):
        self.f = file_like
        self.block_size = block_size or RangedFileReader.block_size
        self.start = start
        self.stop = stop

    def __iter__(self):
        self.f.seek(self.start)
        position = self.start
        while position < self.stop:
            data = self.f.read(min(self.block_size, self.stop - position))
            if not data:
                break

            yield data
            position += self.block_size