move serve_static and shell_welcome_message into misc

This commit is contained in:
Nick Sweeting
2024-10-14 17:35:28 -07:00
parent 5ac941cf2e
commit 9a04ed7c76
5 changed files with 6 additions and 5 deletions

View File

@@ -0,0 +1,169 @@
import os
import stat
import posixpath
import mimetypes
from pathlib import Path
from django.contrib.staticfiles import finders
from django.views import static
from django.http import StreamingHttpResponse, Http404, HttpResponse, HttpResponseNotModified
from django.utils._os import safe_join
from django.utils.http import http_date
from django.utils.translation import gettext as _
def serve_static_with_byterange_support(request, path, document_root=None, show_indexes=False):
"""
Overrides Django's built-in django.views.static.serve function to support byte range requests.
This allows you to do things like seek into the middle of a huge mp4 or WACZ without downloading the whole file.
https://github.com/satchamo/django/commit/2ce75c5c4bee2a858c0214d136bfcd351fcde11d
"""
assert document_root
path = posixpath.normpath(path).lstrip("/")
fullpath = Path(safe_join(document_root, path))
if os.access(fullpath, os.R_OK) and fullpath.is_dir():
if show_indexes:
return static.directory_index(path, fullpath)
raise Http404(_("Directory indexes are not allowed here."))
if not os.access(fullpath, os.R_OK):
raise Http404(_("%(path)s” does not exist") % {"path": fullpath})
# Respect the If-Modified-Since header.
statobj = fullpath.stat()
if not static.was_modified_since(request.META.get("HTTP_IF_MODIFIED_SINCE"), statobj.st_mtime):
return HttpResponseNotModified()
content_type, encoding = mimetypes.guess_type(str(fullpath))
content_type = content_type or "application/octet-stream"
# setup resposne object
ranged_file = RangedFileReader(open(fullpath, "rb"))
response = StreamingHttpResponse(ranged_file, content_type=content_type)
response.headers["Last-Modified"] = http_date(statobj.st_mtime)
# handle byte-range requests by serving chunk of file
if stat.S_ISREG(statobj.st_mode):
size = statobj.st_size
response["Content-Length"] = size
response["Accept-Ranges"] = "bytes"
response["X-Django-Ranges-Supported"] = "1"
# Respect the Range header.
if "HTTP_RANGE" in request.META:
try:
ranges = parse_range_header(request.META['HTTP_RANGE'], size)
except ValueError:
ranges = None
# only handle syntactically valid headers, that are simple (no
# multipart byteranges)
if ranges is not None and len(ranges) == 1:
start, stop = ranges[0]
if stop > size:
# requested range not satisfiable
return HttpResponse(status=416)
ranged_file.start = start
ranged_file.stop = stop
response["Content-Range"] = "bytes %d-%d/%d" % (start, stop - 1, size)
response["Content-Length"] = stop - start
response.status_code = 206
if encoding:
response.headers["Content-Encoding"] = encoding
return response
def serve_static(request, path, **kwargs):
"""
Serve static files below a given point in the directory structure or
from locations inferred from the staticfiles finders.
To use, put a URL pattern such as::
from django.contrib.staticfiles import views
path('<path:path>', views.serve)
in your URLconf.
It uses the django.views.static.serve() view to serve the found files.
"""
normalized_path = posixpath.normpath(path).lstrip("/")
absolute_path = finders.find(normalized_path)
if not absolute_path:
if path.endswith("/") or path == "":
raise Http404("Directory indexes are not allowed here.")
raise Http404("'%s' could not be found" % path)
document_root, path = os.path.split(absolute_path)
return serve_static_with_byterange_support(request, path, document_root=document_root, **kwargs)
def parse_range_header(header, resource_size):
"""
Parses a range header into a list of two-tuples (start, stop) where `start`
is the starting byte of the range (inclusive) and `stop` is the ending byte
position of the range (exclusive).
Returns None if the value of the header is not syntatically valid.
https://github.com/satchamo/django/commit/2ce75c5c4bee2a858c0214d136bfcd351fcde11d
"""
if not header or "=" not in header:
return None
ranges = []
units, range_ = header.split("=", 1)
units = units.strip().lower()
if units != "bytes":
return None
for val in range_.split(","):
val = val.strip()
if "-" not in val:
return None
if val.startswith("-"):
# suffix-byte-range-spec: this form specifies the last N bytes of an
# entity-body
start = resource_size + int(val)
if start < 0:
start = 0
stop = resource_size
else:
# byte-range-spec: first-byte-pos "-" [last-byte-pos]
start, stop = val.split("-", 1)
start = int(start)
# the +1 is here since we want the stopping point to be exclusive, whereas in
# the HTTP spec, the last-byte-pos is inclusive
stop = int(stop) + 1 if stop else resource_size
if start >= stop:
return None
ranges.append((start, stop))
return ranges
class RangedFileReader:
"""
Wraps a file like object with an iterator that runs over part (or all) of
the file defined by start and stop. Blocks of block_size will be returned
from the starting position, up to, but not including the stop point.
https://github.com/satchamo/django/commit/2ce75c5c4bee2a858c0214d136bfcd351fcde11d
"""
block_size = 8192
def __init__(self, file_like, start=0, stop=float("inf"), block_size=None):
self.f = file_like
self.block_size = block_size or RangedFileReader.block_size
self.start = start
self.stop = stop
def __iter__(self):
self.f.seek(self.start)
position = self.start
while position < self.stop:
data = self.f.read(min(self.block_size, self.stop - position))
if not data:
break
yield data
position += self.block_size

View File

@@ -0,0 +1,60 @@
__package__ = 'archivebox.core'
from rich.console import Console
# helpful imports that make the shell easier to work with out-of-the-box:
import re # noqa
import os # noqa
import sys # noqa
import json # noqa
import psutil # noqa
import django # noqa
import pydantic # noqa
import requests # noqa
import subprocess # noqa
import archivebox # noqa
import abx # noqa
from benedict import benedict # noqa
from django.utils import timezone # noqa
from datetime import datetime, timedelta # noqa
from django.conf import settings # noqa
from archivebox import CONSTANTS # noqa
from ..main import * # noqa
from ..cli import CLI_SUBCOMMANDS
CONFIG = settings.FLAT_CONFIG
CLI_COMMAND_NAMES = ", ".join(CLI_SUBCOMMANDS.keys())
if __name__ == '__main__':
# load the rich extension for ipython for pretty printing
# https://rich.readthedocs.io/en/stable/introduction.html#ipython-extension
get_ipython().run_line_magic('load_ext', 'rich') # type: ignore # noqa
# prnt = print with cropping using ... ellipsis for helptext that doens't matter that much
console = Console()
prnt = lambda *args, **kwargs: console.print(*args, overflow='ellipsis', soft_wrap=True, **kwargs)
# print the welcome message
prnt('[green]import re, os, sys, psutil, subprocess, reqiests, json, pydantic, benedict, django, abx[/]')
prnt('[yellow4]# ArchiveBox Imports[/]')
prnt('[yellow4]import archivebox[/]')
prnt('[yellow4]from archivebox.main import {}[/]'.format(CLI_COMMAND_NAMES))
prnt()
if console.width >= 80:
from archivebox.misc.logging import rainbow
prnt(rainbow(archivebox.ASCII_LOGO))
prnt('[i] :heavy_dollar_sign: Welcome to the ArchiveBox Shell!')
prnt(' [deep_sky_blue4]Docs:[/deep_sky_blue4] [link=https://github.com/ArchiveBox/ArchiveBox/wiki/Usage#Shell-Usage]https://github.com/ArchiveBox/ArchiveBox/wiki/Usage#Shell-Usage[/link]')
prnt(' [link=https://docs.archivebox.io/en/latest/modules.html]https://docs.archivebox.io/en/latest/modules.html[/link]')
prnt()
prnt(' :grey_question: [violet]Hint[/] [i]Here are some examples to get started:[/]')
prnt(' add[blink][deep_sky_blue4]?[/deep_sky_blue4][/blink] [grey53]# add ? after anything to get help[/]')
prnt(' add("https://example.com/some/new/url") [grey53]# call CLI methods from the shell[/]')
prnt(' snap = Snapshot.objects.filter(url__contains="https://example.com").last() [grey53]# query for individual snapshots[/]')
prnt(' archivebox.plugins_extractor.wget.apps.WGET_EXTRACTOR.extract(snap.id) [grey53]# call an extractor directly[/]')
prnt(' snap.archiveresult_set.all() [grey53]# see extractor results[/]')
prnt(' bool(re.compile(CONFIG.URL_DENYLIST).search("https://example.com/abc.exe")) [grey53]# test out a config change[/]')