multipart-parser correctness (closes #1227);

makes it possible to login from the webbrowser dillo;

* unlike every other browser, dillo does NOT send a trailing "\r\n"
   after the terminating "--"; turns out that dillo got this right
   and every other browser didn't, fun

* dillo announces the boundary in quotes, which is spec-optional

the multipart-parser is now 2% slower
This commit is contained in:
ed
2026-01-21 03:19:32 +00:00
parent 78f6855f08
commit b4df8fa23c

View File

@@ -961,11 +961,13 @@ class _Unrecv(object):
self.s = s
self.log = log
self.buf: bytes = b""
self.nb = 0
def recv(self, nbytes: int, spins: int = 1) -> bytes:
if self.buf:
ret = self.buf[:nbytes]
self.buf = self.buf[nbytes:]
self.nb += len(ret)
return ret
while True:
@@ -985,6 +987,7 @@ class _Unrecv(object):
if not ret:
raise UnrecvEOF("client stopped sending data")
self.nb += len(ret)
return ret
def recv_ex(self, nbytes: int, raise_on_trunc: bool = True) -> bytes:
@@ -1012,6 +1015,7 @@ class _Unrecv(object):
def unrecv(self, buf: bytes) -> None:
self.buf = buf + self.buf
self.nb -= len(buf)
# !rm.yes>
@@ -1024,6 +1028,7 @@ class _LUnrecv(object):
self.s = s
self.log = log
self.buf = b""
self.nb = 0
def recv(self, nbytes: int, spins: int) -> bytes:
if self.buf:
@@ -1031,6 +1036,7 @@ class _LUnrecv(object):
self.buf = self.buf[nbytes:]
t = "\033[0;7mur:pop:\033[0;1;32m {}\n\033[0;7mur:rem:\033[0;1;35m {}\033[0m"
print(t.format(ret, self.buf))
self.nb += len(ret)
return ret
ret = self.s.recv(nbytes)
@@ -1039,6 +1045,7 @@ class _LUnrecv(object):
if not ret:
raise UnrecvEOF("client stopped sending data")
self.nb += len(ret)
return ret
def recv_ex(self, nbytes: int, raise_on_trunc: bool = True) -> bytes:
@@ -1067,6 +1074,7 @@ class _LUnrecv(object):
def unrecv(self, buf: bytes) -> None:
self.buf = buf + self.buf
self.nb -= len(buf)
t = "\033[0;7mur:push\033[0;1;31m {}\n\033[0;7mur:rem:\033[0;1;35m {}\033[0m"
print(t.format(buf, self.buf))
@@ -1807,6 +1815,11 @@ class MultipartParser(object):
self.log = log_func
self.args = args
self.headers = http_headers
try:
self.clen = int(http_headers["content-length"])
sr.nb = 0
except:
self.clen = 0
self.re_ctype = RE_CTYPE
self.re_cdisp = RE_CDISP
@@ -1962,7 +1975,10 @@ class MultipartParser(object):
if tail == b"--":
# EOF indicated by this immediately after final boundary
tail = self.sr.recv_ex(2, False)
if self.clen == self.sr.nb:
tail = b"\r\n" # dillo doesn't terminate with trailing \r\n
else:
tail = self.sr.recv_ex(2, False)
run = False
if tail != b"\r\n":
@@ -1980,6 +1996,8 @@ class MultipartParser(object):
def parse(self) -> None:
boundary = get_boundary(self.headers)
if boundary.startswith('"') and boundary.endswith('"'):
boundary = boundary[1:-1] # dillo uses quotes
self.log("boundary=%r" % (boundary,))
# spec says there might be junk before the first boundary,