multipart-parser correctness (closes #1227);

makes it possible to login from the webbrowser dillo;

* unlike every other browser, dillo does NOT send a trailing "\r\n"
   after the terminating "--"; turns out that dillo got this right
   and every other browser didn't, fun

* dillo announces the boundary in quotes, which is spec-optional

the multipart-parser is now 2% slower
This commit is contained in:
ed
2026-01-21 03:19:32 +00:00
parent 78f6855f08
commit b4df8fa23c

View File

@@ -961,11 +961,13 @@ class _Unrecv(object):
self.s = s self.s = s
self.log = log self.log = log
self.buf: bytes = b"" self.buf: bytes = b""
self.nb = 0
def recv(self, nbytes: int, spins: int = 1) -> bytes: def recv(self, nbytes: int, spins: int = 1) -> bytes:
if self.buf: if self.buf:
ret = self.buf[:nbytes] ret = self.buf[:nbytes]
self.buf = self.buf[nbytes:] self.buf = self.buf[nbytes:]
self.nb += len(ret)
return ret return ret
while True: while True:
@@ -985,6 +987,7 @@ class _Unrecv(object):
if not ret: if not ret:
raise UnrecvEOF("client stopped sending data") raise UnrecvEOF("client stopped sending data")
self.nb += len(ret)
return ret return ret
def recv_ex(self, nbytes: int, raise_on_trunc: bool = True) -> bytes: def recv_ex(self, nbytes: int, raise_on_trunc: bool = True) -> bytes:
@@ -1012,6 +1015,7 @@ class _Unrecv(object):
def unrecv(self, buf: bytes) -> None: def unrecv(self, buf: bytes) -> None:
self.buf = buf + self.buf self.buf = buf + self.buf
self.nb -= len(buf)
# !rm.yes> # !rm.yes>
@@ -1024,6 +1028,7 @@ class _LUnrecv(object):
self.s = s self.s = s
self.log = log self.log = log
self.buf = b"" self.buf = b""
self.nb = 0
def recv(self, nbytes: int, spins: int) -> bytes: def recv(self, nbytes: int, spins: int) -> bytes:
if self.buf: if self.buf:
@@ -1031,6 +1036,7 @@ class _LUnrecv(object):
self.buf = self.buf[nbytes:] self.buf = self.buf[nbytes:]
t = "\033[0;7mur:pop:\033[0;1;32m {}\n\033[0;7mur:rem:\033[0;1;35m {}\033[0m" t = "\033[0;7mur:pop:\033[0;1;32m {}\n\033[0;7mur:rem:\033[0;1;35m {}\033[0m"
print(t.format(ret, self.buf)) print(t.format(ret, self.buf))
self.nb += len(ret)
return ret return ret
ret = self.s.recv(nbytes) ret = self.s.recv(nbytes)
@@ -1039,6 +1045,7 @@ class _LUnrecv(object):
if not ret: if not ret:
raise UnrecvEOF("client stopped sending data") raise UnrecvEOF("client stopped sending data")
self.nb += len(ret)
return ret return ret
def recv_ex(self, nbytes: int, raise_on_trunc: bool = True) -> bytes: def recv_ex(self, nbytes: int, raise_on_trunc: bool = True) -> bytes:
@@ -1067,6 +1074,7 @@ class _LUnrecv(object):
def unrecv(self, buf: bytes) -> None: def unrecv(self, buf: bytes) -> None:
self.buf = buf + self.buf self.buf = buf + self.buf
self.nb -= len(buf)
t = "\033[0;7mur:push\033[0;1;31m {}\n\033[0;7mur:rem:\033[0;1;35m {}\033[0m" t = "\033[0;7mur:push\033[0;1;31m {}\n\033[0;7mur:rem:\033[0;1;35m {}\033[0m"
print(t.format(buf, self.buf)) print(t.format(buf, self.buf))
@@ -1807,6 +1815,11 @@ class MultipartParser(object):
self.log = log_func self.log = log_func
self.args = args self.args = args
self.headers = http_headers self.headers = http_headers
try:
self.clen = int(http_headers["content-length"])
sr.nb = 0
except:
self.clen = 0
self.re_ctype = RE_CTYPE self.re_ctype = RE_CTYPE
self.re_cdisp = RE_CDISP self.re_cdisp = RE_CDISP
@@ -1962,7 +1975,10 @@ class MultipartParser(object):
if tail == b"--": if tail == b"--":
# EOF indicated by this immediately after final boundary # EOF indicated by this immediately after final boundary
tail = self.sr.recv_ex(2, False) if self.clen == self.sr.nb:
tail = b"\r\n" # dillo doesn't terminate with trailing \r\n
else:
tail = self.sr.recv_ex(2, False)
run = False run = False
if tail != b"\r\n": if tail != b"\r\n":
@@ -1980,6 +1996,8 @@ class MultipartParser(object):
def parse(self) -> None: def parse(self) -> None:
boundary = get_boundary(self.headers) boundary = get_boundary(self.headers)
if boundary.startswith('"') and boundary.endswith('"'):
boundary = boundary[1:-1] # dillo uses quotes
self.log("boundary=%r" % (boundary,)) self.log("boundary=%r" % (boundary,))
# spec says there might be junk before the first boundary, # spec says there might be junk before the first boundary,