Avoid spurious copying during dissection by using memoryview

This commit is contained in:
Tony Garnock-Jones 2022-07-12 16:47:16 +02:00
parent d22d0d7120
commit 8032e68214
1 changed files with 12 additions and 7 deletions

View File

@ -7,11 +7,16 @@ def dissect_items(body, position=0):
(count, i) = decode_varint(body) (count, i) = decode_varint(body)
chunk = body[i:i+count] chunk = body[i:i+count]
body = body[i+count:] body = body[i+count:]
result.append(dissect(chunk, position + i)) result.append(_dissect(chunk, position + i))
position = position + i+count position = position + i+count
return result return result
def dissect(bs, position=0): def dissect(bs):
if not isinstance(bs, memoryview):
bs = memoryview(bs)
return _dissect(bs, 0)
def _dissect(bs, position):
tag = bs[0] tag = bs[0]
body = bs[1:] body = bs[1:]
info = None info = None
@ -23,23 +28,23 @@ def dissect(bs, position=0):
elif tag in [0xA7, 0xA8, 0xA9, 0xBF]: elif tag in [0xA7, 0xA8, 0xA9, 0xBF]:
kids = dissect_items(body, position+1) kids = dissect_items(body, position+1)
elif tag == 0xAB: elif tag == 0xAB:
kids = [dissect(body, position+1)] kids = [_dissect(body, position+1)]
elif tag == 0xA2: elif tag == 0xA2:
if len(body) == 4: info = struct.unpack('>f', body)[0] if len(body) == 4: info = struct.unpack('>f', body)[0]
if len(body) == 8: info = struct.unpack('>d', body)[0] if len(body) == 8: info = struct.unpack('>d', body)[0]
elif tag == 0xA3: elif tag == 0xA3:
info = decode_int(body) info = decode_int(body)
elif tag == 0xA4: elif tag == 0xA4:
info = body[:-1].decode('utf-8') info = bytes(body[:-1]).decode('utf-8')
elif tag == 0xA6: elif tag == 0xA6:
info = body.decode('utf-8') info = bytes(body).decode('utf-8')
if kids is not None: if kids is not None:
return ((position, len(bs), tag), kids) return ((position, len(bs), tag), kids)
elif info is not None: elif info is not None:
return ((position, len(bs), tag), body, info) return ((position, len(bs), tag), bytes(body), info)
else: else:
return ((position, len(bs), tag), body) return ((position, len(bs), tag), bytes(body))
if __name__ == '__main__': if __name__ == '__main__':
import sys import sys