From d22d0d7120b1096425b0490fa46a77727abc18e2 Mon Sep 17 00:00:00 2001 From: Tony Garnock-Jones Date: Sun, 10 Jul 2022 17:39:48 +0200 Subject: [PATCH] dissect.py --- implementations/python/preserves/dissect.py | 47 +++++++++++++++++++++ 1 file changed, 47 insertions(+) create mode 100644 implementations/python/preserves/dissect.py diff --git a/implementations/python/preserves/dissect.py b/implementations/python/preserves/dissect.py new file mode 100644 index 0000000..fba326c --- /dev/null +++ b/implementations/python/preserves/dissect.py @@ -0,0 +1,47 @@ +import struct +from .binary import decode_int, decode_varint + +def dissect_items(body, position=0): + result = [] + while body: + (count, i) = decode_varint(body) + chunk = body[i:i+count] + body = body[i+count:] + result.append(dissect(chunk, position + i)) + position = position + i+count + return result + +def dissect(bs, position=0): + tag = bs[0] + body = bs[1:] + info = None + kids = None + + if tag == 0xAA: + kids = dissect_items(body, position+1) + kids = [kids[i:i+2] for i in range(0, len(kids), 2)] + elif tag in [0xA7, 0xA8, 0xA9, 0xBF]: + kids = dissect_items(body, position+1) + elif tag == 0xAB: + kids = [dissect(body, position+1)] + elif tag == 0xA2: + if len(body) == 4: info = struct.unpack('>f', body)[0] + if len(body) == 8: info = struct.unpack('>d', body)[0] + elif tag == 0xA3: + info = decode_int(body) + elif tag == 0xA4: + info = body[:-1].decode('utf-8') + elif tag == 0xA6: + info = body.decode('utf-8') + + if kids is not None: + return ((position, len(bs), tag), kids) + elif info is not None: + return ((position, len(bs), tag), body, info) + else: + return ((position, len(bs), tag), body) + +if __name__ == '__main__': + import sys + from pprint import pprint + pprint(dissect(sys.stdin.buffer.read()))