408 lines
14 KiB
Python
408 lines
14 KiB
Python
import numbers
|
|
import struct
|
|
import base64
|
|
import math
|
|
|
|
from .values import *
|
|
from .error import *
|
|
from .compat import basestring_, unichr_
|
|
from .binary import Decoder
|
|
|
|
class TextCodec(object): pass
|
|
|
|
NUMBER_RE = re.compile(r'^([-+]?\d+)(((\.\d+([eE][-+]?\d+)?)|([eE][-+]?\d+))([fF]?))?$')
|
|
|
|
class Parser(TextCodec):
|
|
def __init__(self, input_buffer=u'', include_annotations=False, parse_embedded=lambda x: x):
|
|
super(Parser, self).__init__()
|
|
self.input_buffer = input_buffer
|
|
self.index = 0
|
|
self.include_annotations = include_annotations
|
|
self.parse_embedded = parse_embedded
|
|
|
|
def extend(self, text):
|
|
self.input_buffer = self.input_buffer[self.index:] + text
|
|
self.index = 0
|
|
|
|
def _atend(self):
|
|
return self.index >= len(self.input_buffer)
|
|
|
|
def peek(self):
|
|
if self._atend():
|
|
raise ShortPacket('Short input buffer')
|
|
return self.input_buffer[self.index]
|
|
|
|
def skip(self):
|
|
self.index = self.index + 1
|
|
|
|
def nextchar(self):
|
|
c = self.peek()
|
|
self.skip()
|
|
return c
|
|
|
|
def skip_whitespace(self):
|
|
while not self._atend():
|
|
c = self.peek()
|
|
if not (c.isspace() or c == ','):
|
|
break
|
|
self.skip()
|
|
|
|
def gather_annotations(self):
|
|
vs = []
|
|
while True:
|
|
self.skip_whitespace()
|
|
c = self.peek()
|
|
if c == ';':
|
|
self.skip()
|
|
vs.append(self.comment_line())
|
|
elif c == '@':
|
|
self.skip()
|
|
vs.append(self.next())
|
|
else:
|
|
return vs
|
|
|
|
def comment_line(self):
|
|
s = []
|
|
while True:
|
|
c = self.nextchar()
|
|
if c == '\r' or c == '\n':
|
|
return self.wrap(u''.join(s))
|
|
s.append(c)
|
|
|
|
def read_stringlike(self, terminator, hexescape, hexescaper):
|
|
acc = []
|
|
while True:
|
|
c = self.nextchar()
|
|
if c == terminator:
|
|
return u''.join(acc)
|
|
if c == '\\':
|
|
c = self.nextchar()
|
|
if c == hexescape: hexescaper(acc)
|
|
elif c == terminator or c == '\\' or c == '/': acc.append(c)
|
|
elif c == 'b': acc.append(u'\x08')
|
|
elif c == 'f': acc.append(u'\x0c')
|
|
elif c == 'n': acc.append(u'\x0a')
|
|
elif c == 'r': acc.append(u'\x0d')
|
|
elif c == 't': acc.append(u'\x09')
|
|
else: raise DecodeError('Invalid escape code')
|
|
else:
|
|
acc.append(c)
|
|
|
|
def hexnum(self, count):
|
|
v = 0
|
|
for i in range(count):
|
|
c = self.nextchar().lower()
|
|
if c >= '0' and c <= '9':
|
|
v = v << 4 | (ord(c) - ord('0'))
|
|
elif c >= 'a' and c <= 'f':
|
|
v = v << 4 | (ord(c) - ord('a') + 10)
|
|
else:
|
|
raise DecodeError('Bad hex escape')
|
|
return v
|
|
|
|
def read_string(self, delimiter):
|
|
def u16_escape(acc):
|
|
n1 = self.hexnum(4)
|
|
if n1 >= 0xd800 and n1 <= 0xdbff:
|
|
ok = True
|
|
ok = ok and self.nextchar() == '\\'
|
|
ok = ok and self.nextchar() == 'u'
|
|
if not ok:
|
|
raise DecodeError('Missing second half of surrogate pair')
|
|
n2 = self.hexnum(4)
|
|
if n2 >= 0xdc00 and n2 <= 0xdfff:
|
|
n = ((n1 - 0xd800) << 10) + (n2 - 0xdc00) + 0x10000
|
|
acc.append(unichr_(n))
|
|
else:
|
|
raise DecodeError('Bad second half of surrogate pair')
|
|
else:
|
|
acc.append(unichr_(n1))
|
|
return self.read_stringlike(delimiter, 'u', u16_escape)
|
|
|
|
def read_literal_binary(self):
|
|
s = self.read_stringlike('"', 'x', lambda acc: acc.append(unichr_(self.hexnum(2))))
|
|
return s.encode('latin-1')
|
|
|
|
def read_hex_binary(self):
|
|
acc = bytearray()
|
|
while True:
|
|
self.skip_whitespace()
|
|
if self.peek() == '"':
|
|
self.skip()
|
|
return bytes(acc)
|
|
acc.append(self.hexnum(2))
|
|
|
|
def read_base64_binary(self):
|
|
acc = []
|
|
while True:
|
|
self.skip_whitespace()
|
|
c = self.nextchar()
|
|
if c == ']':
|
|
acc.append(u'====')
|
|
return base64.b64decode(u''.join(acc))
|
|
if c == '-': c = '+'
|
|
if c == '_': c = '/'
|
|
if c == '=': continue
|
|
acc.append(c)
|
|
|
|
def read_hex_float(self, bytecount):
|
|
if self.nextchar() != '"':
|
|
raise DecodeError('Missing open-double-quote in hex-encoded floating-point number')
|
|
bs = self.read_hex_binary()
|
|
if len(bs) != bytecount:
|
|
raise DecodeError('Incorrect number of bytes in hex-encoded floating-point number')
|
|
if bytecount == 4: return Float.from_bytes(bs)
|
|
if bytecount == 8: return struct.unpack('>d', bs)[0]
|
|
raise DecodeError('Unsupported byte count in hex-encoded floating-point number')
|
|
|
|
def upto(self, delimiter):
|
|
vs = []
|
|
while True:
|
|
self.skip_whitespace()
|
|
if self.peek() == delimiter:
|
|
self.skip()
|
|
return tuple(vs)
|
|
vs.append(self.next())
|
|
|
|
def read_dictionary(self):
|
|
acc = []
|
|
while True:
|
|
self.skip_whitespace()
|
|
if self.peek() == '}':
|
|
self.skip()
|
|
return ImmutableDict.from_kvs(acc)
|
|
acc.append(self.next())
|
|
self.skip_whitespace()
|
|
if self.nextchar() != ':':
|
|
raise DecodeError('Missing expected key/value separator')
|
|
acc.append(self.next())
|
|
|
|
def read_raw_symbol_or_number(self, acc):
|
|
while not self._atend():
|
|
c = self.peek()
|
|
if c.isspace() or c in '(){}[]<>";,@#:|':
|
|
break
|
|
self.skip()
|
|
acc.append(c)
|
|
acc = u''.join(acc)
|
|
m = NUMBER_RE.match(acc)
|
|
if m:
|
|
if m[2] is None:
|
|
return int(m[1])
|
|
elif m[7] == '':
|
|
return float(m[1] + m[3])
|
|
else:
|
|
return Float(float(m[1] + m[3]))
|
|
else:
|
|
return Symbol(acc)
|
|
|
|
def wrap(self, v):
|
|
return Annotated(v) if self.include_annotations else v
|
|
|
|
def next(self):
|
|
self.skip_whitespace()
|
|
c = self.peek()
|
|
if c == '"':
|
|
self.skip()
|
|
return self.wrap(self.read_string('"'))
|
|
if c == '|':
|
|
self.skip()
|
|
return self.wrap(Symbol(self.read_string('|')))
|
|
if c in ';@':
|
|
annotations = self.gather_annotations()
|
|
v = self.next()
|
|
if self.include_annotations:
|
|
v.annotations = annotations + v.annotations
|
|
return v
|
|
if c == ':':
|
|
raise DecodeError('Unexpected key/value separator between items')
|
|
if c == '#':
|
|
self.skip()
|
|
c = self.nextchar()
|
|
if c == 'f': return self.wrap(False)
|
|
if c == 't': return self.wrap(True)
|
|
if c == '{': return self.wrap(frozenset(self.upto('}')))
|
|
if c == '"': return self.wrap(self.read_literal_binary())
|
|
if c == 'x':
|
|
c = self.nextchar()
|
|
if c == '"': return self.wrap(self.read_hex_binary())
|
|
if c == 'f': return self.wrap(self.read_hex_float(4))
|
|
if c == 'd': return self.wrap(self.read_hex_float(8))
|
|
raise DecodeError('Invalid #x syntax')
|
|
if c == '[': return self.wrap(self.read_base64_binary())
|
|
if c == '=':
|
|
old_ann = self.include_annotations
|
|
self.include_annotations = True
|
|
bs_val = self.next()
|
|
self.include_annotations = old_ann
|
|
if len(bs_val.annotations) > 0:
|
|
raise DecodeError('Annotations not permitted after #=')
|
|
bs_val = bs_val.item
|
|
if not isinstance(bs_val, bytes):
|
|
raise DecodeError('ByteString must follow #=')
|
|
return self.wrap(Decoder(bs_val, include_annotations = self.include_annotations).next())
|
|
if c == '!':
|
|
if self.parse_embedded is None:
|
|
raise DecodeError('No parse_embedded function supplied')
|
|
return self.wrap(Embedded(self.parse_embedded(self.next())))
|
|
raise DecodeError('Invalid # syntax')
|
|
if c == '<':
|
|
self.skip()
|
|
vs = self.upto('>')
|
|
if len(vs) == 0:
|
|
raise DecodeError('Missing record label')
|
|
return self.wrap(Record(vs[0], vs[1:]))
|
|
if c == '[':
|
|
self.skip()
|
|
return self.wrap(self.upto(']'))
|
|
if c == '{':
|
|
self.skip()
|
|
return self.wrap(self.read_dictionary())
|
|
if c in '>]}':
|
|
raise DecodeError('Unexpected ' + c)
|
|
self.skip()
|
|
return self.wrap(self.read_raw_symbol_or_number([c]))
|
|
|
|
def try_next(self):
|
|
start = self.index
|
|
try:
|
|
return self.next()
|
|
except ShortPacket:
|
|
self.index = start
|
|
return None
|
|
|
|
def __iter__(self):
|
|
return self
|
|
|
|
def __next__(self):
|
|
v = self.try_next()
|
|
if v is None:
|
|
raise StopIteration
|
|
return v
|
|
|
|
def parse(bs, **kwargs):
|
|
return Parser(input_buffer=bs, **kwargs).next()
|
|
|
|
def parse_with_annotations(bs, **kwargs):
|
|
return Parser(input_buffer=bs, include_annotations=True, **kwargs).next()
|
|
|
|
class Formatter(TextCodec):
|
|
def __init__(self,
|
|
format_embedded=lambda x: x,
|
|
indent=None,
|
|
with_commas=False,
|
|
trailing_comma=False):
|
|
super(Formatter, self).__init__()
|
|
self.indent_delta = 0 if indent is None else indent
|
|
self.indent_distance = 0
|
|
self.with_commas = with_commas
|
|
self.trailing_comma = trailing_comma
|
|
self.chunks = []
|
|
self._format_embedded = format_embedded
|
|
|
|
def format_embedded(self, v):
|
|
if self._format_embedded is None:
|
|
raise EncodeError('No format_embedded function supplied')
|
|
return self._format_embedded(v)
|
|
|
|
def contents(self):
|
|
return u''.join(self.chunks)
|
|
|
|
def is_indenting(self):
|
|
return self.indent_delta > 0
|
|
|
|
def write_indent(self):
|
|
if self.is_indenting():
|
|
self.chunks.append('\n' + ' ' * self.indent_distance)
|
|
|
|
def write_indent_space(self):
|
|
if self.is_indenting():
|
|
self.write_indent()
|
|
else:
|
|
self.chunks.append(' ')
|
|
|
|
def write_stringlike_char(self, c):
|
|
if c == '\\': self.chunks.append('\\\\')
|
|
elif c == '\x08': self.chunks.append('\\b')
|
|
elif c == '\x0c': self.chunks.append('\\f')
|
|
elif c == '\x0a': self.chunks.append('\\n')
|
|
elif c == '\x0d': self.chunks.append('\\r')
|
|
elif c == '\x09': self.chunks.append('\\t')
|
|
else: self.chunks.append(c)
|
|
|
|
def write_seq(self, opener, closer, vs, appender):
|
|
vs = list(vs)
|
|
itemcount = len(vs)
|
|
self.chunks.append(opener)
|
|
if itemcount == 0:
|
|
pass
|
|
elif itemcount == 1:
|
|
appender(vs[0])
|
|
else:
|
|
self.indent_distance = self.indent_distance + self.indent_delta
|
|
self.write_indent()
|
|
appender(vs[0])
|
|
for v in vs[1:]:
|
|
if self.with_commas: self.chunks.append(',')
|
|
self.write_indent_space()
|
|
appender(v)
|
|
self.indent_distance = self.indent_distance - self.indent_delta
|
|
if self.trailing_comma: self.chunks.append(',')
|
|
self.write_indent()
|
|
self.chunks.append(closer)
|
|
|
|
def append(self, v):
|
|
v = preserve(v)
|
|
if hasattr(v, '__preserve_write_text__'):
|
|
v.__preserve_write_text__(self)
|
|
elif v is False:
|
|
self.chunks.append('#f')
|
|
elif v is True:
|
|
self.chunks.append('#t')
|
|
elif isinstance(v, float):
|
|
if math.isnan(v) or math.isinf(v):
|
|
self.chunks.append('#xd"' + struct.pack('>d', v).hex() + '"')
|
|
else:
|
|
self.chunks.append(repr(v))
|
|
elif isinstance(v, numbers.Number):
|
|
self.chunks.append('%d' % (v,))
|
|
elif isinstance(v, bytes):
|
|
self.chunks.append('#[%s]' % (base64.b64encode(v).decode('ascii'),))
|
|
elif isinstance(v, basestring_):
|
|
self.chunks.append('"')
|
|
for c in v:
|
|
if c == '"': self.chunks.append('\\"')
|
|
else: self.write_stringlike_char(c)
|
|
self.chunks.append('"')
|
|
elif isinstance(v, list):
|
|
self.write_seq('[', ']', v, self.append)
|
|
elif isinstance(v, tuple):
|
|
self.write_seq('[', ']', v, self.append)
|
|
elif isinstance(v, set):
|
|
self.write_seq('#{', '}', v, self.append)
|
|
elif isinstance(v, frozenset):
|
|
self.write_seq('#{', '}', v, self.append)
|
|
elif isinstance(v, dict):
|
|
def append_kv(kv):
|
|
self.append(kv[0])
|
|
self.chunks.append(': ')
|
|
self.append(kv[1])
|
|
self.write_seq('{', '}', v.items(), append_kv)
|
|
else:
|
|
try:
|
|
i = iter(v)
|
|
except TypeError:
|
|
i = None
|
|
if i is None:
|
|
self.cannot_format(v)
|
|
else:
|
|
self.write_seq('[', ']', i, self.append)
|
|
|
|
def cannot_format(self, v):
|
|
raise TypeError('Cannot preserves-format: ' + repr(v))
|
|
|
|
def stringify(v, **kwargs):
|
|
e = Formatter(**kwargs)
|
|
e.append(v)
|
|
return e.contents()
|