"""The [preserves.values][] module implements the core representations of Preserves [`Value`s](https://preserves.dev/preserves.html#semantics) as Python values. """ import re import sys import struct import math from .error import DecodeError def preserve(v): """Converts `v` to a representation of a Preserves `Value` by (repeatedly) setting ```python v = v.__preserve__() ``` while `v` has a `__preserve__` method. Parsed [Schema][preserves.schema] values are able to render themselves to their serialized representations this way. """ while hasattr(v, '__preserve__'): v = v.__preserve__() return v def float_to_int(v): return struct.unpack('>Q', struct.pack('>d', v))[0] def cmp_floats(a, b): """Implements the `totalOrder` predicate defined in section 5.10 of [IEEE Std 754-2008](https://dx.doi.org/10.1109/IEEESTD.2008.4610935). """ a = float_to_int(a) b = float_to_int(b) if a & 0x8000000000000000: a = a ^ 0x7fffffffffffffff if b & 0x8000000000000000: b = b ^ 0x7fffffffffffffff return a - b class Float(object): """Wrapper for treating a Python double-precision floating-point value as a single-precision (32-bit) float, from Preserves' perspective. (Python lacks native single-precision floating point support.) ```python >>> Float(3.45) Float(3.45) >>> import preserves >>> preserves.stringify(Float(3.45)) '3.45f' >>> preserves.stringify(3.45) '3.45' >>> preserves.parse('3.45f') Float(3.45) >>> preserves.parse('3.45') 3.45 >>> preserves.encode(Float(3.45)) b'\\x87\\x04@\\\\\\xcc\\xcd' >>> preserves.encode(3.45) b'\\x87\\x08@\\x0b\\x99\\x99\\x99\\x99\\x99\\x9a' ``` Attributes: value (float): the double-precision representation of intended single-precision value """ def __init__(self, value): self.value = value def __eq__(self, other): other = _unwrap(other) if other.__class__ is self.__class__: return cmp_floats(self.value, other.value) == 0 def __lt__(self, other): other = _unwrap(other) if other.__class__ is self.__class__: return cmp_floats(self.value, other.value) < 0 def __ne__(self, other): return not self.__eq__(other) def __hash__(self): return hash(self.value) def __repr__(self): return 'Float(' + repr(self.value) + ')' def to_bytes(self): """Converts this 32-bit single-precision floating point value to its binary32 format, taking care to preserve the quiet/signalling bit-pattern of NaN values, unlike its `struct.pack('>f', ...)` equivalent. ```python >>> Float.from_bytes(b'\\x7f\\x80\\x00{') Float(nan) >>> Float.from_bytes(b'\\x7f\\x80\\x00{').to_bytes() b'\\x7f\\x80\\x00{' >>> struct.unpack('>f', b'\\x7f\\x80\\x00{')[0] nan >>> Float(struct.unpack('>f', b'\\x7f\\x80\\x00{')[0]).to_bytes() b'\\x7f\\xc0\\x00{' >>> struct.pack('>f', struct.unpack('>f', b'\\x7f\\x80\\x00{')[0]) b'\\x7f\\xc0\\x00{' ``` (Note well the difference between `7f80007b` and `7fc0007b`!) """ if math.isnan(self.value) or math.isinf(self.value): dbs = struct.pack('>d', self.value) vd = struct.unpack('>Q', dbs)[0] sign = vd >> 63 payload = (vd >> 29) & 0x007fffff vf = (sign << 31) | 0x7f800000 | payload return struct.pack('>I', vf) else: return struct.pack('>f', self.value) def __preserve_write_binary__(self, encoder): encoder.buffer.append(0x87) encoder.buffer.append(4) encoder.buffer.extend(self.to_bytes()) def __preserve_write_text__(self, formatter): if math.isnan(self.value) or math.isinf(self.value): formatter.chunks.append('#xf"' + self.to_bytes().hex() + '"') else: formatter.chunks.append(repr(self.value) + 'f') @staticmethod def from_bytes(bs): """Converts a 4-byte-long byte string to a 32-bit single-precision floating point value wrapped in a [Float][preserves.values.Float] instance. Takes care to preserve the quiet/signalling bit-pattern of NaN values, unlike its `struct.unpack('>f', ...)` equivalent. ```python >>> Float.from_bytes(b'\\x7f\\x80\\x00{') Float(nan) >>> Float.from_bytes(b'\\x7f\\x80\\x00{').to_bytes() b'\\x7f\\x80\\x00{' >>> struct.unpack('>f', b'\\x7f\\x80\\x00{')[0] nan >>> Float(struct.unpack('>f', b'\\x7f\\x80\\x00{')[0]).to_bytes() b'\\x7f\\xc0\\x00{' >>> struct.pack('>f', struct.unpack('>f', b'\\x7f\\x80\\x00{')[0]) b'\\x7f\\xc0\\x00{' ``` (Note well the difference between `7f80007b` and `7fc0007b`!) """ vf = struct.unpack('>I', bs)[0] if (vf & 0x7f800000) == 0x7f800000: # NaN or inf. Preserve quiet/signalling bit by manually expanding to double-precision. sign = vf >> 31 payload = vf & 0x007fffff dbs = struct.pack('>Q', (sign << 63) | 0x7ff0000000000000 | (payload << 29)) return Float(struct.unpack('>d', dbs)[0]) else: return Float(struct.unpack('>f', bs)[0]) # FIXME: This regular expression is conservatively correct, but Anglo-chauvinistic. RAW_SYMBOL_RE = re.compile(r'^[-a-zA-Z0-9~!$%^&*?_=+/.]+$') def _eq(a, b): from .compare import eq return eq(a, b) class Symbol(object): """Representation of Preserves `Symbol`s. ```python >>> Symbol('xyz') #xyz >>> Symbol('xyz').name 'xyz' >>> import preserves >>> preserves.stringify(Symbol('xyz')) 'xyz' >>> preserves.stringify(Symbol('hello world')) '|hello world|' >>> preserves.parse('xyz') #xyz >>> preserves.parse('|hello world|') #hello world ``` Attributes: name (str | Symbol): The symbol's text label. If an existing [Symbol][preserves.values.Symbol] is passed in, the existing Symbol's `name` is used as the `name` for the new Symbol. """ def __init__(self, name): self.name = name.name if isinstance(name, Symbol) else name def __eq__(self, other): other = _unwrap(other) return isinstance(other, Symbol) and self.name == other.name def __ne__(self, other): return not self.__eq__(other) def __lt__(self, other): return self.name < other.name def __le__(self, other): return self.name <= other.name def __gt__(self, other): return self.name > other.name def __ge__(self, other): return self.name >= other.name def __hash__(self): return hash(self.name) def __repr__(self): return '#' + self.name def __preserve_write_binary__(self, encoder): bs = self.name.encode('utf-8') encoder.buffer.append(0xb3) encoder.varint(len(bs)) encoder.buffer.extend(bs) def __preserve_write_text__(self, formatter): if RAW_SYMBOL_RE.match(self.name): formatter.chunks.append(self.name) else: formatter.chunks.append('|') for c in self.name: if c == '|': formatter.chunks.append('\\|') else: formatter.write_stringlike_char(c) formatter.chunks.append('|') class Record(object): """Representation of Preserves `Record`s, which are a pair of a *label* `Value` and a sequence of *field* `Value`s. ```python >>> r = Record(Symbol('label'), ['field1', ['field2item1', 'field2item2']]) >>> r #label('field1', ['field2item1', 'field2item2']) >>> r.key #label >>> r.fields ('field1', ['field2item1', 'field2item2']) >>> import preserves >>> preserves.stringify(r) '