Skip to content

Machine-oriented binary syntax

The preserves.binary module implements the Preserves machine-oriented binary syntax.

The main entry points are functions encode, canonicalize, decode, and decode_with_annotations.

>>> encode(Record(Symbol('hi'), []))
b'\xb4\xb3\x02hi\x84'
>>> decode(b'\xb4\xb3\x02hi\x84')
#hi()

Decoder(packet=b'', include_annotations=False, decode_embedded=lambda : x)

Bases: BinaryCodec

Implementation of a decoder for the machine-oriented binary Preserves syntax.

Parameters:

Name Type Description Default
packet bytes

initial contents of the input buffer; may subsequently be extended by calling extend.

b''
include_annotations bool

if True, wrap each value and subvalue in an Annotated object.

False
decode_embedded

function accepting a Value and returning a possibly-decoded form of that value suitable for placing into an Embedded object.

lambda : x

Normal usage is to supply a buffer, and keep calling next until a ShortPacket exception is raised:

>>> d = Decoder(b'\xb0\x01{\xb1\x05hello\x85\xb3\x01x\xb5\x84')
>>> d.next()
123
>>> d.next()
'hello'
>>> d.next()
()
>>> d.next()
Traceback (most recent call last):
  ...
preserves.error.ShortPacket: Short packet

Alternatively, keep calling try_next until it yields None, which is not in the domain of Preserves Values:

>>> d = Decoder(b'\xb0\x01{\xb1\x05hello\x85\xb3\x01x\xb5\x84')
>>> d.try_next()
123
>>> d.try_next()
'hello'
>>> d.try_next()
()
>>> d.try_next()

For convenience, Decoder implements the iterator interface, backing it with try_next, so you can simply iterate over all complete values in an input:

>>> d = Decoder(b'\xb0\x01{\xb1\x05hello\x85\xb3\x01x\xb5\x84')
>>> list(d)
[123, 'hello', ()]
>>> for v in Decoder(b'\xb0\x01{\xb1\x05hello\x85\xb3\x01x\xb5\x84'):
...     print(repr(v))
123
'hello'
()

Supply include_annotations=True to read annotations alongside the annotated values:

>>> d = Decoder(b'\xb0\x01{\xb1\x05hello\x85\xb3\x01x\xb5\x84', include_annotations=True)
>>> list(d)
[123, 'hello', @#x ()]

If you are incrementally reading from, say, a socket, you can use extend to add new input as if comes available:

>>> d = Decoder(b'\xb0\x01{\xb1\x05he')
>>> d.try_next()
123
>>> d.try_next() # returns None because the input is incomplete
>>> d.extend(b'llo')
>>> d.try_next()
'hello'
>>> d.try_next()

Attributes:

Name Type Description
packet bytes

buffered input waiting to be processed

index int

read position within packet

Source code in preserves/binary.py
127
128
129
130
131
132
def __init__(self, packet=b'', include_annotations=False, decode_embedded=lambda x: x):
    super(Decoder, self).__init__()
    self.packet = packet
    self.index = 0
    self.include_annotations = include_annotations
    self.decode_embedded = decode_embedded

extend(data)

Appends data to the remaining bytes in self.packet, trimming already-processed bytes from the front of self.packet and resetting self.index to zero.

Source code in preserves/binary.py
134
135
136
137
138
def extend(self, data):
    """Appends `data` to the remaining bytes in `self.packet`, trimming already-processed
    bytes from the front of `self.packet` and resetting `self.index` to zero."""
    self.packet = self.packet[self.index:] + data
    self.index = 0

next()

Reads the next complete Value from the internal buffer, raising ShortPacket if too few bytes are available, or DecodeError if the input is invalid somehow.

Source code in preserves/binary.py
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
def next(self):
    """Reads the next complete `Value` from the internal buffer, raising
    [ShortPacket][preserves.error.ShortPacket] if too few bytes are available, or
    [DecodeError][preserves.error.DecodeError] if the input is invalid somehow.

    """
    tag = self.nextbyte()
    if tag == 0x80: return self.wrap(False)
    if tag == 0x81: return self.wrap(True)
    if tag == 0x84: raise DecodeError('Unexpected end-of-stream marker')
    if tag == 0x85:
        a = self.next()
        v = self.next()
        return self.unshift_annotation(a, v)
    if tag == 0x86:
        if self.decode_embedded is None:
            raise DecodeError('No decode_embedded function supplied')
        return self.wrap(Embedded(self.decode_embedded(self.next())))
    if tag == 0x87:
        count = self.nextbyte()
        if count == 4: return self.wrap(Float.from_bytes(self.nextbytes(4)))
        if count == 8: return self.wrap(struct.unpack('>d', self.nextbytes(8))[0])
        raise DecodeError('Invalid IEEE754 size')
    if tag == 0xb0: return self.wrap(self.nextint(self.varint()))
    if tag == 0xb1: return self.wrap(self.nextbytes(self.varint()).decode('utf-8'))
    if tag == 0xb2: return self.wrap(self.nextbytes(self.varint()))
    if tag == 0xb3: return self.wrap(Symbol(self.nextbytes(self.varint()).decode('utf-8')))
    if tag == 0xb4:
        vs = self.nextvalues()
        if not vs: raise DecodeError('Too few elements in encoded record')
        return self.wrap(Record(vs[0], vs[1:]))
    if tag == 0xb5: return self.wrap(tuple(self.nextvalues()))
    if tag == 0xb6: return self.wrap(frozenset(self.nextvalues()))
    if tag == 0xb7: return self.wrap(ImmutableDict.from_kvs(self.nextvalues()))
    raise DecodeError('Invalid tag: ' + hex(tag))

try_next()

Like next, but returns None instead of raising ShortPacket.

Source code in preserves/binary.py
225
226
227
228
229
230
231
232
233
def try_next(self):
    """Like [next][preserves.binary.Decoder.next], but returns `None` instead of raising
    [ShortPacket][preserves.error.ShortPacket]."""
    start = self.index
    try:
        return self.next()
    except ShortPacket:
        self.index = start
        return None

Encoder(encode_embedded=lambda : x, canonicalize=False, include_annotations=None)

Bases: BinaryCodec

Implementation of an encoder for the machine-oriented binary Preserves syntax.

>>> e = Encoder()
>>> e.append(123)
>>> e.append('hello')
>>> e.append(annotate([], Symbol('x')))
>>> e.contents()
b'\xb0\x01{\xb1\x05hello\x85\xb3\x01x\xb5\x84'

Parameters:

Name Type Description Default
encode_embedded

function accepting an Embedded.embeddedValue and returning a Value for serialization.

lambda : x
canonicalize bool

if True, ensures the serialized data are in canonical form. This is slightly more work than producing potentially-non-canonical output.

False
include_annotations bool | None

if None, includes annotations in the output only when canonicalize is False, because canonical serialization of values demands omission of annotations. If explicitly True or False, however, annotations will be included resp. excluded no matter the canonicalize setting. This can be used to get canonical ordering (canonicalize=True) and annotations (include_annotations=True).

None

Attributes:

Name Type Description
buffer bytearray

accumulator for the output of the encoder

Source code in preserves/binary.py
295
296
297
298
299
300
301
302
303
304
305
306
def __init__(self,
             encode_embedded=lambda x: x,
             canonicalize=False,
             include_annotations=None):
    super(Encoder, self).__init__()
    self.buffer = bytearray()
    self._encode_embedded = encode_embedded
    self._canonicalize = canonicalize
    if include_annotations is None:
        self.include_annotations = not self._canonicalize
    else:
        self.include_annotations = include_annotations

append(v)

Extend self.buffer with an encoding of v.

Source code in preserves/binary.py
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
def append(self, v):
    """Extend `self.buffer` with an encoding of `v`."""
    v = preserve(v)
    if hasattr(v, '__preserve_write_binary__'):
        v.__preserve_write_binary__(self)
    elif v is False:
        self.buffer.append(0x80)
    elif v is True:
        self.buffer.append(0x81)
    elif isinstance(v, float):
        self.buffer.append(0x87)
        self.buffer.append(8)
        self.buffer.extend(struct.pack('>d', v))
    elif isinstance(v, numbers.Number):
        self.encodeint(v)
    elif isinstance(v, bytes):
        self.encodebytes(0xb2, v)
    elif isinstance(v, basestring_):
        self.encodebytes(0xb1, v.encode('utf-8'))
    elif isinstance(v, list):
        self.encodevalues(0xb5, v)
    elif isinstance(v, tuple):
        self.encodevalues(0xb5, v)
    elif isinstance(v, set):
        self.encodeset(v)
    elif isinstance(v, frozenset):
        self.encodeset(v)
    elif isinstance(v, dict):
        self.encodedict(v)
    else:
        try:
            i = iter(v)
        except TypeError:
            i = None
        if i is None:
            self.cannot_encode(v)
        else:
            self.encodevalues(0xb5, i)

contents()

Returns a bytes constructed from the contents of self.buffer.

Source code in preserves/binary.py
317
318
319
def contents(self):
    """Returns a `bytes` constructed from the contents of `self.buffer`."""
    return bytes(self.buffer)

reset()

Clears self.buffer to a fresh empty bytearray.

Source code in preserves/binary.py
308
309
310
def reset(self):
    """Clears `self.buffer` to a fresh empty `bytearray`."""
    self.buffer = bytearray()

canonicalize(v, **kwargs)

As encode, but sets canonicalize=True in the Encoder constructor.

Source code in preserves/binary.py
433
434
435
436
437
438
def canonicalize(v, **kwargs):
    """As [encode][preserves.binary.encode], but sets `canonicalize=True` in the
    [Encoder][preserves.binary.Encoder] constructor.

    """
    return encode(v, canonicalize=True, **kwargs)

decode(bs, **kwargs)

Yields the first complete encoded value from bs, passing kwargs through to the Decoder constructor. Raises exceptions as per next.

Parameters:

Name Type Description Default
bs bytes

encoded data to decode

required
Source code in preserves/binary.py
244
245
246
247
248
249
250
251
252
253
def decode(bs, **kwargs):
    """Yields the first complete encoded value from `bs`, passing `kwargs` through to the
    [Decoder][preserves.binary.Decoder] constructor. Raises exceptions as per
    [next][preserves.binary.Decoder.next].

    Args:
        bs (bytes): encoded data to decode

    """
    return Decoder(packet=bs, **kwargs).next()

decode_with_annotations(bs, **kwargs)

Like decode, but supplying include_annotations=True to the Decoder constructor.

Source code in preserves/binary.py
255
256
257
258
def decode_with_annotations(bs, **kwargs):
    """Like [decode][preserves.binary.decode], but supplying `include_annotations=True` to the
    [Decoder][preserves.binary.Decoder] constructor."""
    return Decoder(packet=bs, include_annotations=True, **kwargs).next()

encode(v, **kwargs)

Encode a single Value v to a byte string. Any supplied kwargs are passed on to the underlying Encoder constructor.

Source code in preserves/binary.py
426
427
428
429
430
431
def encode(v, **kwargs):
    """Encode a single `Value` `v` to a byte string. Any supplied `kwargs` are passed on to the
    underlying [Encoder][preserves.binary.Encoder] constructor."""
    e = Encoder(**kwargs)
    e.append(v)
    return e.contents()

Last update: March 16, 2023
Created: March 16, 2023