Docs for preserves.binary; better encoding annotations with canonicalization
This commit is contained in:
parent
99258be1ef
commit
7cbd1a2813
|
@ -3,7 +3,11 @@ theme:
|
|||
name: material
|
||||
plugins:
|
||||
- search
|
||||
- mkdocstrings
|
||||
- mkdocstrings:
|
||||
handlers:
|
||||
python:
|
||||
options:
|
||||
merge_init_into_class: true
|
||||
- macros:
|
||||
include_dir: ../../_includes
|
||||
markdown_extensions:
|
||||
|
|
|
@ -23,14 +23,108 @@ from .error import *
|
|||
from .compat import basestring_, ord_
|
||||
|
||||
class BinaryCodec(object):
|
||||
"""TODO"""
|
||||
pass
|
||||
|
||||
class Decoder(BinaryCodec):
|
||||
"""TODO"""
|
||||
"""Implementation of a decoder for the machine-oriented binary Preserves syntax.
|
||||
|
||||
Args:
|
||||
packet (bytes):
|
||||
initial contents of the input buffer; may subsequently be extended by calling
|
||||
[extend][preserves.binary.Decoder.extend].
|
||||
|
||||
include_annotations (bool):
|
||||
if `True`, wrap each value and subvalue in an
|
||||
[Annotated][preserves.values.Annotated] object.
|
||||
|
||||
decode_embedded:
|
||||
function accepting a `Value` and returning a possibly-decoded form of that value
|
||||
suitable for placing into an [Embedded][preserves.values.Embedded] object.
|
||||
|
||||
Normal usage is to supply a buffer, and keep calling [next][preserves.binary.Decoder.next]
|
||||
until a [ShortPacket][preserves.error.ShortPacket] exception is raised:
|
||||
|
||||
```python
|
||||
>>> d = Decoder(b'\\xa0{\\xb1\\x05hello\\x85\\xb3\\x01x\\xb5\\x84')
|
||||
>>> d.next()
|
||||
123
|
||||
>>> d.next()
|
||||
'hello'
|
||||
>>> d.next()
|
||||
()
|
||||
>>> d.next()
|
||||
Traceback (most recent call last):
|
||||
...
|
||||
preserves.error.ShortPacket: Short packet
|
||||
|
||||
```
|
||||
|
||||
Alternatively, keep calling [try_next][preserves.binary.Decoder.try_next] until it yields
|
||||
`None`, which is not in the domain of Preserves `Value`s:
|
||||
|
||||
```python
|
||||
>>> d = Decoder(b'\\xa0{\\xb1\\x05hello\\x85\\xb3\\x01x\\xb5\\x84')
|
||||
>>> d.try_next()
|
||||
123
|
||||
>>> d.try_next()
|
||||
'hello'
|
||||
>>> d.try_next()
|
||||
()
|
||||
>>> d.try_next()
|
||||
|
||||
```
|
||||
|
||||
For convenience, [Decoder][preserves.binary.Decoder] implements the iterator interface,
|
||||
backing it with [try_next][preserves.binary.Decoder.try_next], so you can simply iterate
|
||||
over all complete values in an input:
|
||||
|
||||
```python
|
||||
>>> d = Decoder(b'\\xa0{\\xb1\\x05hello\\x85\\xb3\\x01x\\xb5\\x84')
|
||||
>>> list(d)
|
||||
[123, 'hello', ()]
|
||||
|
||||
```
|
||||
|
||||
```python
|
||||
>>> for v in Decoder(b'\\xa0{\\xb1\\x05hello\\x85\\xb3\\x01x\\xb5\\x84'):
|
||||
... print(repr(v))
|
||||
123
|
||||
'hello'
|
||||
()
|
||||
|
||||
```
|
||||
|
||||
Supply `include_annotations=True` to read annotations alongside the annotated values:
|
||||
|
||||
```python
|
||||
>>> d = Decoder(b'\\xa0{\\xb1\\x05hello\\x85\\xb3\\x01x\\xb5\\x84', include_annotations=True)
|
||||
>>> list(d)
|
||||
[123, 'hello', @#x ()]
|
||||
|
||||
```
|
||||
|
||||
If you are incrementally reading from, say, a socket, you can use
|
||||
[extend][preserves.binary.Decoder.extend] to add new input as if comes available:
|
||||
|
||||
```python
|
||||
>>> d = Decoder(b'\\xa0{\\xb1\\x05he')
|
||||
>>> d.try_next()
|
||||
123
|
||||
>>> d.try_next() # returns None because the input is incomplete
|
||||
>>> d.extend(b'llo')
|
||||
>>> d.try_next()
|
||||
'hello'
|
||||
>>> d.try_next()
|
||||
|
||||
```
|
||||
|
||||
Attributes:
|
||||
packet (bytes): buffered input waiting to be processed
|
||||
index (int): read position within `packet`
|
||||
|
||||
"""
|
||||
|
||||
def __init__(self, packet=b'', include_annotations=False, decode_embedded=lambda x: x):
|
||||
"""TODO"""
|
||||
super(Decoder, self).__init__()
|
||||
self.packet = packet
|
||||
self.index = 0
|
||||
|
@ -38,7 +132,8 @@ class Decoder(BinaryCodec):
|
|||
self.decode_embedded = decode_embedded
|
||||
|
||||
def extend(self, data):
|
||||
"""TODO"""
|
||||
"""Appends `data` to the remaining bytes in `self.packet`, trimming already-processed
|
||||
bytes from the front of `self.packet` and resetting `self.index` to zero."""
|
||||
self.packet = self.packet[self.index:] + data
|
||||
self.index = 0
|
||||
|
||||
|
@ -92,7 +187,11 @@ class Decoder(BinaryCodec):
|
|||
return v
|
||||
|
||||
def next(self):
|
||||
"""TODO"""
|
||||
"""Reads the next complete `Value` from the internal buffer, raising
|
||||
[ShortPacket][preserves.error.ShortPacket] if too few bytes are available, or
|
||||
[DecodeError][preserves.error.DecodeError] if the input is invalid somehow.
|
||||
|
||||
"""
|
||||
tag = self.nextbyte()
|
||||
if tag == 0x80: return self.wrap(False)
|
||||
if tag == 0x81: return self.wrap(True)
|
||||
|
@ -123,7 +222,8 @@ class Decoder(BinaryCodec):
|
|||
raise DecodeError('Invalid tag: ' + hex(tag))
|
||||
|
||||
def try_next(self):
|
||||
"""TODO"""
|
||||
"""Like [next][preserves.binary.Decoder.next], but returns `None` instead of raising
|
||||
[ShortPacket][preserves.error.ShortPacket]."""
|
||||
start = self.index
|
||||
try:
|
||||
return self.next()
|
||||
|
@ -132,7 +232,6 @@ class Decoder(BinaryCodec):
|
|||
return None
|
||||
|
||||
def __iter__(self):
|
||||
"""TODO"""
|
||||
return self
|
||||
|
||||
def __next__(self):
|
||||
|
@ -142,26 +241,71 @@ class Decoder(BinaryCodec):
|
|||
return v
|
||||
|
||||
def decode(bs, **kwargs):
|
||||
"""TODO"""
|
||||
"""Yields the first complete encoded value from `bs`, passing `kwargs` through to the
|
||||
[Decoder][preserves.binary.Decoder] constructor. Raises exceptions as per
|
||||
[next][preserves.binary.Decoder.next].
|
||||
|
||||
Args:
|
||||
bs (bytes): encoded data to decode
|
||||
|
||||
"""
|
||||
return Decoder(packet=bs, **kwargs).next()
|
||||
|
||||
def decode_with_annotations(bs, **kwargs):
|
||||
"""TODO"""
|
||||
"""Like [decode][preserves.binary.decode], but supplying `include_annotations=True` to the
|
||||
[Decoder][preserves.binary.Decoder] constructor."""
|
||||
return Decoder(packet=bs, include_annotations=True, **kwargs).next()
|
||||
|
||||
class Encoder(BinaryCodec):
|
||||
"""Implementation of an encoder for the machine-oriented binary Preserves syntax.
|
||||
|
||||
```python
|
||||
>>> e = Encoder()
|
||||
>>> e.append(123)
|
||||
>>> e.append('hello')
|
||||
>>> e.append(annotate([], Symbol('x')))
|
||||
>>> e.contents()
|
||||
b'\\xa0{\\xb1\\x05hello\\x85\\xb3\\x01x\\xb5\\x84'
|
||||
|
||||
```
|
||||
|
||||
Args:
|
||||
encode_embedded:
|
||||
function accepting an [Embedded][preserves.values.Embedded].embeddedValue and
|
||||
returning a `Value` for serialization.
|
||||
|
||||
canonicalize (bool):
|
||||
if `True`, ensures the serialized data are in [canonical
|
||||
form](https://preserves.dev/canonical-binary.html). This is slightly more work than
|
||||
producing potentially-non-canonical output.
|
||||
|
||||
include_annotations (bool | None):
|
||||
if `None`, includes annotations in the output only when `canonicalize` is `False`,
|
||||
because [canonical serialization of values demands omission of
|
||||
annotations](https://preserves.dev/canonical-binary.html). If explicitly `True` or
|
||||
`False`, however, annotations will be included resp. excluded no matter the
|
||||
`canonicalize` setting. This can be used to get canonical ordering
|
||||
(`canonicalize=True`) *and* annotations (`include_annotations=True`).
|
||||
|
||||
Attributes:
|
||||
buffer (bytearray): accumulator for the output of the encoder
|
||||
|
||||
"""
|
||||
def __init__(self, encode_embedded=lambda x: x, canonicalize=False):
|
||||
"""TODO"""
|
||||
def __init__(self,
|
||||
encode_embedded=lambda x: x,
|
||||
canonicalize=False,
|
||||
include_annotations=None):
|
||||
super(Encoder, self).__init__()
|
||||
self.buffer = bytearray()
|
||||
self._encode_embedded = encode_embedded
|
||||
self._canonicalize = canonicalize
|
||||
if include_annotations is None:
|
||||
self.include_annotations = not self._canonicalize
|
||||
else:
|
||||
self.include_annotations = include_annotations
|
||||
|
||||
def reset(self):
|
||||
"""TODO"""
|
||||
"""Clears `self.buffer` to a fresh empty `bytearray`."""
|
||||
self.buffer = bytearray()
|
||||
|
||||
def encode_embedded(self, v):
|
||||
|
@ -170,7 +314,7 @@ class Encoder(BinaryCodec):
|
|||
return self._encode_embedded(v)
|
||||
|
||||
def contents(self):
|
||||
"""TODO"""
|
||||
"""Returns a `bytes` constructed from the contents of `self.buffer`."""
|
||||
return bytes(self.buffer)
|
||||
|
||||
def varint(self, v):
|
||||
|
@ -208,7 +352,7 @@ class Encoder(BinaryCodec):
|
|||
if not self._canonicalize:
|
||||
self.encodevalues(6, v)
|
||||
else:
|
||||
c = Canonicalizer(self._encode_embedded)
|
||||
c = Canonicalizer(self._encode_embedded, self.include_annotations)
|
||||
for i in v: c.entry([i])
|
||||
c.emit_entries(self, 6)
|
||||
|
||||
|
@ -216,12 +360,12 @@ class Encoder(BinaryCodec):
|
|||
if not self._canonicalize:
|
||||
self.encodevalues(7, list(dict_kvs(v)))
|
||||
else:
|
||||
c = Canonicalizer(self._encode_embedded)
|
||||
c = Canonicalizer(self._encode_embedded, self.include_annotations)
|
||||
for (kk, vv) in v.items(): c.entry([kk, vv])
|
||||
c.emit_entries(self, 7)
|
||||
|
||||
def append(self, v):
|
||||
"""TODO"""
|
||||
"""Extend `self.buffer` with an encoding of `v`."""
|
||||
v = preserve(v)
|
||||
if hasattr(v, '__preserve_write_binary__'):
|
||||
v.__preserve_write_binary__(self)
|
||||
|
@ -265,8 +409,8 @@ class Encoder(BinaryCodec):
|
|||
raise TypeError('Cannot preserves-encode: ' + repr(v))
|
||||
|
||||
class Canonicalizer:
|
||||
def __init__(self, encode_embedded):
|
||||
self.encoder = Encoder(encode_embedded, canonicalize=True)
|
||||
def __init__(self, encode_embedded, include_annotations):
|
||||
self.encoder = Encoder(encode_embedded, canonicalize=True, include_annotations=include_annotations)
|
||||
self.entries = []
|
||||
|
||||
def entry(self, pieces):
|
||||
|
@ -281,10 +425,8 @@ class Canonicalizer:
|
|||
outer_encoder.buffer.append(0x84)
|
||||
|
||||
def encode(v, **kwargs):
|
||||
"""Encode a single `Value` v to a byte string. Any kwargs are passed on to the underlying
|
||||
[Encoder][preserves.binary.Encoder] constructor.
|
||||
|
||||
"""
|
||||
"""Encode a single `Value` `v` to a byte string. Any supplied `kwargs` are passed on to the
|
||||
underlying [Encoder][preserves.binary.Encoder] constructor."""
|
||||
e = Encoder(**kwargs)
|
||||
e.append(v)
|
||||
return e.contents()
|
||||
|
|
|
@ -305,7 +305,8 @@ class Formatter(TextCodec):
|
|||
format_embedded=lambda x: x,
|
||||
indent=None,
|
||||
with_commas=False,
|
||||
trailing_comma=False):
|
||||
trailing_comma=False,
|
||||
include_annotations=True):
|
||||
"""TODO"""
|
||||
super(Formatter, self).__init__()
|
||||
self.indent_delta = 0 if indent is None else indent
|
||||
|
@ -314,6 +315,7 @@ class Formatter(TextCodec):
|
|||
self.trailing_comma = trailing_comma
|
||||
self.chunks = []
|
||||
self._format_embedded = format_embedded
|
||||
self.include_annotations = include_annotations
|
||||
|
||||
def format_embedded(self, v):
|
||||
if self._format_embedded is None:
|
||||
|
|
|
@ -290,16 +290,18 @@ class Annotated(object):
|
|||
self.item = item
|
||||
|
||||
def __preserve_write_binary__(self, encoder):
|
||||
for a in self.annotations:
|
||||
encoder.buffer.append(0x85)
|
||||
encoder.append(a)
|
||||
if encoder.include_annotations:
|
||||
for a in self.annotations:
|
||||
encoder.buffer.append(0x85)
|
||||
encoder.append(a)
|
||||
encoder.append(self.item)
|
||||
|
||||
def __preserve_write_text__(self, formatter):
|
||||
for a in self.annotations:
|
||||
formatter.chunks.append('@')
|
||||
formatter.append(a)
|
||||
formatter.chunks.append(' ')
|
||||
if formatter.include_annotations:
|
||||
for a in self.annotations:
|
||||
formatter.chunks.append('@')
|
||||
formatter.append(a)
|
||||
formatter.chunks.append(' ')
|
||||
formatter.append(self.item)
|
||||
|
||||
def strip(self, depth=inf):
|
||||
|
|
|
@ -261,7 +261,7 @@ def install_test(d, variant, tName, binaryForm, annotatedTextForm):
|
|||
def test_back(self): self.assertPreservesEqual(self.DS(binaryForm), back)
|
||||
def test_back_ann(self): self.assertPreservesEqual(self.D(self.E(annotatedTextForm)), annotatedTextForm)
|
||||
def test_encode(self): self.assertPreservesEqual(self.E(forward), binaryForm)
|
||||
def test_encode_canonical(self): self.assertPreservesEqual(self.EC(annotatedTextForm), binaryForm)
|
||||
def test_encode_nondet(self): self.assertPreservesEqual(self.ENONDET(annotatedTextForm), binaryForm)
|
||||
def test_encode_ann(self): self.assertPreservesEqual(self.E(annotatedTextForm), binaryForm)
|
||||
add_method(d, tName, test_match_expected)
|
||||
add_method(d, tName, test_roundtrip)
|
||||
|
@ -271,7 +271,7 @@ def install_test(d, variant, tName, binaryForm, annotatedTextForm):
|
|||
if variant in ['normal']:
|
||||
add_method(d, tName, test_encode)
|
||||
if variant in ['nondeterministic']:
|
||||
add_method(d, tName, test_encode_canonical)
|
||||
add_method(d, tName, test_encode_nondet)
|
||||
if variant in ['normal', 'nondeterministic']:
|
||||
add_method(d, tName, test_encode_ann)
|
||||
|
||||
|
@ -323,8 +323,8 @@ class CommonTestSuite(PreservesTestCase):
|
|||
def E(self, v):
|
||||
return encode(v, encode_embedded=lambda x: x)
|
||||
|
||||
def EC(self, v):
|
||||
return encode(v, encode_embedded=lambda x: x, canonicalize=True)
|
||||
def ENONDET(self, v):
|
||||
return encode(v, encode_embedded=lambda x: x, canonicalize=True, include_annotations=True)
|
||||
|
||||
class RecordTests(PreservesTestCase):
|
||||
def test_getters(self):
|
||||
|
|
Loading…
Reference in New Issue