From 031575ad826d2db716e88d9b087376b193c1cfe1 Mon Sep 17 00:00:00 2001
From: Tony Garnock-Jones <tonyg@leastfixedpoint.com>
Date: Thu, 16 Mar 2023 17:51:19 +0100
Subject: [PATCH] Python preserves doctest runner, and mkdocs documentation
 stubs

---
 _includes/value-grammar.md                    | 17 +++++++
 _includes/what-is-preserves.md                | 12 +++++
 implementations/python/.envrc                 |  3 +-
 implementations/python/.gitignore             |  1 +
 implementations/python/docs/api.md            |  3 ++
 implementations/python/docs/binary.md         |  3 ++
 implementations/python/docs/compare.md        |  3 ++
 implementations/python/docs/error.md          |  3 ++
 implementations/python/docs/fold.md           |  3 ++
 implementations/python/docs/index.md          | 30 +++++++++++++
 implementations/python/docs/merge.md          |  3 ++
 implementations/python/docs/path.md           |  3 ++
 implementations/python/docs/schema.md         |  3 ++
 implementations/python/docs/text.md           |  3 ++
 implementations/python/docs/values.md         |  3 ++
 implementations/python/mkdocs.yml             | 16 +++++++
 implementations/python/preserves/__init__.py  | 34 ++++++++++++++
 implementations/python/preserves/binary.py    | 45 ++++++++++++++++++-
 implementations/python/preserves/compare.py   |  8 ++++
 implementations/python/preserves/error.py     | 16 +++++--
 implementations/python/preserves/fold.py      |  3 ++
 implementations/python/preserves/merge.py     |  4 ++
 implementations/python/preserves/path.py      |  7 +++
 implementations/python/preserves/schema.py    | 25 +++++++++--
 implementations/python/preserves/text.py      | 20 ++++++++-
 implementations/python/preserves/values.py    | 28 ++++++++++++
 implementations/python/tests/test_doctests.py | 17 +++++++
 preserves.md                                  | 31 +------------
 28 files changed, 308 insertions(+), 39 deletions(-)
 create mode 100644 _includes/value-grammar.md
 create mode 100644 _includes/what-is-preserves.md
 create mode 100644 implementations/python/docs/api.md
 create mode 100644 implementations/python/docs/binary.md
 create mode 100644 implementations/python/docs/compare.md
 create mode 100644 implementations/python/docs/error.md
 create mode 100644 implementations/python/docs/fold.md
 create mode 100644 implementations/python/docs/index.md
 create mode 100644 implementations/python/docs/merge.md
 create mode 100644 implementations/python/docs/path.md
 create mode 100644 implementations/python/docs/schema.md
 create mode 100644 implementations/python/docs/text.md
 create mode 100644 implementations/python/docs/values.md
 create mode 100644 implementations/python/mkdocs.yml
 create mode 100644 implementations/python/tests/test_doctests.py

diff --git a/_includes/value-grammar.md b/_includes/value-grammar.md
new file mode 100644
index 0000000..c42f773
--- /dev/null
+++ b/_includes/value-grammar.md
@@ -0,0 +1,17 @@
+                          Value = Atom
+                                | Compound
+                                | Embedded
+
+                           Atom = Boolean
+                                | Float
+                                | Double
+                                | SignedInteger
+                                | String
+                                | ByteString
+                                | Symbol
+
+                       Compound = Record
+                                | Sequence
+                                | Set
+                                | Dictionary
+
diff --git a/_includes/what-is-preserves.md b/_includes/what-is-preserves.md
new file mode 100644
index 0000000..3c8d1bc
--- /dev/null
+++ b/_includes/what-is-preserves.md
@@ -0,0 +1,12 @@
+*Preserves* is a data model, with associated serialization formats.
+
+It supports *records* with user-defined *labels*, embedded
+*references*, and the usual suite of atomic and compound data types,
+including *binary* data as a distinct type from text strings. Its
+*annotations* allow separation of data from metadata such as comments,
+trace information, and provenance information.
+
+Preserves departs from many other data languages in defining how to
+*compare* two values. Comparison is based on the data model, not on
+syntax or on data structures of any particular implementation
+language.
diff --git a/implementations/python/.envrc b/implementations/python/.envrc
index 9ae9c9c..9150c90 100644
--- a/implementations/python/.envrc
+++ b/implementations/python/.envrc
@@ -2,7 +2,8 @@ if ! [ -d .venv ]
 then
     python -m venv .venv
     . .venv/bin/activate
-    pip install -U coverage setuptools setuptools_scm wheel
+    pip install -U coverage setuptools setuptools_scm wheel \
+        mkdocs 'mkdocstrings[python]' mkdocs-material mkdocs-macros-plugin
     pip install -e .
 else
     . .venv/bin/activate
diff --git a/implementations/python/.gitignore b/implementations/python/.gitignore
index b214146..63ac3c7 100644
--- a/implementations/python/.gitignore
+++ b/implementations/python/.gitignore
@@ -4,3 +4,4 @@ htmlcov/
 build/
 dist/
 *.egg-info/
+/.venv/
diff --git a/implementations/python/docs/api.md b/implementations/python/docs/api.md
new file mode 100644
index 0000000..b855a9e
--- /dev/null
+++ b/implementations/python/docs/api.md
@@ -0,0 +1,3 @@
+# The top-level preserves package
+
+::: preserves
diff --git a/implementations/python/docs/binary.md b/implementations/python/docs/binary.md
new file mode 100644
index 0000000..e70482b
--- /dev/null
+++ b/implementations/python/docs/binary.md
@@ -0,0 +1,3 @@
+# Machine-oriented binary syntax
+
+::: preserves.binary
diff --git a/implementations/python/docs/compare.md b/implementations/python/docs/compare.md
new file mode 100644
index 0000000..720b1d9
--- /dev/null
+++ b/implementations/python/docs/compare.md
@@ -0,0 +1,3 @@
+# Comparing Values
+
+::: preserves.compare
diff --git a/implementations/python/docs/error.md b/implementations/python/docs/error.md
new file mode 100644
index 0000000..5731c34
--- /dev/null
+++ b/implementations/python/docs/error.md
@@ -0,0 +1,3 @@
+# Codec errors
+
+::: preserves.error
diff --git a/implementations/python/docs/fold.md b/implementations/python/docs/fold.md
new file mode 100644
index 0000000..3534854
--- /dev/null
+++ b/implementations/python/docs/fold.md
@@ -0,0 +1,3 @@
+# Traversing values
+
+::: preserves.fold
diff --git a/implementations/python/docs/index.md b/implementations/python/docs/index.md
new file mode 100644
index 0000000..fa643c7
--- /dev/null
+++ b/implementations/python/docs/index.md
@@ -0,0 +1,30 @@
+# Overview
+
+This package implements [Preserves](https://preserves.dev/) for Python 3.x. It provides the
+core [semantics][] as well as both the [human-readable text
+syntax](https://preserves.dev/preserves-text.html) (a superset of JSON) and [machine-oriented
+binary format](https://preserves.dev/preserves-binary.html) (including canonicalization) for
+Preserves. It also implements [Preserves Schema](https://preserves.dev/preserves-schema.html)
+and [Preserves Path](https://preserves.dev/preserves-path.html).
+
+ - Main package API: [preserves](/api)
+
+## What is Preserves?
+
+{% include "what-is-preserves.md" %}
+
+## Mapping between Preserves values and Python values
+
+Preserves `Value`s are categorized in the following way:
+
+{% include "value-grammar.md" %}
+
+Python's strings, byte strings, integers, booleans, and double-precision floats stand directly
+for their Preserves counterparts. Small wrapper classes for `Float` and `Symbol` complete the
+suite of atomic types.
+
+Python's lists and tuples correspond to Preserves `Sequence`s, and dicts and sets to
+`Dictionary` and `Set` values, respectively. Preserves `Record`s are represented by a `Record`
+class. Finally, embedded values are represented by a small `Embedded` wrapper class.
+
+[semantics]: https://preserves.dev/preserves.html#semantics
diff --git a/implementations/python/docs/merge.md b/implementations/python/docs/merge.md
new file mode 100644
index 0000000..629109a
--- /dev/null
+++ b/implementations/python/docs/merge.md
@@ -0,0 +1,3 @@
+# Merging values
+
+::: preserves.merge
diff --git a/implementations/python/docs/path.md b/implementations/python/docs/path.md
new file mode 100644
index 0000000..ec40d98
--- /dev/null
+++ b/implementations/python/docs/path.md
@@ -0,0 +1,3 @@
+# Preserves Path
+
+::: preserves.path
diff --git a/implementations/python/docs/schema.md b/implementations/python/docs/schema.md
new file mode 100644
index 0000000..4b9f11f
--- /dev/null
+++ b/implementations/python/docs/schema.md
@@ -0,0 +1,3 @@
+# Preserves Schema
+
+::: preserves.schema
diff --git a/implementations/python/docs/text.md b/implementations/python/docs/text.md
new file mode 100644
index 0000000..bd505d8
--- /dev/null
+++ b/implementations/python/docs/text.md
@@ -0,0 +1,3 @@
+# Human-readable text syntax
+
+::: preserves.text
diff --git a/implementations/python/docs/values.md b/implementations/python/docs/values.md
new file mode 100644
index 0000000..ef2837e
--- /dev/null
+++ b/implementations/python/docs/values.md
@@ -0,0 +1,3 @@
+# Representations of Values
+
+::: preserves.values
diff --git a/implementations/python/mkdocs.yml b/implementations/python/mkdocs.yml
new file mode 100644
index 0000000..d0fa910
--- /dev/null
+++ b/implementations/python/mkdocs.yml
@@ -0,0 +1,16 @@
+site_name: Preserves
+theme:
+  name: material
+plugins:
+  - search
+  - mkdocstrings
+  - macros:
+      include_dir: ../../_includes
+markdown_extensions:
+  - admonition
+  - pymdownx.highlight
+  - pymdownx.inlinehilite
+  - pymdownx.snippets
+  - pymdownx.superfences
+watch:
+  - preserves
diff --git a/implementations/python/preserves/__init__.py b/implementations/python/preserves/__init__.py
index 9e2b391..83500b3 100644
--- a/implementations/python/preserves/__init__.py
+++ b/implementations/python/preserves/__init__.py
@@ -1,4 +1,31 @@
+'''
+```
+import preserves
+```
+
+The main package re-exports a subset of the exports of its constituent modules:
+
+(TODO: improve the presentation of this list)
+
+- From [`values`](/values): `Float, Symbol, Record, ImmutableDict, Embedded, preserve, Annotated, is_annotated, strip_annotations, annotate`
+
+- From [`compare`](/compare): `cmp`
+
+- From [`error`](/error): `DecodeError, EncodeError, ShortPacket`
+
+- From [`binary`](/binary): `Decoder, Encoder, decode, decode_with_annotations, encode, canonicalize`
+
+- From [`text`](/text): `Parser, Formatter, parse, parse_with_annotations, stringify`
+
+- From [`merge`](/merge): `merge`
+
+- and submodules [`fold`](/fold) and [`compare`](/compare).
+
+In addition, it provides a few utility aliases for common tasks:
+'''
+
 from .values import Float, Symbol, Record, ImmutableDict, Embedded, preserve
+
 from .values import Annotated, is_annotated, strip_annotations, annotate
 
 from .compare import cmp
@@ -13,4 +40,11 @@ from .merge import merge
 from . import fold, compare
 
 loads = parse
+'''
+This alias for `parse` provides a familiar pythonesque name for converting a string to a Preserves `Value`.
+'''
+
 dumps = stringify
+'''
+This alias for `stringify` provides a familiar pythonesque name for converting a Preserves `Value` to a string.
+'''
diff --git a/implementations/python/preserves/binary.py b/implementations/python/preserves/binary.py
index 8b246f4..7f2904c 100644
--- a/implementations/python/preserves/binary.py
+++ b/implementations/python/preserves/binary.py
@@ -1,3 +1,20 @@
+"""The [preserves.binary][] module implements the [Preserves machine-oriented binary
+syntax](https://preserves.dev/preserves-binary.html).
+
+The main entry points are functions [encode][preserves.binary.encode],
+[canonicalize][preserves.binary.canonicalize], [decode][preserves.binary.decode], and
+[decode_with_annotations][preserves.binary.decode_with_annotations].
+
+```python
+>>> encode(Record(Symbol('hi'), []))
+b'\\xb4\\xb3\\x02hi\\x84'
+>>> decode(b'\\xb4\\xb3\\x02hi\\x84')
+#hi()
+
+```
+
+"""
+
 import numbers
 import struct
 
@@ -5,10 +22,15 @@ from .values import *
 from .error import *
 from .compat import basestring_, ord_
 
-class BinaryCodec(object): pass
+class BinaryCodec(object):
+    """TODO"""
+    pass
 
 class Decoder(BinaryCodec):
+    """TODO"""
+
     def __init__(self, packet=b'', include_annotations=False, decode_embedded=lambda x: x):
+        """TODO"""
         super(Decoder, self).__init__()
         self.packet = packet
         self.index = 0
@@ -16,6 +38,7 @@ class Decoder(BinaryCodec):
         self.decode_embedded = decode_embedded
 
     def extend(self, data):
+        """TODO"""
         self.packet = self.packet[self.index:] + data
         self.index = 0
 
@@ -69,6 +92,7 @@ class Decoder(BinaryCodec):
         return v
 
     def next(self):
+        """TODO"""
         tag = self.nextbyte()
         if tag == 0x80: return self.wrap(False)
         if tag == 0x81: return self.wrap(True)
@@ -99,6 +123,7 @@ class Decoder(BinaryCodec):
         raise DecodeError('Invalid tag: ' + hex(tag))
 
     def try_next(self):
+        """TODO"""
         start = self.index
         try:
             return self.next()
@@ -107,6 +132,7 @@ class Decoder(BinaryCodec):
             return None
 
     def __iter__(self):
+        """TODO"""
         return self
 
     def __next__(self):
@@ -116,19 +142,26 @@ class Decoder(BinaryCodec):
         return v
 
 def decode(bs, **kwargs):
+    """TODO"""
     return Decoder(packet=bs, **kwargs).next()
 
 def decode_with_annotations(bs, **kwargs):
+    """TODO"""
     return Decoder(packet=bs, include_annotations=True, **kwargs).next()
 
 class Encoder(BinaryCodec):
+    """Implementation of an encoder for the machine-oriented binary Preserves syntax.
+
+    """
     def __init__(self, encode_embedded=lambda x: x, canonicalize=False):
+        """TODO"""
         super(Encoder, self).__init__()
         self.buffer = bytearray()
         self._encode_embedded = encode_embedded
         self._canonicalize = canonicalize
 
     def reset(self):
+        """TODO"""
         self.buffer = bytearray()
 
     def encode_embedded(self, v):
@@ -137,6 +170,7 @@ class Encoder(BinaryCodec):
         return self._encode_embedded(v)
 
     def contents(self):
+        """TODO"""
         return bytes(self.buffer)
 
     def varint(self, v):
@@ -187,6 +221,7 @@ class Encoder(BinaryCodec):
             c.emit_entries(self, 7)
 
     def append(self, v):
+        """TODO"""
         v = preserve(v)
         if hasattr(v, '__preserve_write_binary__'):
             v.__preserve_write_binary__(self)
@@ -246,9 +281,17 @@ class Canonicalizer:
         outer_encoder.buffer.append(0x84)
 
 def encode(v, **kwargs):
+    """Encode a single `Value` v to a byte string. Any kwargs are passed on to the underlying
+    [Encoder][preserves.binary.Encoder] constructor.
+
+    """
     e = Encoder(**kwargs)
     e.append(v)
     return e.contents()
 
 def canonicalize(v, **kwargs):
+    """As [encode][preserves.binary.encode], but sets `canonicalize=True` in the
+    [Encoder][preserves.binary.Encoder] constructor.
+
+    """
     return encode(v, canonicalize=True, **kwargs)
diff --git a/implementations/python/preserves/compare.py b/implementations/python/preserves/compare.py
index 1572426..b8cc685 100644
--- a/implementations/python/preserves/compare.py
+++ b/implementations/python/preserves/compare.py
@@ -1,3 +1,5 @@
+"""TODO"""
+
 import numbers
 from enum import Enum
 from functools import cmp_to_key
@@ -50,15 +52,19 @@ def type_number(v):
         return TypeNumber.SEQUENCE
 
 def cmp(a, b):
+    """TODO"""
     return _cmp(preserve(a), preserve(b))
 
 def lt(a, b):
+    """TODO"""
     return cmp(a, b) < 0
 
 def le(a, b):
+    """TODO"""
     return cmp(a, b) <= 0
 
 def eq(a, b):
+    """TODO"""
     return _eq(preserve(a), preserve(b))
 
 key = cmp_to_key(cmp)
@@ -66,9 +72,11 @@ _key = key
 
 _sorted = sorted
 def sorted(iterable, *, key=lambda x: x, reverse=False):
+    """TODO"""
     return _sorted(iterable, key=lambda x: _key(key(x)), reverse=reverse)
 
 def sorted_items(d):
+    """TODO"""
     return sorted(d.items(), key=_item_key)
 
 def _eq_sequences(aa, bb):
diff --git a/implementations/python/preserves/error.py b/implementations/python/preserves/error.py
index d0aee8a..39122c5 100644
--- a/implementations/python/preserves/error.py
+++ b/implementations/python/preserves/error.py
@@ -1,3 +1,13 @@
-class DecodeError(ValueError): pass
-class EncodeError(ValueError): pass
-class ShortPacket(DecodeError): pass
+"""TODO"""
+
+class DecodeError(ValueError):
+    """TODO"""
+    pass
+
+class EncodeError(ValueError):
+    """TODO"""
+    pass
+
+class ShortPacket(DecodeError):
+    """TODO"""
+    pass
diff --git a/implementations/python/preserves/fold.py b/implementations/python/preserves/fold.py
index 90b1e43..0cdb57b 100644
--- a/implementations/python/preserves/fold.py
+++ b/implementations/python/preserves/fold.py
@@ -1,6 +1,9 @@
+"""TODO"""
+
 from .values import ImmutableDict, dict_kvs, Embedded, Record
 
 def map_embeddeds(f, v):
+    """TODO"""
     def walk(v):
         if isinstance(v, Embedded):
             return f(v.embeddedValue)
diff --git a/implementations/python/preserves/merge.py b/implementations/python/preserves/merge.py
index c04fac1..072a649 100644
--- a/implementations/python/preserves/merge.py
+++ b/implementations/python/preserves/merge.py
@@ -1,9 +1,12 @@
+"""TODO"""
+
 from .values import ImmutableDict, dict_kvs, Embedded, Record
 
 def merge_embedded_id(a, b):
     return a if a is b else None
 
 def merge(v0, *vs, merge_embedded=None):
+    """TODO"""
     v = v0
     for vN in vs:
         v = merge2(v, vN, merge_embedded=merge_embedded)
@@ -17,6 +20,7 @@ def merge_seq(aa, bb, merge_embedded=None):
     return [merge2(a, b, merge_embedded=merge_embedded) for (a, b) in zip(aa, bb)]
 
 def merge2(a, b, merge_embedded=None):
+    """TODO"""
     if a == b:
         return a
     if isinstance(a, (list, tuple)) and isinstance(b, (list, tuple)):
diff --git a/implementations/python/preserves/path.py b/implementations/python/preserves/path.py
index bf05a6c..4b0ad15 100644
--- a/implementations/python/preserves/path.py
+++ b/implementations/python/preserves/path.py
@@ -1,3 +1,5 @@
+"""TODO (document __main__ behaviour)"""
+
 from . import *
 from .schema import load_schema_file, extend
 from .values import _unwrap
@@ -6,11 +8,16 @@ import pathlib
 import re
 
 syntax = load_schema_file(pathlib.Path(__file__).parent / 'path.prb').path
+"""TODO"""
 
 Selector = syntax.Selector
+"""TODO"""
+
 Predicate = syntax.Predicate
+"""TODO"""
 
 def parse(s):
+    """TODO"""
     return parse_selector(Parser(s))
 
 def parse_selector(tokens):
diff --git a/implementations/python/preserves/schema.py b/implementations/python/preserves/schema.py
index f887b15..04233fa 100644
--- a/implementations/python/preserves/schema.py
+++ b/implementations/python/preserves/schema.py
@@ -1,7 +1,8 @@
-#
-# This is an implementation of [Preserves Schema](https://preserves.dev/preserves-schema.html)
-# for Python 3.
-#
+"""This is an implementation of [Preserves Schema](https://preserves.dev/preserves-schema.html)
+for Python 3.
+
+TODO
+"""
 
 from . import *
 import pathlib
@@ -38,6 +39,7 @@ def sequenceish(x):
     return isinstance(x, tuple) or isinstance(x, list)
 
 class SchemaDecodeFailed(ValueError):
+    """TODO"""
     def __init__(self, cls, p, v, failures=None):
         super().__init__()
         self.cls = cls
@@ -81,6 +83,8 @@ class ExplanationBuilder:
         return '\n' + ' ' * self.indentLevel + self._node(failure) + ''.join(nested)
 
 class SchemaObject:
+    """TODO"""
+
     ROOTNS = None
     SCHEMA = None
     MODULE_PATH = None
@@ -89,10 +93,12 @@ class SchemaObject:
 
     @classmethod
     def decode(cls, v):
+        """TODO"""
         raise NotImplementedError('Subclass responsibility')
 
     @classmethod
     def try_decode(cls, v):
+        """TODO"""
         try:
             return cls.decode(v)
         except SchemaDecodeFailed:
@@ -176,6 +182,7 @@ class SchemaObject:
         raise ValueError(f'Bad schema {p}')
 
     def __preserve__(self):
+        """TODO"""
         raise NotImplementedError('Subclass responsibility')
 
     def __repr__(self):
@@ -192,6 +199,8 @@ class SchemaObject:
         raise NotImplementedError('Subclass responsibility')
 
 class Enumeration(SchemaObject):
+    """TODO"""
+
     VARIANTS = None
 
     def __init__(self):
@@ -239,6 +248,8 @@ def safehasattr(o, k):
     return hasattr(o, safeattrname(k))
 
 class Definition(SchemaObject):
+    """TODO"""
+
     EMPTY = False
     SIMPLE = False
     FIELD_NAMES = []
@@ -345,6 +356,7 @@ class escape:
         return self.escaped
 
 def encode(p, v):
+    """TODO"""
     if hasattr(v, '__escape_schema__'):
         return preserve(v.__escape_schema__())
     if p == ANY:
@@ -431,6 +443,7 @@ def definition_not_found(module_path, name):
     raise KeyError('Definition not found: ' + module_path_str(module_path + (name,)))
 
 class Namespace:
+    """TODO"""
     def __init__(self, prefix):
         self._prefix = prefix
 
@@ -453,6 +466,7 @@ class Namespace:
         return repr(self._items())
 
 class Compiler:
+    """TODO"""
     def __init__(self):
         self.root = Namespace(())
 
@@ -487,12 +501,14 @@ class Compiler:
             ns[n] = c
 
 def load_schema_file(filename):
+    """TODO"""
     c = Compiler()
     c.load(filename)
     return c.root
 
 # a decorator
 def extend(cls):
+    """TODO"""
     def extender(f):
         setattr(cls, f.__name__, f)
         return f
@@ -500,6 +516,7 @@ def extend(cls):
 
 __metaschema_filename = pathlib.Path(__file__).parent / 'schema.prb'
 meta = load_schema_file(__metaschema_filename).schema
+"""TODO"""
 
 if __name__ == '__main__':
     with open(__metaschema_filename, 'rb') as f:
diff --git a/implementations/python/preserves/text.py b/implementations/python/preserves/text.py
index 321f8b3..9924636 100644
--- a/implementations/python/preserves/text.py
+++ b/implementations/python/preserves/text.py
@@ -1,3 +1,5 @@
+"""TODO"""
+
 import numbers
 import struct
 import base64
@@ -8,12 +10,17 @@ from .error import *
 from .compat import basestring_, unichr_
 from .binary import Decoder
 
-class TextCodec(object): pass
+class TextCodec(object):
+    """TODO"""
+    pass
 
 NUMBER_RE = re.compile(r'^([-+]?\d+)(((\.\d+([eE][-+]?\d+)?)|([eE][-+]?\d+))([fF]?))?$')
 
 class Parser(TextCodec):
+    """TODO"""
+
     def __init__(self, input_buffer=u'', include_annotations=False, parse_embedded=lambda x: x):
+        """TODO"""
         super(Parser, self).__init__()
         self.input_buffer = input_buffer
         self.index = 0
@@ -21,6 +28,7 @@ class Parser(TextCodec):
         self.parse_embedded = parse_embedded
 
     def extend(self, text):
+        """TODO"""
         self.input_buffer = self.input_buffer[self.index:] + text
         self.index = 0
 
@@ -200,6 +208,7 @@ class Parser(TextCodec):
         return Annotated(v) if self.include_annotations else v
 
     def next(self):
+        """TODO"""
         self.skip_whitespace()
         c = self.peek()
         if c == '"':
@@ -264,6 +273,7 @@ class Parser(TextCodec):
         return self.wrap(self.read_raw_symbol_or_number([c]))
 
     def try_next(self):
+        """TODO"""
         start = self.index
         try:
             return self.next()
@@ -272,6 +282,7 @@ class Parser(TextCodec):
             return None
 
     def __iter__(self):
+        """TODO"""
         return self
 
     def __next__(self):
@@ -281,17 +292,21 @@ class Parser(TextCodec):
         return v
 
 def parse(bs, **kwargs):
+    """TODO"""
     return Parser(input_buffer=bs, **kwargs).next()
 
 def parse_with_annotations(bs, **kwargs):
+    """TODO"""
     return Parser(input_buffer=bs, include_annotations=True, **kwargs).next()
 
 class Formatter(TextCodec):
+    """TODO"""
     def __init__(self,
                  format_embedded=lambda x: x,
                  indent=None,
                  with_commas=False,
                  trailing_comma=False):
+        """TODO"""
         super(Formatter, self).__init__()
         self.indent_delta = 0 if indent is None else indent
         self.indent_distance = 0
@@ -306,6 +321,7 @@ class Formatter(TextCodec):
         return self._format_embedded(v)
 
     def contents(self):
+        """TODO"""
         return u''.join(self.chunks)
 
     def is_indenting(self):
@@ -352,6 +368,7 @@ class Formatter(TextCodec):
         self.chunks.append(closer)
 
     def append(self, v):
+        """TODO"""
         v = preserve(v)
         if hasattr(v, '__preserve_write_text__'):
             v.__preserve_write_text__(self)
@@ -402,6 +419,7 @@ class Formatter(TextCodec):
         raise TypeError('Cannot preserves-format: ' + repr(v))
 
 def stringify(v, **kwargs):
+    """TODO"""
     e = Formatter(**kwargs)
     e.append(v)
     return e.contents()
diff --git a/implementations/python/preserves/values.py b/implementations/python/preserves/values.py
index cdb2fc0..d3e7225 100644
--- a/implementations/python/preserves/values.py
+++ b/implementations/python/preserves/values.py
@@ -1,3 +1,5 @@
+"""TODO"""
+
 import re
 import sys
 import struct
@@ -6,6 +8,7 @@ import math
 from .error import DecodeError
 
 def preserve(v):
+    """TODO"""
     while hasattr(v, '__preserve__'):
         v = v.__preserve__()
     return v
@@ -14,6 +17,7 @@ def float_to_int(v):
     return struct.unpack('>Q', struct.pack('>d', v))[0]
 
 def cmp_floats(a, b):
+    """TODO"""
     a = float_to_int(a)
     b = float_to_int(b)
     if a & 0x8000000000000000: a = a ^ 0x7fffffffffffffff
@@ -21,7 +25,9 @@ def cmp_floats(a, b):
     return a - b
 
 class Float(object):
+    """TODO"""
     def __init__(self, value):
+        """TODO"""
         self.value = value
 
     def __eq__(self, other):
@@ -66,6 +72,7 @@ class Float(object):
 
     @staticmethod
     def from_bytes(bs):
+        """TODO"""
         vf = struct.unpack('>I', bs)[0]
         if (vf & 0x7f800000) == 0x7f800000:
             # NaN or inf. Preserve quiet/signalling bit by manually expanding to double-precision.
@@ -80,7 +87,9 @@ class Float(object):
 RAW_SYMBOL_RE = re.compile(r'^[-a-zA-Z0-9~!$%^&*?_=+/.]+$')
 
 class Symbol(object):
+    """TODO"""
     def __init__(self, name):
+        """TODO"""
         self.name = name.name if isinstance(name, Symbol) else name
 
     def __eq__(self, other):
@@ -125,7 +134,9 @@ class Symbol(object):
             formatter.chunks.append('|')
 
 class Record(object):
+    """TODO"""
     def __init__(self, key, fields):
+        """TODO"""
         self.key = key
         self.fields = tuple(fields)
         self.__hash = None
@@ -165,10 +176,12 @@ class Record(object):
 
     @staticmethod
     def makeConstructor(labelSymbolText, fieldNames):
+        """TODO"""
         return Record.makeBasicConstructor(Symbol(labelSymbolText), fieldNames)
 
     @staticmethod
     def makeBasicConstructor(label, fieldNames):
+        """TODO"""
         if type(fieldNames) == str:
             fieldNames = fieldNames.split()
         arity = len(fieldNames)
@@ -196,7 +209,9 @@ class Record(object):
         return ctor
 
 class RecordConstructorInfo(object):
+    """TODO"""
     def __init__(self, key, arity):
+        """TODO"""
         self.key = key
         self.arity = arity
 
@@ -218,7 +233,9 @@ class RecordConstructorInfo(object):
 
 # Blub blub blub
 class ImmutableDict(dict):
+    """TODO"""
     def __init__(self, *args, **kwargs):
+        """TODO"""
         if hasattr(self, '__hash'): raise TypeError('Immutable')
         super(ImmutableDict, self).__init__(*args, **kwargs)
         self.__hash = None
@@ -241,6 +258,7 @@ class ImmutableDict(dict):
 
     @staticmethod
     def from_kvs(kvs):
+        """TODO"""
         i = iter(kvs)
         result = ImmutableDict()
         result_proxy = super(ImmutableDict, result)
@@ -257,6 +275,7 @@ class ImmutableDict(dict):
         return result
 
 def dict_kvs(d):
+    """TODO"""
     for k in d:
         yield k
         yield d[k]
@@ -264,7 +283,9 @@ def dict_kvs(d):
 inf = float('inf')
 
 class Annotated(object):
+    """TODO"""
     def __init__(self, item):
+        """TODO"""
         self.annotations = []
         self.item = item
 
@@ -282,9 +303,11 @@ class Annotated(object):
         formatter.append(self.item)
 
     def strip(self, depth=inf):
+        """TODO"""
         return strip_annotations(self, depth)
 
     def peel(self):
+        """TODO"""
         return strip_annotations(self, 1)
 
     def __eq__(self, other):
@@ -300,9 +323,11 @@ class Annotated(object):
         return ' '.join(list('@' + repr(a) for a in self.annotations) + [repr(self.item)])
 
 def is_annotated(v):
+    """TODO"""
     return isinstance(v, Annotated)
 
 def strip_annotations(v, depth=inf):
+    """TODO"""
     if depth == 0: return v
     if not is_annotated(v): return v
 
@@ -329,6 +354,7 @@ def strip_annotations(v, depth=inf):
         return v
 
 def annotate(v, *anns):
+    """TODO"""
     if not is_annotated(v):
         v = Annotated(v)
     for a in anns:
@@ -342,7 +368,9 @@ def _unwrap(x):
         return x
 
 class Embedded:
+    """TODO"""
     def __init__(self, value):
+        """TODO"""
         self.embeddedValue = value
 
     def __eq__(self, other):
diff --git a/implementations/python/tests/test_doctests.py b/implementations/python/tests/test_doctests.py
new file mode 100644
index 0000000..a29fd4c
--- /dev/null
+++ b/implementations/python/tests/test_doctests.py
@@ -0,0 +1,17 @@
+import doctest
+import pkgutil
+import importlib.util
+
+import preserves
+
+def load_tests(loader, tests, ignore):
+    def m(spec):
+        mod = importlib.util.module_from_spec(spec)
+        mod.__loader__.exec_module(mod)
+        tests.addTests(doctest.DocTestSuite(mod))
+    spec = preserves.__spec__
+    m(spec)
+    for mi in pkgutil.walk_packages(spec.submodule_search_locations, spec.name + '.'):
+        subspec = mi.module_finder.find_spec(mi.name)
+        m(subspec)
+    return tests
diff --git a/preserves.md b/preserves.md
index 02c89e9..98f9e08 100644
--- a/preserves.md
+++ b/preserves.md
@@ -6,19 +6,7 @@ title: "Preserves: an Expressive Data Language"
 Tony Garnock-Jones <tonyg@leastfixedpoint.com>  
 {{ site.version_date }}. Version {{ site.version }}.
 
-*Preserves* is a data model, with associated serialization formats.
-
-It supports *records* with user-defined *labels*, embedded *references*,
-and the usual suite of atomic and compound data types, including
-*binary* data as a distinct type from text strings. Its *annotations*
-allow separation of data from metadata such as
-[comments](conventions.html#comments), trace information, and provenance
-information.
-
-Preserves departs from many other data languages in defining how to
-*compare* two values. Comparison is based on the data model, not on
-syntax or on data structures of any particular implementation
-language.
+{% include what-is-preserves.md %}
 
 This document defines the core semantics and data model of Preserves and
 presents a handful of examples. Two other core documents define
@@ -38,22 +26,7 @@ element of that set.
 data. Every `Value` is finite and non-cyclic. Embedded values, called
 `Embedded`s, are a third, special-case category.
 
-                          Value = Atom
-                                | Compound
-                                | Embedded
-
-                           Atom = Boolean
-                                | Float
-                                | Double
-                                | SignedInteger
-                                | String
-                                | ByteString
-                                | Symbol
-
-                       Compound = Record
-                                | Sequence
-                                | Set
-                                | Dictionary
+{% include value-grammar.md %}
 
 **Total order.**<a name="total-order"></a> As we go, we will
 incrementally specify a total order over `Value`s. Two values of the