Test against upstream samples

PEG: tweak commas and symbols
Add raw floats and doubles to parser
2023-12-25 10:08:10 +02:00 · 2023-12-25 10:08:10 +02:00 · 2023-12-25 10:08:10 +02:00 · 2023-12-25 10:08:10 +02:00 · 2023-12-25 10:08:03 +02:00 · 2023-12-24 21:24:02 +02:00
16 changed files with 852 additions and 701 deletions
--- a/.gitignore
+++ b/.gitignore
@ -1,3 +1,5 @@
 /nim.cfg
+*.dot
 *.html
 *.run
+*.svg
--- a/preserves.nimble
+++ b/preserves.nimble
@ -1,6 +1,6 @@
 # Package

-version = "20231102"
+version = "20231220"
 author        = "Emery Hemingway"
 description   = "data model and serialization format"
 license       = "Unlicense"
--- a/src/Tupfile
+++ b/src/Tupfile
@ -1,3 +1,2 @@
 include_rules
 : preserves.nim |> !nim_run |>
-: preserves.nim |> !nim_check |>
--- a/src/preserves.nim
+++ b/src/preserves.nim
@ -1,12 +1,14 @@
 # SPDX-FileCopyrightText: ☭ Emery Hemingway
 # SPDX-License-Identifier: Unlicense

-import std/[base64, endians, hashes, options, sets, sequtils, streams, strutils, tables, typetraits]
+import std/[options, sets, sequtils, strutils, tables, typetraits]
 import ./preserves/private/macros

 from std/algorithm import sort
 from std/json import escapeJson, escapeJsonUnquoted
-import ./preserves/private/dot
+import ./preserves/private/[encoding, decoding, dot, parsing, texts, values]
+
+export encoding, decoding, parsing, texts, values

 when defined(tracePreserves):
  when defined(posix):
@ -16,194 +18,6 @@ when defined(tracePreserves):
 else:
  template trace(args: varargs[untyped]) = discard

-type
-  PreserveKind* = enum
-    pkBoolean, pkFloat, pkDouble, pkSignedInteger, pkString, pkByteString, pkSymbol,
-        pkRecord, pkSequence, pkSet, pkDictionary, pkEmbedded
-
-const
-  atomKinds* = {pkBoolean, pkFloat, pkDouble, pkSignedInteger, pkString, pkByteString, pkSymbol}
-  compoundKinds* = {pkRecord, pkSequence, pkSet, pkDictionary}
-
-type Symbol* = distinct string
-proc `<`*(x, y: Symbol): bool {.borrow.}
-proc `==`*(x, y: Symbol): bool {.borrow.}
-proc hash*(s: Symbol): Hash {.borrow.}
-proc len*(s: Symbol): int {.borrow.}
-
-proc `$`*(s: Symbol): string =
-  let sym = string s
-  if sym.len > 0 and sym[0] in {'A'..'z'} and not sym.anyIt(char(it) in { '\x00'..'\x19', '"', '\\', '|' }):
-    result = sym
-  else:
-    result = newStringOfCap(sym.len shl 1)
-    result.add('|')
-    for c in sym:
-      case c
-      of '\\':
-        result.add("\\\\")
-      of '/':
-        result.add("\\/")
-      of '\x08':
-        result.add("\\b")
-      of '\x0c':
-        result.add("\\f")
-      of '\x0a':
-        result.add("\\n")
-      of '\x0d':
-        result.add("\\r")
-      of '\x09':
-        result.add("\\t")
-      of '|':
-        result.add("\\|")
-      else:
-        result.add(c)
-    result.add('|')
-
-type
-  Preserve*[E] = object
-    embedded*: bool
-      ## Flag to mark embedded Preserves
-    case kind*: PreserveKind
-    of pkBoolean:
-      bool*: bool
-    of pkFloat:
-      float*: float32
-    of pkDouble:
-      double*: float64
-    of pkSignedInteger:
-      int*: BiggestInt
-    of pkString:
-      string*: string
-    of pkByteString:
-      bytes*: seq[byte]
-    of pkSymbol:
-      symbol*: Symbol
-    of pkRecord:
-      record*: seq[Preserve[E]] # label is last
-    of pkSequence:
-      sequence*: seq[Preserve[E]]
-    of pkSet:
-      set*: seq[Preserve[E]]
-        # TODO: HashSet
-    of pkDictionary:
-      dict*: seq[DictEntry[E]]
-        # TODO: Tables
-    of pkEmbedded:
-      embed*: E
-
-  DictEntry[E] = tuple[key: Preserve[E], val: Preserve[E]]
-
-func `==`*[A, B](x: Preserve[A]; y: Preserve[B]): bool =
-  ## Check `x` and `y` for equivalence.
-  if x.kind == y.kind and x.embedded == y.embedded:
-    case x.kind
-    of pkBoolean:
-      result = x.bool == y.bool
-    of pkFloat:
-      result = x.float == y.float
-    of pkDouble:
-      result = x.double == y.double
-    of pkSignedInteger:
-      result = x.int == y.int
-    of pkString:
-      result = x.string == y.string
-    of pkByteString:
-      result = x.bytes == y.bytes
-    of pkSymbol:
-      result = x.symbol == y.symbol
-    of pkRecord:
-      result = x.record.len == y.record.len
-      for i in 0..x.record.high:
-        if not result: break
-        result = result and (x.record[i] == y.record[i])
-    of pkSequence:
-      for i, val in x.sequence:
-        if y.sequence[i] != val: return false
-      result = true
-    of pkSet:
-      result = x.set.len == y.set.len
-      for i in 0..x.set.high:
-        if not result: break
-        result = result and (x.set[i] == y.set[i])
-    of pkDictionary:
-      result = x.dict.len == y.dict.len
-      for i in 0..x.dict.high:
-        if not result: break
-        result = result and
-          (x.dict[i].key == y.dict[i].key) and
-          (x.dict[i].val == y.dict[i].val)
-    of pkEmbedded:
-      when A is B:
-        when A is void:
-          result = true
-        else:
-          result = x.embed == y.embed
-
-proc `<`(x, y: string | seq[byte]): bool =
-  for i in 0 .. min(x.high, y.high):
-    if x[i] < y[i]: return true
-    if x[i] != y[i]: return false
-  x.len < y.len
-
-proc `<`*[A, B](x: Preserve[A]; y: Preserve[B]): bool =
-  ## Preserves have a total order over values. Check if `x` is ordered before `y`.
-  if x.embedded != y.embedded:
-    result = y.embedded
-  elif x.kind != y.kind:
-    result = x.kind < y.kind
-  else:
-    case x.kind
-    of pkBoolean:
-      result = (not x.bool) and y.bool
-    of pkFloat:
-      result = x.float < y.float
-    of pkDouble:
-      result = x.double < y.double
-    of pkSignedInteger:
-      result = x.int < y.int
-    of pkString:
-      result = x.string < y.string
-    of pkByteString:
-      result = x.bytes < y.bytes
-    of pkSymbol:
-      result = x.symbol < y.symbol
-    of pkRecord:
-      if x.record[x.record.high] < y.record[y.record.high]: return true
-      for i in 0..<min(x.record.high, y.record.high):
-        if x.record[i] < y.record[i]: return true
-        if x.record[i] == y.record[i]: return false
-      result = x.record.len < y.record.len
-    of pkSequence:
-      for i in 0..min(x.sequence.high, y.sequence.high):
-        if x.sequence[i] < y.sequence[i]: return true
-        if x.sequence[i] != y.sequence[i]: return false
-      result = x.sequence.len < y.sequence.len
-    of pkSet:
-      for i in 0..min(x.set.high, y.set.high):
-        if x.set[i] < y.set[i]: return true
-        if x.set[i] != y.set[i]: return false
-      result = x.set.len < y.set.len
-    of pkDictionary:
-      for i in 0..min(x.dict.high, y.dict.high):
-        if x.dict[i].key < y.dict[i].key: return true
-        if x.dict[i].key == y.dict[i].key:
-          if x.dict[i].val < y.dict[i].val: return true
-          if x.dict[i].val != y.dict[i].val: return false
-      result = x.dict.len < y.dict.len
-    of pkEmbedded:
-      when (not A is void) and (A is B):
-        result = x.embed < y.embed
-
-func cmp*[E](x, y: Preserve[E]): int =
-  ## Compare by Preserves total ordering.
-  if x == y: 0
-  elif x < y: -1
-  else: 1
-
-proc sort*[E](pr: var Preserve[E]) = sort(pr.sequence, cmp)
-  ## Sort a Preserves array by total ordering.
-
 proc sortDict[E](pr: var Preserve[E]) =
  sort(pr.dict) do (x, y: DictEntry[E]) -> int:
    cmp(x.key, y.key)
@ -223,64 +37,6 @@ proc cannonicalize*[E](pr: var Preserve[E]) =
  else:
    discard

-proc hash*[E](pr: Preserve[E]): Hash =
-  ## Produce a `Hash` of `pr` for use with a `HashSet` or `Table`.
-  var h = hash(pr.kind.int) !& hash(pr.embedded)
-  case pr.kind
-  of pkBoolean:
-    h = h !& hash(pr.bool)
-  of pkFloat:
-    h = h !& hash(pr.float)
-  of pkDouble:
-    h = h !& hash(pr.double)
-  of pkSignedInteger:
-    h = h !& hash(pr.int)
-  of pkString:
-    h = h !& hash(pr.string)
-  of pkByteString:
-    h = h !& hash(pr.bytes)
-  of pkSymbol:
-    h = h !& hash(string pr.symbol)
-  of pkRecord:
-    for val in pr.record:
-      h = h !& hash(val)
-  of pkSequence:
-    for val in pr.sequence:
-      h = h !& hash(val)
-  of pkSet:
-    for val in pr.set.items:
-      h = h !& hash(val)
-  of pkDictionary:
-    for (key, val) in pr.dict.items:
-      h = h !& hash(key) !& hash(val)
-  of pkEmbedded:
-    when E is void:
-      h = h !& hash(pr.embed)
-    else:
-      if pr.embed.isNil:
-        h = h !& hash(false)
-      else:
-        h = h !& hash(pr.embed)
-  !$h
-
-proc `[]`*(pr: Preserve; i: int): Preserve =
-  ## Select an indexed value from ``pr``.
-  ## Only valid for records and sequences.
-  case pr.kind
-  of pkRecord: pr.record[i]
-  of pkSequence: pr.sequence[i]
-  else:
-    raise newException(ValueError, "Preserves value is not indexable")
-
-proc `[]=`*(pr: var Preserve; i: Natural; val: Preserve) =
-  ## Assign an indexed value into ``pr``.
-  ## Only valid for records and sequences.
-  case pr.kind
-  of pkRecord: pr.record[i] = val
-  of pkSequence: pr.sequence[i] = val
-  else:
-    raise newException(ValueError, "`Preserves value is not indexable")
-
 proc getOrDefault(pr: Preserve; key: Preserve): Preserve =
  ## Retrieves the value of `pr[key]` if `pr` is a dictionary containing `key`
  ## or returns the `#f` Preserves value.
@ -302,21 +58,6 @@ proc pop*(pr: var Preserve; key: Preserve; val: var Preserve): bool =
        delete(pr.dict, i, i)
        return true

-proc incl*(pr: var Preserve; key: Preserve) =
-  ## Include `key` in the Preserves set `pr`.
-  for i in 0..pr.set.high:
-    if key < pr.set[i]:
-      insert(pr.set, [key], i)
-      return
-  pr.set.add(key)
-
-proc excl*(pr: var Preserve; key: Preserve) =
-  ## Exclude `key` from the Preserves set `pr`.
-  for i in 0..pr.set.high:
-    if pr.set[i] == key:
-      delete(pr.set, i..i)
-      break
-
 proc `[]`*(pr, key: Preserve): Preserve {.deprecated: "use step instead".} =
  ## Select a value by `key` from `pr`.
  ## Works for sequences, records, and dictionaries.
@ -362,17 +103,6 @@ func step*[E](pr: Preserve[E]; key: Symbol): Option[Preserve[E]] =
        result = some(v)
        break

-proc `[]=`*(pr: var Preserve; key, val: Preserve) =
-  ## Insert `val` by `key` in the Preserves dictionary `pr`.
-  for i in 0..pr.dict.high:
-    if key < pr.dict[i].key:
-      insert(pr.dict, [(key, val, )], i)
-      return
-    elif key == pr.dict[i].key:
-      pr.dict[i].val = val
-      return
-  pr.dict.add((key, val, ))
-
 proc mget*(pr: var Preserve; key: Preserve): var Preserve =
  ## Select a value by `key` from the Preserves dictionary `pr`.
  if pr.isDictionary:
@ -551,264 +281,6 @@ proc unembed*[E](pr: Preserve[E]): E =
    raise newException(ValueError, "not an embedded value")
  pr.embed

-proc writeVarint(s: Stream; n: Natural) =
-  var n = n
-  while n > 0x7f:
-    s.write(uint8 n or 0x80)
-    n = n shr 7
-  s.write(uint8 n and 0x7f)
-
-proc readVarint(s: Stream): uint =
-  var
-    shift = 0
-    c = uint s.readUint8
-  while (c and 0x80) == 0x80:
-    result = result or ((c and 0x7f) shl shift)
-    inc(shift, 7)
-    c = uint s.readUint8
-  result = result or (c shl shift)
-
-proc write*[E](str: Stream; pr: Preserve[E]) =
-  ## Write the binary-encoding of a Preserves value to a stream.
-  if pr.embedded: str.write(0x86'u8)
-  case pr.kind:
-  of pkBoolean:
-    case pr.bool
-    of false: str.write(0x80'u8)
-    of true: str.write(0x81'u8)
-  of pkFloat:
-    str.write("\x87\x04")
-    when system.cpuEndian == bigEndian:
-      str.write(pr.float)
-    else:
-      var be: float32
-      swapEndian32(be.addr, pr.float.unsafeAddr)
-      str.write(be)
-  of pkDouble:
-    str.write("\x87\x08")
-    when system.cpuEndian == bigEndian:
-      str.write(pr.double)
-    else:
-      var be: float64
-      swapEndian64(be.addr, pr.double.unsafeAddr)
-      str.write(be)
-  of pkSignedInteger:
-    if pr.int == 0:
-      str.write("\xb0\x00")
-    else:
-      var bitCount = 1'u8
-      if pr.int < 0:
-        while ((not pr.int) shr bitCount) != 0:
-          inc(bitCount)
-      else:
-        while (pr.int shr bitCount) != 0:
-          inc(bitCount)
-      var byteCount = (bitCount + 8) div 8
-      str.write(0xb0'u8)
-      str.writeVarint(byteCount)
-      proc write(n: uint8; i: BiggestInt) =
-        if n > 1:
-          write(n.pred, i shr 8)
-        str.write(i.uint8)
-      write(byteCount, pr.int)
-  of pkString:
-    str.write(0xb1'u8)
-    str.writeVarint(pr.string.len)
-    str.write(pr.string)
-  of pkByteString:
-    str.write(0xb2'u8)
-    str.writeVarint(pr.bytes.len)
-    str.write(cast[string](pr.bytes))
-  of pkSymbol:
-    str.write(0xb3'u8)
-    str.writeVarint(pr.symbol.len)
-    str.write(string pr.symbol)
-  of pkRecord:
-    assert(pr.record.len > 0)
-    str.write(0xb4'u8)
-    str.write(pr.record[pr.record.high])
-    for i in 0..<pr.record.high:
-      str.write(pr.record[i])
-    str.write(0x84'u8)
-  of pkSequence:
-    str.write(0xb5'u8)
-    for e in pr.sequence:
-      str.write(e)
-    str.write(0x84'u8)
-  of pkSet:
-    str.write(0xb6'u8)
-    for val in pr.set.items:
-      str.write(val)
-    str.write(0x84'u8)
-  of pkDictionary:
-    str.write(0xb7'u8)
-    for (key, value) in pr.dict.items:
-      str.write(key)
-      str.write(value)
-    str.write(0x84'u8)
-  of pkEmbedded:
-    str.write(0x86'u8)
-    str.write(pr.embed.toPreserve)
-
-proc encode*[E](pr: Preserve[E]): seq[byte] =
-  ## Return the binary-encoding of a Preserves value.
-  let s = newStringStream()
-  s.write pr
-  result = cast[seq[byte]](move s.data)
-
-proc decodePreserves*(s: Stream; E = void): Preserve[E] =
-  ## Decode a Preserves value from a binary-encoded stream.
-  if s.atEnd: raise newException(IOError, "End of Preserves stream")
-  const endMarker = 0x84
-  let tag = s.readUint8()
-  case tag
-  of 0x80: result = Preserve[E](kind: pkBoolean, bool: false)
-  of 0x81: result = Preserve[E](kind: pkBoolean, bool: true)
-  of 0x85:
-    discard decodePreserves(s, E)
-    while s.peekUint8() == 0x85:
-      discard s.readUint8()
-      discard decodePreserves(s, E)
-  of 0x86:
-    result = decodePreserves(s, E)
-    result.embedded = true
-  of 0x87:
-    let n = s.readUint8()
-    case n
-    of 4:
-      when system.cpuEndian == bigEndian:
-        result = Preserve[E](kind: pkFloat, float: s.readFloat32())
-      else:
-        result = Preserve[E](kind: pkFloat)
-        var be = s.readFloat32()
-        swapEndian32(result.float.addr, be.addr)
-    of 8:
-      when system.cpuEndian == bigEndian:
-        result = Preserve[E](kind: pkDouble, double: s.readFloat64())
-      else:
-        result = Preserve[E](kind: pkDouble)
-        var be = s.readFloat64()
-        swapEndian64(result.double.addr, be.addr)
-    else:
-      raise newException(IOError, "unhandled IEEE754 value of " & $n & " bytes")
-  of 0xb1:
-    var data = newString(s.readVarint())
-    if data.len > 0:
-      let n = s.readData(unsafeAddr data[0], data.len)
-      if n != data.len:
-        raise newException(IOError, "short read")
-    result = Preserve[E](kind: pkString, string: data)
-  of 0xb2:
-    var data = newSeq[byte](s.readVarint())
-    if data.len > 0:
-      let n = s.readData(addr data[0], data.len)
-      if n != data.len:
-        raise newException(IOError, "short read")
-    result = Preserve[E](kind: pkByteString, bytes: data)
-  of 0xb3:
-    var data = newString(s.readVarint())
-    if data.len > 0:
-      let n = s.readData(addr data[0], data.len)
-      if n != data.len:
-        raise newException(IOError, "short read")
-    result = Preserve[E](kind: pkSymbol, symbol: Symbol data)
-  of 0xb4:
-    result = Preserve[E](kind: pkRecord)
-    var label = decodePreserves(s, E)
-    while s.peekUint8() != endMarker:
-      result.record.add decodePreserves(s, E)
-    result.record.add(move label)
-    discard s.readUint8()
-  of 0xb5:
-    result = Preserve[E](kind: pkSequence)
-    while s.peekUint8() != endMarker:
-      result.sequence.add decodePreserves(s, E)
-    discard s.readUint8()
-  of 0xb6:
-    result = Preserve[E](kind: pkSet)
-    while s.peekUint8() != endMarker:
-      incl(result, decodePreserves(s, E))
-    discard s.readUint8()
-  of 0xb7:
-    result = Preserve[E](kind: pkDictionary)
-    while s.peekUint8() != endMarker:
-      result[decodePreserves(s, E)] = decodePreserves(s, E)
-    discard s.readUint8()
-  of 0xb0:
-    var len = s.readVarint()
-    result = Preserve[E](kind: pkSignedInteger)
-    if len > 0:
-      if (s.peekUint8() and 0x80) == 0x80:
-        result.int = BiggestInt -1
-      while len > 0:
-        result.int = (result.int shl 8) + s.readUint8().BiggestInt
-        dec(len)
-
-  of endMarker:
-    raise newException(ValueError, "invalid Preserves stream")
-  else:
-    raise newException(ValueError, "invalid Preserves tag byte 0x" & tag.toHex(2))
-
-proc decodePreserves*(s: string; E = void): Preserve[E] =
-  ## Decode a string of binary-encoded Preserves.
-  decodePreserves(s.newStringStream, E)
-
-proc decodePreserves*(s: seq[byte]; E = void): Preserve[E] =
-  ## Decode a byte-string of binary-encoded Preserves.
-  decodePreserves(cast[string](s), E)
-
-type BufferedDecoder* = object
-  ## Type for buffering binary Preserves before decoding.
-  stream: StringStream
-  appendPosition, decodePosition, maxSize: int
-
-proc newBufferedDecoder*(maxSize = 4096): BufferedDecoder =
-  ## Create a new `newBufferedDecoder`.
-  runnableExamples:
-    var
-      buf = newBufferedDecoder()
-      bin = encode(parsePreserves("<foobar>"))
-    buf.feed(bin[0..2])
-    buf.feed(bin[3..bin.high])
-    var (success, pr) = decode(buf)
-    assert success
-    assert $pr == "<foobar>"
-  BufferedDecoder(
-      stream: newStringStream(newStringOfCap(maxSize)),
-      maxSize: maxSize,
-    )
-
-proc feed*(dec: var BufferedDecoder; buf: pointer; len: int) =
-  assert len > 0
-  if dec.maxSize > 0 and dec.maxSize < (dec.appendPosition + len):
-    raise newException(IOError, "BufferedDecoder at maximum buffer size")
-  dec.stream.setPosition(dec.appendPosition)
-  dec.stream.writeData(buf, len)
-  inc(dec.appendPosition, len)
-  assert dec.appendPosition == dec.stream.getPosition()
-
-proc feed*[T: byte|char](dec: var BufferedDecoder; data: openarray[T]) =
-  if data.len > 0:
-    dec.feed(unsafeAddr data[0], data.len)
-
-proc decode*(dec: var BufferedDecoder; E = void): (bool, Preserve[E]) =
-  ## Decode from `dec`. If decoding fails the internal position of the
-  ## decoder does not advance.
-  if dec.appendPosition > 0:
-    assert(dec.decodePosition < dec.appendPosition)
-    dec.stream.setPosition(dec.decodePosition)
-    try:
-      result[1] = decodePreserves(dec.stream, E)
-      result[0] = true
-      dec.decodePosition = dec.stream.getPosition()
-      if dec.decodePosition == dec.appendPosition:
-        dec.stream.setPosition(0)
-        dec.stream.data.setLen(0)
-        dec.appendPosition = 0
-        dec.decodePosition = 0
-    except IOError:
-      discard
-
 template preservesRecord*(label: string) {.pragma.}
  ## Serialize this object or tuple as a record.
  ## See ``toPreserve``.
@ -1415,131 +887,3 @@ proc getOrDefault*[T, V](pr: Preserve[T]; key: string; default: V): V =
        if fromPreserve(result, v): return
        else: break
  default
-
-type TextMode* = enum textPreserves, textJson
-
-proc writeText*[E](stream: Stream; pr: Preserve[E]; mode = textPreserves) =
-  ## Encode Preserves to a `Stream` as text.
-  if pr.embedded: write(stream, "#!")
-  case pr.kind:
-  of pkBoolean:
-    case pr.bool
-    of false: write(stream, "#f")
-    of true: write(stream, "#t")
-  of pkFloat:
-    write(stream, $pr.float)
-    write(stream, 'f')
-  of pkDouble:
-    write(stream, $pr.double)
-  of pkSignedInteger:
-    write(stream, $pr.int)
-  of pkString:
-    write(stream, escapeJson(pr.string))
-  of pkByteString:
-    if pr.bytes.allIt(char(it) in {' '..'!', '#'..'~'}):
-      write(stream, "#\"")
-      write(stream, cast[string](pr.bytes))
-      write(stream, '"')
-    else:
-      if pr.bytes.len > 64:
-        write(stream, "#[") #]#
-        write(stream, base64.encode(pr.bytes))
-        write(stream, ']')
-      else:
-        const alphabet = "0123456789abcdef"
-        write(stream, "#x\"")
-        for b in pr.bytes:
-          write(stream, alphabet[int(b shr 4)])
-          write(stream, alphabet[int(b and 0xf)])
-        write(stream, '"')
-  of pkSymbol:
-    let sym = pr.symbol.string
-    if sym.len > 0 and sym[0] in {'A'..'z'} and not sym.anyIt(char(it) in { '\x00'..'\x19', '"', '\\', '|' }):
-      write(stream, sym)
-    else:
-      write(stream, '|')
-      for c in sym:
-        case c
-        of '\\':
-          write(stream, "\\\\")
-        of '/':
-          write(stream, "\\/")
-        of '\x08':
-          write(stream, "\\b")
-        of '\x0c':
-          write(stream, "\\f")
-        of '\x0a':
-          write(stream, "\\n")
-        of '\x0d':
-          write(stream, "\\r")
-        of '\x09':
-          write(stream, "\\t")
-        of '|':
-          write(stream, "\\|")
-        else:
-          write(stream, c)
-      write(stream, '|')
-  of pkRecord:
-    assert(pr.record.len > 0)
-    write(stream, '<')
-    writeText(stream, pr.record[pr.record.high], mode)
-    for i in 0..<pr.record.high:
-      write(stream, ' ')
-      writeText(stream, pr.record[i], mode)
-    write(stream, '>')
-  of pkSequence:
-    write(stream, '[')
-    var insertSeperator: bool
-    case mode
-    of textPreserves:
-      for val in pr.sequence:
-        if insertSeperator: write(stream, ' ')
-        else: insertSeperator = true
-        writeText(stream, val, mode)
-    of textJson:
-      for val in pr.sequence:
-        if insertSeperator: write(stream, ',')
-        else: insertSeperator = true
-        writeText(stream, val, mode)
-    write(stream, ']')
-  of pkSet:
-    write(stream, "#{")
-    var insertSeperator: bool
-    for val in pr.set.items:
-      if insertSeperator: write(stream, ' ')
-      else: insertSeperator = true
-      writeText(stream, val, mode)
-    write(stream, '}')
-  of pkDictionary:
-    write(stream, '{')
-    var insertSeperator: bool
-    case mode
-    of textPreserves:
-      for (key, value) in pr.dict.items:
-        if insertSeperator: write(stream, ' ')
-        else: insertSeperator = true
-        writeText(stream, key, mode)
-        write(stream, ": ")
-        writeText(stream, value, mode)
-    of textJson:
-      for (key, value) in pr.dict.items:
-        if insertSeperator: write(stream, ',')
-        else: insertSeperator = true
-        writeText(stream, key, mode)
-        write(stream, ':')
-        writeText(stream, value, mode)
-    write(stream, '}')
-  of pkEmbedded:
-    write(stream, "#!")
-    when compiles($pr.embed) and not E is void:
-      write(stream, $pr.embed)
-    else:
-      write(stream, "…")
-
-proc `$`*[E](pr: Preserve[E]): string =
-  ## Generate the textual representation of ``pr``.
-  var stream = newStringStream()
-  writeText(stream, pr, textPreserves)
-  result = move stream.data
-
-include ./preserves/private/parse
--- a/src/preserves/Tupfile
+++ b/src/preserves/Tupfile
@ -1,4 +1,7 @@
 include_rules
 NIM_FLAGS += --path:$(TUP_CWD)/..
-: foreach preserves_schema_nim.nim schemac.nim schemaparse.nim |> !nim_bin |> $(BIN_DIR)/%B | $(BIN_DIR)/<%B>
-: foreach *hooks.nim |> !nim_check |>
+: foreach preserves_schema_nim.nim schemaparse.nim |> !nim_bin |> $(BIN_DIR)/%B | $(BIN_DIR)/<%B>
+
+DOT_FILES = ../../Document.dot ../../Schema.dot
+: schemac.nim |> !nim_bin |> $(BIN_DIR)/%B | $(DOT_FILES) $(BIN_DIR)/<%B>
+: foreach $(DOT_FILES) |> dot -Tsvg -LO %f > %o |> ../../%B-Grammer-Graph.svg
--- a/src/preserves/pegs.nim
+++ b/src/preserves/pegs.nim
@ -1,4 +1,4 @@
-# SPDX-FileCopyrightText: 2021 ☭ Emery Hemingway
+# SPDX-FileCopyrightText: ☭ Emery Hemingway
 # SPDX-License-Identifier: Unlicense

 ## NPEG rules for Preserves.
@ -9,58 +9,59 @@ when defined(nimHasUsed): {.used.}

 grammar "Preserves":

+  ws <- *(' ' | '\t' | '\r' | '\n' )
+  commas <- *(ws * ',') * ws
+
  Document <- Value * ws * !1

  Value <-
      (ws * (Record | Collection | Atom | Embedded | Compact)) |
      (ws * Annotation) |
-      (ws * ';' * @'\n' * Value)
+      (ws * '#' * @'\n' * Value)

  Collection <- Sequence | Dictionary | Set

-  Atom <- Boolean | Float | Double | SignedInteger | String | ByteString | Symbol
+  Atom <- Boolean | Float | Double | FloatRaw | DoubleRaw | SignedInteger | String | ByteString | Symbol

-  Record <- '<' * Value * *Value * ws * '>'
+  Record <- '<' * +Value * ws * '>'

-  Sequence <- '[' * ws * *(Value * ws) * ']'
+  Sequence <- '[' * *(commas * Value) * commas * ']'

-  Dictionary <- '{' * ws * *(Value * ws * ':' * ws * Value * ws) * '}'
+  Dictionary <- '{' * *(commas * Value * ws * ':' * Value) * commas * '}'

-  Set <- "#{" * ws * *(Value * ws) * '}'
+  Set <- "#{" * *(commas * Value) * commas * '}'

  Boolean <- "#f" | "#t"

-  Float <- >flt * 'f'
-  Double <- flt
-  SignedInteger <- int
-
  nat <- '0' | (Digit-'0') * *Digit
  int <- ?'-' * nat
  frac <- '.' * +Digit
  exp <- 'e' * ?('-'|'+') * +Digit
  flt <- int * ((frac * exp) | frac | exp)

+  Float <- >flt * 'f'
+  Double <- flt
+
+  SignedInteger <- int
+
  char <- unescaped | '|' | (escape * (escaped | '"' | ('u' * Xdigit[4])))
  String <- '"' * >(*char) * '"'

  ByteString <- charByteString | hexByteString | b64ByteString
  charByteString <- "#\"" * >(*binchar) * '"'
-  hexByteString <- "#x\"" * ws * >(*(Xdigit[2] * ws)) * '"'
-  b64ByteString <- "#[" * ws * >(*(base64char * ws)) * ']'
+  hexByteString <- "#x\"" * >(*(ws * Xdigit[2])) * ws * '"'
+  base64char <- {'A'..'Z', 'a'..'z', '0'..'9', '+', '/', '-', '_', '='}
+  b64ByteString <- "#[" * >(*(ws * base64char)) * ws * ']'

  binchar <- binunescaped | (escape * (escaped | '"' | ('x' * Xdigit[2])))
  binunescaped <- {' '..'!', '#'..'[', ']'..'~'}
-  base64char <- {'A'..'Z', 'a'..'z', '0'..'9', '+', '/', '-', '_', '='}

-  Symbol <- >(symstart * *symcont) | ('|' * >(*symchar) * '|')
-
-  symstart <- Alpha | sympunct | symustart
-  symcont <- Alpha | sympunct | symustart | symucont | Digit | '-'
-  sympunct <- {'~', '!', '$', '%', '^', '&', '*', '?', '_', '=', '+', '/', '.'}
-  symchar <- unescaped | '"' | (escape * (escaped | '|' | ('u' * Xdigit)))
-  symustart <- utf8.any - {0..127}
-  symucont <- utf8.any - {0..127}
-    # TODO: exclude some unicode ranges
+  symchar <- (utf8.any - { 0..127, '\\', '|' }) | (escape * (escaped | ('u' * Xdigit[4]))) | "\\|"
+  QuotedSymbol <- '|' * >(*symchar) * '|'
+  sympunct <- {'~', '!', '$', '%', '^', '&', '*', '?', '_', '=', '+', '-', '/', '.'}
+  symuchar <- utf8.any - { 0..127 }
+  SymbolOrNumber <- >(+(Alpha | Digit | sympunct | symuchar))
+  Symbol <- QuotedSymbol | SymbolOrNumber

  Embedded <- "#!" * Value

@ -73,4 +74,5 @@ grammar "Preserves":
  escaped <- {'\\', '/', 'b', 'f', 'n', 'r', 't'}
  escape <- '\\'

-  ws <- *(' ' | '\t' | '\r' | '\n' | ',')
+  FloatRaw <- "#xf\"" * >((ws * Xdigit[2])[4]) * ws * '"'
+  DoubleRaw <- "#xd\"" * >((ws * Xdigit[2])[8]) * ws * '"'
--- a/src/preserves/private/decoding.nim
+++ b/src/preserves/private/decoding.nim
@ -0,0 +1,166 @@
+# SPDX-FileCopyrightText: ☭ Emery Hemingway
+# SPDX-License-Identifier: Unlicense
+
+import std/[endians, streams, strutils]
+import ./values
+
+proc readVarint(s: Stream): uint =
+  var
+    shift = 0
+    c = uint s.readUint8
+  while (c and 0x80) == 0x80:
+    result = result or ((c and 0x7f) shl shift)
+    inc(shift, 7)
+    c = uint s.readUint8
+  result = result or (c shl shift)
+
+proc decodePreserves*(s: Stream; E = void): Preserve[E] =
+  ## Decode a Preserves value from a binary-encoded stream.
+  if s.atEnd: raise newException(IOError, "End of Preserves stream")
+  const endMarker = 0x84
+  let tag = s.readUint8()
+  case tag
+  of 0x80: result = Preserve[E](kind: pkBoolean, bool: false)
+  of 0x81: result = Preserve[E](kind: pkBoolean, bool: true)
+  of 0x85:
+    discard decodePreserves(s, E)
+    result = decodePreserves(s, E)
+  of 0x86:
+    result = decodePreserves(s, E)
+    result.embedded = true
+  of 0x87:
+    let n = s.readUint8()
+    case n
+    of 4:
+      when system.cpuEndian == bigEndian:
+        result = Preserve[E](kind: pkFloat, float: s.readFloat32())
+      else:
+        result = Preserve[E](kind: pkFloat)
+        var be = s.readFloat32()
+        swapEndian32(result.float.addr, be.addr)
+    of 8:
+      when system.cpuEndian == bigEndian:
+        result = Preserve[E](kind: pkDouble, double: s.readFloat64())
+      else:
+        result = Preserve[E](kind: pkDouble)
+        var be = s.readFloat64()
+        swapEndian64(result.double.addr, be.addr)
+    else:
+      raise newException(IOError, "unhandled IEEE754 value of " & $n & " bytes")
+  of 0xb1:
+    var data = newString(s.readVarint())
+    if data.len > 0:
+      let n = s.readData(unsafeAddr data[0], data.len)
+      if n != data.len:
+        raise newException(IOError, "short read")
+    result = Preserve[E](kind: pkString, string: data)
+  of 0xb2:
+    var data = newSeq[byte](s.readVarint())
+    if data.len > 0:
+      let n = s.readData(addr data[0], data.len)
+      if n != data.len:
+        raise newException(IOError, "short read")
+    result = Preserve[E](kind: pkByteString, bytes: data)
+  of 0xb3:
+    var data = newString(s.readVarint())
+    if data.len > 0:
+      let n = s.readData(addr data[0], data.len)
+      if n != data.len:
+        raise newException(IOError, "short read")
+    result = Preserve[E](kind: pkSymbol, symbol: Symbol data)
+  of 0xb4:
+    result = Preserve[E](kind: pkRecord)
+    var label = decodePreserves(s, E)
+    while s.peekUint8() != endMarker:
+      result.record.add decodePreserves(s, E)
+    result.record.add(move label)
+    discard s.readUint8()
+  of 0xb5:
+    result = Preserve[E](kind: pkSequence)
+    while s.peekUint8() != endMarker:
+      result.sequence.add decodePreserves(s, E)
+    discard s.readUint8()
+  of 0xb6:
+    result = Preserve[E](kind: pkSet)
+    while s.peekUint8() != endMarker:
+      incl(result, decodePreserves(s, E))
+    discard s.readUint8()
+  of 0xb7:
+    result = Preserve[E](kind: pkDictionary)
+    while s.peekUint8() != endMarker:
+      result[decodePreserves(s, E)] = decodePreserves(s, E)
+    discard s.readUint8()
+  of 0xb0:
+    var len = s.readVarint()
+    result = Preserve[E](kind: pkSignedInteger)
+    if len > 0:
+      if (s.peekUint8() and 0x80) == 0x80:
+        result.int = BiggestInt -1
+      while len > 0:
+        result.int = (result.int shl 8) + s.readUint8().BiggestInt
+        dec(len)
+
+  of endMarker:
+    raise newException(ValueError, "invalid Preserves stream")
+  else:
+    raise newException(ValueError, "invalid Preserves tag byte 0x" & tag.toHex(2))
+
+proc decodePreserves*(s: string; E = void): Preserve[E] =
+  ## Decode a string of binary-encoded Preserves.
+  decodePreserves(s.newStringStream, E)
+
+proc decodePreserves*(s: seq[byte]; E = void): Preserve[E] =
+  ## Decode a byte-string of binary-encoded Preserves.
+  decodePreserves(cast[string](s), E)
+
+type BufferedDecoder* = object
+  ## Type for buffering binary Preserves before decoding.
+  stream: StringStream
+  appendPosition, decodePosition, maxSize: int
+
+proc newBufferedDecoder*(maxSize = 4096): BufferedDecoder =
+  ## Create a new `newBufferedDecoder`.
+  runnableExamples:
+    var
+      buf = newBufferedDecoder()
+      bin = encode(parsePreserves("<foobar>"))
+    buf.feed(bin[0..2])
+    buf.feed(bin[3..bin.high])
+    var (success, pr) = decode(buf)
+    assert success
+    assert $pr == "<foobar>"
+  BufferedDecoder(
+      stream: newStringStream(newStringOfCap(maxSize)),
+      maxSize: maxSize,
+    )
+
+proc feed*(dec: var BufferedDecoder; buf: pointer; len: int) =
+  assert len > 0
+  if dec.maxSize > 0 and dec.maxSize < (dec.appendPosition + len):
+    raise newException(IOError, "BufferedDecoder at maximum buffer size")
+  dec.stream.setPosition(dec.appendPosition)
+  dec.stream.writeData(buf, len)
+  inc(dec.appendPosition, len)
+  assert dec.appendPosition == dec.stream.getPosition()
+
+proc feed*[T: byte|char](dec: var BufferedDecoder; data: openarray[T]) =
+  if data.len > 0:
+    dec.feed(unsafeAddr data[0], data.len)
+
+proc decode*(dec: var BufferedDecoder; E = void): (bool, Preserve[E]) =
+  ## Decode from `dec`. If decoding fails the internal position of the
+  ## decoder does not advance.
+  if dec.appendPosition > 0:
+    assert(dec.decodePosition < dec.appendPosition)
+    dec.stream.setPosition(dec.decodePosition)
+    try:
+      result[1] = decodePreserves(dec.stream, E)
+      result[0] = true
+      dec.decodePosition = dec.stream.getPosition()
+      if dec.decodePosition == dec.appendPosition:
+        dec.stream.setPosition(0)
+        dec.stream.data.setLen(0)
+        dec.appendPosition = 0
+        dec.decodePosition = 0
+    except IOError:
+      discard
--- a/src/preserves/private/encoding.nim
+++ b/src/preserves/private/encoding.nim
@ -0,0 +1,100 @@
+# SPDX-FileCopyrightText: ☭ Emery Hemingway
+# SPDX-License-Identifier: Unlicense
+
+import std/[endians, options, sets, sequtils, streams, tables, typetraits]
+import ./values
+
+proc writeVarint(s: Stream; n: Natural) =
+  var n = n
+  while n > 0x7f:
+    s.write(uint8 n or 0x80)
+    n = n shr 7
+  s.write(uint8 n and 0x7f)
+
+proc write*[E](str: Stream; pr: Preserve[E]) =
+  ## Write the binary-encoding of a Preserves value to a stream.
+  if pr.embedded: str.write(0x86'u8)
+  case pr.kind:
+  of pkBoolean:
+    case pr.bool
+    of false: str.write(0x80'u8)
+    of true: str.write(0x81'u8)
+  of pkFloat:
+    str.write("\x87\x04")
+    when system.cpuEndian == bigEndian:
+      str.write(pr.float)
+    else:
+      var be: float32
+      swapEndian32(be.addr, pr.float.unsafeAddr)
+      str.write(be)
+  of pkDouble:
+    str.write("\x87\x08")
+    when system.cpuEndian == bigEndian:
+      str.write(pr.double)
+    else:
+      var be: float64
+      swapEndian64(be.addr, pr.double.unsafeAddr)
+      str.write(be)
+  of pkSignedInteger:
+    if pr.int == 0:
+      str.write("\xb0\x00")
+    else:
+      var bitCount = 1'u8
+      if pr.int < 0:
+        while ((not pr.int) shr bitCount) != 0:
+          inc(bitCount)
+      else:
+        while (pr.int shr bitCount) != 0:
+          inc(bitCount)
+      var byteCount = (bitCount + 8) div 8
+      str.write(0xb0'u8)
+      str.writeVarint(byteCount)
+      proc write(n: uint8; i: BiggestInt) =
+        if n > 1:
+          write(n.pred, i shr 8)
+        str.write(i.uint8)
+      write(byteCount, pr.int)
+  of pkString:
+    str.write(0xb1'u8)
+    str.writeVarint(pr.string.len)
+    str.write(pr.string)
+  of pkByteString:
+    str.write(0xb2'u8)
+    str.writeVarint(pr.bytes.len)
+    str.write(cast[string](pr.bytes))
+  of pkSymbol:
+    str.write(0xb3'u8)
+    str.writeVarint(pr.symbol.len)
+    str.write(string pr.symbol)
+  of pkRecord:
+    assert(pr.record.len > 0)
+    str.write(0xb4'u8)
+    str.write(pr.record[pr.record.high])
+    for i in 0..<pr.record.high:
+      str.write(pr.record[i])
+    str.write(0x84'u8)
+  of pkSequence:
+    str.write(0xb5'u8)
+    for e in pr.sequence:
+      str.write(e)
+    str.write(0x84'u8)
+  of pkSet:
+    str.write(0xb6'u8)
+    for val in pr.set.items:
+      str.write(val)
+    str.write(0x84'u8)
+  of pkDictionary:
+    str.write(0xb7'u8)
+    for (key, value) in pr.dict.items:
+      str.write(key)
+      str.write(value)
+    str.write(0x84'u8)
+  of pkEmbedded:
+    str.write(0x86'u8)
+    str.write(pr.embed.toPreserve)
+
+proc encode*[E](pr: Preserve[E]): seq[byte] =
+  ## Return the binary-encoding of a Preserves value.
+  let s = newStringStream()
+  s.write pr
+  result = cast[seq[byte]](move s.data)
--- a/src/preserves/private/parsing.nim
+++ b/src/preserves/private/parsing.nim
@ -1,16 +1,12 @@
-# SPDX-FileCopyrightText: 2021 ☭ Emery Hemingway
+# SPDX-FileCopyrightText: ☭ Emery Hemingway
 # SPDX-License-Identifier: Unlicense

-# this module is included in ../../preserves.nim
-
-import std/[parseutils, unicode]
-
-when isMainModule:
-  import std/strutils
-  from std/sequtils import insert
+import std/[base64, parseutils, strutils, unicode]
+from std/sequtils import insert

 import npeg
 import ../pegs
+import ./decoding, ./values

 type
  Value = Preserve[void]
@ -78,6 +74,14 @@ template unescape(buf: var seq[byte]; capture: string) =
      add(buf, byte capture[i])
    inc(i)

+proc pushHexNibble[T](result: var T; c: char) =
+  var n = case c
+    of '0'..'9': T(ord(c) - ord('0'))
+    of 'a'..'f': T(ord(c) - ord('a') + 10)
+    of 'A'..'F': T(ord(c) - ord('A') + 10)
+    else: 0
+  result = (result shl 4) or n
+
 proc parsePreserves*(text: string): Preserve[void] =
  ## Parse a text-encoded Preserves `string` to a `Preserve` value.
  runnableExamples:
@ -142,6 +146,16 @@ proc parsePreserves*(text: string): Preserve[void] =
      let i = stack.high
      discard parseBiggestFloat($0, stack[i].value.double)

+    Preserves.FloatRaw <- Preserves.FloatRaw:
+      var reg: uint32
+      for c in $1: pushHexNibble(reg, c)
+      pushStack Value(kind: pkFloat, float: cast[float32](reg))
+
+    Preserves.DoubleRaw <- Preserves.DoubleRaw:
+      var reg: uint64
+      for c in $1: pushHexNibble(reg, c)
+      pushStack Value(kind: pkDouble, double: cast[float64](reg))
+
    Preserves.SignedInteger <- Preserves.SignedInteger:
      pushStack Value(kind: pkSignedInteger, int: parseInt($0))

--- a/src/preserves/private/texts.nim
+++ b/src/preserves/private/texts.nim
@ -0,0 +1,160 @@
+# SPDX-FileCopyrightText: ☭ Emery Hemingway
+# SPDX-License-Identifier: Unlicense
+
+import std/[base64, json, options, sets, sequtils, streams, strutils, tables, typetraits]
+import ./values
+
+proc `$`*(s: Symbol): string =
+  let sym = string s
+  if sym.len > 0 and sym[0] in {'A'..'z'} and not sym.anyIt(char(it) in { '\x00'..'\x19', '"', '\\', '|' }):
+    result = sym
+  else:
+    result = newStringOfCap(sym.len shl 1)
+    result.add('|')
+    for c in sym:
+      case c
+      of '\\':
+        result.add("\\\\")
+      of '/':
+        result.add("\\/")
+      of '\x08':
+        result.add("\\b")
+      of '\x0c':
+        result.add("\\f")
+      of '\x0a':
+        result.add("\\n")
+      of '\x0d':
+        result.add("\\r")
+      of '\x09':
+        result.add("\\t")
+      of '|':
+        result.add("\\|")
+      else:
+        result.add(c)
+    result.add('|')
+
+type TextMode* = enum textPreserves, textJson
+
+proc writeText*[E](stream: Stream; pr: Preserve[E]; mode = textPreserves) =
+  ## Encode Preserves to a `Stream` as text.
+  if pr.embedded: write(stream, "#!")
+  case pr.kind:
+  of pkBoolean:
+    case pr.bool
+    of false: write(stream, "#f")
+    of true: write(stream, "#t")
+  of pkFloat:
+    write(stream, $pr.float)
+    write(stream, 'f')
+  of pkDouble:
+    write(stream, $pr.double)
+  of pkSignedInteger:
+    write(stream, $pr.int)
+  of pkString:
+    write(stream, escapeJson(pr.string))
+  of pkByteString:
+    if pr.bytes.allIt(char(it) in {' '..'!', '#'..'~'}):
+      write(stream, "#\"")
+      write(stream, cast[string](pr.bytes))
+      write(stream, '"')
+    else:
+      if pr.bytes.len > 64:
+        write(stream, "#[") #]#
+        write(stream, base64.encode(pr.bytes))
+        write(stream, ']')
+      else:
+        const alphabet = "0123456789abcdef"
+        write(stream, "#x\"")
+        for b in pr.bytes:
+          write(stream, alphabet[int(b shr 4)])
+          write(stream, alphabet[int(b and 0xf)])
+        write(stream, '"')
+  of pkSymbol:
+    let sym = pr.symbol.string
+    if sym.len > 0 and sym[0] in {'A'..'z'} and not sym.anyIt(char(it) in { '\x00'..'\x19', '"', '\\', '|' }):
+      write(stream, sym)
+    else:
+      write(stream, '|')
+      for c in sym:
+        case c
+        of '\\':
+          write(stream, "\\\\")
+        of '/':
+          write(stream, "\\/")
+        of '\x08':
+          write(stream, "\\b")
+        of '\x0c':
+          write(stream, "\\f")
+        of '\x0a':
+          write(stream, "\\n")
+        of '\x0d':
+          write(stream, "\\r")
+        of '\x09':
+          write(stream, "\\t")
+        of '|':
+          write(stream, "\\|")
+        else:
+          write(stream, c)
+      write(stream, '|')
+  of pkRecord:
+    assert(pr.record.len > 0)
+    write(stream, '<')
+    writeText(stream, pr.record[pr.record.high], mode)
+    for i in 0..<pr.record.high:
+      write(stream, ' ')
+      writeText(stream, pr.record[i], mode)
+    write(stream, '>')
+  of pkSequence:
+    write(stream, '[')
+    var insertSeperator: bool
+    case mode
+    of textPreserves:
+      for val in pr.sequence:
+        if insertSeperator: write(stream, ' ')
+        else: insertSeperator = true
+        writeText(stream, val, mode)
+    of textJson:
+      for val in pr.sequence:
+        if insertSeperator: write(stream, ',')
+        else: insertSeperator = true
+        writeText(stream, val, mode)
+    write(stream, ']')
+  of pkSet:
+    write(stream, "#{")
+    var insertSeperator: bool
+    for val in pr.set.items:
+      if insertSeperator: write(stream, ' ')
+      else: insertSeperator = true
+      writeText(stream, val, mode)
+    write(stream, '}')
+  of pkDictionary:
+    write(stream, '{')
+    var insertSeperator: bool
+    case mode
+    of textPreserves:
+      for (key, value) in pr.dict.items:
+        if insertSeperator: write(stream, ' ')
+        else: insertSeperator = true
+        writeText(stream, key, mode)
+        write(stream, ": ")
+        writeText(stream, value, mode)
+    of textJson:
+      for (key, value) in pr.dict.items:
+        if insertSeperator: write(stream, ',')
+        else: insertSeperator = true
+        writeText(stream, key, mode)
+        write(stream, ':')
+        writeText(stream, value, mode)
+    write(stream, '}')
+  of pkEmbedded:
+    write(stream, "#!")
+    when compiles($pr.embed) and not E is void:
+      write(stream, $pr.embed)
+    else:
+      write(stream, "…")
+
+proc `$`*[E](pr: Preserve[E]): string =
+  ## Generate the textual representation of ``pr``.
+  var stream = newStringStream()
+  writeText(stream, pr, textPreserves)
+  result = move stream.data
--- a/src/preserves/private/values.nim
+++ b/src/preserves/private/values.nim
@ -0,0 +1,247 @@
+# SPDX-FileCopyrightText: ☭ Emery Hemingway
+# SPDX-License-Identifier: Unlicense
+
+import std/[hashes, options, sets, sequtils, tables]
+
+type
+  PreserveKind* = enum
+    pkBoolean, pkFloat, pkDouble, pkSignedInteger, pkString, pkByteString, pkSymbol,
+        pkRecord, pkSequence, pkSet, pkDictionary, pkEmbedded
+
+const
+  atomKinds* = {pkBoolean, pkFloat, pkDouble, pkSignedInteger, pkString, pkByteString, pkSymbol}
+  compoundKinds* = {pkRecord, pkSequence, pkSet, pkDictionary}
+
+type Symbol* = distinct string
+proc `<`*(x, y: Symbol): bool {.borrow.}
+proc `==`*(x, y: Symbol): bool {.borrow.}
+proc hash*(s: Symbol): Hash {.borrow.}
+proc len*(s: Symbol): int {.borrow.}
+
+type
+  Preserve*[E] = object
+    embedded*: bool
+      ## Flag to mark embedded Preserves
+    case kind*: PreserveKind
+    of pkBoolean:
+      bool*: bool
+    of pkFloat:
+      float*: float32
+    of pkDouble:
+      double*: float64
+    of pkSignedInteger:
+      int*: BiggestInt
+    of pkString:
+      string*: string
+    of pkByteString:
+      bytes*: seq[byte]
+    of pkSymbol:
+      symbol*: Symbol
+    of pkRecord:
+      record*: seq[Preserve[E]] # label is last
+    of pkSequence:
+      sequence*: seq[Preserve[E]]
+    of pkSet:
+      set*: seq[Preserve[E]]
+        # TODO: HashSet
+    of pkDictionary:
+      dict*: seq[DictEntry[E]]
+        # TODO: Tables
+    of pkEmbedded:
+      embed*: E
+
+  DictEntry*[E] = tuple[key: Preserve[E], val: Preserve[E]]
+
+func `==`*[A, B](x: Preserve[A]; y: Preserve[B]): bool =
+  ## Check `x` and `y` for equivalence.
+  if x.kind == y.kind and x.embedded == y.embedded:
+    case x.kind
+    of pkBoolean:
+      result = x.bool == y.bool
+    of pkFloat:
+      result = x.float == y.float
+    of pkDouble:
+      result = x.double == y.double
+    of pkSignedInteger:
+      result = x.int == y.int
+    of pkString:
+      result = x.string == y.string
+    of pkByteString:
+      result = x.bytes == y.bytes
+    of pkSymbol:
+      result = x.symbol == y.symbol
+    of pkRecord:
+      result = x.record.len == y.record.len
+      for i in 0..x.record.high:
+        if not result: break
+        result = result and (x.record[i] == y.record[i])
+    of pkSequence:
+      for i, val in x.sequence:
+        if y.sequence[i] != val: return false
+      result = true
+    of pkSet:
+      result = x.set.len == y.set.len
+      for i in 0..x.set.high:
+        if not result: break
+        result = result and (x.set[i] == y.set[i])
+    of pkDictionary:
+      result = x.dict.len == y.dict.len
+      for i in 0..x.dict.high:
+        if not result: break
+        result = result and
+          (x.dict[i].key == y.dict[i].key) and
+          (x.dict[i].val == y.dict[i].val)
+    of pkEmbedded:
+      when A is B:
+        when A is void:
+          result = true
+        else:
+          result = x.embed == y.embed
+
+proc `<`(x, y: string | seq[byte]): bool =
+  for i in 0 .. min(x.high, y.high):
+    if x[i] < y[i]: return true
+    if x[i] != y[i]: return false
+  x.len < y.len
+
+proc `<`*[A, B](x: Preserve[A]; y: Preserve[B]): bool =
+  ## Preserves have a total order over values. Check if `x` is ordered before `y`.
+  if x.embedded != y.embedded:
+    result = y.embedded
+  elif x.kind != y.kind:
+    result = x.kind < y.kind
+  else:
+    case x.kind
+    of pkBoolean:
+      result = (not x.bool) and y.bool
+    of pkFloat:
+      result = x.float < y.float
+    of pkDouble:
+      result = x.double < y.double
+    of pkSignedInteger:
+      result = x.int < y.int
+    of pkString:
+      result = x.string < y.string
+    of pkByteString:
+      result = x.bytes < y.bytes
+    of pkSymbol:
+      result = x.symbol < y.symbol
+    of pkRecord:
+      if x.record[x.record.high] < y.record[y.record.high]: return true
+      for i in 0..<min(x.record.high, y.record.high):
+        if x.record[i] < y.record[i]: return true
+        if x.record[i] == y.record[i]: return false
+      result = x.record.len < y.record.len
+    of pkSequence:
+      for i in 0..min(x.sequence.high, y.sequence.high):
+        if x.sequence[i] < y.sequence[i]: return true
+        if x.sequence[i] != y.sequence[i]: return false
+      result = x.sequence.len < y.sequence.len
+    of pkSet:
+      for i in 0..min(x.set.high, y.set.high):
+        if x.set[i] < y.set[i]: return true
+        if x.set[i] != y.set[i]: return false
+      result = x.set.len < y.set.len
+    of pkDictionary:
+      for i in 0..min(x.dict.high, y.dict.high):
+        if x.dict[i].key < y.dict[i].key: return true
+        if x.dict[i].key == y.dict[i].key:
+          if x.dict[i].val < y.dict[i].val: return true
+          if x.dict[i].val != y.dict[i].val: return false
+      result = x.dict.len < y.dict.len
+    of pkEmbedded:
+      when (not A is void) and (A is B):
+        result = x.embed < y.embed
+
+func cmp*[E](x, y: Preserve[E]): int =
+  ## Compare by Preserves total ordering.
+  if x == y: 0
+  elif x < y: -1
+  else: 1
+
+proc sort*[E](pr: var Preserve[E]) = sort(pr.sequence, cmp)
+  ## Sort a Preserves array by total ordering.
+
+proc hash*[E](pr: Preserve[E]): Hash =
+  ## Produce a `Hash` of `pr` for use with a `HashSet` or `Table`.
+  var h = hash(pr.kind.int) !& hash(pr.embedded)
+  case pr.kind
+  of pkBoolean:
+    h = h !& hash(pr.bool)
+  of pkFloat:
+    h = h !& hash(pr.float)
+  of pkDouble:
+    h = h !& hash(pr.double)
+  of pkSignedInteger:
+    h = h !& hash(pr.int)
+  of pkString:
+    h = h !& hash(pr.string)
+  of pkByteString:
+    h = h !& hash(pr.bytes)
+  of pkSymbol:
+    h = h !& hash(string pr.symbol)
+  of pkRecord:
+    for val in pr.record:
+      h = h !& hash(val)
+  of pkSequence:
+    for val in pr.sequence:
+      h = h !& hash(val)
+  of pkSet:
+    for val in pr.set.items:
+      h = h !& hash(val)
+  of pkDictionary:
+    for (key, val) in pr.dict.items:
+      h = h !& hash(key) !& hash(val)
+  of pkEmbedded:
+    when E is void:
+      h = h !& hash(pr.embed)
+    else:
+      if pr.embed.isNil:
+        h = h !& hash(false)
+      else:
+        h = h !& hash(pr.embed)
+  !$h
+
+proc `[]`*(pr: Preserve; i: int): Preserve =
+  ## Select an indexed value from ``pr``.
+  ## Only valid for records and sequences.
+  case pr.kind
+  of pkRecord: pr.record[i]
+  of pkSequence: pr.sequence[i]
+  else:
+    raise newException(ValueError, "Preserves value is not indexable")
+
+proc `[]=`*(pr: var Preserve; i: Natural; val: Preserve) =
+  ## Assign an indexed value into ``pr``.
+  ## Only valid for records and sequences.
+  case pr.kind
+  of pkRecord: pr.record[i] = val
+  of pkSequence: pr.sequence[i] = val
+  else:
+    raise newException(ValueError, "`Preserves value is not indexable")
+
+proc `[]=`*(pr: var Preserve; key, val: Preserve) =
+  ## Insert `val` by `key` in the Preserves dictionary `pr`.
+  for i in 0..pr.dict.high:
+    if key < pr.dict[i].key:
+      insert(pr.dict, [(key, val, )], i)
+      return
+    elif key == pr.dict[i].key:
+      pr.dict[i].val = val
+      return
+  pr.dict.add((key, val, ))
+
+proc incl*(pr: var Preserve; key: Preserve) =
+  ## Include `key` in the Preserves set `pr`.
+  for i in 0..pr.set.high:
+    if key < pr.set[i]:
+      insert(pr.set, [key], i)
+      return
+  pr.set.add(key)
+
+proc excl*(pr: var Preserve; key: Preserve) =
+  ## Exclude `key` from the Preserves set `pr`.
+  for i in 0..pr.set.high:
+    if pr.set[i] == key:
+      delete(pr.set, i..i)
+      break
--- a/src/preserves/schemac.nim
+++ b/src/preserves/schemac.nim
@ -1,4 +1,4 @@
-# SPDX-FileCopyrightText: 2022 ☭ Emery Hemingway
+# SPDX-FileCopyrightText: ☭ Emery Hemingway
 # SPDX-License-Identifier: Unlicense

 import std/[hashes, options, os, parseopt, streams, strutils, tables]
@ -23,7 +23,7 @@ when isMainModule:
        write(outStream, schema.toPreserve)

      of "bundle":
-        var bundle: Bundle
+        let bundle = Bundle()
        if not dirExists inputPath:
          quit "not a directory of schemas: " & inputPath
        else:
--- a/src/preserves/schemac.nim.cfg
+++ b/src/preserves/schemac.nim.cfg
@ -0,0 +1 @@
+d:npegDotDir:"../.."
--- a/src/preserves/schemaparse.nim
+++ b/src/preserves/schemaparse.nim
@ -136,7 +136,9 @@ const parser = peg("Schema", p: ParseState):
      initRecord(toSymbol"lit", parsePreserves $1)]
    pushStack n

-  AndPattern <- ?('&' * S) * NamedPattern * +(S * '&' * S * NamedPattern)
+  AndPattern <- ?('&' * S) * NamedPattern * +(S * '&' * S * NamedPattern):
+    var node = initRecord(toSymbol("and"), toPreserve takeStackAt())
+    pushStack node

  Pattern <- SimplePattern | CompoundPattern

@ -239,7 +241,7 @@ const parser = peg("Schema", p: ParseState):
    var node = initRecord(toSymbol"tuplePrefix", toPreserve fields, tail)
    pushStack node

-  DictionaryPattern <- '{' * S * *(>Value * S * ':' * S * NamedSimplePattern * S) * '}':
+  DictionaryPattern <- '{' * S * *(>Value * S * ':' * S * NamedSimplePattern * ?',' * S) * '}':
    var dict = initDictionary(void)
    for i in countDown(pred capture.len, 1):
      let key = toSymbol capture[i].s
@ -259,7 +261,7 @@ const parser = peg("Schema", p: ParseState):

  id <- Alpha * *Alnum

-  Comment <- ';' * @'\n'
+  Comment <- '#' * @'\n'

  S <- *(Space | Comment)

--- a/tests/Tupfile
+++ b/tests/Tupfile
@ -1,2 +1,3 @@
 include_rules
+NIM_FLAGS_test_samples += -d:upstreamTestfile="$(TUP_CWD)/../../preserves/tests/samples.pr"
 : foreach t*.nim |> !nim_run |> | ../<test>
--- a/tests/test_samples.nim
+++ b/tests/test_samples.nim
@ -0,0 +1,110 @@
+# SPDX-FileCopyrightText: ☭ Emery Hemingway
+# SPDX-License-Identifier: Unlicense
+
+import std/unittest
+import preserves
+
+type Value = Preserve[void]
+
+const upstreamTestfile {.strdefine.} = ""
+
+proc strip(pr: Preserve): Preserve = pr
+
+proc encodeBinary(pr: Value): Value =
+  result = encode(pr).toPreserve
+  checkpoint("encoded binary: " & $result)
+
+proc looseEncodeBinary(pr: Value): Value =
+  result = encode(pr).toPreserve
+  checkpoint("loose encoded binary: " & $result)
+
+proc annotatedBinary(pr: Value): Value =
+  result = encode(pr).toPreserve
+  checkpoint("annotated binary: " & $result)
+
+proc decodeBinary(pr: Value): Value =
+  result = decodePreserves(pr.bytes)
+
+proc encodeText(pr: Value): Value =
+  result = ($pr).toPreserve
+  checkpoint("encoded text: " & result.string)
+
+proc decodeText(pr: Value): Value =
+  result = parsePreserves(pr.string)
+  checkpoint("decoded text " & $pr)
+
+if upstreamTestfile != "":
+  let samples = readFile(upstreamTestfile).parsePreserves(void)
+  assert samples.isRecord("TestCases")
+
+  var binary, annotatedValue, stripped, text, bytes: Value
+
+  for n in { 1..8, 20..22, 30..32 }:
+    suite $n:
+      for name, testcase in samples[0]:
+        assert testcase.isRecord
+        assert testcase.label.isSymbol
+        var testMatched: bool
+        case testcase.label.symbol.string
+        of "Test":
+          testMatched = (n in { 1..8 })
+          if testMatched:
+            binary = testcase[0]
+            annotatedValue = testcase[1]
+            stripped = strip(annotatedValue)
+        of "NondeterministicTest":
+          testMatched = (n in { 1..7 })
+          if testMatched:
+            binary = testcase[0]
+            annotatedValue = testcase[1]
+            stripped = strip(annotatedValue)
+        of "ParseError":
+          testMatched = (n in { 20 })
+          if testMatched: text = testcase[0]
+        of "ParseShort":
+          testMatched = (n in { 21 })
+          if testMatched: text = testcase[0]
+        of "ParseEOF":
+          testMatched = (n in { 22 })
+          if testMatched: text = testcase[0]
+        of "DecodeError":
+          testMatched = (n in { 30 })
+          if testMatched: bytes = testcase[0]
+        of "DecodeShort":
+          testMatched = (n in { 31 })
+          if testMatched: bytes = testcase[0]
+        of "DecodeEOF":
+          testMatched = (n in { 32 })
+          if testMatched: bytes = testcase[0]
+        else:
+          assert false
+
+        if testMatched:
+          test $name:
+            checkpoint $testcase
+            case n
+            of 1: check decodeBinary(encodeBinary(annotatedValue)) == stripped
+            of 2: check strip(decodeBinary(binary)) == stripped
+            of 3:
+              # check decodeBinary(binary) == annotatedValue
+              discard
+            of 4:
+              # check decodeBinary(annotatedBinary(annotatedValue)) == annotatedValue
+              discard
+            of 5: check decodeText(encodeText(stripped)) == stripped
+            of 6: check decodeText(encodeText(annotatedValue)) == annotatedValue
+            of 7:
+              # check annotatedBinary(annotatedValue) == binary
+              discard
+            of 8:
+              # check looseEncodeBinary(annotatedValue) == binary
+              discard
+            of 20, 21, 22:
+              # TODO: be specific about which error is raised
+              expect ValueError, IOError:
+                discard decodeText(text)
+            of 30, 31, 32:
+              expect ValueError, IOError:
+                discard decodeBinary(bytes)
+            else:
+              assert false
Author	SHA1	Message	Date
Emery Hemingway	37043a03bf	Test against upstream samples	2023-12-25 10:08:10 +02:00
Emery Hemingway	558a1a862a	PEG: tweak commas and symbols	2023-12-25 10:08:10 +02:00
Emery Hemingway	dbe9f3566f	Add raw floats and doubles to parser	2023-12-25 10:08:10 +02:00
Emery Hemingway	82631b1a01	Stop running nim doc	2023-12-25 10:08:10 +02:00
Emery Hemingway	adadcc181a	Move some of preserves implementation to preserves/private	2023-12-25 10:08:03 +02:00
Emery Hemingway	126365d164	Remove superfluous PEG whitspace rules	2023-12-24 21:24:02 +02:00
Emery Hemingway	5267b7dc13	Generate graphs for PEGs	2023-12-24 21:24:02 +02:00
Emery Hemingway	c1eb0a513a	schemac: Bundle is now a ref obj	2023-12-24 21:24:02 +02:00
Emery Hemingway	40f20a3ca8	schema: extract And clauses	2023-12-24 21:24:02 +02:00
Emery Hemingway	218ca7a669	schemaparse: parse them commas	2023-12-24 21:24:02 +02:00
Emery Hemingway	c83b78aef7	Replace comment marker ; with #	2023-12-24 21:24:02 +02:00
Emery Hemingway	27a5ace1ea	Fix decoding of annotations	2023-12-24 21:12:30 +02:00