Replace Nimble with an SBOM

pegs: refer to npeg syntax in the documentation
sugar: preserve most Nim atomics with %
2024-06-01 12:16:26 +03:00 · 2024-05-25 10:23:33 +03:00 · 2024-05-23 15:58:40 +03:00 · 2024-05-22 22:53:45 +03:00 · 2024-05-22 20:50:48 +03:00 · 2024-05-22 19:49:07 +03:00
47 changed files with 4241 additions and 1722 deletions
--- a/.gitignore
+++ b/.gitignore
@ -1,9 +1,5 @@
-tests/test_integers
-tests/test_parser
-tests/test_rfc8259
-tests/test_schemas
-preserves_encode
-preserves_decode
-preserves_from_json
-preserves_to_json
-preserves_schema_nim
+/nim.cfg
+*.dot
+*.html
+*.run
+*.svg
--- a/.gitmodules
+++ b/.gitmodules
@ -1,3 +0,0 @@
-[submodule "upstream"]
-	path = upstream
-	url = https://gitlab.com/preserves/preserves.git
--- a/README.md
+++ b/README.md
@ -1,4 +1,14 @@
-Nim implementation of the [Preserves data language](https://preserves.gitlab.io/preserves/preserves.html).
+# Preserves
+
+Nim implementation of the [Preserves data language](https://preserves.dev/).
+
+If you don't know why you need Preserves, see the [Syndicate library](https://git.syndicate-lang.org/ehmry/syndicate-nim).
+
+## Library
+
+To parse or produce Preserves one should write a [schema](https://preserves.dev/preserves-schema.html) and generate a Nim module using the [preserves_schema_nim](./src/preserves/preserves_schema_nim.nim) utility. This module will contain Nim types corresponding to schema definitions. The `toPreserve` and`fromPreserve` routines will convert Nim types to and from Preserves. The `decodePreserves`, `parsePreserves`, `encode`, and `$` routines will convert `Preserve` objects to and from binary and textual encoding.
+
+To debug the `toPreserves` and `fromPreserves` routines compile with `-d:tracePreserves`.

 ## Utilities
 * preserves_schema_nim
@ -7,7 +17,5 @@ Nim implementation of the [Preserves data language](https://preserves.gitlab.io/
 * preserves_from_json
 * preserves_to_json

-## Installation
-`preserves_encode` is a multi-call binary that implements `preserves_encode`,
-`preserves_decode`, `preserves_from_json`, and `preserves_to_json`, so the
-appropriate symlinks should be created during packaging.
+### Installation
+`preserves_encode` is a multi-call binary that implements `preserves_encode`, `preserves_decode`, `preserves_from_json`, and `preserves_to_json`, so the appropriate symlinks should be created during packaging.
--- a/2
+++ b/2
@ -0,0 +1,2 @@
+include_rules
+: sbom.json |> !sbom-to-nix |> | ./<lock>
--- a/Tuprules.tup
+++ b/Tuprules.tup
@ -0,0 +1,2 @@
+include depends.tup
+NIM_GROUPS += $(TUP_CWD)/<lock>
--- a/default.nix
+++ b/default.nix
@ -0,0 +1,26 @@
+{
+  pkgs ? import <nixpkgs> { },
+}:
+let
+  inherit (pkgs) lib buildNimPackage nim;
+in
+buildNimPackage {
+  pname = "preserves-nim";
+  version = "unstable";
+
+  lockFile = ./lock.json;
+
+  src = if lib.inNixShell then null else lib.cleanSource ./.;
+
+  nimFlags = [ "--path:${nim.passthru.nim}/nim" ];
+  # Path to the compiler/ast library.
+
+  postInstall = ''
+    pushd $out/bin
+    for link in preserves_decode preserves_from_json preserves_to_json;
+      do ln -s  preserves_encode $link
+    done
+    mv preserves_schemac preserves-schemac
+    popd
+  '';
+}
--- a/depends.tup
+++ b/depends.tup
@ -0,0 +1,2 @@
+NIM_FLAGS += --path:$(TUP_CWD)/../nim
+NIM_FLAGS += --path:$(TUP_CWD)/../npeg/src
--- a/lock.json
+++ b/lock.json
@ -0,0 +1,28 @@
+{
+  "depends": [
+    {
+      "method": "fetchzip",
+      "packages": [
+        "bigints"
+      ],
+      "path": "/nix/store/jvrm392g8adfsgf36prgwkbyd7vh5jsw-source",
+      "ref": "20231006",
+      "rev": "86ea14d31eea9275e1408ca34e6bfe9c99989a96",
+      "sha256": "15pcpmnk1bnw3k8769rjzcpg00nahyrypwbxs88jnwr4aczp99j4",
+      "srcDir": "src",
+      "url": "https://github.com/ehmry/nim-bigints/archive/86ea14d31eea9275e1408ca34e6bfe9c99989a96.tar.gz"
+    },
+    {
+      "method": "fetchzip",
+      "packages": [
+        "npeg"
+      ],
+      "path": "/nix/store/ffkxmjmigfs7zhhiiqm0iw2c34smyciy-source",
+      "ref": "1.2.1",
+      "rev": "26d62fdc40feb84c6533956dc11d5ee9ea9b6c09",
+      "sha256": "0xpzifjkfp49w76qmaylan8q181bs45anmp46l4bwr3lkrr7bpwh",
+      "srcDir": "src",
+      "url": "https://github.com/zevv/npeg/archive/26d62fdc40feb84c6533956dc11d5ee9ea9b6c09.tar.gz"
+    }
+  ]
+}
--- a/preserves.nimble
+++ b/preserves.nimble
@ -1,14 +1,61 @@
-# Package
+# Emulate Nimble from CycloneDX data at sbom.json.

-version       = "1.0.0"
-author        = "Emery Hemingway"
-description   = "data model and serialization format"
-license       = "Unlicense"
-srcDir        = "src"
+import std/json

-bin           = @["preserves/preserves_schema_nim", "preserves/private/preserves_encode"]
+proc lookupComponent(sbom: JsonNode; bomRef: string): JsonNode =
+  for c in sbom{"components"}.getElems.items:
+    if c{"bom-ref"}.getStr == bomRef:
+      return c
+  result = newJNull()

+let
+  sbom = "sbom.json".readFile.parseJson
+  comp = sbom{"metadata", "component"}
+  bomRef = comp{"bom-ref"}.getStr

-# Dependencies
+version = comp{"version"}.getStr
+author = comp{"authors"}[0]{"name"}.getStr
+description = comp{"description"}.getStr
+license = comp{"licenses"}[0]{"license", "id"}.getStr

-requires "nim >= 1.4.8", "compiler >= 1.4.8", "bigints", "npeg"
+for prop in comp{"properties"}.getElems.items:
+  let (key, val) = (prop{"name"}.getStr, prop{"value"}.getStr)
+  case key
+  of "nim:skipDirs:":
+    add(skipDirs, val)
+  of "nim:skipFiles:":
+    add(skipFiles, val)
+  of "nim:skipExt":
+    add(skipExt, val)
+  of "nim:installDirs":
+    add(installDirs, val)
+  of "nim:installFiles":
+    add(installFiles, val)
+  of "nim:installExt":
+    add(installExt, val)
+  of "nim:binDir":
+    add(binDir, val)
+  of "nim:srcDir":
+    add(srcDir, val)
+  of "nim:backend":
+    add(backend, val)
+  else:
+    if key.startsWith "nim:bin:":
+      namedBin[key[8..key.high]] = val
+
+for depend in sbom{"dependencies"}.items:
+  if depend{"ref"}.getStr == bomRef:
+    for depRef in depend{"dependsOn"}.items:
+      let dep = sbom.lookupComponent(depRef.getStr)
+      var spec = dep{"name"}.getStr
+      for extRef in dep{"externalReferences"}.elems:
+        if extRef{"type"}.getStr == "vcs":
+          spec = extRef{"url"}.getStr
+          break
+      let ver = dep{"version"}.getStr
+      if ver != "":
+        if ver.allCharsInSet {'0'..'9', '.'}: spec.add " == "
+        else: spec.add '#'
+        spec.add ver
+      requires spec
+    break
--- a/sbom.json
+++ b/sbom.json
@ -0,0 +1,162 @@
+{
+  "bomFormat": "CycloneDX",
+  "specVersion": "1.6",
+  "metadata": {
+    "component": {
+      "type": "application",
+      "bom-ref": "pkg:nim/preserves",
+      "name": "preserves",
+      "description": "data model and serialization format",
+      "version": "20240523",
+      "authors": [
+        {
+          "name": "Emery Hemingway"
+        }
+      ],
+      "licenses": [
+        {
+          "license": {
+            "id": "Unlicense"
+          }
+        }
+      ],
+      "properties": [
+        {
+          "name": "nim:skipExt",
+          "value": "nim"
+        },
+        {
+          "name": "nim:bin:preserves/private/preserves_encode",
+          "value": "preserves/private/preserves_encode"
+        },
+        {
+          "name": "nim:bin:preserves/preserves_schema_nim",
+          "value": "preserves/preserves_schema_nim"
+        },
+        {
+          "name": "nim:bin:preserves/preserves_schemac",
+          "value": "preserves/preserves_schemac"
+        },
+        {
+          "name": "nim:srcDir",
+          "value": "src"
+        },
+        {
+          "name": "nim:backend",
+          "value": "c"
+        }
+      ]
+    }
+  },
+  "components": [
+    {
+      "type": "library",
+      "bom-ref": "pkg:nim/npeg",
+      "name": "npeg",
+      "version": "1.2.2",
+      "externalReferences": [
+        {
+          "url": "https://github.com/zevv/npeg/archive/ec0cc6e64ea4c62d2aa382b176a4838474238f8d.tar.gz",
+          "type": "source-distribution"
+        },
+        {
+          "url": "https://github.com/zevv/npeg.git",
+          "type": "vcs"
+        }
+      ],
+      "properties": [
+        {
+          "name": "nix:fod:method",
+          "value": "fetchzip"
+        },
+        {
+          "name": "nix:fod:path",
+          "value": "/nix/store/xpn694ibgipj8xak3j4bky6b3k0vp7hh-source"
+        },
+        {
+          "name": "nix:fod:rev",
+          "value": "ec0cc6e64ea4c62d2aa382b176a4838474238f8d"
+        },
+        {
+          "name": "nix:fod:sha256",
+          "value": "1fi9ls3xl20bmv1ikillxywl96i9al6zmmxrbffx448gbrxs86kg"
+        },
+        {
+          "name": "nix:fod:url",
+          "value": "https://github.com/zevv/npeg/archive/ec0cc6e64ea4c62d2aa382b176a4838474238f8d.tar.gz"
+        },
+        {
+          "name": "nix:fod:ref",
+          "value": "1.2.2"
+        },
+        {
+          "name": "nix:fod:srcDir",
+          "value": "src"
+        }
+      ]
+    },
+    {
+      "type": "library",
+      "bom-ref": "pkg:nim/bigints",
+      "name": "bigints",
+      "version": "20231006",
+      "externalReferences": [
+        {
+          "url": "https://github.com/ehmry/nim-bigints/archive/86ea14d31eea9275e1408ca34e6bfe9c99989a96.tar.gz",
+          "type": "source-distribution"
+        },
+        {
+          "url": "https://github.com/ehmry/nim-bigints.git",
+          "type": "vcs"
+        }
+      ],
+      "properties": [
+        {
+          "name": "nix:fod:method",
+          "value": "fetchzip"
+        },
+        {
+          "name": "nix:fod:path",
+          "value": "/nix/store/jvrm392g8adfsgf36prgwkbyd7vh5jsw-source"
+        },
+        {
+          "name": "nix:fod:rev",
+          "value": "86ea14d31eea9275e1408ca34e6bfe9c99989a96"
+        },
+        {
+          "name": "nix:fod:sha256",
+          "value": "15pcpmnk1bnw3k8769rjzcpg00nahyrypwbxs88jnwr4aczp99j4"
+        },
+        {
+          "name": "nix:fod:url",
+          "value": "https://github.com/ehmry/nim-bigints/archive/86ea14d31eea9275e1408ca34e6bfe9c99989a96.tar.gz"
+        },
+        {
+          "name": "nix:fod:ref",
+          "value": "20231006"
+        },
+        {
+          "name": "nix:fod:srcDir",
+          "value": "src"
+        }
+      ]
+    }
+  ],
+  "dependencies": [
+    {
+      "ref": "pkg:nim/preserves",
+      "dependsOn": [
+        "pkg:nim/npeg",
+        "pkg:nim/bigints"
+      ]
+    },
+    {
+      "ref": "pkg:nim/npeg",
+      "dependsOn": []
+    },
+    {
+      "ref": "pkg:nim/bigints",
+      "dependsOn": []
+    }
+  ]
+}
--- a/schema.bin
+++ b/schema.bin
@ -0,0 +1,8 @@
+´³schema·³version‘³definitions·³Ref´³rec´³lit³ref„´³tupleµ´³named³module´³refµ„³
+ModulePath„„´³named³name´³atom³Symbol„„„„„³Bundle´³rec´³lit³bundle„´³tupleµ´³named³modules´³refµ„³Modules„„„„„³Schema´³rec´³lit³schema„´³tupleµ´³dict·³version´³named³version´³refµ„³Version„„³definitions´³named³definitions´³refµ„³Definitions„„³embeddedType´³named³embeddedType´³refµ„³EmbeddedTypeName„„„„„„„³Binding´³rec´³lit³named„´³tupleµ´³named³name´³atom³Symbol„„´³named³pattern´³refµ„³
SimplePattern„„„„„³Modules´³dictof´³refµ„³
+ModulePath„´³refµ„³Schema„„³Pattern´³orµµ±
SimplePattern´³refµ„³
SimplePattern„„µ±CompoundPattern´³refµ„³CompoundPattern„„„„³Version´³lit‘„³AtomKind´³orµµ±Boolean´³lit³Boolean„„µ±Float´³lit³Float„„µ±Double´³lit³Double„„µ±
SignedInteger´³lit³
SignedInteger„„µ±String´³lit³String„„µ±
+ByteString´³lit³
+ByteString„„µ±Symbol´³lit³Symbol„„„„³
+Definition´³orµµ±or´³rec´³lit³or„´³tupleµ´³tuplePrefixµ´³named³pattern0´³refµ„³NamedAlternative„„´³named³pattern1´³refµ„³NamedAlternative„„„´³named³patternN´³seqof´³refµ„³NamedAlternative„„„„„„„„µ±and´³rec´³lit³and„´³tupleµ´³tuplePrefixµ´³named³pattern0´³refµ„³NamedPattern„„´³named³pattern1´³refµ„³NamedPattern„„„´³named³patternN´³seqof´³refµ„³NamedPattern„„„„„„„„µ±Pattern´³refµ„³Pattern„„„„³
+ModulePath´³seqof´³atom³Symbol„„³Definitions´³dictof´³atom³Symbol„´³refµ„³
+Definition„„³NamedPattern´³orµµ±named´³refµ„³Binding„„µ±	anonymous´³refµ„³Pattern„„„„³
SimplePattern´³orµµ±any´³lit³any„„µ±atom´³rec´³lit³atom„´³tupleµ´³named³atomKind´³refµ„³AtomKind„„„„„„µ±embedded´³rec´³lit³embedded„´³tupleµ´³named³	interface´³refµ„³
SimplePattern„„„„„„µ±lit´³rec´³lit³lit„´³tupleµ´³named³value³any„„„„„µ±seqof´³rec´³lit³seqof„´³tupleµ´³named³pattern´³refµ„³
SimplePattern„„„„„„µ±setof´³rec´³lit³setof„´³tupleµ´³named³pattern´³refµ„³
SimplePattern„„„„„„µ±dictof´³rec´³lit³dictof„´³tupleµ´³named³key´³refµ„³
SimplePattern„„´³named³value´³refµ„³
SimplePattern„„„„„„µ±Ref´³refµ„³Ref„„„„³CompoundPattern´³orµµ±rec´³rec´³lit³rec„´³tupleµ´³named³label´³refµ„³NamedPattern„„´³named³fields´³refµ„³NamedPattern„„„„„„µ±tuple´³rec´³lit³tuple„´³tupleµ´³named³patterns´³seqof´³refµ„³NamedPattern„„„„„„„µ±tuplePrefix´³rec´³lit³tuplePrefix„´³tupleµ´³named³fixed´³seqof´³refµ„³NamedPattern„„„´³named³variable´³refµ„³NamedSimplePattern„„„„„„µ±dict´³rec´³lit³dict„´³tupleµ´³named³entries´³refµ„³DictionaryEntries„„„„„„„„³EmbeddedTypeName´³orµµ±Ref´³refµ„³Ref„„µ±false´³lit€„„„„³NamedAlternative´³tupleµ´³named³variantLabel´³atom³String„„´³named³pattern´³refµ„³Pattern„„„„³DictionaryEntries´³dictof³any´³refµ„³NamedSimplePattern„„³NamedSimplePattern´³orµµ±named´³refµ„³Binding„„µ±	anonymous´³refµ„³
SimplePattern„„„„„³embeddedType€„„
--- a/schema.prs
+++ b/schema.prs
@ -0,0 +1,92 @@
+@<EmacsMode "-*- preserves -*-">
+
+; TODO: some kind of constants
+; TODO: rename "version" to "schema-version" ?
+
+version 1 .
+
+Bundle = <bundle @modules Modules>.
+Modules = { ModulePath: Schema ...:... }.
+
+Schema = <schema {
+  version: Version
+  embeddedType: EmbeddedTypeName
+  definitions: Definitions
+}>.
+
+; version 1 .
+Version = 1 .
+
+EmbeddedTypeName = Ref / #f.
+
+Definitions = { symbol: Definition ...:... }.
+
+Definition =
+  ; Pattern / Pattern / ...
+  / <or [@pattern0 NamedAlternative @pattern1 NamedAlternative @patternN NamedAlternative ...]>
+
+  ; Pattern & Pattern & ...
+  / <and [@pattern0 NamedPattern @pattern1 NamedPattern @patternN NamedPattern ...]>
+
+  ; Pattern
+  / Pattern
+.
+
+Pattern = SimplePattern / CompoundPattern .
+
+SimplePattern =
+  ; any
+  / =any
+
+  ; special builtins: bool, float, double, int, string, bytes, symbol
+  / <atom @atomKind AtomKind>
+
+  ; matches an embedded value in the input: #:p
+  / <embedded @interface SimplePattern>
+
+  ; =symbol, <<lit> any>, or plain non-symbol atom
+  / <lit @value any>
+
+  ; [p ...] ----> <seqof <ref p>>; see also tuplePrefix below.
+  / <seqof @pattern SimplePattern>
+
+  ; #{p} ----> <setof <ref p>>
+  / <setof @pattern SimplePattern>
+
+  ; {k: v, ...:...} ----> <dictof <ref k> <ref v>>
+  / <dictof @key SimplePattern @value SimplePattern>
+
+  ; symbol, symbol.symbol, symbol.symbol.symbol, ...
+  / Ref
+.
+
+CompoundPattern =
+  ; <label a b c> ----> <rec <lit label> <tuple [<ref a> <ref b> <ref c>]>>
+  ; except for record labels
+  ; <<rec> x y> ---> <rec <ref x> <ref y>>
+  / <rec @label NamedPattern @fields NamedPattern>
+
+  ; [a b c] ----> <tuple [<ref a> <ref b> <ref c>]>
+  / <tuple @patterns [NamedPattern ...]>
+
+  ; [a b c ...] ----> <tuplePrefix [<ref a> <ref b>] <seqof <ref c>>>
+  ; TODO: [@fixed0 NamedPattern @fixedN NamedPattern ...]
+  / <tuplePrefix @fixed [NamedPattern ...] @variable NamedSimplePattern>
+
+  ; {a: b, c: d} ----> <dict {a: <ref b>, c: <ref d>}>
+  / <dict @entries DictionaryEntries>
+.
+
+DictionaryEntries = { any: NamedSimplePattern ...:... }.
+
+AtomKind = =Boolean / =Float / =Double / =SignedInteger / =String / =ByteString / =Symbol .
+
+NamedAlternative = [@variantLabel string @pattern Pattern].
+
+NamedSimplePattern = @named Binding / @anonymous SimplePattern .
+NamedPattern = @named Binding / @anonymous Pattern .
+
+Binding = <named @name symbol @pattern SimplePattern>.
+
+Ref = <ref @module ModulePath @name symbol>.
+ModulePath = [symbol ...].
--- a/src/Tupfile
+++ b/src/Tupfile
@ -0,0 +1,2 @@
+include_rules
+: preserves.nim |> !nim_check |>
--- a/src/preserves.nim
+++ b/src/preserves.nim
--- a/src/preserves/Tupfile
+++ b/src/preserves/Tupfile
@ -0,0 +1,10 @@
+include_rules
+NIM_FLAGS += --path:$(TUP_CWD)/..
+NIM_FLAGS_preserves_schemac += -d:npegDotDir="../.."
+
+: foreach preserves_schema_nim.nim schemaparse.nim |> !nim_bin |> $(BIN_DIR)/%B | $(BIN_DIR)/<%B>
+
+DOT_FILES = ../../Atom.dot ../../Document.dot ../../Schema.dot
+: preserves_schemac.nim |> !nim_bin |> $(BIN_DIR)/preserves-schemac | $(DOT_FILES) $(BIN_DIR)/<preserves-schemac>
+: foreach $(DOT_FILES) |> dot -Tsvg -LO %f > %o |> ../../%B-Grammer-Graph.svg
+: foreach *hooks.nim  |> !nim_run |>
--- a/src/preserves/datehooks.nim
+++ b/src/preserves/datehooks.nim
@ -0,0 +1,44 @@
+# SPDX-FileCopyrightText: ☭ Emery Hemingway
+# SPDX-License-Identifier: Unlicense
+
+import std/times
+import ../preserves
+
+const
+  label = "rfc3339"
+  fullDateFormat = "yyyy-MM-dd"
+  partialTimeFormat = "HH:mm:ss"
+  fullTimeFormat = "HH:mm:sszzz"
+  dateTimeFormat = "yyyy-MM-dd'T'HH:mm:sszzz"
+
+proc toPreservesHook*(dt: DateTime): Value =
+  initRecord("rfc3339", toPreserves($dt))
+
+proc fromPreservesHook*(dt: var DateTime; pr: Value): bool =
+  result = pr.isRecord(label, 1) and pr.record[0].isString
+  if result:
+    try:
+      let
+        s = pr.record[0].string
+        n = len(s)
+      if n == len(fullDateFormat):
+        dt = parse(s, fullDateFormat)
+      elif n == len(partialTimeFormat):
+        dt = parse(s, partialTimeFormat)
+      elif len(partialTimeFormat) < n and n <= len(fullTimeFormat):
+        dt = parse(s, fullTimeFormat)
+      elif len(fullTimeFormat) < n:
+        dt = parse(s, dateTimeFormat)
+      else:
+        result = false
+    except ValueError:
+      result = false
+
+runnableExamples:
+  import std/[times, unittest]
+  import preserves
+  var a, b: DateTime
+  a = now()
+  var pr = a.toPreservesHook()
+  check b.fromPreservesHook(pr)
+  check $a == $b
--- a/src/preserves/expressions.nim
+++ b/src/preserves/expressions.nim
@ -0,0 +1,92 @@
+# SPDX-FileCopyrightText: ☭ Emery Hemingway
+# SPDX-License-Identifier: Unlicense
+
+import
+  npeg,
+  ../preserves, ./pegs
+
+type
+  Frame = tuple[value: Value, pos: int]
+  Stack = seq[Frame]
+
+proc shrink(stack: var Stack; n: int) = stack.setLen(stack.len - n)
+
+template pushStack(v: Value) = stack.add((v, capture[0].si))
+
+template collectEntries(result: var seq[Value]; stack: var Stack) =
+  for frame in stack.mitems:
+    if frame.pos > capture[0].si:
+      result.add frame.value.move
+  stack.shrink result.len
+
+proc parseExpressions*(text: string): seq[Value] =
+  let parser = peg("Document", stack: Stack):
+
+    ws <- *{ ' ', '\t', '\r', '\n' }
+
+    Document <- *Expr * ws * !1
+
+    Annotation <-
+      ('@' * SimpleExpr) |
+      ('#' * {'\x20', '\x09', '\x21'} * @{'\r','\n'})
+
+    Trailer <- *(ws * Annotation)
+
+    Expr <- ws * (Punct | SimpleExpr) * Trailer
+
+    Punct <- {',', ';'} | +':':
+      pushStack initRecord("p", toSymbol $0)
+
+    SimpleExpr <-
+        Atom |
+        Compound |
+        Embedded |
+        Annotated
+
+    Embedded <- "#:" * SimpleExpr:
+      pushstack stack.pop.value.embed
+
+    Annotated <- Annotation * SimpleExpr
+
+    Compound <- Sequence | Record | Block | Group | Set
+
+    Sequence <-  '[' * *Expr * ws * ']':
+      var pr = Value(kind: pkSequence)
+      collectEntries(pr.sequence, stack)
+      pushStack pr
+
+    Record <- '<' * *Expr * ws * '>':
+      var pr = Value(kind: pkRecord)
+      collectEntries(pr.record, stack)
+      pr.record.add toSymbol"r"
+      pushStack pr
+
+    Block <- '{' * *Expr * ws * '}':
+      var pr = Value(kind: pkRecord)
+      collectEntries(pr.record, stack)
+      pr.record.add toSymbol"b"
+      pushStack pr
+
+    Group <- '(' * *Expr * ws * ')':
+      var pr = Value(kind: pkRecord)
+      collectEntries(pr.record, stack)
+      pr.record.add toSymbol"g"
+      pushStack pr
+
+    Set <- "#{" * *Expr * ws * '}':
+      var pr = Value(kind: pkRecord)
+      collectEntries(pr.record, stack)
+      pr.record.add toSymbol"s"
+      pushStack pr
+
+    Atom <- Preserves.Atom:
+      pushStack parsePreserves($0)
+
+  var stack: Stack
+  let match = parser.match(text, stack)
+  if not match.ok:
+    raise newException(ValueError, "failed to parse Preserves Expressions:\n" & text[match.matchMax..text.high])
+
+  result.setLen stack.len
+  for i, _ in result:
+    result[i] = move stack[i].value
--- a/src/preserves/jsonhooks.nim
+++ b/src/preserves/jsonhooks.nim
@ -1,47 +1,52 @@
-# SPDX-FileCopyrightText: 2021 ☭ Emery Hemingway
+# SPDX-FileCopyrightText: ☭ Emery Hemingway
 # SPDX-License-Identifier: Unlicense

 import std/[json, tables]
 import ../preserves

-proc toPreserveHook*(js: JsonNode; E: typedesc): Preserve[E] =
+proc toPreservesHook*(js: JsonNode): Value =
  case js.kind
  of JString:
-    result = Preserve[E](kind: pkString, string: js.str)
+    result = js.str.toPreserves()
  of JInt:
-    result = Preserve[E](kind: pkSignedInteger, int: js.num)
+    result = js.num.toPreserves()
  of JFloat:
-    result = Preserve[E](kind: pkDouble, double: js.fnum)
+    result = js.fnum.toPreserves()
  of JBool:
    result = case js.bval
-      of false: toSymbol("false", E)
-      of true: toSymbol("true", E)
+      of false: toSymbol("false")
+      of true: toSymbol("true")
  of JNull:
-    result = toSymbol("null", E)
+    result = toSymbol("null")
  of JObject:
-    result = Preserve[E](kind: pkDictionary)
+    result = Value(kind: pkDictionary)
    for key, val in js.fields.pairs:
-      result[Preserve[E](kind: pkString, string: key)] = toPreserveHook(val, E)
+      result[Value(kind: pkSymbol, symbol: Symbol key)] = toPreservesHook(val)
  of JArray:
-    result = Preserve[E](kind: pkSequence,
-        sequence: newSeq[Preserve[E]](js.elems.len))
+    result = Value(kind: pkSequence,
+        sequence: newSeq[Value](js.elems.len))
    for i, e in js.elems:
-      result.sequence[i] = toPreserveHook(e, E)
+      result.sequence[i] = toPreservesHook(e)

-proc fromPreserveHook*[E](js: var JsonNode; prs: Preserve[E]): bool =
-  case prs.kind:
+proc fromPreservesHook*(js: var JsonNode; pr: Value): bool =
+  runnableExamples:
+    import std/json
+    var js = JsonNode()
+    var pr = js.toPreservesHook()
+    assert js.fromPreservesHook(pr)
+    fromJsonHook(pr, js)
+    js = toJsonHook(pr)
+  case pr.kind:
  of pkBoolean:
-    js = newJBool(prs.bool)
+    js = newJBool(pr.bool)
  of pkFloat:
-    js = newJFloat(prs.float)
-  of pkDouble:
-    js = newJFloat(prs.double)
-  of pkSignedInteger:
-    js = newJInt(prs.int)
+    js = newJFloat(pr.float)
+  of pkRegister:
+    js = newJInt(pr.register)
  of pkString:
-    js = newJString(prs.string)
+    js = newJString(pr.string)
  of pkSymbol:
-    case prs.symbol
+    case pr.symbol.string
    of "false":
      js = newJBool(false)
    of "true":
@ -52,31 +57,38 @@ proc fromPreserveHook*[E](js: var JsonNode; prs: Preserve[E]): bool =
      return false
  of pkSequence:
    js = newJArray()
-    js.elems.setLen(prs.sequence.len)
-    for i, val in prs.sequence:
-      if not fromPreserve(js.elems[i], val):
+    js.elems.setLen(pr.sequence.len)
+    for i, val in pr.sequence:
+      if not js.elems[i].fromPreservesHook(val):
        return false
+  of pkSet:
+    js = newJArray()
+    js.elems.setLen(pr.set.len)
+    var i: int
+    for val in pr.set:
+      if not js.elems[i].fromPreservesHook(val):
+        return false
+      inc i
  of pkDictionary:
    js = newJObject()
-    for (key, val) in prs.dict.items:
-      if key.kind != pkString:
+    for (key, val) in pr.dict.items:
+      case key.kind
+      of pkSymbol:
+        var jsVal: JsonNode
+        if not jsVal.fromPreservesHook(val): return false
+        js[string key.symbol] = jsVal
+      of pkString:
+        var jsVal: JsonNode
+        if not jsVal.fromPreservesHook(val): return false
+        js[key.string] = jsVal
+      else:
        return false
-      var jsVal: JsonNode
-      if not fromPreserve(jsVal, val): return false
-      js[key.string] = jsVal
  else: return false
  true

-proc toJsonHook*[E](pr: Preserve[E]): JsonNode =
-  if not fromPreserveHook(result, pr):
+proc toJsonHook*(pr: Value): JsonNode =
+  if not result.fromPreservesHook(pr):
    raise newException(ValueError, "cannot convert Preserves value to JSON")

-proc fromJsonHook*[E](pr: var Preserve[E]; js: JsonNode) =
-  pr = toPreserveHook(js, E)
-
-when isMainModule:
-  var js = JsonNode()
-  var pr = js.toPreserveHook(void)
-  assert fromPreserveHook(js, pr)
-  fromJsonHook(pr, js)
-  js = toJsonHook(pr)
+proc fromJsonHook*(pr: var Value; js: JsonNode) =
+  pr = toPreservesHook(js)
--- a/src/preserves/parse.nim
+++ b/src/preserves/parse.nim
@ -1,108 +0,0 @@
-# SPDX-FileCopyrightText: 2021 ☭ Emery Hemingway
-# SPDX-License-Identifier: Unlicense
-
-import std/[base64, parseutils, sets, strutils, tables]
-import npeg
-import ../preserves, ./pegs
-
-type
-  Value = Preserve[void]
-  Frame = tuple[value: Value, pos: int]
-  Stack = seq[Frame]
-
-proc shrink(stack: var Stack; n: int) = stack.setLen(stack.len - n)
-
-template pushStack(v: Value) = stack.add((v, capture[0].si))
-
-proc parsePreserves*(text: string): Preserve[void] {.gcsafe.} =
-  const pegParser = peg("Document", stack: Stack):
-    # Override rules from pegs.nim
-
-    Document <- Preserves.Document
-
-    Preserves.Record <- Preserves.Record:
-      var
-        record: seq[Value]
-        labelOff: int
-      while stack[labelOff].pos < capture[0].si:
-        inc labelOff
-      for i in labelOff.succ..stack.high:
-        record.add(move stack[i].value)
-      record.add(move stack[labelOff].value)
-      stack.shrink record.len
-      pushStack Value(kind: pkRecord, record: move record)
-
-    Preserves.Sequence <- Preserves.Sequence:
-      var sequence: seq[Value]
-      for frame in stack.mitems:
-        if frame.pos > capture[0].si:
-          sequence.add(move frame.value)
-      stack.shrink sequence.len
-      pushStack Value(kind: pkSequence, sequence: move sequence)
-
-    Preserves.Dictionary <- Preserves.Dictionary:
-      var prs = Value(kind: pkDictionary)
-      for i in countDown(stack.high.pred, 0, 2):
-        if stack[i].pos < capture[0].si: break
-        prs[move stack[i].value] = stack[i.succ].value
-      stack.shrink prs.dict.len*2
-      pushStack prs
-
-    Preserves.Set <- Preserves.Set:
-      var prs = Value(kind: pkSet)
-      for frame in stack.mitems:
-        if frame.pos > capture[0].si:
-          prs.incl(move frame.value)
-      stack.shrink prs.set.len
-      pushStack prs
-
-    Preserves.Boolean <- Preserves.Boolean:
-      case $0
-      of "#f": pushStack Value(kind: pkBoolean)
-      of "#t": pushStack Value(kind: pkBoolean, bool: true)
-      else: discard
-
-    Preserves.Float <- Preserves.Float:
-      pushStack Value(kind: pkFloat, float: parseFloat($1))
-
-    Preserves.Double <- Preserves.Double:
-      pushStack Value(kind: pkDouble)
-      let i = stack.high
-      discard parseBiggestFloat($0, stack[i].value.double)
-
-    Preserves.SignedInteger <- Preserves.SignedInteger:
-      pushStack Value(kind: pkSignedInteger, int: parseInt($0))
-
-    Preserves.String <- Preserves.String:
-      pushStack Value(kind: pkString, string: unescape($0))
-
-    Preserves.charByteString <- Preserves.charByteString:
-      let s = unescape($1)
-      pushStack Value(kind: pkByteString, bytes: cast[seq[byte]](s))
-
-    Preserves.hexByteString <- Preserves.hexByteString:
-      pushStack Value(kind: pkByteString, bytes: cast[seq[byte]](parseHexStr($1)))
-
-    Preserves.b64ByteString <- Preserves.b64ByteString:
-      pushStack Value(kind: pkByteString, bytes: cast[seq[byte]](base64.decode($1)))
-
-    Preserves.Symbol <- Preserves.Symbol:
-      pushStack Value(kind: pkSymbol, symbol: $0)
-
-    Preserves.Embedded <- Preserves.Embedded:
-      var v = stack.pop.value
-      v.embedded = true
-      pushStack v
-
-    Preserves.Compact <- Preserves.Compact:
-      pushStack decodePreserves(stack.pop.value.bytes, void)
-
-  var stack: Stack
-  let match = pegParser.match(text, stack)
-  if not match.ok:
-    raise newException(ValueError, "failed to parse Preserves:\n" & text[match.matchMax..text.high])
-  assert(stack.len == 1)
-  stack.pop.value
-
-when isMainModule:
-  assert(parsePreserves("#f") == Preserve())
--- a/src/preserves/pegs.nim
+++ b/src/preserves/pegs.nim
@ -1,72 +1,82 @@
-# SPDX-FileCopyrightText: 2021 ☭ Emery Hemingway
+# SPDX-FileCopyrightText: ☭ Emery Hemingway
 # SPDX-License-Identifier: Unlicense

+## NPEG rules for Preserves.
+## For an explanation of the syntax see https://github.com/zevv/npeg/blob/master/README.md#syntax.
+
 import npeg, npeg/lib/utf8

 when defined(nimHasUsed): {.used.}

 grammar "Preserves":

+  ws <- *{ ' ', '\t', '\r', '\n' }
+  commas <- *(ws * ',') * ws
+  delimiter <- {
+      ' ', '\t', '\r', '\n',
+      '<', '>', '[', ']', '{', '}', '(', ')',
+      '#', ':', '"', '|', '@', ';', ','
+    } | !1
+
  Document <- Value * ws * !1

-  Value <-
-      (ws * (Record | Collection | Atom | Embedded | Compact)) |
-      (ws * '@' * Value * Value) |
-      (ws * ';' * @'\n' * Value)
+  Atom <- Boolean | Double | DoubleRaw | SignedInteger | String | ByteString | Symbol

  Collection <- Sequence | Dictionary | Set

-  Atom <- Boolean | Float | Double | SignedInteger | String | ByteString | Symbol
+  Value <- ws * (
+      Record | Collection | Atom | Embedded | Compact |
+      Annotation |
+      ('#' * @'\n' * Value) )

-  Record <- '<' * Value * *Value * ws * '>'
+  Record <- '<' * +Value * ws * '>'

-  Sequence <- '[' * ws * *(Value * ws) * ']'
+  Sequence <- '[' * *(commas * Value) * commas * ']'

-  Dictionary <- '{' * ws * *(Value * ws * ':' * ws * Value * ws) * '}'
+  Dictionary <- '{' * *(commas * Value * ws * ':' * Value) * commas * '}'

-  Set <- "#{" * ws * *(Value * ws) * '}'
+  Set <- "#{" * *(commas * Value) * commas * '}'

-  Boolean <- "#f" | "#t"
+  Boolean <- '#' * {'f', 't'} * &delimiter

-  Float <- >flt * 'f'
-  Double <- flt
-  SignedInteger <- int
-
-  nat <- '0' | (Digit-'0') * *Digit
-  int <- ?'-' * nat
+  nat <- +Digit
+  int <- ?('-'|'+') * nat
  frac <- '.' * +Digit
  exp <- 'e' * ?('-'|'+') * +Digit
  flt <- int * ((frac * exp) | frac | exp)

-  stringBody <- ?escape * *( +( {'\x20'..'\xff'} - {'"'} - {'\\'}) * *escape)
-  String <- '"' * stringBody * '"'
+  Double <- >flt * &delimiter
+
+  SignedInteger <- int * &delimiter
+
+  unescaped <- utf8.any - { '\x00'..'\x19', '"', '\\', '|' }
+  unicodeEscaped <- 'u' * Xdigit[4]
+  escaped <- {'\\', '/', 'b', 'f', 'n', 'r', 't'}
+  escape <- '\\'
+
+  char <- unescaped | '|' | (escape * (escaped | '"' | unicodeEscaped))
+  String <- '"' * >(*char) * '"'
+
+  binunescaped <- {' '..'!', '#'..'[', ']'..'~'}
+  binchar <- binunescaped | (escape * (escaped | '"' | ('x' * Xdigit[2])))

  ByteString <- charByteString | hexByteString | b64ByteString
-  charByteString <- '#' * >('"' * >(*binchar) * '"')
-  hexByteString <- "#x\"" * ws * >(*(Xdigit[2] * ws)) * '"'
-  b64ByteString <- "#[" * ws * >(*(base64char * ws)) * ']'
-
-  binchar <- binunescaped | (escape * (escaped | '"' | ('x' * Xdigit[2])))
-  binunescaped <- {'\20'..'\21', '#'..'[', ']'..'~'}
+  charByteString <- "#\"" * >(*binchar) * '"'
+  hexByteString <- "#x\"" * >(*(ws * Xdigit[2])) * ws * '"'
  base64char <- {'A'..'Z', 'a'..'z', '0'..'9', '+', '/', '-', '_', '='}
+  b64ByteString <- "#[" * >(*(ws * base64char)) * ws * ']'

-  Symbol <- (symstart * *symcont) | ('|' * *symchar * '|')
+  symchar <- (utf8.any - {'\\', '|'}) | (escape * (escaped | unicodeEscaped)) | "\\|"
+  QuotedSymbol <- '|' * >(*symchar) * '|'
+  sympunct <- {'~', '!', '$', '%', '^', '&', '*', '?', '_', '=', '+', '-', '/', '.'}
+  symuchar <- utf8.any - { 0..127 }
+  SymbolOrNumber <- >(+(Alpha | Digit | sympunct | symuchar))
+  Symbol <- QuotedSymbol | (SymbolOrNumber * &delimiter)

-  symstart <- Alpha | sympunct | symustart
-  symcont <- Alpha | sympunct | symustart | symucont | Digit | '-'
-  sympunct <- {'~', '!', '$', '%', '^', '&', '*', '?', '_', '=', '+', '/', '.'}
-  symchar <- unescaped | '"' | (escape * (escaped | '|' | ('u' * Xdigit)))
-  symustart <- utf8.any - {0..127}
-  symucont <- utf8.any - {0..127}
-    # TODO: exclude some unicode ranges
+  Embedded <- "#:" * Value

-  Embedded <- "#!" * Value
+  Annotation <- '@' * Value * Value

  Compact <- "#=" * ws * ByteString

-  unescaped <- utf8.any - escaped
-  unicodeEscaped <- 'u' * Xdigit[4]
-  escaped <- '\\' * ({'{', '"', '|', '\\', 'b', 'f', 'n', 'r', 't'} | unicodeEscaped)
-  escape <- '\\'
-
-  ws <- *(' ' | '\t' | '\r' | '\n' | ',')
+  DoubleRaw <- "#xd\"" * >((ws * Xdigit[2])[8]) * ws * '"'
--- a/src/preserves/preserves_schema_nim.nim
+++ b/src/preserves/preserves_schema_nim.nim
--- a/src/preserves/preserves_schemac.nim
+++ b/src/preserves/preserves_schemac.nim
@ -0,0 +1,58 @@
+# SPDX-FileCopyrightText: ☭ Emery Hemingway
+# SPDX-License-Identifier: Unlicense
+
+import std/[hashes, os, parseopt, streams, strutils, tables]
+
+import ../preserves, ./schema, ./schemaparse
+
+when isMainModule:
+  let outStream = newFileStream(stdout)
+  var
+    inputPath = ""
+    noBundle = false
+
+  for kind, key, arg in getopt():
+    case kind
+    of cmdEnd: discard
+    of cmdArgument:
+      if inputPath != "":
+        quit "only a single path may specified"
+      inputPath = key
+    of cmdLongOption:
+      if arg != "":
+        quit("flag does not take an argument: " & key & " " & arg)
+      case key
+      of "no-bundle": noBundle = true
+      else: quit(key & "flag not recognized")
+    else: quit(key & "flag not recognized")
+
+  if inputPath == "":
+    quit "input file(s) not specified"
+
+  if noBundle:
+    if not fileExists inputPath:
+      quit(inputPath & " does not exist or is not a file")
+    var schema = parsePreservesSchema(readFile(inputPath))
+    write(outStream, schema.toPreserves)
+
+  else:
+    var bundle: Bundle
+    if not dirExists inputPath:
+      quit "not a directory of schemas: " & inputPath
+    else:
+      for filePath in walkDirRec(inputPath, relative = true):
+        var (dirPath, fileName, fileExt) = splitFile(filePath)
+        if fileExt == ".prs":
+          var
+            scm = parsePreservesSchema(readFile(inputPath / filePath))
+            path: ModulePath
+          for e in split(dirPath, '/'):
+            if e != "": add(path, Symbol e)
+          add(path, Symbol fileName)
+          bundle.modules[path] = scm
+      if bundle.modules.len == 0:
+        quit "no schemas parsed"
+      else:
+        write(outStream, bundle.toPreserves)
+
+  close(outStream)
--- a/src/preserves/private/Tupfile
+++ b/src/preserves/private/Tupfile
@ -0,0 +1,9 @@
+include_rules
+GROUP = $(BIN_DIR)/<preserves_encode>
+: preserves_encode.nim |> !nim |> $(BIN_DIR)/preserves_encode | $(GROUP) {bin}
+!link = |> ^o symlink %o^ ln -s preserves_encode %o |> | $(GROUP)
+: {bin} |> !link |>  $(BIN_DIR)/preserves_decode
+: {bin} |> !link |>  $(BIN_DIR)/preserves_from_json
+: {bin} |> !link |>  $(BIN_DIR)/preserves_from_xml
+: {bin} |> !link |>  $(BIN_DIR)/preserves_to_json
+: {bin} |> !link |>  $(BIN_DIR)/preserves_to_xml
--- a/src/preserves/private/buffering.nim
+++ b/src/preserves/private/buffering.nim
@ -0,0 +1,79 @@
+# SPDX-FileCopyrightText: ☭ Emery Hemingway
+# SPDX-License-Identifier: Unlicense
+
+import std/[assertions, endians, options, streams, strutils]
+import bigints
+import ./decoding, ./parsing, ./values
+
+type BufferedDecoder* = object
+  ## Type for buffering binary Preserves before decoding.
+  stream: StringStream
+  appendPosition, decodePosition, maxSize: int
+
+proc newBufferedDecoder*(maxSize = 4096): BufferedDecoder =
+  ## Create a new `newBufferedDecoder`.
+  runnableExamples:
+    var
+      buf = newBufferedDecoder()
+      bin = encode(parsePreserves("<foobar>"))
+    buf.feed(bin[0..2])
+    buf.feed(bin[3..bin.high])
+    var (success, pr) = decode(buf)
+    assert success
+    assert $pr == "<foobar>"
+  BufferedDecoder(
+      stream: newStringStream(newStringOfCap(maxSize)),
+      maxSize: maxSize,
+    )
+
+proc feed*(dec: var BufferedDecoder; buf: pointer; len: int) =
+  assert len > 0
+  if dec.maxSize > 0 and dec.maxSize < (dec.appendPosition + len):
+    raise newException(IOError, "BufferedDecoder at maximum buffer size")
+  dec.stream.setPosition(dec.appendPosition)
+  dec.stream.writeData(buf, len)
+  inc(dec.appendPosition, len)
+  assert dec.appendPosition == dec.stream.getPosition()
+
+proc feed*[T: byte|char](dec: var BufferedDecoder; data: openarray[T]) =
+  if data.len > 0:
+    dec.feed(addr data[0], data.len)
+
+proc feed*[T: byte|char](dec: var BufferedDecoder; data: openarray[T]; slice: Slice[int]) =
+  let n = slice.b + 1 - slice.a
+  if n > 0:
+    dec.feed(addr data[slice.a], n)
+
+proc decode*(dec: var BufferedDecoder): Option[Value] =
+  ## Decode from `dec`. If decoding fails the internal position of the
+  ## decoder does not advance.
+  if dec.appendPosition > 0:
+    assert(dec.decodePosition < dec.appendPosition)
+    dec.stream.setPosition(dec.decodePosition)
+    try:
+      result = dec.stream.decodePreserves.some
+      dec.decodePosition = dec.stream.getPosition()
+      if dec.decodePosition == dec.appendPosition:
+        dec.stream.setPosition(0)
+        dec.stream.data.setLen(0)
+        dec.appendPosition = 0
+        dec.decodePosition = 0
+    except IOError:
+      discard
+
+proc parse*(dec: var BufferedDecoder): Option[Value] =
+  ## Parse from `dec`. If parsing fails the internal position of the
+  ## decoder does not advance.
+  if dec.appendPosition > 0:
+    assert(dec.decodePosition < dec.appendPosition)
+    dec.stream.setPosition(dec.decodePosition)
+    try:
+      result = dec.stream.readAll.parsePreserves.some
+      dec.decodePosition = dec.stream.getPosition()
+      if dec.decodePosition == dec.appendPosition:
+        dec.stream.setPosition(0)
+        dec.stream.data.setLen(0)
+        dec.appendPosition = 0
+        dec.decodePosition = 0
+    except IOError, ValueError:
+      discard
--- a/src/preserves/private/decoding.nim
+++ b/src/preserves/private/decoding.nim
@ -0,0 +1,144 @@
+# SPDX-FileCopyrightText: ☭ Emery Hemingway
+# SPDX-License-Identifier: Unlicense
+
+import std/[endians, options, streams, strutils]
+import bigints
+import ./values
+
+proc readVarint(s: Stream): uint =
+  var
+    shift = 0
+    c = uint s.readUint8
+  while (c and 0x80) == 0x80:
+    result = result or ((c and 0x7f) shl shift)
+    inc(shift, 7)
+    c = uint s.readUint8
+  result = result or (c shl shift)
+
+proc decodePreserves*(s: Stream): Value {.gcsafe.}
+
+proc decodePreserves(s: Stream; tag: uint8): Value =
+  ## Decode a Preserves value from a binary-encoded stream.
+  const endMarker = 0x84
+  case tag
+  of 0x80: return Value(kind: pkBoolean, bool: false)
+  of 0x81: return Value(kind: pkBoolean, bool: true)
+  else: discard
+  if s.atEnd:
+    raise newException(IOError, "End of Preserves stream")
+  case tag
+  of 0x85:
+    discard decodePreserves(s)
+    result = decodePreserves(s)
+  of 0x86:
+    result = decodePreserves(s)
+    result.embedded = true
+  of 0x87:
+    result = Value(kind: pkFloat)
+    var N: int
+    let n = int s.readUint8()
+    case n
+    of 4:
+      var
+        buf: uint32
+        float: float32
+      N = s.readData(addr buf, sizeof(buf))
+      bigEndian32(addr float, addr buf)
+      result.float = BiggestFloat float
+    of 8:
+      var buf: uint64
+      N = s.readData(addr buf, sizeof(buf))
+      bigEndian64(addr result.float, addr buf)
+    else:
+      raise newException(IOError, "unhandled IEEE754 value of " & $n & " bytes")
+    if N != n: raise newException(IOError, "short read")
+  of 0xb0:
+    var n = int s.readVarint()
+    if n <= sizeof(int):
+      result = Value(kind: pkRegister)
+      if n > 0:
+        var
+          buf: array[sizeof(int), byte]
+          off = buf.len - n
+        if s.readData(addr buf[off], n) != n:
+          raise newException(IOError, "short read")
+        if off > 0:
+          var fill: uint8 = if (buf[off] and 0x80) == 0x80'u8: 0xff else: 0x00'u8
+          for i in 0..<off: buf[i] = fill
+        when buf.len == 4:
+          bigEndian32(addr result.register, addr buf[0])
+        elif buf.len == 8:
+          bigEndian64(addr result.register, addr buf[0])
+        else: {.error: "int size " & $buf.len & " not supported here".}
+    else:
+      result = Value(kind: pkBigInt)
+      var buf = newSeq[byte](n)
+      if s.readData(addr buf[0], buf.len) != n:
+        raise newException(IOError, "short read")
+      if (buf[0] and 0x80) == 0x80:
+        for i, b in buf: buf[i] = not b
+        result.bigint.fromBytes(buf, bigEndian)
+        result.bigint = -(result.bigint.succ)
+      else:
+        result.bigint.fromBytes(buf, bigEndian)
+  of 0xb1:
+    result = Value(kind: pkString, string: newString(s.readVarint()))
+    if result.string.len > 0:
+      if s.readData(addr result.string[0], result.string.len) != result.string.len:
+        raise newException(IOError, "short read")
+  of 0xb2:
+    var data = newSeq[byte](s.readVarint())
+    if data.len > 0:
+      let n = s.readData(addr data[0], data.len)
+      if n != data.len:
+        raise newException(IOError, "short read")
+    result = Value(kind: pkByteString, bytes: data)
+  of 0xb3:
+    var data = newString(s.readVarint())
+    if data.len > 0:
+      let n = s.readData(addr data[0], data.len)
+      if n != data.len:
+        raise newException(IOError, "short read")
+    result = Value(kind: pkSymbol, symbol: Symbol data)
+  of 0xb4:
+    result = Value(kind: pkRecord)
+    var label = decodePreserves(s)
+    var tag = s.readUint8()
+    while tag != endMarker:
+      result.record.add decodePreserves(s, tag)
+      tag = s.readUint8()
+    result.record.add(move label)
+  of 0xb5:
+    result = Value(kind: pkSequence)
+    var tag = s.readUint8()
+    while tag != endMarker:
+      result.sequence.add decodePreserves(s, tag)
+      tag = s.readUint8()
+  of 0xb6:
+    result = Value(kind: pkSet)
+    var tag = s.readUint8()
+    while tag != endMarker:
+      incl(result, decodePreserves(s, tag))
+      tag = s.readUint8()
+  of 0xb7:
+    result = Value(kind: pkDictionary)
+    var tag = s.readUint8()
+    while tag != endMarker:
+      result[decodePreserves(s, tag)] = decodePreserves(s)
+      tag = s.readUint8()
+  of endMarker:
+    raise newException(ValueError, "invalid Preserves stream")
+  else:
+    raise newException(ValueError, "invalid Preserves tag byte 0x" & tag.toHex(2))
+
+proc decodePreserves*(s: Stream): Value {.gcsafe.} =
+  ## Decode a Preserves value from a binary-encoded stream.
+  s.decodePreserves s.readUint8()
+
+proc decodePreserves*(s: string): Value =
+  ## Decode a string of binary-encoded Preserves.
+  decodePreserves(s.newStringStream)
+
+proc decodePreserves*(s: seq[byte]): Value =
+  ## Decode a byte-string of binary-encoded Preserves.
+  decodePreserves(cast[string](s))
--- a/src/preserves/private/encoding.nim
+++ b/src/preserves/private/encoding.nim
@ -0,0 +1,124 @@
+# SPDX-FileCopyrightText: ☭ Emery Hemingway
+# SPDX-License-Identifier: Unlicense
+
+import std/[algorithm, assertions, endians, streams]
+import bigints
+import ./values
+
+proc writeVarint(s: Stream; n: Natural) =
+  var n = n
+  while n > 0x7f:
+    s.write(uint8 n or 0x80)
+    n = n shr 7
+  s.write(uint8 n and 0x7f)
+
+proc write*(str: Stream; pr: Value) =
+  ## Write the binary-encoding of a Preserves value to a stream.
+  if pr.embedded: str.write(0x86'u8)
+  case pr.kind:
+  of pkBoolean:
+    case pr.bool
+    of false: str.write(0x80'u8)
+    of true: str.write(0x81'u8)
+  of pkFloat:
+    str.write("\x87\x08")
+    when system.cpuEndian == bigEndian:
+      str.write(pr.double)
+    else:
+      var be: float64
+      swapEndian64(be.addr, pr.float.unsafeAddr)
+      str.write(be)
+  of pkRegister:
+    if pr.register == 0: str.write("\xb0\x00")
+    else:
+      const bufLen = sizeof(int)
+      var buf: array[bufLen, byte]
+      when bufLen == 4: bigEndian32(addr buf[0], addr pr.register)
+      elif bufLen == 8: bigEndian64(addr buf[0], addr pr.register)
+      else: {.error: "int size " & $bufLen & " not supported here".}
+      if buf[0] != 0x00 and buf[0] != 0xff:
+        str.write(cast[string](buf)) # dumbass hex conversion
+      else:
+        var start = 0
+        while start < buf.high and buf[0] == buf[succ start]: inc start
+        if start < buf.high and (buf[succ start] and 0x80) == (buf[0] and 0x80): inc start
+        str.write('\xb0')
+        str.write(uint8(bufLen - start))
+        str.write(cast[string](buf[start..<bufLen]))
+  of pkBigInt:
+    if pr.bigint.isZero: str.write("\xb0\x00")
+    elif pr.bigint.isNegative:
+      var buf = pr.bigint.succ.toBytes(bigEndian)
+      for i, b in buf: buf[i] = not b
+      str.write('\xb0')
+      if (buf[0] and 0x80) != 0x80:
+        str.writeVarint(buf.len.succ)
+        str.write('\xff')
+      else:
+        str.writeVarint(buf.len)
+      str.write(cast[string](buf))
+    else:
+      var buf = pr.bigint.toBytes(bigEndian)
+      str.write('\xb0')
+      if (buf[0] and 0x80) != 0:
+        str.writeVarint(buf.len.succ)
+        str.write('\x00')
+      else:
+        str.writeVarint(buf.len)
+      str.write(cast[string](buf))
+  of pkString:
+    str.write(0xb1'u8)
+    str.writeVarint(pr.string.len)
+    str.write(pr.string)
+  of pkByteString:
+    str.write(0xb2'u8)
+    str.writeVarint(pr.bytes.len)
+    str.write(cast[string](pr.bytes))
+  of pkSymbol:
+    str.write(0xb3'u8)
+    str.writeVarint(pr.symbol.len)
+    str.write(string pr.symbol)
+  of pkRecord:
+    assert(pr.record.len > 0)
+    str.write(0xb4'u8)
+    str.write(pr.record[pr.record.high])
+    for i in 0..<pr.record.high:
+      str.write(pr.record[i])
+    str.write(0x84'u8)
+  of pkSequence:
+    str.write(0xb5'u8)
+    for e in pr.sequence:
+      str.write(e)
+    str.write(0x84'u8)
+  of pkSet:
+    str.write(0xb6'u8)
+    for val in pr.set.items:
+      str.write(val)
+    str.write(0x84'u8)
+  of pkDictionary:
+    var
+      keyIndices = newSeqOfCap[(string, int)](pr.dict.len)
+      keyBuffer = newStringStream()
+    for i in 0..pr.dict.high:
+      keyBuffer.write(pr.dict[i][0])
+      keyIndices.add((keyBuffer.data.move, i))
+      keyBuffer.setPosition(0)
+        # add each encoded key and its index to the seq
+    sort(keyIndices) do (a, b: (string, int)) -> int:
+      cmp(a[0], b[0])
+        # sort the seq by encoded keys
+    str.write(0xb7'u8)
+    for (keyBytes, i) in keyIndices:
+      str.write(keyBytes)
+      str.write(pr.dict[i][1])
+        # encode the values in sorted key order
+    str.write(0x84'u8)
+  of pkEmbedded:
+    # str.write(0x86'u8)
+    raise newException(ValueError, "cannot encode an embedded object")
+
+proc encode*(pr: Value): seq[byte] =
+  ## Return the binary-encoding of a Preserves value.
+  let s = newStringStream()
+  s.write pr
+  result = cast[seq[byte]](move s.data)
--- a/src/preserves/private/macros.nim
+++ b/src/preserves/private/macros.nim
@ -0,0 +1,164 @@
+#
+#
+#            Nim's Runtime Library
+#        (c) Copyright 2015 Andreas Rumpf
+#
+#    See the file "copying.txt", included in this
+#    distribution, for details about the copyright.
+#
+
+import std/[assertions, macros]
+
+const
+  nnkPragmaCallKinds = {nnkExprColonExpr, nnkCall, nnkCallStrLit}
+
+proc extractTypeImpl(n: NimNode): NimNode =
+    ## attempts to extract the type definition of the given symbol
+    case n.kind
+    of nnkSym: # can extract an impl
+      result = n.getImpl.extractTypeImpl()
+    of nnkObjectTy, nnkRefTy, nnkPtrTy: result = n
+    of nnkBracketExpr:
+      if n.typeKind == ntyTypeDesc:
+        result = n[1].extractTypeImpl()
+      else:
+        doAssert n.typeKind == ntyGenericInst
+        result = n[0].getImpl()
+    of nnkTypeDef:
+      result = n[2]
+    else: error("Invalid node to retrieve type implementation of: " & $n.kind)
+
+proc customPragmaNode(n: NimNode): NimNode =
+  expectKind(n, {nnkSym, nnkDotExpr, nnkBracketExpr, nnkTypeOfExpr, nnkType, nnkCheckedFieldExpr})
+  var
+    typ = n.getTypeInst()
+
+  if typ.kind == nnkBracketExpr and typ.len > 1 and typ[1].kind == nnkProcTy:
+    return typ[1][1]
+  elif typ.typeKind == ntyTypeDesc:
+    typ = typ[1]
+    while kind(typ) == nnkBracketExpr:
+      typ = typ[0]
+    let impl = getImpl(typ)
+    if impl.kind == nnkNilLit:
+      return impl
+    elif impl[0].kind == nnkPragmaExpr:
+      return impl[0][1]
+    else:
+      return impl[0] # handle types which don't have macro at all
+
+  if n.kind == nnkSym: # either an variable or a proc
+    let impl = n.getImpl()
+    if impl.kind in RoutineNodes:
+      return impl.pragma
+    elif impl.kind == nnkIdentDefs and impl[0].kind == nnkPragmaExpr:
+      return impl[0][1]
+    else:
+      let timpl = typ.getImpl()
+      if timpl.len>0 and timpl[0].len>1:
+        return timpl[0][1]
+      else:
+        return timpl
+
+  if n.kind in {nnkDotExpr, nnkCheckedFieldExpr}:
+    let name = $(if n.kind == nnkCheckedFieldExpr: n[0][1] else: n[1])
+    var typInst = getTypeInst(if n.kind == nnkCheckedFieldExpr or n[0].kind == nnkHiddenDeref: n[0][0] else: n[0])
+    while typInst.kind in {nnkVarTy, nnkBracketExpr}:
+      typInst = typInst[0]
+    var typDef = getImpl(typInst)
+    while typDef != nil:
+      typDef.expectKind(nnkTypeDef)
+      let typ = typDef[2].extractTypeImpl()
+      if typ.kind notin {nnkRefTy, nnkPtrTy, nnkObjectTy}: break
+      let isRef = typ.kind in {nnkRefTy, nnkPtrTy}
+      if isRef and typ[0].kind in {nnkSym, nnkBracketExpr}: # defines ref type for another object(e.g. X = ref X)
+        typDef = getImpl(typ[0])
+      else: # object definition, maybe an object directly defined as a ref type
+        let
+          obj = (if isRef: typ[0] else: typ)
+        var identDefsStack = newSeq[NimNode](obj[2].len)
+        for i in 0..<identDefsStack.len: identDefsStack[i] = obj[2][i]
+        while identDefsStack.len > 0:
+          var identDefs = identDefsStack.pop()
+
+          case identDefs.kind
+          of nnkRecList:
+            for child in identDefs.children:
+              identDefsStack.add(child)
+          of nnkRecCase:
+            # Add condition definition
+            identDefsStack.add(identDefs[0])
+            # Add branches
+            for i in 1 ..< identDefs.len:
+              identDefsStack.add(identDefs[i].last)
+          else:
+            for i in 0 .. identDefs.len - 3:
+              let varNode = identDefs[i]
+              if varNode.kind == nnkPragmaExpr:
+                var varName = varNode[0]
+                if varName.kind == nnkPostfix:
+                  # This is a public field. We are skipping the postfix *
+                  varName = varName[1]
+                if eqIdent($varName, name):
+                  return varNode[1]
+
+        if obj[1].kind == nnkOfInherit: # explore the parent object
+          typDef = getImpl(obj[1][0])
+        else:
+          typDef = nil
+
+macro hasCustomPragma*(n: typed, cp: typed{nkSym}): untyped =
+  ## Expands to `true` if expression `n` which is expected to be `nnkDotExpr`
+  ## (if checking a field), a proc or a type has custom pragma `cp`.
+  ##
+  ## See also `getCustomPragmaVal`.
+  ##
+  ## .. code-block:: nim
+  ##   template myAttr() {.pragma.}
+  ##   type
+  ##     MyObj = object
+  ##       myField {.myAttr.}: int
+  ##
+  ##   proc myProc() {.myAttr.} = discard
+  ##
+  ##   var o: MyObj
+  ##   assert(o.myField.hasCustomPragma(myAttr))
+  ##   assert(myProc.hasCustomPragma(myAttr))
+  let pragmaNode = customPragmaNode(n)
+  for p in pragmaNode:
+    if (p.kind == nnkSym and p == cp) or
+        (p.kind in nnkPragmaCallKinds and p.len > 0 and p[0].kind == nnkSym and p[0] == cp):
+      return newLit(true)
+  return newLit(false)
+
+macro getCustomPragmaVal*(n: typed, cp: typed{nkSym}): untyped =
+  ## Expands to value of custom pragma `cp` of expression `n` which is expected
+  ## to be `nnkDotExpr`, a proc or a type.
+  ##
+  ## See also `hasCustomPragma`
+  ##
+  ## .. code-block:: nim
+  ##   template serializationKey(key: string) {.pragma.}
+  ##   type
+  ##     MyObj {.serializationKey: "mo".} = object
+  ##       myField {.serializationKey: "mf".}: int
+  ##   var o: MyObj
+  ##   assert(o.myField.getCustomPragmaVal(serializationKey) == "mf")
+  ##   assert(o.getCustomPragmaVal(serializationKey) == "mo")
+  ##   assert(MyObj.getCustomPragmaVal(serializationKey) == "mo")
+  result = nil
+  let pragmaNode = customPragmaNode(n)
+  for p in pragmaNode:
+    if p.kind in nnkPragmaCallKinds and p.len > 0 and p[0].kind == nnkSym and p[0] == cp:
+      if p.len == 2 or (p.len == 3 and p[1].kind == nnkSym and p[1].symKind == nskType):
+        result = p[1]
+      else:
+        let def = p[0].getImpl[3]
+        result = newTree(nnkPar)
+        for i in 1 ..< def.len:
+          let key = def[i][0]
+          let val = p[i]
+          result.add newTree(nnkExprColonExpr, key, val)
+      break
+  if result.kind == nnkEmpty:
+    error(n.repr & " doesn't have a pragma named " & cp.repr()) # returning an empty node results in most cases in a cryptic error,
--- a/src/preserves/private/parsing.nim
+++ b/src/preserves/private/parsing.nim
@ -0,0 +1,271 @@
+# SPDX-FileCopyrightText: ☭ Emery Hemingway
+# SPDX-License-Identifier: Unlicense
+
+import std/[assertions, base64, options, parseutils, strutils, unicode]
+from std/sequtils import insert
+
+import bigints, npeg
+
+import ../pegs
+import ./decoding, ./values
+
+type
+  Frame = tuple[value: Value, pos: int]
+  Stack = seq[Frame]
+
+proc shrink(stack: var Stack; n: int) = stack.setLen(stack.len - n)
+
+template pushStack(v: Value) = stack.add((v, capture[0].si))
+
+proc joinWhitespace(s: string): string =
+  result = newStringOfCap(s.len)
+  for token, isSep in tokenize(s, Whitespace + {','}):
+    if not isSep: add(result, token)
+
+template unescape*(buf: var string; capture: string) =
+  var i: int
+  while i < len(capture):
+    if capture[i] == '\\':
+      inc(i)
+      case capture[i]
+      of '\\': add(buf, char 0x5c)
+      of '/': add(buf, char 0x2f)
+      of 'b': add(buf, char 0x08)
+      of 'f': add(buf, char 0x0c)
+      of 'n': add(buf, char 0x0a)
+      of 'r': add(buf, char 0x0d)
+      of 't': add(buf, char 0x09)
+      of '"': add(buf, char 0x22)
+      of 'u':
+        var short: uint16
+        inc(i)
+        discard parseHex(capture, short, i, 4)
+        inc(i, 3)
+        if (short shr 15) == 0:
+          add(buf, Rune(short).toUtf8)
+        elif (short shr 10) == 0b110110:
+          if i+6 >= capture.len:
+            raise newException(ValueError, "Invalid UTF-16 surrogate pair")
+          var rune = uint32(short shl 10) + 0x10000
+          validate(capture[i+1] == '\\')
+          validate(capture[i+2] == 'u')
+          inc(i, 3)
+          discard parseHex(capture, short, i, 4)
+          if (short shr 10) != 0b110111:
+            raise newException(ValueError, "Invalid UTF-16 surrogate pair")
+          inc(i, 3)
+          rune = rune or (short and 0b1111111111)
+          #add(buf, Rune(rune).toUTF8)
+          let j = buf.len
+          buf.setLen(buf.len+4)
+          rune.Rune.fastToUTF8Copy(buf, j, false)
+        else:
+          raise newException(ValueError, "Invalid UTF-16 escape sequence " & capture)
+      else:
+        validate(false)
+    else:
+      add(buf, capture[i])
+    inc(i)
+
+template unescape(buf: var seq[byte]; capture: string) =
+  var i: int
+  while i < len(capture):
+    if capture[i] == '\\':
+      inc(i)
+      case capture[i]
+      of '\\': add(buf, 0x5c'u8)
+      of '/': add(buf, 0x2f'u8)
+      of 'b': add(buf, 0x08'u8)
+      of 'f': add(buf, 0x0c'u8)
+      of 'n': add(buf, 0x0a'u8)
+      of 'r': add(buf, 0x0d'u8)
+      of 't': add(buf, 0x09'u8)
+      of '"': add(buf, 0x22'u8)
+      of 'x':
+        var b: byte
+        inc(i)
+        discard parseHex(capture, b, i, 2)
+        inc(i)
+        add(buf, b)
+      else:
+        validate(false)
+    else:
+      add(buf, byte capture[i])
+    inc(i)
+
+proc pushHexNibble[T](result: var T; c: char) =
+  var n = case c
+    of '0'..'9': T(ord(c) - ord('0'))
+    of 'a'..'f': T(ord(c) - ord('a') + 10)
+    of 'A'..'F': T(ord(c) - ord('A') + 10)
+    else: return
+  result = (result shl 4) or n
+
+proc parsePreserves*(text: string): Value =
+  ## Parse a text-encoded Preserves `string` to a Preserves `Value`.
+  let pegParser = peg("Document", stack: Stack):
+    # Override rules from pegs.nim
+
+    Document <- Preserves.Document
+
+    Preserves.Record <- Preserves.Record:
+      var
+        record: seq[Value]
+        labelOff: int
+      while stack[labelOff].pos < capture[0].si:
+        inc labelOff
+      for i in labelOff.succ..stack.high:
+        record.add(move stack[i].value)
+      record.add(move stack[labelOff].value)
+      stack.shrink record.len
+      pushStack Value(kind: pkRecord, record: move record)
+
+    Preserves.Sequence <- Preserves.Sequence:
+      var sequence: seq[Value]
+      for frame in stack.mitems:
+        if frame.pos > capture[0].si:
+          sequence.add(move frame.value)
+      stack.shrink sequence.len
+      pushStack Value(kind: pkSequence, sequence: move sequence)
+
+    Preserves.Dictionary <- Preserves.Dictionary:
+      var prs = Value(kind: pkDictionary)
+      for i in countDown(stack.high.pred, 0, 2):
+        if stack[i].pos < capture[0].si: break
+        var
+          val = stack.pop.value
+          key = stack.pop.value
+        for j in 0..prs.dict.high:
+          validate(prs.dict[j].key != key)
+        prs[key] = val
+      pushStack prs
+
+    Preserves.Set <- Preserves.Set:
+      var prs = Value(kind: pkSet)
+      for frame in stack.mitems:
+        if frame.pos > capture[0].si:
+          for e in prs.set: validate(e != frame.value)
+          prs.incl(move frame.value)
+      stack.shrink prs.set.len
+      pushStack prs
+
+    Preserves.Boolean <- Preserves.Boolean:
+      case $0
+      of "#f": pushStack Value(kind: pkBoolean)
+      of "#t": pushStack Value(kind: pkBoolean, bool: true)
+      else: discard
+
+    Preserves.Double <- Preserves.Double:
+      pushStack Value(kind: pkFloat, float: parseFloat($1))
+
+    Preserves.DoubleRaw <- Preserves.DoubleRaw:
+      var reg: uint64
+      for c in $1: pushHexNibble(reg, c)
+      pushStack Value(kind: pkFloat, float: cast[float64](reg))
+
+    Preserves.SignedInteger <- Preserves.SignedInteger:
+      var
+        big = initBigInt($0)
+        small = toInt[int](big)
+      if small.isSome:
+        pushStack Value(kind: pkRegister, register: small.get)
+      else:
+        pushStack Value(kind: pkBigInt, bigint: big)
+
+    Preserves.String <- Preserves.String:
+      var v = Value(kind: pkString, string: newStringOfCap(len($1)))
+      unescape(v.string, $1)
+      if validateUtf8(v.string) != -1:
+        raise newException(ValueError, "Preserves text contains an invalid UTF-8 sequence")
+      pushStack v
+
+    Preserves.charByteString <- Preserves.charByteString:
+      var v = Value(kind: pkByteString, bytes: newSeqOfCap[byte](len($1)))
+      unescape(v.bytes, $1)
+      pushStack v
+
+    Preserves.hexByteString <- Preserves.hexByteString:
+      pushStack Value(kind: pkByteString, bytes: cast[seq[byte]](parseHexStr(joinWhitespace($1))))
+
+    Preserves.b64ByteString <- Preserves.b64ByteString:
+      pushStack Value(kind: pkByteString, bytes: cast[seq[byte]](base64.decode(joinWhitespace($1))))
+
+    Preserves.Symbol <- Preserves.Symbol:
+      var buf = newStringOfCap(len($1))
+      unescape(buf, $1)
+      pushStack Value(kind: pkSymbol, symbol: Symbol buf)
+
+    Preserves.Embedded <- Preserves.Embedded:
+      var v = stack.pop.value
+      v.embedded = true
+      pushStack v
+
+    Preserves.Annotation <- Preserves.Annotation:
+      var val = stack.pop.value
+      discard stack.pop.value
+      pushStack val
+
+    Preserves.Compact <- Preserves.Compact:
+      pushStack decodePreserves(stack.pop.value.bytes)
+
+  var stack: Stack
+  let match = pegParser.match(text, stack)
+  if not match.ok:
+    raise newException(ValueError, "failed to parse Preserves:\n" & text[match.matchMax..text.high])
+  assert(stack.len == 1)
+  stack.pop.value
+
+proc parsePreservesAtom*(text: string): Atom =
+  ## Parse a text-encoded Preserves `string` to a Preserves `Atom`.
+  let pegParser = peg("Atom", a: Atom):
+    # Override rules from pegs.nim
+
+    Atom <- ?"#:" * Preserves.Atom
+
+    Preserves.Boolean <- Preserves.Boolean:
+      case $0
+      of "#f": a = Atom(kind: pkBoolean)
+      of "#t": a = Atom(kind: pkBoolean, bool: true)
+      else: discard
+
+    Preserves.Float <- Preserves.Float:
+      a = Atom(kind: pkFloat)
+      validate(parseBiggestFloat($0, a.float) == len($0))
+
+    Preserves.FloatRaw <- Preserves.FloatRaw:
+      var reg: uint64
+      for c in $1: pushHexNibble(reg, c)
+      a = Atom(kind: pkFloat, float: cast[float64](reg))
+
+    Preserves.SignedInteger <- Preserves.SignedInteger:
+      var
+        big = initBigInt($0)
+        small = toInt[int](big)
+      if small.isSome:
+        a = Atom(kind: pkRegister, register: small.get)
+      else:
+        a = Atom(kind: pkBigInt, bigint: big)
+
+    Preserves.String <- Preserves.String:
+      a = Atom(kind: pkString, string: newStringOfCap(len($1)))
+      unescape(a.string, $1)
+      if validateUtf8(a.string) != -1:
+        raise newException(ValueError, "Preserves text contains an invalid UTF-8 sequence")
+
+    Preserves.charByteString <- Preserves.charByteString:
+      a = Atom(kind: pkByteString, bytes: newSeqOfCap[byte](len($1)))
+      unescape(a.bytes, $1)
+
+    Preserves.hexByteString <- Preserves.hexByteString:
+      a = Atom(kind: pkByteString, bytes: cast[seq[byte]](parseHexStr(joinWhitespace($1))))
+
+    Preserves.b64ByteString <- Preserves.b64ByteString:
+      a = Atom(kind: pkByteString, bytes: cast[seq[byte]](base64.decode(joinWhitespace($1))))
+
+    Preserves.Symbol <- Preserves.Symbol:
+      var buf = newStringOfCap(len($1))
+      unescape(buf, $1)
+      a = Atom(kind: pkSymbol, symbol: Symbol buf)
+
+  if not pegParser.match(text, result).ok:
+    raise newException(ValueError, "failed to parse Preserves atom: " & text)
--- a/src/preserves/private/preserves_encode.nim
+++ b/src/preserves/private/preserves_encode.nim
@ -1,10 +1,10 @@
 # SPDX-FileCopyrightText: 2021 ☭ Emery Hemingway
 # SPDX-License-Identifier: Unlicense

-import std/[json, options, streams]
+import std/[json, options, streams, xmlparser, xmltree]
 from std/os import extractFilename, paramStr

-import ../../preserves, ../../preserves/jsonhooks, ../../preserves/parse
+import ../../preserves, ../jsonhooks, ../xmlhooks

 when isMainModule:
  let command = extractFilename(paramStr 0)
@ -19,16 +19,28 @@ when isMainModule:
    of "preserves_from_json":
      let
        js = stdin.newFileStream.parseJson
-        pr = js.toPreserve
+        pr = js.toPreserves
+      stdout.newFileStream.write(pr)
+    of "preserves_from_xml":
+      let
+        xn = stdin.newFileStream.parseXml
+        pr = xn.toPreservesHook()
      stdout.newFileStream.write(pr)
    of "preserves_to_json":
      let
        pr = stdin.readAll.decodePreserves
-        js = preserveTo(pr, JsonNode)
+        js = preservesTo(pr, JsonNode)
      if js.isSome:
        stdout.writeLine(get js)
      else:
        quit("Preserves not convertable to JSON")
+    of "preserves_to_xml":
+      let pr = stdin.readAll.decodePreserves
+      var xn: XmlNode
+      if fromPreserves(xn, pr):
+        stdout.writeLine(xn)
+      else:
+        quit("Preserves not convertable to XML")
    else:
      quit("no behavior defined for " & command)
  except:
--- a/src/preserves/private/texts.nim
+++ b/src/preserves/private/texts.nim
@ -0,0 +1,168 @@
+# SPDX-FileCopyrightText: ☭ Emery Hemingway
+# SPDX-License-Identifier: Unlicense
+
+import std/[assertions, base64, endians, sequtils, streams, strutils]
+when not defined(nimNoLibc):
+  import std/math
+import bigints
+import ./values
+
+const hexAlphabet = "0123456789abcdef"
+
+type TextMode* = enum textPreserves, textJson
+
+template writeEscaped(stream: Stream; text: string; delim: char) =
+  const escaped = { '"', '\\', '\b', '\f', '\n', '\r', '\t' }
+  var
+    i: int
+    c: char
+  while i < text.len:
+    c = text[i]
+    case c
+    of delim:
+      write(stream, '\\')
+      write(stream, delim)
+    of '\\': write(stream, "\\\\")
+    of '\b': write(stream, "\\b")
+    of '\f': write(stream, "\\f")
+    of '\n': write(stream, "\\n")
+    of '\r': write(stream, "\\r")
+    of '\t': write(stream, "\\t")
+    of { '\x00'..'\x1f', '\x7f' } - escaped:
+      # do not use \x__ notation because
+      # it is a subset of \u____.
+      write(stream, "\\u00")
+      write(stream, c.uint8.toHex(2))
+    else: write(stream, c)
+    inc i
+
+proc writeSymbol(stream: Stream; sym: string) =
+  if sym.len > 0 and sym[0] in {'A'..'z'} and not sym.anyIt(char(it) in { '\x00'..'\x19', '"', '\\', '|' }):
+    write(stream, sym)
+  else:
+    write(stream, '|')
+    writeEscaped(stream, sym, '|')
+    write(stream, '|')
+
+proc writeFloatBytes(stream: Stream; f: float) =
+  var buf: array[8, byte]
+  bigEndian64(addr buf[0], addr f)
+  write(stream, "#xd\"")
+  for b in buf:
+    write(stream, hexAlphabet[b shr 4])
+    write(stream, hexAlphabet[b and 0xf])
+  write(stream, '"')
+
+proc writeText*(stream: Stream; pr: Value; mode = textPreserves) =
+  ## Encode Preserves to a `Stream` as text.
+  if pr.embedded: write(stream, "#:")
+  case pr.kind:
+  of pkBoolean:
+    case pr.bool
+    of false: write(stream, "#f")
+    of true: write(stream, "#t")
+  of pkFloat:
+    when defined(nimNoLibc):
+      writeFloatBytes(stream, pr.float)
+        # IEE754-to-decimal is non-trivial
+    else:
+      if pr.float.classify in {fcNormal, fcZero, fcNegZero}:
+        write(stream, $pr.float)
+      else:
+        writeFloatBytes(stream, pr.float)
+  of pkRegister:
+    write(stream, $pr.register)
+  of pkBigInt:
+    write(stream, $pr.bigint)
+  of pkString:
+    write(stream, '"')
+    writeEscaped(stream, pr.string, '"')
+    write(stream, '"')
+  of pkByteString:
+    if pr.bytes.allIt(char(it) in {' '..'!', '#'..'~'}):
+      write(stream, "#\"")
+      write(stream, cast[string](pr.bytes))
+      write(stream, '"')
+    else:
+      if pr.bytes.len > 64:
+        write(stream, "#[") #]#
+        write(stream, base64.encode(pr.bytes))
+        write(stream, ']')
+      else:
+        write(stream, "#x\"")
+        for b in pr.bytes:
+          write(stream, hexAlphabet[b.int shr 4])
+          write(stream, hexAlphabet[b.int and 0xf])
+        write(stream, '"')
+  of pkSymbol:
+    writeSymbol(stream, pr.symbol.string)
+  of pkRecord:
+    assert(pr.record.len > 0)
+    write(stream, '<')
+    writeText(stream, pr.record[pr.record.high], mode)
+    for i in 0..<pr.record.high:
+      write(stream, ' ')
+      writeText(stream, pr.record[i], mode)
+    write(stream, '>')
+  of pkSequence:
+    write(stream, '[')
+    var insertSeperator: bool
+    case mode
+    of textPreserves:
+      for val in pr.sequence:
+        if insertSeperator: write(stream, ' ')
+        else: insertSeperator = true
+        writeText(stream, val, mode)
+    of textJson:
+      for val in pr.sequence:
+        if insertSeperator: write(stream, ',')
+        else: insertSeperator = true
+        writeText(stream, val, mode)
+    write(stream, ']')
+  of pkSet:
+    write(stream, "#{")
+    var insertSeperator: bool
+    for val in pr.set.items:
+      if insertSeperator: write(stream, ' ')
+      else: insertSeperator = true
+      writeText(stream, val, mode)
+    write(stream, '}')
+  of pkDictionary:
+    write(stream, '{')
+    var insertSeperator: bool
+    case mode
+    of textPreserves:
+      for (key, value) in pr.dict.items:
+        if insertSeperator: write(stream, ' ')
+        else: insertSeperator = true
+        writeText(stream, key, mode)
+        write(stream, ": ")
+        writeText(stream, value, mode)
+    of textJson:
+      for (key, value) in pr.dict.items:
+        if insertSeperator: write(stream, ',')
+        else: insertSeperator = true
+        writeText(stream, key, mode)
+        write(stream, ':')
+        writeText(stream, value, mode)
+    write(stream, '}')
+  of pkEmbedded:
+    if not pr.embedded: write(stream, "#:")
+    if pr.embeddedRef.isNil:
+      write(stream, "<null>")
+    else:
+      when compiles($pr.embed):
+        write(stream, $pr.embed)
+      else:
+        write(stream, "…")
+
+proc `$`*(sym: Symbol): string =
+  var stream = newStringStream()
+  writeSymbol(stream, sym.string)
+  result = move stream.data
+
+proc `$`*(pr: Value): string =
+  ## Generate the textual representation of ``pr``.
+  var stream = newStringStream()
+  writeText(stream, pr, textPreserves)
+  result = move stream.data
--- a/src/preserves/private/values.nim
+++ b/src/preserves/private/values.nim
@ -0,0 +1,266 @@
+# SPDX-FileCopyrightText: ☭ Emery Hemingway
+# SPDX-License-Identifier: Unlicense
+
+import std/[algorithm, hashes, options, sets, sequtils, tables]
+import bigints
+
+type
+  PreserveKind* = enum
+    pkBoolean, pkFloat, pkRegister, pkBigInt, pkString, pkByteString, pkSymbol,
+        pkRecord, pkSequence, pkSet, pkDictionary, pkEmbedded
+
+const
+  atomKinds* = {pkBoolean, pkFloat, pkRegister, pkBigInt, pkString, pkByteString, pkSymbol}
+  compoundKinds* = {pkRecord, pkSequence, pkSet, pkDictionary}
+
+type Symbol* = distinct string
+proc `<`*(x, y: Symbol): bool {.borrow.}
+proc `==`*(x, y: Symbol): bool {.borrow.}
+proc hash*(s: Symbol): Hash {.borrow.}
+proc len*(s: Symbol): int {.borrow.}
+
+type
+  Atom* = object
+    ## Atomic Preserves value.
+    ## Useful when a `const Value` is required.
+    case kind*: PreserveKind
+    of pkBoolean:
+      bool*: bool
+    of pkFloat:
+      float*: float
+    of pkRegister:
+      register*: int
+    of pkBigInt:
+      bigint*: BigInt
+    of pkString:
+      string*: string
+    of pkByteString:
+      bytes*: seq[byte]
+    of pkSymbol:
+      symbol*: Symbol
+    else:
+      discard
+
+  Value* = object
+    case kind*: PreserveKind
+    of pkBoolean:
+      bool*: bool
+    of pkFloat:
+      float*: float
+    of pkRegister:
+      register*: int
+    of pkBigInt:
+      bigint*: BigInt
+    of pkString:
+      string*: string
+    of pkByteString:
+      bytes*: seq[byte]
+    of pkSymbol:
+      symbol*: Symbol
+    of pkRecord:
+      record*: seq[Value] # label is last
+    of pkSequence:
+      sequence*: seq[Value]
+    of pkSet:
+      set*: seq[Value]
+        # TODO: HashSet
+    of pkDictionary:
+      dict*: seq[DictEntry]
+        # TODO: Tables
+    of pkEmbedded:
+      embeddedRef*: EmbeddedRef
+    embedded*: bool
+      ## Flag to mark embedded Preserves value
+
+  DictEntry* = tuple[key: Value, val: Value]
+
+  EmbeddedRef* = ref RootObj
+  EmbeddedObj* = RootObj
+    ## Object refs embedded in Preserves `Value`s must inherit from `EmbeddedObj`.
+    ## At the moment this is just an alias to `RootObj` but this may change in the future.
+
+func `==`*(x, y: Value): bool =
+  ## Check `x` and `y` for equivalence.
+  if x.kind == y.kind and x.embedded == y.embedded:
+    case x.kind
+    of pkBoolean:
+      result = x.bool == y.bool
+    of pkFloat:
+      result = cast[uint64](x.float) == cast[uint64](y.float)
+    of pkRegister:
+      result = x.register == y.register
+    of pkBigInt:
+      result = x.bigint == y.bigint
+    of pkString:
+      result = x.string == y.string
+    of pkByteString:
+      result = x.bytes == y.bytes
+    of pkSymbol:
+      result = x.symbol == y.symbol
+    of pkRecord:
+      result = x.record.len == y.record.len
+      for i in 0..x.record.high:
+        if not result: break
+        result = result and (x.record[i] == y.record[i])
+    of pkSequence:
+      for i, val in x.sequence:
+        if y.sequence[i] != val: return false
+      result = true
+    of pkSet:
+      result = x.set.len == y.set.len
+      for i in 0..x.set.high:
+        if not result: break
+        result = result and (x.set[i] == y.set[i])
+    of pkDictionary:
+      result = x.dict.len == y.dict.len
+      for i in 0..x.dict.high:
+        if not result: break
+        result = result and
+          (x.dict[i].key == y.dict[i].key) and
+          (x.dict[i].val == y.dict[i].val)
+    of pkEmbedded:
+      result = x.embeddedRef == y.embeddedRef
+
+proc `<`(x, y: string | seq[byte]): bool =
+  for i in 0 .. min(x.high, y.high):
+    if x[i] < y[i]: return true
+    if x[i] != y[i]: return false
+  x.len < y.len
+
+proc `<`*(x, y: Value): bool =
+  ## Preserves have a total order over values. Check if `x` is ordered before `y`.
+  if x.embedded != y.embedded:
+    result = y.embedded
+  elif x.kind != y.kind:
+    result = x.kind < y.kind
+  else:
+    case x.kind
+    of pkBoolean:
+      result = (not x.bool) and y.bool
+    of pkFloat:
+      result = x.float < y.float
+    of pkRegister:
+      result = x.register < y.register
+    of pkBigInt:
+      result = x.bigint < y.bigint
+    of pkString:
+      result = x.string < y.string
+    of pkByteString:
+      result = x.bytes < y.bytes
+    of pkSymbol:
+      result = x.symbol < y.symbol
+    of pkRecord:
+      if x.record[x.record.high] < y.record[y.record.high]: return true
+      for i in 0..<min(x.record.high, y.record.high):
+        if x.record[i] < y.record[i]: return true
+        if x.record[i] == y.record[i]: return false
+      result = x.record.len < y.record.len
+    of pkSequence:
+      for i in 0..min(x.sequence.high, y.sequence.high):
+        if x.sequence[i] < y.sequence[i]: return true
+        if x.sequence[i] != y.sequence[i]: return false
+      result = x.sequence.len < y.sequence.len
+    of pkSet:
+      for i in 0..min(x.set.high, y.set.high):
+        if x.set[i] < y.set[i]: return true
+        if x.set[i] != y.set[i]: return false
+      result = x.set.len < y.set.len
+    of pkDictionary:
+      for i in 0..min(x.dict.high, y.dict.high):
+        if x.dict[i].key < y.dict[i].key: return true
+        if x.dict[i].key == y.dict[i].key:
+          if x.dict[i].val < y.dict[i].val: return true
+          if x.dict[i].val != y.dict[i].val: return false
+      result = x.dict.len < y.dict.len
+    of pkEmbedded:
+      result = x.embeddedRef < y.embeddedRef
+
+func cmp*(x, y: Value): int =
+  ## Compare by Preserves total ordering.
+  if x == y: 0
+  elif x < y: -1
+  else: 1
+
+proc sort*(pr: var Value) = sort(pr.sequence, cmp)
+  ## Sort a Preserves array by total ordering.
+
+proc hash*(pr: Value): Hash =
+  ## Produce a `Hash` of `pr` for use with a `HashSet` or `Table`.
+  var h = hash(pr.kind.int) !& hash(pr.embedded)
+  case pr.kind
+  of pkBoolean:
+    h = h !& hash(pr.bool)
+  of pkFloat:
+    h = h !& hash(pr.float)
+  of pkRegister:
+    h = h !& hash(pr.register)
+  of pkBigInt:
+    h = h !& hash(pr.bigint)
+  of pkString:
+    h = h !& hash(pr.string)
+  of pkByteString:
+    h = h !& hash(pr.bytes)
+  of pkSymbol:
+    h = h !& hash(string pr.symbol)
+  of pkRecord:
+    for val in pr.record:
+      h = h !& hash(val)
+  of pkSequence:
+    for val in pr.sequence:
+      h = h !& hash(val)
+  of pkSet:
+    for val in pr.set.items:
+      h = h !& hash(val)
+  of pkDictionary:
+    for (key, val) in pr.dict.items:
+      h = h !& hash(key) !& hash(val)
+  of pkEmbedded:
+    h = h !& hash(cast[uint](addr pr.embeddedRef[]))
+  !$h
+
+proc `[]`*(pr: Value; i: int): Value =
+  ## Select an indexed value from ``pr``.
+  ## Only valid for records and sequences.
+  case pr.kind
+  of pkRecord: pr.record[i]
+  of pkSequence: pr.sequence[i]
+  else:
+    raise newException(ValueError, "Preserves value is not indexable")
+
+proc `[]=`*(pr: var Value; i: Natural; val: Value) =
+  ## Assign an indexed value into ``pr``.
+  ## Only valid for records and sequences.
+  case pr.kind
+  of pkRecord: pr.record[i] = val
+  of pkSequence: pr.sequence[i] = val
+  else:
+    raise newException(ValueError, "Preserves value is not indexable")
+
+proc `[]=`*(pr: var Value; key, val: Value) =
+  ## Insert `val` by `key` in the Preserves dictionary `pr`.
+  for i in 0..pr.dict.high:
+    if key < pr.dict[i].key:
+      insert(pr.dict, [(key, val, )], i)
+      return
+    elif key == pr.dict[i].key:
+      pr.dict[i].val = val
+      return
+  pr.dict.add((key, val, ))
+
+proc incl*(pr: var Value; key: Value) =
+  ## Include `key` in the Preserves set `pr`.
+  # TODO: binary search
+  for i in 0..pr.set.high:
+    if key < pr.set[i]:
+      insert(pr.set, [key], i)
+      return
+    elif key == pr.set[i]:
+      return
+  pr.set.add(key)
+
+proc excl*(pr: var Value; key: Value) =
+  ## Exclude `key` from the Preserves set `pr`.
+  for i in 0..pr.set.high:
+    if pr.set[i] == key:
+      delete(pr.set, i..i)
+      break
--- a/src/preserves/schema.nim
+++ b/src/preserves/schema.nim
@ -1,13 +1,14 @@

 import
-  std/typetraits, preserves, std/tables
+  std/tables,
+  ../preserves

 type
  Ref* {.preservesRecord: "ref".} = object
    `module`*: ModulePath
-    `name`* {.preservesSymbol.}: string
+    `name`*: Symbol

-  ModulePath* = seq[string]
+  ModulePath* = seq[Symbol]
  Bundle* {.preservesRecord: "bundle".} = object
    `modules`*: Modules

@ -27,7 +28,7 @@ type
  CompoundPatternDict* {.preservesRecord: "dict".} = object
    `entries`*: DictionaryEntries

-  `CompoundPattern`* {.preservesOr.} = ref object
+  `CompoundPattern`* {.acyclic, preservesOr.} = ref object
    case orKind*: CompoundPatternKind
    of CompoundPatternKind.`rec`:
        `rec`*: CompoundPatternRec
@ -44,24 +45,23 @@ type
  
  Modules* = Table[ModulePath, Schema]
  EmbeddedTypeNameKind* {.pure.} = enum
-    `Ref`, `false`
-  `EmbeddedTypeName`* {.preservesOr.} = ref object
+    `false`, `Ref`
+  `EmbeddedTypeName`* {.preservesOr.} = object
    case orKind*: EmbeddedTypeNameKind
-    of EmbeddedTypeNameKind.`Ref`:
-        `ref`*: Ref
-
    of EmbeddedTypeNameKind.`false`:
        `false`* {.preservesLiteral: "#f".}: bool

+    of EmbeddedTypeNameKind.`Ref`:
+        `ref`*: Ref
+
  
-  `AtomKind`* {.preservesOr.} = enum
-    `Boolean`, `Float`, `Double`, `SignedInteger`, `String`, `ByteString`,
-    `Symbol`
-  Definitions* = Table[string, Definition]
-  DictionaryEntries* = Table[Preserve[void], NamedSimplePattern]
+  `AtomKind`* {.preservesOr, pure.} = enum
+    `Boolean`, `Double`, `SignedInteger`, `String`, `ByteString`, `Symbol`
+  Definitions* = Table[Symbol, Definition]
+  DictionaryEntries* = Table[Value, NamedSimplePattern]
  NamedPatternKind* {.pure.} = enum
    `named`, `anonymous`
-  `NamedPattern`* {.preservesOr.} = ref object
+  `NamedPattern`* {.acyclic, preservesOr.} = ref object
    case orKind*: NamedPatternKind
    of NamedPatternKind.`named`:
        `named`*: Binding
@ -79,7 +79,7 @@ type
    `interface`*: SimplePattern

  SimplePatternLit* {.preservesRecord: "lit".} = object
-    `value`*: Preserve[void]
+    `value`*: Value

  SimplePatternSeqof* {.preservesRecord: "seqof".} = object
    `pattern`*: SimplePattern
@ -91,7 +91,7 @@ type
    `key`*: SimplePattern
    `value`*: SimplePattern

-  `SimplePattern`* {.preservesOr.} = ref object
+  `SimplePattern`* {.acyclic, preservesOr.} = ref object
    case orKind*: SimplePatternKind
    of SimplePatternKind.`any`:
        `any`* {.preservesLiteral: "any".}: bool
@ -120,7 +120,7 @@ type
  
  NamedSimplePatternKind* {.pure.} = enum
    `named`, `anonymous`
-  `NamedSimplePattern`* {.preservesOr.} = ref object
+  `NamedSimplePattern`* {.preservesOr.} = object
    case orKind*: NamedSimplePatternKind
    of NamedSimplePatternKind.`named`:
        `named`*: Binding
@ -131,23 +131,23 @@ type
  
  DefinitionKind* {.pure.} = enum
    `or`, `and`, `Pattern`
-  DefinitionOrData* {.preservesTuple.} = object
+  DefinitionOrField0* {.preservesTuple.} = object
    `pattern0`*: NamedAlternative
    `pattern1`*: NamedAlternative
    `patternN`* {.preservesTupleTail.}: seq[NamedAlternative]

  DefinitionOr* {.preservesRecord: "or".} = object
-    `data`*: DefinitionOrData
+    `field0`*: DefinitionOrField0

-  DefinitionAndData* {.preservesTuple.} = object
+  DefinitionAndField0* {.preservesTuple.} = object
    `pattern0`*: NamedPattern
    `pattern1`*: NamedPattern
    `patternN`* {.preservesTupleTail.}: seq[NamedPattern]

  DefinitionAnd* {.preservesRecord: "and".} = object
-    `data`*: DefinitionAndData
+    `field0`*: DefinitionAndField0

-  `Definition`* {.preservesOr.} = ref object
+  `Definition`* {.preservesOr.} = object
    case orKind*: DefinitionKind
    of DefinitionKind.`or`:
        `or`*: DefinitionOr
@ -163,17 +163,17 @@ type
    `variantLabel`*: string
    `pattern`*: Pattern

-  SchemaData* {.preservesDictionary.} = object
-    `embeddedType`*: EmbeddedTypeName
-    `version`* {.preservesLiteral: "1".}: bool
+  SchemaField0* {.preservesDictionary.} = object
    `definitions`*: Definitions
+    `embeddedType`*: EmbeddedTypeName
+    `version`* {.preservesLiteral: "1".}: tuple[]

  Schema* {.preservesRecord: "schema".} = object
-    `data`*: SchemaData
+    `field0`*: SchemaField0

  PatternKind* {.pure.} = enum
    `SimplePattern`, `CompoundPattern`
-  `Pattern`* {.preservesOr.} = ref object
+  `Pattern`* {.acyclic, preservesOr.} = ref object
    case orKind*: PatternKind
    of PatternKind.`SimplePattern`:
        `simplepattern`*: SimplePattern
@ -183,7 +183,7 @@ type

  
  Binding* {.preservesRecord: "named".} = object
-    `name`* {.preservesSymbol.}: string
+    `name`*: Symbol
    `pattern`*: SimplePattern

 proc `$`*(x: Ref | ModulePath | Bundle | CompoundPattern | Modules |
@ -198,9 +198,9 @@ proc `$`*(x: Ref | ModulePath | Bundle | CompoundPattern | Modules |
    Schema |
    Pattern |
    Binding): string =
-  `$`(toPreserve(x))
+  `$`(toPreserves(x))

-proc encode*[E](x: Ref | ModulePath | Bundle | CompoundPattern | Modules |
+proc encode*(x: Ref | ModulePath | Bundle | CompoundPattern | Modules |
    EmbeddedTypeName |
    Definitions |
    DictionaryEntries |
@ -212,4 +212,4 @@ proc encode*[E](x: Ref | ModulePath | Bundle | CompoundPattern | Modules |
    Schema |
    Pattern |
    Binding): seq[byte] =
-  encode(toPreserve(x, E))
+  encode(toPreserves(x))
--- a/src/preserves/schemaparse.nim
+++ b/src/preserves/schemaparse.nim
@ -0,0 +1,319 @@
+# SPDX-FileCopyrightText: ☭ Emery Hemingway
+# SPDX-License-Identifier: Unlicense
+
+import std/[strutils, tables]
+from std/os import absolutePath, isAbsolute, getCurrentDir, parentDir
+
+import npeg
+
+import ../preserves, ./schema, ./pegs
+
+type
+  Stack = seq[tuple[node: Value, pos: int]]
+  ParseState = object
+    schema: SchemaField0
+    stack: Stack
+    directory: string
+
+template takeStackAt(): seq[Value] =
+  var nodes = newSeq[Value]()
+  let pos = capture[0].si
+  var i: int
+  while i < p.stack.len and p.stack[i].pos < pos:
+    inc i
+  let stop = i
+  while i < p.stack.len:
+    nodes.add(move p.stack[i].node)
+    inc i
+  p.stack.setLen(stop)
+  nodes
+
+template takeStackAfter(): seq[Value] =
+  var nodes = newSeq[Value]()
+  let pos = capture[0].si
+  var i: int
+  while i < p.stack.len and p.stack[i].pos <= pos:
+    inc i
+  let stop = i
+  while i < p.stack.len:
+    nodes.add(move p.stack[i].node)
+    inc i
+  p.stack.setLen(stop)
+  nodes
+
+template popStack(): Value =
+  assert(p.stack.len > 0, capture[0].s)
+  assert(capture[0].si <= p.stack[p.stack.high].pos, capture[0].s)
+  p.stack.pop.node
+
+template pushStack(n: Value) =
+  let pos = capture[0].si
+  var i: int
+  while i < p.stack.len and p.stack[i].pos < pos:
+    inc i
+  p.stack.setLen(i)
+  p.stack.add((n, pos))
+  assert(p.stack.len > 0, capture[0].s)
+
+proc toSymbolLit(s: string): Value =
+  initRecord(toSymbol"lit", toSymbol s)
+
+proc match(text: string; p: var ParseState)
+
+const parser = peg("Schema", p: ParseState):
+
+  Schema <- S * +Clause * !1
+
+  Clause <- (Version | EmbeddedTypeName | Include | Definition | +LineComment) * S * '.' * S
+
+  Version <- "version" * S * >(*Digit):
+    if parseInt($1) != 1: fail()
+
+  EmbeddedTypeName <- "embeddedType" * S * ("#f" | Ref):
+    if capture.len == 1:
+      var r = popStack()
+      p.schema.embeddedType =
+        EmbeddedTypeName(orKind: EmbeddedTypeNameKind.Ref)
+      validate p.schema.embeddedType.`ref`.fromPreserves(r)
+
+  Include <- "include" * S * '"' * >(+Preserves.char) * '"':
+    var path: string
+    unescape(path, $1)
+    path = absolutePath(path, p.directory)
+    var state = ParseState(
+        schema: move p.schema,
+        directory: parentDir path)
+    match(readFile path, state)
+    p.schema = move state.schema
+
+  Definition <- *LineComment * ?Annotation * id * '=' * S * (OrPattern | AndPattern | Pattern):
+    if p.schema.definitions.hasKey(Symbol $1):
+      raise newException(ValueError, "duplicate definition of " & $0)
+    var
+      node = popStack()
+      def: Definition
+    if not def.fromPreserves(node):
+      raise newException(ValueError, "failed to convert " & $1 & " to a Definition: " & $node)
+    p.schema.definitions[Symbol $1] = def
+    p.stack.setLen(0)
+
+  OrDelim <-  *LineComment * '/' * S * *LineComment
+  OrPattern <- ?OrDelim * AltPattern * +(S * OrDelim * AltPattern):
+    var node = initRecord(toSymbol("or"), takeStackAt().toPreserves)
+    pushStack node
+
+  AltPattern <-
+      AltNamed |
+      AltRecord |
+      AltRef |
+      AltLiteralPattern
+
+  AltNamed <- atId * ?Annotation * Pattern:
+    var n = toPreserves(@[toPreserves $1] & takeStackAt())
+    pushStack n
+
+  AltRecord <- '<' * id * *NamedPattern * '>':
+    var n = toPreserves @[
+      toPreserves $1,
+      initRecord(
+        toSymbol"rec",
+        toSymbolLit $1,
+        initRecord(toSymbol"tuple", toPreserves takeStackAt()))]
+    pushStack n
+
+  AltRef <- Ref:
+    var r = popStack()
+    var n = toPreserves @[r[1].symbol.string.toPreserves, r]
+    pushStack n
+
+  AltLiteralPattern <-
+      >Preserves.Boolean |
+      >Preserves.Double |
+      >Preserves.SignedInteger |
+      >Preserves.String |
+      '=' * >Preserves.Symbol:
+    var id = case $1
+    of "#f": "false"
+    of "#t": "true"
+    else: $1
+    var n = toPreserves @[
+      toPreserves id,
+      initRecord(toSymbol"lit", parsePreserves $1)]
+    pushStack n
+
+  AndPattern <- ?'&' * S * NamedPattern * +('&' * S * NamedPattern):
+    var node = initRecord(toSymbol("and"), toPreserves takeStackAt())
+    pushStack node
+
+  Pattern <- SimplePattern | CompoundPattern
+
+  SimplePattern <- (
+      AnyPattern |
+      AtomKindPattern |
+      EmbeddedPattern |
+      LiteralPattern |
+      SequenceOfPattern |
+      SetOfPattern |
+      DictOfPattern |
+      Ref ) * S
+
+  AnyPattern <- "any":
+    pushStack toSymbol"any"
+
+  AtomKindPattern <- Boolean | Double | SignedInteger | String | ByteString | Symbol
+
+  Boolean <- "bool":
+    pushStack initRecord(toSymbol"atom", toSymbol"Boolean")
+
+  Double <- "double":
+    pushStack initRecord(toSymbol"atom", toSymbol"Double")
+
+  SignedInteger <- "int":
+    pushStack initRecord(toSymbol"atom", toSymbol"SignedInteger")
+
+  String <- "string":
+    pushStack initRecord(toSymbol"atom", toSymbol"String")
+
+  ByteString <- "bytes":
+    pushStack initRecord(toSymbol"atom", toSymbol"ByteString")
+
+  Symbol <- "symbol":
+    pushStack initRecord(toSymbol"atom", toSymbol"Symbol")
+
+  EmbeddedPattern <- "#:" * SimplePattern:
+    var n = initRecord(toSymbol"embedded", popStack())
+    pushStack n
+
+  LiteralPattern <- ('=' * >symbol) | ("<<lit>" * >Preserves.Value * ">") | >nonSymbolAtom:
+    pushStack initRecord(toSymbol"lit", parsePreserves($1))
+
+  SequenceOfPattern <- '[' * S * SimplePattern * "..." * S * ']':
+    var n = initRecord(toSymbol"seqof", popStack())
+    pushStack n
+
+  SetOfPattern <- "#{" * S * SimplePattern * '}':
+    var n = initRecord(toSymbol"setof", popStack())
+    pushStack n
+
+  DictOfPattern <-
+      '{' * S *
+          ?Annotation * SimplePattern * ':' * S * SimplePattern * "...:..." * S *
+      '}':
+    var
+      val = popStack()
+      key = popStack()
+    var n = initRecord(toSymbol"dictof", key, val)
+    pushStack n
+
+  Ref <- >(Alpha * *Alnum) * *('.' * >(*Alnum)):
+    var path = initSequence()
+    for i in 1..<capture.len: path.sequence.add(toSymbol capture[i].s)
+    var name = pop(path.sequence)
+    var n = initRecord(toSymbol"ref", path, name)
+    pushStack n
+
+  CompoundPattern <- (
+      RecordPattern |
+      VariableRecordPattern |
+      TuplePattern |
+      VariableTuplePattern |
+      DictionaryPattern ) * S
+
+  RecordPattern <-
+      ("<<rec>" * S * NamedPattern * *NamedPattern * '>') |
+      ('<' * >Value * *(S * NamedPattern) * '>'):
+    if capture.len == 2:
+      var n = initRecord(toSymbol"rec",
+        toSymbolLit $1,
+        initRecord(toSymbol"tuple", toPreserves takeStackAfter()))
+      pushStack n
+    else:
+      var n = initRecord(toSymbol"rec", takeStackAfter())
+      pushStack n
+
+  VariableRecordPattern <- '<' * >Value * S * *(NamedPattern) * "..." * S * '>':
+    var fields = takeStackAfter()
+    var tail = fields.pop
+    tail[1] = initRecord(toSymbol"seqof", tail[1])
+    var n = initRecord(
+        toSymbol"rec",
+        toSymbolLit $1,
+        initRecord(toSymbol"tuplePrefix", toPreserves fields, tail))
+    pushStack n
+
+  TuplePattern <-
+      '[' * S * *NamedPattern * ']':
+    var n = initRecord(toSymbol"tuple", toPreserves takeStackAfter())
+    pushStack n
+
+  VariableTuplePattern <-
+      '[' * S * *NamedPattern * ?Pattern * "..." * S * ']':
+    var fields = takeStackAfter()
+    var tail = fields.pop
+    tail[1] = initRecord(toSymbol"seqof", tail[1])
+    var node = initRecord(toSymbol"tuplePrefix", toPreserves fields, tail)
+    pushStack node
+
+  DictionaryPattern <- '{' * S * *(*LineComment * >Value * S * ':' * S * NamedSimplePattern * ?',' * S) * '}':
+    var dict = initDictionary()
+    for i in countDown(pred capture.len, 1):
+      let key = toSymbol capture[i].s
+      dict[key] =  initRecord("named", key, popStack())
+    var n = initRecord(toSymbol"dict", dict)
+    pushStack n
+
+  NamedPattern <- ((atId * ?Annotation * SimplePattern) | Pattern):
+    if capture.len > 1:
+      var n = initRecord(toSymbol"named", toSymbol $1, popStack())
+      pushStack n
+
+  NamedSimplePattern <- ((atId * ?Annotation * SimplePattern) | SimplePattern):
+    if capture.len > 1:
+      var n = initRecord(toSymbol"named", toSymbol $1, popStack())
+      pushStack n
+
+  id <- >(Alpha * *Alnum) * S
+  atId <- ?Annotation * '@' * id
+
+  symbol <- Preserves.Symbol
+
+  nonSymbolAtom <-
+      Preserves.Boolean |
+      Preserves.Double |
+      Preserves.SignedInteger |
+      Preserves.String |
+      Preserves.ByteString
+
+  Value <- Preserves.Value:
+    discard
+
+  Annotation <- '@' * (Preserves.String | Preserves.Record) * S:
+    discard
+
+  S <- *{ ' ', '\t', '\r', '\n' }
+
+  LineComment <- '#' * @'\n' * S
+
+proc match(text: string; p: var ParseState) =
+  let match = parser.match(text, p)
+  if not match.ok:
+    raise newException(ValueError, "failed to parse.\n" & text[0..<match.matchMax])
+
+proc parsePreservesSchema*(text: string; directory = getCurrentDir()): Schema =
+  ## Parse a Preserves schema.
+  ##
+  ## Schemas in binary encoding should instead be parsed as Preserves
+  ## and converted to `Schema` with `fromPreserve` or `preserveTo`.
+  assert directory != ""
+  var p = ParseState(schema: SchemaField0(), directory: directory)
+  match(text, p)
+  Schema(field0: p.schema)
+
+when isMainModule:
+  import std/streams
+  let txt = readAll stdin
+  if txt != "":
+    let
+      scm = parsePreservesSchema(txt)
+      pr = toPreserves scm
+    stdout.newFileStream.writeText(pr, textPreserves)
--- a/src/preserves/sugar.nim
+++ b/src/preserves/sugar.nim
@ -0,0 +1,7 @@
+# SPDX-FileCopyrightText: ☭ Emery Hemingway
+# SPDX-License-Identifier: Unlicense
+
+import ../preserves, ./private/macros
+
+proc `%`*(v: bool|SomeFloat|SomeInteger|string|seq[byte]|Symbol): Value {.inline.} = v.toPreserves
+  # Preserve an atomic Nim value.
--- a/src/preserves/xmlhooks.nim
+++ b/src/preserves/xmlhooks.nim
@ -0,0 +1,86 @@
+# SPDX-FileCopyrightText: ☭ Emery Hemingway
+# SPDX-License-Identifier: Unlicense
+
+import std/[parseutils, strtabs, xmltree]
+import ../preserves
+
+proc toPreservesFromString*(s: string): Value =
+  # This is a bad and slow thing to do, but that is XML.
+  case s
+  of "false", "no", "off":
+    result = toPreserves(false)
+  of "true", "yes", "on":
+    result = toPreserves(true)
+  else:
+    var
+      n: BiggestInt
+      f: float
+    if parseBiggestInt(s, n) == s.len:
+      result = toPreserves(n)
+    elif parseHex(s, n) == s.len:
+      result = toPreserves(n)
+    elif parseFloat(s, f) == s.len:
+      result = toPreserves(f)
+    else:
+      result = toPreserves(s)
+
+proc toPreservesHook*(xn: XmlNode): Value =
+  if xn.kind == xnElement:
+    result = Value(kind: pkRecord)
+    if not xn.attrs.isNil:
+      var attrs = initDictionary()
+      for xk, xv in xn.attrs.pairs:
+        attrs[toSymbol(xk)] = toPreservesFromString(xv)
+      result.record.add(attrs)
+    var isText = xn.len > 0
+      # escaped text is broken up and must be concatenated
+    for child in xn.items:
+      if child.kind == xnElement:
+        isText = false
+        break
+    if isText:
+      result.record.add(toPreserves(xn.innerText))
+    else:
+      for child in xn.items:
+        case child.kind
+        of xnElement:
+          result.record.add(toPreservesHook(child))
+        of xnText, xnVerbatimText, xnCData, xnEntity:
+          result.record.add(toPreserves(text(child)))
+        of xnComment:
+          discard
+    result.record.add(toSymbol(xn.tag))
+      # record labels are stored after the fields
+
+proc toUnquotedString(pr: Value): string {.inline.} =
+  case pr.kind
+  of pkString:
+    pr.string
+  of pkBoolean:
+    if pr.bool: "true" else: "false"
+  else: $pr
+
+proc fromPreservesHook*(xn: var XmlNode; pr: Value): bool =
+  if pr.kind == pkRecord and pr.label.kind == pkSymbol:
+    xn = newElement($pr.label)
+    var i: int
+    for e in pr.fields:
+      if i == 0 and e.kind == pkDictionary:
+        var pairs = newSeqOfCap[tuple[key, val: string]](e.dict.len)
+        for key, val in e.dict.items:
+          pairs.add((key.toUnquotedString, val.toUnquotedString,))
+        xn.attrs = pairs.toXmlAttributes
+      elif e.kind == pkString:
+        xn.add newText(e.string)
+      else:
+        var child: XmlNode
+        result = fromPreservesHook(child, e)
+        if not result: return
+        xn.add child
+      inc i
+    result = true
+
+when isMainModule:
+  var xn = newElement("foobar")
+  var pr = xn.toPreservesHook()
+  assert fromPreservesHook(xn, pr)
--- a/tests/Tupfile
+++ b/tests/Tupfile
@ -0,0 +1,7 @@
+include_rules
+NIM_FLAGS_test_samples += -d:upstreamTestfile="$(TUP_CWD)/../../preserves/tests/samples.pr"
+: foreach t*.nim |> !nim_run |> | ../<test>
+
+: $(BIN_DIR)/<preserves-schemac> \
+	|> $(BIN_DIR)/preserves-schemac --no-bundle ../../preserves/doc/demo.prs | xxd > %o \
+	|> demo.xxd
--- a/tests/config.nims
+++ b/tests/config.nims
@ -1 +1 @@
-switch("path", "$projectDir/../src")
+switch("path", "$projectDir/../src")
--- a/tests/test_buffereddecoder.nim
+++ b/tests/test_buffereddecoder.nim
@ -0,0 +1,22 @@
+# SPDX-FileCopyrightText: 2021 ☭ Emery Hemingway
+# SPDX-License-Identifier: Unlicense
+
+import std/[options, unittest]
+import preserves
+
+suite "BufferedDecoder":
+
+  test "half-string":
+    var
+      buf = newBufferedDecoder()
+      pr = Value(kind: pkByteString, bytes: newSeq[byte](23))
+    for i, _ in pr.bytes:
+      pr.bytes[i] = byte(i)
+    let bin = encode(pr)
+    for i in 0..32:
+      checkpoint $i
+      let j = (i+2) and 0xf
+      feed(buf, bin[0..<j])
+      feed(buf, bin[j..bin.high])
+      var v = decode(buf)
+      check v.isSome
--- a/tests/test_conversions.nim
+++ b/tests/test_conversions.nim
@ -1,47 +1,104 @@
 # SPDX-FileCopyrightText: 2021 ☭ Emery Hemingway
 # SPDX-License-Identifier: Unlicense

-import std/[options, tables, unittest]
-import bigints, preserves
+import std/[options, tables, unittest, xmlparser, xmltree]
+import preserves, preserves/xmlhooks
+
+type
+  Route {.preservesRecord: "route".} = object
+    `transports`*: seq[Value]
+    `pathSteps`* {.preservesTupleTail.}: seq[Value]

 suite "conversions":
  test "dictionary":
    type Bar = tuple
      s: string
    type Foobar {.preservesDictionary.} = object
-      a, b: int
-      c: Bar
+      a: int
+      b: seq[int]
+      c {.preservesEmbedded.}: Bar
+      d: Option[bool]
+      e: Option[bool]
    let
-      c = Foobar(a: 1, b: 2, c: ("ku", ))
-      b = toPreserve(c)
-      a = preserveTo(b, Foobar)
-    check(a.isSome and (get(a) == c))
+      c = Foobar(a: 1, b: @[2], c: ("ku", ), e: some(true))
+      b = toPreserves(c)
+      a = preservesTo(b, Foobar)
+    check($b == """{a: 1 b: [2] c: #:["ku"] e: #t}""")
+    check(a.isSome)
+    if a.isSome: check(get(a) == c)
    check(b.kind == pkDictionary)

  test "records":
    type Bar {.preservesRecord: "bar".} = object
      s: string
    type Foobar {.preservesRecord: "foo".} = object
-      a, b: int
+      a: int
+      b: seq[int]
      c: Bar
    let
-      tup = Foobar(a: 1, b: 2, c: Bar(s: "ku", ))
-      prs = toPreserve(tup)
+      tup = Foobar(a: 1, b: @[2], c: Bar(s: "ku", ))
+      prs = toPreserves(tup)
    check(prs.kind == pkRecord)
-    check($prs == """<foo 1 2 <bar "ku">>""")
-    check(preserveTo(prs, Foobar) == some(tup))
+    check($prs == """<foo 1 [2] <bar "ku">>""")
+    check(preservesTo(prs, Foobar) == some(tup))

  test "tables":
    var a: Table[int, string]
    for i, s in ["a", "b", "c"]: a[i] = s
-    let b = toPreserve(a)
-    check($b == """{0: "a", 1: "b", 2: "c"}""")
+    let b = toPreserves(a)
+    check($b == """{0: "a" 1: "b" 2: "c"}""")
    var c: Table[int, string]
-    check(fromPreserve(c, b))
+    check(fromPreserves(c, b))
    check(a == c)

-suite "%":
-  template check(p: Preserve; s: string) =
+  test "XML":
+    var a: XmlNode
+    var b = parseXML """
+      <?xml version="1.0" standalone="no"?>
+      <!DOCTYPE svg PUBLIC "-//W3C//DTD SVG 1.1//EN" "http://www.w3.org/Graphics/SVG/1.1/DTD/svg11.dtd">
+      <?xml version="1.0"?>
+      <svg xmlns="http://www.w3.org/2000/svg" width="10cm" height="3cm" viewBox="0 0 1000 300" version="1.1">
+        <desc>Example text01 - 'Hello, out there' in blue</desc>
+        <text x="250" y="150" font-family="Verdana" font-size="55" fill="blue">
+      Hello, out there
+      </text>
+        <!-- Show outline of canvas using 'rect' element -->
+        <rect x="1" y="1" width="998" height="298" fill="none" stroke="blue" stroke-width="2"/>
+      </svg>
+    """
+    var pr = toPreserves(b)
+    checkpoint $pr
+    check fromPreserves(a, pr)
+
+  test "preservesTupleTail":
+    let pr = parsePreserves """<route [<tcp "localhost" 1024>] <ref {oid: "syndicate" sig: #x"69ca300c1dbfa08fba692102dd82311a"}>>"""
+    var route: Route
+    check route.fromPreserves(pr)
+
+  test "ebedded":
+    type
+      Foo {.preservesRecord: "foo".} = object
+        n: int
+        bar {.preservesEmbedded.}: Bar
+      Bar = ref object of RootObj
+        x: int
+      Baz = ref object of RootObj
+        x: int
+
+    let a = initRecord("foo", 9.toPreserves, embed Bar(x: 768))
+    checkpoint $a
+    check a.preservesTo(Foo).isSome
+
+    let b = initRecord("foo", 2.toPreserves, embed Baz(x: 999))
+    checkpoint $b
+    check not b.preservesTo(Foo).isSome
+
+suite "toPreserve":
+  template check(p: Value; s: string) =
    test s: check($p == s)
-  check false.toPreserve, "#f"
-  check [0, 1, 2, 3].toPreserve, "[0 1 2 3]"
+  check false.toPreserves, "#f"
+  check [0, 1, 2, 3].toPreserves, "[0 1 2 3]"
+
+  test "toRecord":
+    let r = toRecord(Symbol"foo", "üks", "kaks", "kolm", {4..7})
+    check $r == """<foo "üks" "kaks" "kolm" #{4 5 6 7}>"""
--- a/tests/test_integers.nim
+++ b/tests/test_integers.nim
@ -1,74 +0,0 @@
-# SPDX-FileCopyrightText: 2021 ☭ Emery Hemingway
-# SPDX-License-Identifier: Unlicense
-
-import streams, strutils, unittest
-import bigints, preserves
-
-suite "native":
-  let testVectors = @[
-  (-257, "A1FEFF"),
-  (-256, "A1FF00"),
-  (-255, "A1FF01"),
-  (-254, "A1FF02"),
-  (-129, "A1FF7F"),
-  (-128, "A080"),
-  (-127, "A081"),
-  (-4, "A0FC"),
-  (-3, "9D"),
-  (-2, "9E"),
-  (-1, "9F"),
-  (0, "90"),
-  (1, "91"),
-  (12, "9C"),
-  (13, "A00D"),
-  (127, "A07F"),
-  (128, "A10080"),
-  (255, "A100FF"),
-  (256, "A10100"),
-  (131072, "A2020000"),
-  (32767, "A17FFF"),
-  (32768, "A2008000"),
-  (65535, "A200FFFF"),
-  (65536, "A2010000"),
-  ]
-
-  for (num, txt) in testVectors:
-    test $num:
-      let x = num.toPreserve
-      var stream = newStringStream()
-      stream.write(x)
-      block:
-        stream.setPosition(0)
-        let a = txt
-        let b = stream.readAll.toHex
-        check(b == a)
-      block:
-        stream.setPosition(0)
-        let y = stream.decodePreserves()
-        let a = num
-        let b = y.int
-        check(b == a)
-
-suite "big":
-  let testVectors = @[
-  ("87112285931760246646623899502532662132736",
-      "B012010000000000000000000000000000000000"),
-  ]
-
-  for (decimals, hex) in testVectors:
-    test decimals:
-      let big = initBigInt(decimals)
-      let x = big.toPreserve
-      var stream = newStringStream()
-      stream.write(x)
-      block:
-        stream.setPosition(0)
-        let a = hex
-        let b = stream.readAll.toHex
-        check(b == a)
-      block:
-        stream.setPosition(0)
-        let y = stream.decodePreserves()
-        let a = big
-        let b = y.bigint
-        check(b == a)
--- a/tests/test_p_exprs.nim
+++ b/tests/test_p_exprs.nim
@ -0,0 +1,83 @@
+# SPDX-FileCopyrightText: ☭ Emery Hemingway
+# SPDX-License-Identifier: Unlicense
+
+import
+  std/unittest,
+  preserves, preserves/expressions
+
+template testExpr(name, code, cntrl: string) {.dirty.} =
+  test name:
+    checkpoint code
+    let
+      pr = parsePreserves cntrl
+      exprs = parseExpressions code
+    checkpoint $(exprs.toPreserves)
+    check exprs.len == 1
+    let px = exprs[0]
+    check px == pr
+
+suite "expression":
+
+  testExpr "date", """
+      <date 1821 (lookup-month "February") 3>
+    """, """
+      <r date 1821 <g lookup-month "February"> 3>
+    """
+
+  testExpr "r", "<>", "<r>"
+
+  testExpr "begin",
+    """(begin (println! (+ 1 2)) (+ 3 4))""",
+    """<g begin <g println! <g + 1 2>> <g + 3 4>>"""
+
+  testExpr "g",
+    """()""", """<g>"""
+
+  testExpr "groups",
+    """[() () ()]""", """[<g>, <g>, <g>]"""
+
+  testExpr "loop", """
+      {
+        setUp();
+        # Now enter the loop
+        loop: {
+            greet("World");
+        }
+        tearDown();
+      }
+    """,  """
+      <b
+      setUp <g> <p |;|>
+      # Now enter the loop
+      loop <p |:|> <b
+          greet <g "World"> <p |;|>
+      >
+      tearDown <g> <p |;|>
+    >
+  """
+
+  testExpr "+", """
+      [1 + 2.0, print "Hello", predicate: #t, foo, #:remote, bar]
+    """, """
+      [1 + 2.0 <p |,|> print "Hello" <p |,|> predicate <p |:|> #t <p |,|>
+   foo <p |,|> #:remote <p |,|> bar]
+   """
+
+  testExpr "set",
+    """#{1 2 3}""",  """<s 1 2 3>"""
+
+  testExpr "group-set",
+    """#{(read) (read) (read)}""",
+    """<s <g read> <g read> <g read>>"""
+
+  testExpr "block", """
+      {
+        optional name: string,
+        address: Address,
+      }
+    """, """
+      <b
+        optional name <p |:|> string <p |,|>
+        address <p |:|> Address <p |,|>
+      >
+    """
--- a/tests/test_parser.nim
+++ b/tests/test_parser.nim
@ -2,22 +2,22 @@
 # SPDX-License-Identifier: Unlicense

 import std/[strutils, unittest]
-import preserves, preserves/parse
+import preserves

 const examples = [
 ("""<capture <discard>>""", "\xB4\xB3\x07capture\xB4\xB3\x07discard\x84\x84"),
-("""[1 2 3 4]""", "\xB5\x91\x92\x93\x94\x84"),
-("""[-2 -1 0 1]""", "\xB5\x9E\x9F\x90\x91\x84"),
+("""[1 2 3 4]""", "\xB5\xB0\x01\x01\xB0\x01\x02\xB0\x01\x03\xB0\x01\x04\x84"),
+("""[-2 -1 0 1]""", "\xB5\xB0\x01\xFE\xB0\x01\xFF\xB0\x00\xB0\x01\x01\x84"),
 (""""hello"""", "\xB1\x05hello"),
+("""" \"hello\" """", "\xB1\x09 \"hello\" "),
 ("""["a" b #"c" [] #{} #t #f]""", "\xB5\xB1\x01a\xB3\x01b\xB2\x01c\xB5\x84\xB6\x84\x81\x80\x84"),
-("""-257""", "\xA1\xFE\xFF"),
-("""-1""", "\x9F"),
-("""0""", "\x90"),
-("""1""", "\x91"),
-("""255""", "\xA1\x00\xFF"),
-("""1.0f""", "\x82\x3F\x80\x00\x00"),
-("""1.0""", "\x83\x3F\xF0\x00\x00\x00\x00\x00\x00"),
-("""-1.202e300""", "\x83\xFE\x3C\xB7\xB7\x59\xBF\x04\x26"),
+("""-257""", "\xB0\x02\xFE\xFF"),
+("""-1""", "\xB0\x01\xFF"),
+("""0""", "\xB0\x00"),
+("""1""", "\xB0\x01\x01"),
+("""255""", "\xB0\x02\x00\xFF"),
+("""1.0""", "\x87\x08\x3F\xF0\x00\x00\x00\x00\x00\x00"),
+("""-1.202e300""", "\x87\x08\xFE\x3C\xB7\xB7\x59\xBF\x04\x26"),
 ("""#=#x"B4B30763617074757265B4B307646973636172648484"""", "\xB4\xB3\x07capture\xB4\xB3\x07discard\x84\x84"),
 ("""#f""", "\x80")
 ]
@ -38,3 +38,5 @@ suite "parse":
          a = encode test
          b = bin
        check(cast[string](a).toHex == b.toHex)
+      if test.isAtomic:
+        discard parsePreservesAtom(txt)
--- a/tests/test_rfc8259.nim
+++ b/tests/test_rfc8259.nim
@ -57,7 +57,7 @@ for i, jsText in testVectors:
    checkpoint(jsText)
    let
      control = parseJson jsText
-      x = control.toPreserve
+      x = control.toPreserves
    checkpoint($x)
    var stream = newStringStream()
    stream.write(x)
--- a/tests/test_samples.nim
+++ b/tests/test_samples.nim
@ -0,0 +1,108 @@
+# SPDX-FileCopyrightText: ☭ Emery Hemingway
+# SPDX-License-Identifier: Unlicense
+
+import std/unittest
+import preserves
+
+const upstreamTestfile {.strdefine.} = ""
+
+proc strip(pr: Value): Value = pr
+
+proc encodeBinary(pr: Value): Value =
+  result = encode(pr).toPreserves
+  checkpoint("encoded binary: " & $result)
+
+proc looseEncodeBinary(pr: Value): Value =
+  result = encode(pr).toPreserves
+  checkpoint("loose encoded binary: " & $result)
+
+proc annotatedBinary(pr: Value): Value =
+  result = encode(pr).toPreserves
+  checkpoint("annotated binary: " & $result)
+
+proc decodeBinary(pr: Value): Value =
+  result = decodePreserves(pr.bytes)
+
+proc encodeText(pr: Value): Value =
+  result = ($pr).toPreserves
+  checkpoint("encoded text: " & result.string)
+
+proc decodeText(pr: Value): Value =
+  result = parsePreserves(pr.string)
+  checkpoint("decoded text " & $pr)
+
+if upstreamTestfile != "":
+  let samples = readFile(upstreamTestfile).parsePreserves()
+  assert samples.isRecord("TestCases")
+
+  var binary, annotatedValue, stripped, text, bytes: Value
+
+  for n in { 1..8, 20..22, 30..32 }:
+    suite $n:
+      for name, testcase in samples[0]:
+        assert testcase.isRecord
+        assert testcase.label.isSymbol
+        var testMatched: bool
+        case testcase.label.symbol.string
+        of "Test":
+          testMatched = (n in { 1..8 })
+          if testMatched:
+            binary = testcase[0]
+            annotatedValue = testcase[1]
+            stripped = strip(annotatedValue)
+        of "NondeterministicTest":
+          testMatched = (n in { 1..7 })
+          if testMatched:
+            binary = testcase[0]
+            annotatedValue = testcase[1]
+            stripped = strip(annotatedValue)
+        of "ParseError":
+          testMatched = (n in { 20 })
+          if testMatched: text = testcase[0]
+        of "ParseShort":
+          testMatched = (n in { 21 })
+          if testMatched: text = testcase[0]
+        of "ParseEOF":
+          testMatched = (n in { 22 })
+          if testMatched: text = testcase[0]
+        of "DecodeError":
+          testMatched = (n in { 30 })
+          if testMatched: bytes = testcase[0]
+        of "DecodeShort":
+          testMatched = (n in { 31 })
+          if testMatched: bytes = testcase[0]
+        of "DecodeEOF":
+          testMatched = (n in { 32 })
+          if testMatched: bytes = testcase[0]
+        else:
+          assert false
+
+        if testMatched:
+          test $name:
+            checkpoint $testcase
+            case n
+            of 1: check decodeBinary(encodeBinary(annotatedValue)) == stripped
+            of 2: check strip(decodeBinary(binary)) == stripped
+            of 3:
+              # check decodeBinary(binary) == annotatedValue
+              discard
+            of 4:
+              # check decodeBinary(annotatedBinary(annotatedValue)) == annotatedValue
+              discard
+            of 5: check decodeText(encodeText(stripped)) == stripped
+            of 6: check decodeText(encodeText(annotatedValue)) == annotatedValue
+            of 7:
+              # check annotatedBinary(annotatedValue) == binary
+              discard
+            of 8:
+              # check looseEncodeBinary(annotatedValue) == binary
+              discard
+            of 20, 21, 22:
+              # TODO: be specific about which error is raised
+              expect ValueError, IOError:
+                discard decodeText(text)
+            of 30, 31, 32:
+              expect ValueError, IOError:
+                discard decodeBinary(bytes)
+            else:
+              assert false
--- a/tests/test_schemas.nim
+++ b/tests/test_schemas.nim
@ -1,21 +0,0 @@
-# SPDX-FileCopyrightText: 2021 ☭ Emery Hemingway
-# SPDX-License-Identifier: Unlicense
-
-import std/[tables, options, os, unittest]
-import preserves, preserves/parse, preserves/schema
-
-suite "schema":
-
-  const
-    binPath = "upstream/schema/schema.bin"
-
-  test "convertability":
-    if not fileExists(binPath): skip()
-    else:
-      var
-        b = decodePreserves readFile(binPath)
-        scm = preserveTo(b, Schema)
-      check scm.isSome
-      if scm.isSome:
-        var a = toPreserve(get scm)
-        check(a == b)
--- a/tests/test_step.nim
+++ b/tests/test_step.nim
@ -0,0 +1,16 @@
+# SPDX-FileCopyrightText: ☭ Emery Hemingway
+# SPDX-License-Identifier: Unlicense
+
+import std/[options, sequtils, unittest]
+import preserves
+
+suite "step":
+  var data = parsePreserves """
+      <foo "bar" [ 0.0 {a: #f, "b": #t } ] >
+    """
+
+  var o = some data
+  for i in [1.toPreserves, 1.toPreserves, "b".toPreserves]:
+    test $i:
+      o = step(get o, i)
+      check o.isSome
--- a/1
+++ b/1
@ -1 +0,0 @@
-Subproject commit b2c3032e7a9c5157aaea88a77be83438b7a23c58
				`@ -1 +0,0 @@`
				`Subproject commit b2c3032e7a9c5157aaea88a77be83438b7a23c58`