Merge branch 'main' into comment-syntax-hash-space

2023-10-31 21:15:41 +01:00 · 2023-10-31 21:15:41 +01:00 · fb63ac24b0
parent c053102d07 ec03bdb45f
commit fb63ac24b0
25 changed files with 603 additions and 53 deletions
--- a/.gitignore
+++ b/.gitignore
@ -1,4 +1,5 @@
 _site/
 preserves-expressions.pdf
 preserves-binary.pdf
 preserves-schema.pdf
 preserves-text.pdf
--- a/7
+++ b/7
@ -1,6 +1,11 @@
 __ignored__ := $(shell ./setup.sh)
-PDFS=preserves.pdf preserves-text.pdf preserves-binary.pdf preserves-schema.pdf
+PDFS=\
 	preserves.pdf \
 	preserves-text.pdf \
 	preserves-binary.pdf \
 	preserves-schema.pdf \
 	preserves-expressions.pdf
 all: $(PDFS)
--- a/implementations/javascript/packages/core/src/bytes.ts
+++ b/implementations/javascript/packages/core/src/bytes.ts
@ -53,13 +53,17 @@ export class Bytes implements Preservable<any>, PreserveWritable<any> {
    static fromHex(s: string): Bytes {
        if (s.length & 1) throw new Error("Cannot decode odd-length hexadecimal string");
        const result = new Bytes(s.length >> 1);
        Bytes._raw_fromHexInto(s, result._view);
        return result;
    }
    static _raw_fromHexInto(s: string, target: Uint8Array): void {
        const len = s.length >> 1;
        const result = new Bytes(len);
        for (let i = 0; i < len; i++) {
-            result._view[i] =
+            target[i] =
                (unhexDigit(s.charCodeAt(i << 1)) << 4) | unhexDigit(s.charCodeAt((i << 1) + 1));
        }
        return result;
    }
    static fromIO(io: string | BytesLike): string | Bytes {
@ -135,11 +139,11 @@ export class Bytes implements Preservable<any>, PreserveWritable<any> {
        return Bytes.isBytes(v) ? v : void 0;
    }
-    toHex(): string {
+    toHex(digit = hexDigit): string {
        var nibbles = [];
        for (let i = 0; i < this.length; i++) {
-            nibbles.push(hexDigit(this._view[i] >> 4));
+            nibbles.push(digit(this._view[i] >> 4));
-            nibbles.push(hexDigit(this._view[i] & 15));
+            nibbles.push(digit(this._view[i] & 15));
        }
        return nibbles.join('');
    }
--- a/implementations/javascript/packages/core/src/decoder.ts
+++ b/implementations/javascript/packages/core/src/decoder.ts
@ -4,7 +4,7 @@ import { Tag } from "./constants";
 import { Set, Dictionary } from "./dictionary";
 import { DoubleFloat, SingleFloat } from "./float";
 import { Record } from "./record";
-import { Bytes, BytesLike, underlying } from "./bytes";
+import { Bytes, BytesLike, underlying, hexDigit } from "./bytes";
 import { Value } from "./values";
 import { is } from "./is";
 import { embed, GenericEmbedded, Embedded, EmbeddedTypeDecode } from "./embedded";
@ -34,7 +34,7 @@ export interface TypedDecoder<T> {
    nextFloat(): SingleFloat | undefined;
    nextDouble(): DoubleFloat | undefined;
    nextEmbedded(): Embedded<T> | undefined;
-    nextSignedInteger(): number | undefined;
+    nextSignedInteger(): number | bigint | undefined;
    nextString(): string | undefined;
    nextByteString(): Bytes | undefined;
    nextSymbol(): symbol | undefined;
@ -130,15 +130,42 @@ export class DecoderState {
        return (this.nextbyte() === Tag.End) || (this.index--, false);
    }
-    nextint(n: number): number {
+    nextint(n: number): number | bigint {
-        // TODO: Bignums :-/
+        const start = this.index;
        if (n === 0) return 0;
        if (n > 7) return this.nextbigint(n);
        if (n === 7) {
            const highByte = this.packet[this.index];
            if ((highByte >= 0x20) && (highByte < 0xe0)) {
                return this.nextbigint(n);
            }
            // if highByte is 0xe0, we still might have a value
            // equal to (Number.MIN_SAFE_INTEGER-1).
        }
        let acc = this.nextbyte();
        if (acc & 0x80) acc -= 256;
        for (let i = 1; i < n; i++) acc = (acc * 256) + this.nextbyte();
        if (!Number.isSafeInteger(acc)) {
            this.index = start;
            return this.nextbigint(n);
        }
        return acc;
    }
    nextbigint(n: number): bigint {
        if (n === 0) return BigInt(0);
        const bs = Bytes.from(this.nextbytes(n));
        if (bs.get(0) >= 128) {
            // negative
            const hex = bs.toHex(d => hexDigit(15 - d));
            return ~BigInt('0x' + hex);
        } else {
            // (strictly) positive
            const hex = bs.toHex();
            return BigInt('0x' + hex);
        }
    }
    wrap<T>(v: Value<T>): Value<T> {
        return this.includeAnnotations ? new Annotated(v) : v;
    }
@ -306,7 +333,7 @@ export class Decoder<T = never> implements TypedDecoder<T> {
        });
    }
-    nextSignedInteger(): number | undefined {
+    nextSignedInteger(): number | bigint | undefined {
        return this.skipAnnotations((reset) => {
            switch (this.state.nextbyte()) {
                case Tag.SignedInteger: return this.state.nextint(this.state.varint());
--- a/implementations/javascript/packages/core/src/encoder.ts
+++ b/implementations/javascript/packages/core/src/encoder.ts
@ -1,5 +1,5 @@
 import { Tag } from "./constants";
-import { Bytes } from "./bytes";
+import { Bytes, unhexDigit } from "./bytes";
 import { Value } from "./values";
 import { EncodeError } from "./codec";
 import { Record, Tuple } from "./record";
@ -122,6 +122,13 @@ export class EncoderState {
        this.index += bs.length;
    }
    claimbytes(count: number) {
        this.makeroom(count);
        const view = new Uint8Array(this.view.buffer, this.index, count);
        this.index += count;
        return view;
    }
    varint(v: number) {
        while (v >= 128) {
            this.emitbyte((v % 128) + 128);
@ -130,8 +137,9 @@ export class EncoderState {
        this.emitbyte(v);
    }
-    encodeint(v: number) {
+    encodeint(v: number | bigint) {
-        // TODO: Bignums :-/
+        if (typeof v === 'bigint') return this.encodebigint(v);
        this.emitbyte(Tag.SignedInteger);
        if (v === 0) {
@ -153,6 +161,37 @@ export class EncoderState {
        enc(bytecount, v);
    }
    encodebigint(v: bigint) {
        this.emitbyte(Tag.SignedInteger);
        let hex: string;
        if (v > 0) {
            hex = v.toString(16);
            if (hex.length & 1) {
                hex = '0' + hex;
            } else if (unhexDigit(hex.charCodeAt(0)) >= 8) {
                hex = '00' + hex;
            }
        } else if (v < 0) {
            const negatedHex = (~v).toString(16);
            hex = '';
            for (let i = 0; i < negatedHex.length; i++) {
                hex = hex + 'fedcba9876543210'[unhexDigit(negatedHex.charCodeAt(i))];
            }
            if (hex.length & 1) {
                hex = 'f' + hex;
            } else if (unhexDigit(hex.charCodeAt(0)) < 8) {
                hex = 'ff' + hex;
            }
        } else {
            this.emitbyte(0);
            return;
        }
        this.varint(hex.length >> 1);
        Bytes._raw_fromHexInto(hex, this.claimbytes(hex.length >> 1));
    }
    encodebytes(tag: Tag, bs: Uint8Array) {
        this.emitbyte(tag);
        this.varint(bs.length);
@ -219,7 +258,7 @@ export class Encoder<T = object> {
        else if (typeof v === 'boolean') {
            this.state.emitbyte(v ? Tag.True : Tag.False);
        }
-        else if (typeof v === 'number') {
+        else if (typeof v === 'number' || typeof v === 'bigint') {
            this.state.encodeint(v);
        }
        else if (typeof v === 'string') {
--- a/implementations/javascript/packages/core/src/fold.ts
+++ b/implementations/javascript/packages/core/src/fold.ts
@ -28,7 +28,7 @@ export interface FoldMethods<T, R> {
    boolean(b: boolean): R;
    single(f: number): R;
    double(f: number): R;
-    integer(i: number): R;
+    integer(i: number | bigint): R;
    string(s: string): R;
    bytes(b: Bytes): R;
    symbol(s: symbol): R;
@ -47,7 +47,7 @@ export class VoidFold<T> implements FoldMethods<T, void> {
    boolean(b: boolean): void {}
    single(f: number): void {}
    double(f: number): void {}
-    integer(i: number): void {}
+    integer(i: number | bigint): void {}
    string(s: string): void {}
    bytes(b: Bytes): void {}
    symbol(s: symbol): void {}
@ -79,7 +79,7 @@ export abstract class ValueFold<T, R = T> implements FoldMethods<T, Value<R>> {
    double(f: number): Value<R> {
        return Double(f);
    }
-    integer(i: number): Value<R> {
+    integer(i: number | bigint): Value<R> {
        return i;
    }
    string(s: string): Value<R> {
@ -138,6 +138,8 @@ export function valueClass<T>(v: Value<T>): ValueClass {
            } else {
                return ValueClass.SignedInteger;
            }
        case 'bigint':
            return ValueClass.SignedInteger;
        case 'string':
            return ValueClass.String;
        case 'symbol':
@ -181,6 +183,8 @@ export function fold<T, R>(v: Value<T>, o: FoldMethods<T, R>): R {
                } else {
                    return o.integer(v);
                }
            case 'bigint':
                return o.integer(v);
            case 'string':
                return o.string(v);
            case 'symbol':
--- a/implementations/javascript/packages/core/src/fromjs.ts
+++ b/implementations/javascript/packages/core/src/fromjs.ts
@ -12,6 +12,7 @@ export function fromJS<T = GenericEmbedded>(x: any): Value<T> {
                throw new TypeError("Refusing to autoconvert non-integer number to Single or Double");
            }
            // FALL THROUGH
        case 'bigint':
        case 'string':
        case 'symbol':
        case 'boolean':
@ -19,7 +20,6 @@ export function fromJS<T = GenericEmbedded>(x: any): Value<T> {
        case 'undefined':
        case 'function':
        case 'bigint':
            break;
        case 'object':
--- a/implementations/javascript/packages/core/src/is.ts
+++ b/implementations/javascript/packages/core/src/is.ts
@ -12,7 +12,13 @@ export function is(a: any, b: any): boolean {
    if (isAnnotated(a)) a = a.item;
    if (isAnnotated(b)) b = b.item;
    if (Object.is(a, b)) return true;
-    if (typeof a !== typeof b) return false;
+    if (typeof a !== typeof b) {
        if ((typeof a === 'number' && typeof b === 'bigint') ||
            (typeof a === 'bigint' && typeof b === 'number')) {
            return a == b;
        }
        return false;
    }
    if (typeof a === 'object') {
        if (a === null || b === null) return false;
        if ('equals' in a && typeof a.equals === 'function') return a.equals(b, is);
--- a/implementations/javascript/packages/core/src/merge.ts
+++ b/implementations/javascript/packages/core/src/merge.ts
@ -7,6 +7,7 @@ import { Set, Dictionary } from "./dictionary";
 import { Annotated } from "./annotated";
 import { unannotate } from "./strip";
 import { embed, isEmbedded, Embedded } from "./embedded";
 import { isCompound } from "./compound";
 export function merge<T>(
    mergeEmbeddeds: (a: T, b: T) => T | undefined,
@ -18,7 +19,17 @@ export function merge<T>(
    }
    function walk(a: Value<T>, b: Value<T>): Value<T> {
-        if (a === b) return a;
+        if (a === b) {
            // Shortcut for merges of trivially identical values.
            return a;
        }
        if (!isCompound(a) && !isCompound(b)) {
            // Don't do expensive recursive comparisons for compounds.
            if (is(a, b)) {
                // Shortcut for merges of marginally less trivially identical values.
                return a;
            }
        }
        return fold<T, Value<T>>(a, {
            boolean: die,
            single(_f: number) { return is(a, b) ? a : die(); },
--- a/implementations/javascript/packages/core/src/reader.ts
+++ b/implementations/javascript/packages/core/src/reader.ts
@ -21,9 +21,8 @@ export interface ReaderOptions<T> extends ReaderStateOptions {
    embeddedDecode?: EmbeddedTypeDecode<T>;
 }
-type IntOrFloat = 'int' | 'float';
+const MAX_SAFE_INTEGERn = BigInt(Number.MAX_SAFE_INTEGER);
-type Numeric = number | SingleFloat | DoubleFloat;
+const MIN_SAFE_INTEGERn = BigInt(Number.MIN_SAFE_INTEGER);
 type IntContinuation = (kind: IntOrFloat, acc: string) => Numeric;
 export const NUMBER_RE: RegExp = /^([-+]?\d+)(((\.\d+([eE][-+]?\d+)?)|([eE][-+]?\d+))([fF]?))?$/;
 // Groups:
@ -174,9 +173,12 @@ export class ReaderState {
        const m = NUMBER_RE.exec(acc);
        if (m) {
            if (m[2] === void 0) {
-                let v = parseInt(m[1]);
+                let v = BigInt(m[1]);
-                if (Object.is(v, -0)) v = 0;
+                if (v <= MIN_SAFE_INTEGERn || v >= MAX_SAFE_INTEGERn) {
-                return v;
+                    return v;
                } else {
                    return Number(v);
                }
            } else if (m[7] === '') {
                return Double(parseFloat(m[1] + m[3]));
            } else {
--- a/implementations/javascript/packages/core/src/values.ts
+++ b/implementations/javascript/packages/core/src/values.ts
@ -15,7 +15,7 @@ export type Atom =
    | boolean
    | SingleFloat
    | DoubleFloat
-    | number
+    | number | bigint
    | string
    | Bytes
    | symbol;
--- a/implementations/javascript/packages/core/src/writer.ts
+++ b/implementations/javascript/packages/core/src/writer.ts
@ -278,6 +278,7 @@ export class Writer<T> {
                }
                break;
            }
            case 'bigint':
            case 'number':
                this.state.pieces.push('' + v);
                break;
@ -328,7 +329,9 @@ export class Writer<T> {
                }
                break;
            default:
-                throw new Error(`Internal error: unhandled in Preserves Writer.push for ${v}`);
+                ((_: never) => {
                    throw new Error(`Internal error: unhandled in Preserves Writer.push for ${v}`);
                })(v);
        }
        return this; // for chaining
    }
--- a/implementations/javascript/packages/core/test/codec.test.ts
+++ b/implementations/javascript/packages/core/test/codec.test.ts
@ -184,6 +184,71 @@ describe('encoding and decoding embeddeds', () => {
    });
 });
 describe('integer text parsing', () => {
    it('should work for zero', () => {
        expect(parse('0')).is(0);
    });
    it('should work for smallish positive integers', () => {
        expect(parse('60000')).is(60000);
    });
    it('should work for smallish negative integers', () => {
        expect(parse('-60000')).is(-60000);
    });
    it('should work for largeish positive integers', () => {
        expect(parse('1234567812345678123456781234567'))
            .is(BigInt("1234567812345678123456781234567"));
    });
    it('should work for largeish negative integers', () => {
        expect(parse('-1234567812345678123456781234567'))
            .is(BigInt("-1234567812345678123456781234567"));
    });
    it('should work for larger positive integers', () => {
        expect(parse('12345678123456781234567812345678'))
            .is(BigInt("12345678123456781234567812345678"));
    });
    it('should work for larger negative integers', () => {
        expect(parse('-12345678123456781234567812345678'))
            .is(BigInt("-12345678123456781234567812345678"));
    });
 });
 describe('integer binary encoding', () => {
    it('should work for zero integers', () => {
        expect(encode(0)).is(Bytes.fromHex('b000'));
    });
    it('should work for zero bigints', () => {
        expect(encode(BigInt(0))).is(Bytes.fromHex('b000'));
    });
    it('should work for smallish positive integers', () => {
        expect(encode(60000)).is(Bytes.fromHex('b00300ea60'));
    });
    it('should work for smallish negative integers', () => {
        expect(encode(-60000)).is(Bytes.fromHex('b003ff15a0'));
    });
    it('should work for largeish positive integers', () => {
        expect(encode(BigInt("1234567812345678123456781234567")))
            .is(Bytes.fromHex('b00d0f951a8f2b4b049d518b923187'));
    });
    it('should work for largeish negative integers', () => {
        expect(encode(BigInt("-1234567812345678123456781234567")))
            .is(Bytes.fromHex('b00df06ae570d4b4fb62ae746dce79'));
    });
    it('should work for larger positive integers', () => {
        expect(encode(BigInt("12345678123456781234567812345678")))
            .is(Bytes.fromHex('b00e009bd30997b0ee2e252f73b5ef4e'));
    });
    it('should work for larger negative integers', () => {
        expect(encode(BigInt("-12345678123456781234567812345678")))
            .is(Bytes.fromHex('b00eff642cf6684f11d1dad08c4a10b2'));
    });
 });
 describe('common test suite', () => {
    const samples_bin = fs.readFileSync(__dirname + '/../../../../../tests/samples.bin');
    const samples = decodeWithAnnotations(samples_bin, { embeddedDecode: genericEmbeddedTypeDecode });
--- a/implementations/javascript/packages/core/test/values.test.ts
+++ b/implementations/javascript/packages/core/test/values.test.ts
@ -1,4 +1,4 @@
-import { Single, Double, fromJS, Dictionary, IDENTITY_FOLD, fold, mapEmbeddeds, Value, embed } from '../src/index';
+import { Single, Double, fromJS, Dictionary, IDENTITY_FOLD, fold, mapEmbeddeds, Value, embed, preserves } from '../src/index';
 import './test-utils';
 describe('Single', () => {
@ -41,4 +41,51 @@ describe('fromJS', () => {
    it('should map integers to themselves', () => {
        expect(fromJS(1)).toBe(1);
    });
    it('should map bigints to themselves', () => {
        expect(fromJS(BigInt("12345678123456781234567812345678")))
            .toBe(BigInt("12345678123456781234567812345678"));;
    });
 });
 describe('is()', () => {
    it('should compare small integers sensibly', () => {
        expect(3).is(3);
        expect(3).not.is(4);
    });
    it('should compare large integers sensibly', () => {
        const a = BigInt("12345678123456781234567812345678");
        const b = BigInt("12345678123456781234567812345679");
        expect(a).is(a);
        expect(a).is(BigInt("12345678123456781234567812345678"));
        expect(a).not.is(b);
    });
    it('should compare mixed integers sensibly', () => {
        const a = BigInt("12345678123456781234567812345678");
        const b = BigInt("3");
        const c = BigInt("4");
        expect(3).not.is(a);
        expect(a).not.is(3);
        expect(3).not.toBe(b);
        expect(3).is(b);
        expect(b).not.toBe(3);
        expect(b).is(3);
        expect(3).not.toBe(c);
        expect(3).not.is(c);
        expect(c).not.toBe(3);
        expect(c).not.is(3);
    });
 });
 describe('`preserves` formatter', () => {
    it('should format numbers', () => {
        expect(preserves`>${3}<`).toBe('>3<');
    });
    it('should format small bigints', () => {
        expect(preserves`>${BigInt("3")}<`).toBe('>3<');
    });
    it('should format big bigints', () => {
        expect(preserves`>${BigInt("12345678123456781234567812345678")}<`)
            .toBe('>12345678123456781234567812345678<');
    });
 });
--- a/implementations/python/tests/samples.bin
+++ b/implementations/python/tests/samples.bin
--- a/implementations/python/tests/samples.pr
+++ b/implementations/python/tests/samples.pr
@ -118,6 +118,9 @@
  float14: @"+qNaN" <Test #x"87047fc00111" #xf"7fc00111">
  float15: @"-qNaN" <Test #x"8704ffc00001" #xf"ffc00001">
  float16: @"-qNaN" <Test #x"8704ffc00111" #xf"ffc00111">
  int-98765432109876543210987654321098765432109: <Test #x"b012feddc125aed4226c770369269596ce3f0ad3" -98765432109876543210987654321098765432109>
  int-12345678123456781234567812345678: <Test #x"b00eff642cf6684f11d1dad08c4a10b2" -12345678123456781234567812345678>
  int-1234567812345678123456781234567: <Test #x"b00df06ae570d4b4fb62ae746dce79" -1234567812345678123456781234567>
  int-257: <Test #x"b002feff" -257>
  int-256: <Test #x"b002ff00" -256>
  int-255: <Test #x"b002ff01" -255>
@ -146,7 +149,10 @@
  int65536: <Test #x"b003010000" 65536>
  int131072: <Test #x"b003020000" 131072>
  int2500000000: <Test #x"b005009502f900" 2500000000>
  int1234567812345678123456781234567: <Test #x"b00d0f951a8f2b4b049d518b923187" 1234567812345678123456781234567>
  int12345678123456781234567812345678: <Test #x"b00e009bd30997b0ee2e252f73b5ef4e" 12345678123456781234567812345678>
  int87112285931760246646623899502532662132736: <Test #x"b012010000000000000000000000000000000000" 87112285931760246646623899502532662132736>
  int98765432109876543210987654321098765432109: <Test #x"b01201223eda512bdd9388fc96d96a6931c0f52d" 98765432109876543210987654321098765432109>
  list0: <Test #x"b584" []>
  list4: <Test #x"b5b00101b00102b00103b0010484" [1 2 3 4]>
  list4a: <Test #x"b5b00101b00102b00103b0010484" [1, 2, 3, 4]>
--- a/implementations/racket/preserves/preserves/tests/samples.pr
+++ b/implementations/racket/preserves/preserves/tests/samples.pr
@ -118,6 +118,9 @@
  float14: @"+qNaN" <Test #x"87047fc00111" #xf"7fc00111">
  float15: @"-qNaN" <Test #x"8704ffc00001" #xf"ffc00001">
  float16: @"-qNaN" <Test #x"8704ffc00111" #xf"ffc00111">
  int-98765432109876543210987654321098765432109: <Test #x"b012feddc125aed4226c770369269596ce3f0ad3" -98765432109876543210987654321098765432109>
  int-12345678123456781234567812345678: <Test #x"b00eff642cf6684f11d1dad08c4a10b2" -12345678123456781234567812345678>
  int-1234567812345678123456781234567: <Test #x"b00df06ae570d4b4fb62ae746dce79" -1234567812345678123456781234567>
  int-257: <Test #x"b002feff" -257>
  int-256: <Test #x"b002ff00" -256>
  int-255: <Test #x"b002ff01" -255>
@ -146,7 +149,10 @@
  int65536: <Test #x"b003010000" 65536>
  int131072: <Test #x"b003020000" 131072>
  int2500000000: <Test #x"b005009502f900" 2500000000>
  int1234567812345678123456781234567: <Test #x"b00d0f951a8f2b4b049d518b923187" 1234567812345678123456781234567>
  int12345678123456781234567812345678: <Test #x"b00e009bd30997b0ee2e252f73b5ef4e" 12345678123456781234567812345678>
  int87112285931760246646623899502532662132736: <Test #x"b012010000000000000000000000000000000000" 87112285931760246646623899502532662132736>
  int98765432109876543210987654321098765432109: <Test #x"b01201223eda512bdd9388fc96d96a6931c0f52d" 98765432109876543210987654321098765432109>
  list0: <Test #x"b584" []>
  list4: <Test #x"b5b00101b00102b00103b0010484" [1 2 3 4]>
  list4a: <Test #x"b5b00101b00102b00103b0010484" [1, 2, 3, 4]>
--- a/implementations/rust/preserves/Cargo.toml
+++ b/implementations/rust/preserves/Cargo.toml
@ -1,6 +1,6 @@
 [package]
 name = "preserves"
-version = "3.990.3"
+version = "3.990.4"
 authors = ["Tony Garnock-Jones <tonyg@leastfixedpoint.com>"]
 edition = "2018"
 description = "Implementation of the Preserves serialization format via serde."
--- a/implementations/rust/preserves/src/value/packed/writer.rs
+++ b/implementations/rust/preserves/src/value/packed/writer.rs
@ -289,7 +289,7 @@ impl Writer for BinaryOrderWriter {
 macro_rules! fits_in_bytes {
    ($v:ident, $limit:literal) => {{
        let bits = $limit * 8 - 1;
-        $v >= -(2 << bits) && $v < (2 << bits)
+        $v >= -(1 << bits) && $v < (1 << bits)
    }};
 }
--- a/preserves-expressions.md
+++ b/preserves-expressions.md
@ -0,0 +1,291 @@
 ---
 title: "P-expressions"
 ---
 Tony Garnock-Jones <tonyg@leastfixedpoint.com>  
 October 2023. Version 0.1.1.
 This document defines a grammar called *Preserves Expressions*
 (*P-expressions*, *pexprs*) that includes [ordinary Preserves text
 syntax](preserves-text.html) but offers extensions sufficient to support
 a Lisp- or Haskell-like programming notation.
 **Motivation.** The [text syntax](preserves-text.html) for Preserves
 works well for writing `Value`s, i.e. data. However, in some contexts,
 Preserves applications need a broader grammar that allows interleaving
 of *expressions* with data. Two examples are the [Preserves Schema
 language](preserves-schema.html) and the [Synit configuration scripting
 language](https://synit.org/book/operation/scripting.html), both of
 which (ab)use Preserves text syntax as a kind of programming notation.
 ## Preliminaries
 The P-expression grammar takes the text syntax grammar as its base and
 modifies it.
 <a id="whitespace">
 **Whitespace.** Whitespace is redefined as any number of spaces, tabs,
 carriage returns, or line feeds. Commas are *not* considered whitespace
 in P-expressions.
                ws = *(%x20 / %x09 / CR / LF)
 <a id="delimiters"></a>
 **Delimiters.** Because commas are no longer included in class `ws`,
 class `delimiter` is widened to include them explicitly.
         delimiter = ws / ","
                   / "<" / ">" / "[" / "]" / "{" / "}"
                   / "#" / ":" / DQUOTE / "|" / "@" / ";"
 ## Grammar
 P-expressions add comma, semicolon, and sequences of one or more colons
 to the syntax class `Value`.
            Value =/ Comma / Semicolon / Colons
             Comma = ","
         Semicolon = ";"
            Colons = 1*":"
 Now that colon is in `Value`, the syntax for `Dictionary` is replaced
 with `Block` everywhere it is mentioned.
             Block = "{" *Value ws "}"
 New syntax for explicit uninterpreted grouping of sequences of values is
 introduced, and added to class `Value`.
            Value =/ ws Group
             Group = "(" *Value ws ")"
 Finally, class `Document` is replaced in order to allow standalone
 documents to directly comprise a sequence of multiple values.
          Document = *Value ws
 No changes to [the Preserves semantic model](preserves.html) are made.
 Every Preserves text-syntax term is a valid P-expression, but in general
 P-expressions must be rewritten or otherwise interpreted before a
 meaningful Preserves value can be arrived at ([see
 below](#reading-preserves)).
 ## <a id="annotations"></a>Annotations and Comments
 Annotations and comments attach to the term following them, just as in
 the ordinary text syntax. However, it is common in programming notations
 to allow comments at the end of a file or other sequential construct:
    {
        key: value
        # example of a comment at the end of a dictionary
    }
    # example of a comment at the end of the input file
 While the ordinary text syntax forbids comments in these positions,
 P-expressions allow them:
         Document =/ *Value Trailer ws
           Record =/ "<" Value *Value Trailer ws ">"
         Sequence =/  "[" *Value Trailer ws "]"
              Set =/ "#{" *Value Trailer ws "}"
            Block =/  "{" *Value Trailer ws "}"
           Trailer = 1*Annotation
 ## <a id="encoding-pexprs"></a>Encoding P-expressions as Preserves
 We write ⌜*p*⌝ for the encoding into Preserves of P-expression *p*.
 {:.pseudocode.equations}
 | ⌜·⌝ : **P-expression** | ⟶ | **Preserves** |
 Aside from the special classes `Group`, `Block`, `Comma`, `Semicolon`,
 `Colons`, or `Trailer`, P-expressions are encoded directly as Preserves
 data.
 {:.pseudocode.equations}
 | ⌜`[`*p* ...`]`⌝  | = | `[`⌜*p*⌝ ...`]`             |
 | ⌜`<`*p* ...`>`⌝  | = | `<`⌜*p*⌝ ...`>`             |
 | ⌜`#{`*p* ...`}`⌝ | = | `#{`⌜*p*⌝ ...`}`            |
 | ⌜`#!`*p*⌝        | = | `#!`⌜*p*⌝                   |
 | ⌜`@`*p* *q*⌝     | = | `@`⌜*p*⌝ ⌜*q*⌝              |
 | ⌜*p*⌝            | = | *p* when *p* ∈ **Atom** |
 All members of the special classes are encoded as Preserves
 dictionaries[^encoding-rationale].
 [^encoding-rationale]: In principle, it would be nice to use *records*
    for this purpose, but if we did so we would have to also encode
    usages of records!
 {:.pseudocode.equations}
 | ⌜`(`*p* ...`)`⌝ | = | `{g:[`⌜*p*⌝ ...`]}` |
 | ⌜`{`*p* ...`}`⌝ | = | `{b:[`⌜*p*⌝ ...`]}` |
 | ⌜`,`⌝           | = | `{s:|,|}`           |
 | ⌜`;`⌝           | = | `{s:|;|}`           |
 | ⌜`:` ...⌝       | = | `{s:|:` ...`|}`     |
 | ⌜*t*⌝           | = | ⌜*a*⌝ ... `{}`, where *a* ... are the annotations in *t* and *t* ∈ **Trailer** |
 The empty dictionary `{}` acts as an anchor for the annotations in a
 `Trailer`.
 We overload the ⌜·⌝ notation for encoding whole `Document`s into
 sequences of Preserves values.
 {:.pseudocode.equations}
 | ⌜·⌝ : **P-expression Document** | ⟶ | **Preserves Sequence** |
 | ⌜*p* ...⌝                       | = | `[`⌜*p*⌝ ...`]`        |
 ## <a id="reading-preserves"></a>Interpreting P-expressions as Preserves
 The [previous section](#encoding-pexprs) discussed ways of representing
 P-expressions using Preserves. Here, we discuss *interpreting*
 P-expressions *as* Preserves, so that (1) a Preserves datum (2) written
 using Preserves text syntax and then (3) read as a P-expression can be
 (4) interpreted from that P-expression to yield the original datum.
 A reader for P-expressions can be adapted to yield a reader for
 Preserves terms by processing (subterms of) each P-expression that the
 reader produces. The only subterms that need processing are the special
 classes mentioned above.
 1. Every `Group` or `Semicolon` that appears is an error.
 2. Every `Colons` with two or more colons in it is an error.
 3. Every `Comma` that appears is discarded.
 3. Every `Trailer` that appears is an error.[^discard-trailers-instead-of-error]
 4. Every `Block` must contain triplets of `Value`, `Colons` (with a
    single colon), `Value`. Any `Block` not following this pattern is an
    error. Each `Block` following the pattern is translated to a
    `Dictionary` containing a key/value pair for each triplet.
 [^discard-trailers-instead-of-error]: **Implementation note.** When
    implementing parsing of P-expressions into Preserves, consider
    offering an optional mode where trailing annotations `Trailer` are
    *discarded* instead of causing an error to be signalled.
 ## Appendix: Examples
 Examples are given as pairs of P-expressions and their Preserves
 text-syntax encodings.
 ### Individual P-expression `Value`s
 ```preserves
 ⌜<date 1821 (lookup-month "February") 3>⌝
 = <date 1821 {g:[lookup-month "February"]} 3>
 ```
 ```preserves
 ⌜(begin (println! (+ 1 2)) (+ 3 4))⌝
 = {g:[begin {g:[println! {g:[+ 1 2]}]} {g:[+ 3 4]}]}
 ```
 ```preserves
 ⌜()⌝
 = {g:[]}
 ⌜[() () ()]⌝
 = [{g:[]}, {g:[]}, {g:[]}]
 ```
 ```preserves
 ⌜{
      setUp();
      # Now enter the loop
      loop: {
          greet("World");
      }
      tearDown();
  }⌝
 = {b:[
      setUp {g:[]} {s:|;|}
      # Now enter the loop
      loop {s:|:|} {b:[
          greet {g:["World"]} {s:|;|}
      ]}
      tearDown {g:[]} {s:|;|}
  ]}
 ```
 ```preserves
 ⌜[1 + 2.0, print "Hello", predicate: #t, foo, #!remote, bar]⌝
 = [1 + 2.0 {s:|,|} print "Hello" {s:|,|} predicate {s:|:|} #t {s:|,|}
   foo {s:|,|} #!remote {s:|,|} bar]
 ```
 ```preserves
 ⌜{
      optional name: string,
      address: Address,
  }⌝
 = {b:[
      optional name {s:|:|} string {s:|,|}
      address {s:|:|} Address {s:|,|}
  ]}
 ```
 ### Whole `Document`s
 ```preserves
 ⌜{
      key: value
      # example of a comment at the end of a dictionary
  }
  # example of a comment at the end of the input file⌝
 = [ {b:[
        key {s:|:|} value
        @"example of a comment at the end of a dictionary" {}
    ]}
    @"example of a comment at the end of the input file"
    {}
  ]
 ```
 ## Appendix: Reading vs. Parsing
 Lisp systems first *read* streams of bytes into S-expressions and then
 *parse* those S-expressions into more abstract structures denoting
 various kinds of program syntax. [Separation of reading from parsing is
 what gives Lisp its syntactic
 flexibility.](http://calculist.org/blog/2012/04/17/homoiconicity-isnt-the-point/)
 Similarly, the Apple programming language
 [Dylan](https://en.wikipedia.org/wiki/Dylan_(programming_language))
 included a reader-parser split, with the Dylan reader producing
 *D-expressions* that are somewhat similar to P-expressions.
 Finally, the Racket dialects
 [Honu](https://docs.racket-lang.org/honu/index.html) and
 [Something](https://github.com/tonyg/racket-something) use a
 reader-parser-macro setup, where the reader produces Racket data, the
 parser produces "syntax" and is user-extensible, and Racket's own
 modular macro system rewrites this "syntax" down to core forms to be
 compiled to machine code.
 Similarly, when using P-expressions as the foundation for a language, a
 generic P-expression reader can then feed into special-purpose
 *parsers*. The reader captures the coarse syntactic structure of a
 program, and the parser refines this.
 Often, a parser will wish to extract structure from sequences of
 P-expression `Value`s.
 - A simple technique is repeated splitting of sequences; first by
   `Semicolon`, then by `Comma`, then by increasingly high binding-power
   operators.
 - More refined is to use a Pratt parser or similar
   ([1](https://en.wikipedia.org/wiki/Operator-precedence_parser),
   [2](https://matklad.github.io/2020/04/13/simple-but-powerful-pratt-parsing.html),
   [3](https://github.com/tonyg/racket-something/blob/f6116bf3861b76970f5ce291a628476adef820b4/src/something/pratt.rkt))
   to build a parse tree using an extensible specification of the pre-,
   in-, and postfix operators involved.
 - Finally, if you treat sequences of `Value`s as pre-lexed token
   streams, almost any parsing formalism (such as [PEG
   parsing](https://en.wikipedia.org/wiki/Parsing_expression_grammar),
   [Ometa](https://en.wikipedia.org/wiki/OMeta), etc.) can be used to
   extract further syntactic structure.
 ## Notes
--- a/preserves-text.md
+++ b/preserves-text.md
@ -55,7 +55,7 @@ Standalone documents may have trailing whitespace.
 Any `Value` may be preceded by whitespace.
             Value = ws (Record / Collection / Atom / Embedded)
-        Collection = Sequence / Dictionary / Set
+        Collection = Sequence / Set / Dictionary
              Atom = Boolean / String / ByteString /
                     QuotedSymbol / SymbolOrNumber
@ -64,18 +64,18 @@ label-`Value` followed by its field-`Value`s.
            Record = "<" Value *Value ws ">"
-`Sequence`s are enclosed in square brackets. `Dictionary` values are
+`Sequence`s are enclosed in square brackets. `Set`s are written as
-curly-brace-enclosed colon-separated pairs of values. `Set`s are
+values enclosed by the tokens `#{` and `}`. `Dictionary` values are
-written as values enclosed by the tokens `#{` and
+curly-brace-enclosed colon-separated pairs of
-`}`.[^printing-collections] It is an error for a set to contain
+values.[^printing-collections] It is an error for a set to contain
 duplicate elements or for a dictionary to contain duplicate keys. When
-printing sets and dictionaries, implementations *SHOULD* order
+printing sets and dictionaries, implementations *SHOULD* order elements
-elements resp. keys with respect to the [total order over
+resp. keys with respect to the [total order over
 `Value`s](preserves.html#total-order).[^rationale-print-ordering]
-          Sequence = "[" *Value ws "]"
+          Sequence =  "["  *Value               ws "]"
-        Dictionary = "{" *(Value ws ":" Value) ws "}"
+               Set = "#{"  *Value               ws "}"
-               Set = "#{" *Value ws "}"
+        Dictionary =  "{" *(Value ws ":" Value) ws "}"
  [^printing-collections]: **Implementation note.** When implementing
    printing of `Value`s using the textual syntax, consider supporting
@ -273,7 +273,8 @@ value. Each annotation is, in turn, a `Value`, and may itself have
 annotations. The ordering of annotations attached to a `Value` is
 significant.
-            Value =/ ws "@" Value Value
+            Value =/ ws Annotation Value
        Annotation = "@" Value
 Each annotation is preceded by `@`; the underlying annotated value
 follows its annotations. Here we extend only the syntactic nonterminal
@ -283,7 +284,7 @@ named “`Value`” without altering the semantic class of `Value`s.
 interpreted as comments associated with that value. Comments are
 sufficiently common that special syntax exists for them.
-            Value =/ ws ("#" [(%x20 / %x09) linecomment]) (CR / LF) Value
+       Annotation =/ "#" [(%x20 / %x09) linecomment] (CR / LF)
       linecomment = *<any unicode scalar value except CR or LF>
 When written this way, everything between the hash-space or hash-tab and
--- a/preserves.css
+++ b/preserves.css
@ -1,6 +1,7 @@
 :root {
    --sans-font: "Open Sans", -apple-system, BlinkMacSystemFont, avenir next, avenir, segoe ui, helvetica neue, helvetica, Cantarell, Ubuntu, roboto, noto, arial, sans-serif;
    --serif-font: palatino, "Palatino Linotype", "Palatino LT STD", "URW Palladio L", "TeX Gyre Pagella", serif;
    --blockquote-indent: 40px;
 }
 body {
    font-family: var(--serif-font);
@ -230,6 +231,7 @@ table.postcard-grammar {
 blockquote {
    padding: 0.5rem 1rem;
    border-left: solid #4f81bd 2px;
    margin-left: var(--blockquote-indent);
    margin-right: 0;
 }
 blockquote :first-child {
@ -243,6 +245,10 @@ blockquote :last-child {
    background-color: #e9f0f9;
 }
 table.equations { width: auto; margin-left: var(--blockquote-indent); }
 table.equations tr > *:nth-child(1) { text-align: right; }
 table.equations tr > *:nth-child(2) { text-align: center; }
 blockquote.pseudocode {
    border-left: none;
    padding: 0;
--- a/preserves.md
+++ b/preserves.md
@ -104,8 +104,8 @@ the `totalOrder` predicate defined in section 5.10 of [IEEE Std
 A `Record` is a *labelled* tuple of `Value`s, the record's *fields*. A
 label can be any `Value`, but is usually a `Symbol`.[^extensibility]
-[^iri-labels] `Record`s are compared lexicographically: first by
+[^iri-labels] `Record`s are ordered first by label, then
-label, then by field sequence.
+lexicographically[^lexicographical-sequences] by field sequence.
  [^extensibility]: The [Racket](https://racket-lang.org/) programming
    language defines
@ -123,10 +123,25 @@ label, then by field sequence.
    it cannot be read as an IRI at all, and so the label simply stands
    for itself—for its own `Value`.
  [^lexicographical-sequences]: When comparing sequences of values for
    the total order, [lexicographical
    ordering](https://en.wikipedia.org/wiki/Lexicographic_order) is
    used. Elements are drawn pairwise from the two sequences to be
    compared. If one is smaller than the other according to the total
    order, the sequence it was drawn from is the smaller of the
    sequences. If the end of one sequence is reached, while the other
    sequence has elements remaining, the shorter sequence is considered
    smaller. Otherwise, all the elements compared equal and neither was
    longer than the other, so they compare equal. For example,
      - `[#f]` is ordered before `[foo]` because `Boolean` appears before `Symbol` in the kind ordering;
      - `[x]` before `[x y]` because there is no element remaining to compare against `y`;
      - `[a b]` before `[x]` because `a` is smaller than `x`; and
      - `[x y]` before `[x z]` because `y` is ordered before `z` according to the ordering rules for `Symbol`.
 ### Sequences.
 A `Sequence` is a sequence of `Value`s. `Sequence`s are compared
-lexicographically.
+lexicographically.[^lexicographical-sequences]
 ### Sets.
@ -134,15 +149,16 @@ A `Set` is an unordered finite set of `Value`s. It contains no
 duplicate values, following the [equivalence relation](#equivalence)
 induced by the total order on `Value`s. Two `Set`s are compared by
 sorting their elements ascending using the [total order](#total-order)
-and comparing the resulting `Sequence`s.
+and comparing the resulting `Sequence`s.[^lexicographical-sequences]
 ### Dictionaries.
 A `Dictionary` is an unordered finite collection of pairs of `Value`s.
 Each pair comprises a *key* and a *value*. Keys in a `Dictionary` are
 pairwise distinct. Instances of `Dictionary` are compared by
-lexicographic comparison of the sequences resulting from ordering each
+lexicographic[^lexicographical-sequences] comparison of the sequences
-`Dictionary`'s pairs in ascending order by key.
+resulting from ordering each `Dictionary`'s pairs in ascending order by
 key.
 ### Embeddeds.
@ -194,8 +210,12 @@ sequences use [the Preserves binary encoding](preserves-binary.html).
 The total ordering specified [above](#total-order) means that the following statements are true:
-    "bzz" < "c" < "caa" < #!"a"
+ - `"bzz"` &lt; `"c"` &lt; `"caa"` &lt; `#!"a"`
-    #t < 3.0f < 3.0 < 3 < "3" < |3| < [] < #!#t
+ - `#t` &lt; `3.0f` &lt; `3.0` &lt; `3` &lt; `"3"` &lt; `|3|` &lt; `[]` &lt; `#!#t`
 - `[#f]` &lt; `[foo]`, because `Boolean` appears before `Symbol` in the kind ordering
 - `[x]` &lt; `[x y]`, because there is no element remaining to compare against `y`
 - `[a b]` &lt; `[x]`, because `a` is smaller than `x`
 - `[x y]` &lt; `[x z]`, because `y` is ordered before `z`
 ### Simple examples.
--- a/tests/samples.bin
+++ b/tests/samples.bin
--- a/tests/samples.pr
+++ b/tests/samples.pr
@ -118,6 +118,9 @@
  float14: @"+qNaN" <Test #x"87047fc00111" #xf"7fc00111">
  float15: @"-qNaN" <Test #x"8704ffc00001" #xf"ffc00001">
  float16: @"-qNaN" <Test #x"8704ffc00111" #xf"ffc00111">
  int-98765432109876543210987654321098765432109: <Test #x"b012feddc125aed4226c770369269596ce3f0ad3" -98765432109876543210987654321098765432109>
  int-12345678123456781234567812345678: <Test #x"b00eff642cf6684f11d1dad08c4a10b2" -12345678123456781234567812345678>
  int-1234567812345678123456781234567: <Test #x"b00df06ae570d4b4fb62ae746dce79" -1234567812345678123456781234567>
  int-257: <Test #x"b002feff" -257>
  int-256: <Test #x"b002ff00" -256>
  int-255: <Test #x"b002ff01" -255>
@ -146,7 +149,10 @@
  int65536: <Test #x"b003010000" 65536>
  int131072: <Test #x"b003020000" 131072>
  int2500000000: <Test #x"b005009502f900" 2500000000>
  int1234567812345678123456781234567: <Test #x"b00d0f951a8f2b4b049d518b923187" 1234567812345678123456781234567>
  int12345678123456781234567812345678: <Test #x"b00e009bd30997b0ee2e252f73b5ef4e" 12345678123456781234567812345678>
  int87112285931760246646623899502532662132736: <Test #x"b012010000000000000000000000000000000000" 87112285931760246646623899502532662132736>
  int98765432109876543210987654321098765432109: <Test #x"b01201223eda512bdd9388fc96d96a6931c0f52d" 98765432109876543210987654321098765432109>
  list0: <Test #x"b584" []>
  list4: <Test #x"b5b00101b00102b00103b0010484" [1 2 3 4]>
  list4a: <Test #x"b5b00101b00102b00103b0010484" [1, 2, 3, 4]>