From cf50e00f80ad8ba51e9aaf410653bfd87660dd00 Mon Sep 17 00:00:00 2001
From: Tony Garnock-Jones <tonyg@leastfixedpoint.com>
Date: Tue, 31 Oct 2023 12:53:54 +0100
Subject: [PATCH 1/8] Repair failing TS bigint tests

---
 .../javascript/packages/core/src/bytes.ts     |  16 +++--
 .../javascript/packages/core/src/decoder.ts   |  37 ++++++++--
 .../javascript/packages/core/src/encoder.ts   |  47 +++++++++++--
 .../javascript/packages/core/src/fold.ts      |  10 ++-
 .../javascript/packages/core/src/fromjs.ts    |   2 +-
 .../javascript/packages/core/src/is.ts        |   8 ++-
 .../javascript/packages/core/src/merge.ts     |  13 +++-
 .../javascript/packages/core/src/reader.ts    |  14 ++--
 .../javascript/packages/core/src/values.ts    |   2 +-
 .../javascript/packages/core/src/writer.ts    |   5 +-
 .../packages/core/test/codec.test.ts          |  65 ++++++++++++++++++
 .../packages/core/test/values.test.ts         |  49 ++++++++++++-
 implementations/python/tests/samples.bin      | Bin 12124 -> 12436 bytes
 implementations/python/tests/samples.pr       |   4 ++
 .../preserves/preserves/tests/samples.pr      |   4 ++
 tests/samples.bin                             | Bin 12124 -> 12436 bytes
 tests/samples.pr                              |   4 ++
 17 files changed, 250 insertions(+), 30 deletions(-)
diff --git a/implementations/javascript/packages/core/src/bytes.ts b/implementations/javascript/packages/core/src/bytes.ts
index 89fd2a1..3f09e8a 100644
--- a/implementations/javascript/packages/core/src/bytes.ts
+++ b/implementations/javascript/packages/core/src/bytes.ts
@@ -53,13 +53,17 @@ export class Bytes implements Preservable<any>, PreserveWritable<any> {
 
     static fromHex(s: string): Bytes {
         if (s.length & 1) throw new Error("Cannot decode odd-length hexadecimal string");
+        const result = new Bytes(s.length >> 1);
+        Bytes._raw_fromHexInto(s, result._view);
+        return result;
+    }
+
+    static _raw_fromHexInto(s: string, target: Uint8Array): void {
         const len = s.length >> 1;
-        const result = new Bytes(len);
         for (let i = 0; i < len; i++) {
-            result._view[i] =
+            target[i] =
                 (unhexDigit(s.charCodeAt(i << 1)) << 4) | unhexDigit(s.charCodeAt((i << 1) + 1));
         }
-        return result;
     }
 
     static fromIO(io: string | BytesLike): string | Bytes {
@@ -135,11 +139,11 @@ export class Bytes implements Preservable<any>, PreserveWritable<any> {
         return Bytes.isBytes(v) ? v : void 0;
     }
 
-    toHex(): string {
+    toHex(digit = hexDigit): string {
         var nibbles = [];
         for (let i = 0; i < this.length; i++) {
-            nibbles.push(hexDigit(this._view[i] >> 4));
-            nibbles.push(hexDigit(this._view[i] & 15));
+            nibbles.push(digit(this._view[i] >> 4));
+            nibbles.push(digit(this._view[i] & 15));
         }
         return nibbles.join('');
     }
diff --git a/implementations/javascript/packages/core/src/decoder.ts b/implementations/javascript/packages/core/src/decoder.ts
index f552aa8..56eb106 100644
--- a/implementations/javascript/packages/core/src/decoder.ts
+++ b/implementations/javascript/packages/core/src/decoder.ts
@@ -4,7 +4,7 @@ import { Tag } from "./constants";
 import { Set, Dictionary } from "./dictionary";
 import { DoubleFloat, SingleFloat } from "./float";
 import { Record } from "./record";
-import { Bytes, BytesLike, underlying } from "./bytes";
+import { Bytes, BytesLike, underlying, hexDigit } from "./bytes";
 import { Value } from "./values";
 import { is } from "./is";
 import { embed, GenericEmbedded, Embedded, EmbeddedTypeDecode } from "./embedded";
@@ -34,7 +34,7 @@ export interface TypedDecoder<T> {
     nextFloat(): SingleFloat | undefined;
     nextDouble(): DoubleFloat | undefined;
     nextEmbedded(): Embedded<T> | undefined;
-    nextSignedInteger(): number | undefined;
+    nextSignedInteger(): number | bigint | undefined;
     nextString(): string | undefined;
     nextByteString(): Bytes | undefined;
     nextSymbol(): symbol | undefined;
@@ -130,15 +130,42 @@ export class DecoderState {
         return (this.nextbyte() === Tag.End) || (this.index--, false);
     }
 
-    nextint(n: number): number {
-        // TODO: Bignums :-/
+    nextint(n: number): number | bigint {
+        const start = this.index;
         if (n === 0) return 0;
+        if (n > 7) return this.nextbigint(n);
+        if (n === 7) {
+            const highByte = this.packet[this.index];
+            if ((highByte >= 0x20) && (highByte < 0xe0)) {
+                return this.nextbigint(n);
+            }
+            // if highByte is 0xe0, we still might have a value
+            // equal to (Number.MIN_SAFE_INTEGER-1).
+        }
         let acc = this.nextbyte();
         if (acc & 0x80) acc -= 256;
         for (let i = 1; i < n; i++) acc = (acc * 256) + this.nextbyte();
+        if (!Number.isSafeInteger(acc)) {
+            this.index = start;
+            return this.nextbigint(n);
+        }
         return acc;
     }
 
+    nextbigint(n: number): bigint {
+        if (n === 0) return BigInt(0);
+        const bs = Bytes.from(this.nextbytes(n));
+        if (bs.get(0) >= 128) {
+            // negative
+            const hex = bs.toHex(d => hexDigit(15 - d));
+            return ~BigInt('0x' + hex);
+        } else {
+            // (strictly) positive
+            const hex = bs.toHex();
+            return BigInt('0x' + hex);
+        }
+    }
+
     wrap<T>(v: Value<T>): Value<T> {
         return this.includeAnnotations ? new Annotated(v) : v;
     }
@@ -306,7 +333,7 @@ export class Decoder<T = never> implements TypedDecoder<T> {
         });
     }
 
-    nextSignedInteger(): number | undefined {
+    nextSignedInteger(): number | bigint | undefined {
         return this.skipAnnotations((reset) => {
             switch (this.state.nextbyte()) {
                 case Tag.SignedInteger: return this.state.nextint(this.state.varint());
diff --git a/implementations/javascript/packages/core/src/encoder.ts b/implementations/javascript/packages/core/src/encoder.ts
index 92b9a8c..c4942dc 100644
--- a/implementations/javascript/packages/core/src/encoder.ts
+++ b/implementations/javascript/packages/core/src/encoder.ts
@@ -1,5 +1,5 @@
 import { Tag } from "./constants";
-import { Bytes } from "./bytes";
+import { Bytes, unhexDigit } from "./bytes";
 import { Value } from "./values";
 import { EncodeError } from "./codec";
 import { Record, Tuple } from "./record";
@@ -122,6 +122,13 @@ export class EncoderState {
         this.index += bs.length;
     }
 
+    claimbytes(count: number) {
+        this.makeroom(count);
+        const view = new Uint8Array(this.view.buffer, this.index, count);
+        this.index += count;
+        return view;
+    }
+
     varint(v: number) {
         while (v >= 128) {
             this.emitbyte((v % 128) + 128);
@@ -130,8 +137,9 @@ export class EncoderState {
         this.emitbyte(v);
     }
 
-    encodeint(v: number) {
-        // TODO: Bignums :-/
+    encodeint(v: number | bigint) {
+        if (typeof v === 'bigint') return this.encodebigint(v);
+
         this.emitbyte(Tag.SignedInteger);
 
         if (v === 0) {
@@ -153,6 +161,37 @@ export class EncoderState {
         enc(bytecount, v);
     }
 
+    encodebigint(v: bigint) {
+        this.emitbyte(Tag.SignedInteger);
+
+        let hex: string;
+        if (v > 0) {
+            hex = v.toString(16);
+            if (hex.length & 1) {
+                hex = '0' + hex;
+            } else if (unhexDigit(hex.charCodeAt(0)) >= 8) {
+                hex = '00' + hex;
+            }
+        } else if (v < 0) {
+            const negatedHex = (~v).toString(16);
+            hex = '';
+            for (let i = 0; i < negatedHex.length; i++) {
+                hex = hex + 'fedcba9876543210'[unhexDigit(negatedHex.charCodeAt(i))];
+            }
+            if (hex.length & 1) {
+                hex = 'f' + hex;
+            } else if (unhexDigit(hex.charCodeAt(0)) < 8) {
+                hex = 'ff' + hex;
+            }
+        } else {
+            this.emitbyte(0);
+            return;
+        }
+
+        this.varint(hex.length >> 1);
+        Bytes._raw_fromHexInto(hex, this.claimbytes(hex.length >> 1));
+    }
+
     encodebytes(tag: Tag, bs: Uint8Array) {
         this.emitbyte(tag);
         this.varint(bs.length);
@@ -219,7 +258,7 @@ export class Encoder<T = object> {
         else if (typeof v === 'boolean') {
             this.state.emitbyte(v ? Tag.True : Tag.False);
         }
-        else if (typeof v === 'number') {
+        else if (typeof v === 'number' || typeof v === 'bigint') {
             this.state.encodeint(v);
         }
         else if (typeof v === 'string') {
diff --git a/implementations/javascript/packages/core/src/fold.ts b/implementations/javascript/packages/core/src/fold.ts
index fe04412..d65b024 100644
--- a/implementations/javascript/packages/core/src/fold.ts
+++ b/implementations/javascript/packages/core/src/fold.ts
@@ -28,7 +28,7 @@ export interface FoldMethods<T, R> {
     boolean(b: boolean): R;
     single(f: number): R;
     double(f: number): R;
-    integer(i: number): R;
+    integer(i: number | bigint): R;
     string(s: string): R;
     bytes(b: Bytes): R;
     symbol(s: symbol): R;
@@ -47,7 +47,7 @@ export class VoidFold<T> implements FoldMethods<T, void> {
     boolean(b: boolean): void {}
     single(f: number): void {}
     double(f: number): void {}
-    integer(i: number): void {}
+    integer(i: number | bigint): void {}
     string(s: string): void {}
     bytes(b: Bytes): void {}
     symbol(s: symbol): void {}
@@ -79,7 +79,7 @@ export abstract class ValueFold<T, R = T> implements FoldMethods<T, Value<R>> {
     double(f: number): Value<R> {
         return Double(f);
     }
-    integer(i: number): Value<R> {
+    integer(i: number | bigint): Value<R> {
         return i;
     }
     string(s: string): Value<R> {
@@ -138,6 +138,8 @@ export function valueClass<T>(v: Value<T>): ValueClass {
             } else {
                 return ValueClass.SignedInteger;
             }
+        case 'bigint':
+            return ValueClass.SignedInteger;
         case 'string':
             return ValueClass.String;
         case 'symbol':
@@ -181,6 +183,8 @@ export function fold<T, R>(v: Value<T>, o: FoldMethods<T, R>): R {
                 } else {
                     return o.integer(v);
                 }
+            case 'bigint':
+                return o.integer(v);
             case 'string':
                 return o.string(v);
             case 'symbol':
diff --git a/implementations/javascript/packages/core/src/fromjs.ts b/implementations/javascript/packages/core/src/fromjs.ts
index c676152..7e3c1d2 100644
--- a/implementations/javascript/packages/core/src/fromjs.ts
+++ b/implementations/javascript/packages/core/src/fromjs.ts
@@ -12,6 +12,7 @@ export function fromJS<T = GenericEmbedded>(x: any): Value<T> {
                 throw new TypeError("Refusing to autoconvert non-integer number to Single or Double");
             }
             // FALL THROUGH
+        case 'bigint':
         case 'string':
         case 'symbol':
         case 'boolean':
@@ -19,7 +20,6 @@ export function fromJS<T = GenericEmbedded>(x: any): Value<T> {
 
         case 'undefined':
         case 'function':
-        case 'bigint':
             break;
 
         case 'object':
diff --git a/implementations/javascript/packages/core/src/is.ts b/implementations/javascript/packages/core/src/is.ts
index 03551da..355f59c 100644
--- a/implementations/javascript/packages/core/src/is.ts
+++ b/implementations/javascript/packages/core/src/is.ts
@@ -12,7 +12,13 @@ export function is(a: any, b: any): boolean {
     if (isAnnotated(a)) a = a.item;
     if (isAnnotated(b)) b = b.item;
     if (Object.is(a, b)) return true;
-    if (typeof a !== typeof b) return false;
+    if (typeof a !== typeof b) {
+        if ((typeof a === 'number' && typeof b === 'bigint') ||
+            (typeof a === 'bigint' && typeof b === 'number')) {
+            return a == b;
+        }
+        return false;
+    }
     if (typeof a === 'object') {
         if (a === null || b === null) return false;
         if ('equals' in a && typeof a.equals === 'function') return a.equals(b, is);
diff --git a/implementations/javascript/packages/core/src/merge.ts b/implementations/javascript/packages/core/src/merge.ts
index fa8b215..052374f 100644
--- a/implementations/javascript/packages/core/src/merge.ts
+++ b/implementations/javascript/packages/core/src/merge.ts
@@ -7,6 +7,7 @@ import { Set, Dictionary } from "./dictionary";
 import { Annotated } from "./annotated";
 import { unannotate } from "./strip";
 import { embed, isEmbedded, Embedded } from "./embedded";
+import { isCompound } from "./compound";
 
 export function merge<T>(
     mergeEmbeddeds: (a: T, b: T) => T | undefined,
@@ -18,7 +19,17 @@ export function merge<T>(
     }
 
     function walk(a: Value<T>, b: Value<T>): Value<T> {
-        if (a === b) return a;
+        if (a === b) {
+            // Shortcut for merges of trivially identical values.
+            return a;
+        }
+        if (!isCompound(a) && !isCompound(b)) {
+            // Don't do expensive recursive comparisons for compounds.
+            if (is(a, b)) {
+                // Shortcut for merges of marginally less trivially identical values.
+                return a;
+            }
+        }
         return fold<T, Value<T>>(a, {
             boolean: die,
             single(_f: number) { return is(a, b) ? a : die(); },
diff --git a/implementations/javascript/packages/core/src/reader.ts b/implementations/javascript/packages/core/src/reader.ts
index 50bec7b..b17c86c 100644
--- a/implementations/javascript/packages/core/src/reader.ts
+++ b/implementations/javascript/packages/core/src/reader.ts
@@ -21,9 +21,8 @@ export interface ReaderOptions<T> extends ReaderStateOptions {
     embeddedDecode?: EmbeddedTypeDecode<T>;
 }
 
-type IntOrFloat = 'int' | 'float';
-type Numeric = number | SingleFloat | DoubleFloat;
-type IntContinuation = (kind: IntOrFloat, acc: string) => Numeric;
+const MAX_SAFE_INTEGERn = BigInt(Number.MAX_SAFE_INTEGER);
+const MIN_SAFE_INTEGERn = BigInt(Number.MIN_SAFE_INTEGER);
 
 export const NUMBER_RE: RegExp = /^([-+]?\d+)(((\.\d+([eE][-+]?\d+)?)|([eE][-+]?\d+))([fF]?))?$/;
 // Groups:
@@ -174,9 +173,12 @@ export class ReaderState {
         const m = NUMBER_RE.exec(acc);
         if (m) {
             if (m[2] === void 0) {
-                let v = parseInt(m[1]);
-                if (Object.is(v, -0)) v = 0;
-                return v;
+                let v = BigInt(m[1]);
+                if (v <= MIN_SAFE_INTEGERn || v >= MAX_SAFE_INTEGERn) {
+                    return v;
+                } else {
+                    return Number(v);
+                }
             } else if (m[7] === '') {
                 return Double(parseFloat(m[1] + m[3]));
             } else {
diff --git a/implementations/javascript/packages/core/src/values.ts b/implementations/javascript/packages/core/src/values.ts
index 1746bcb..1a030f5 100644
--- a/implementations/javascript/packages/core/src/values.ts
+++ b/implementations/javascript/packages/core/src/values.ts
@@ -15,7 +15,7 @@ export type Atom =
     | boolean
     | SingleFloat
     | DoubleFloat
-    | number
+    | number | bigint
     | string
     | Bytes
     | symbol;
diff --git a/implementations/javascript/packages/core/src/writer.ts b/implementations/javascript/packages/core/src/writer.ts
index 8409e5c..93c9d20 100644
--- a/implementations/javascript/packages/core/src/writer.ts
+++ b/implementations/javascript/packages/core/src/writer.ts
@@ -278,6 +278,7 @@ export class Writer<T> {
                 }
                 break;
             }
+            case 'bigint':
             case 'number':
                 this.state.pieces.push('' + v);
                 break;
@@ -328,7 +329,9 @@ export class Writer<T> {
                 }
                 break;
             default:
-                throw new Error(`Internal error: unhandled in Preserves Writer.push for ${v}`);
+                ((_: never) => {
+                    throw new Error(`Internal error: unhandled in Preserves Writer.push for ${v}`);
+                })(v);
         }
         return this; // for chaining
     }
diff --git a/implementations/javascript/packages/core/test/codec.test.ts b/implementations/javascript/packages/core/test/codec.test.ts
index 70f821d..9b0291c 100644
--- a/implementations/javascript/packages/core/test/codec.test.ts
+++ b/implementations/javascript/packages/core/test/codec.test.ts
@@ -184,6 +184,71 @@ describe('encoding and decoding embeddeds', () => {
     });
 });
 
+describe('integer text parsing', () => {
+    it('should work for zero', () => {
+        expect(parse('0')).is(0);
+    });
+
+    it('should work for smallish positive integers', () => {
+        expect(parse('60000')).is(60000);
+    });
+    it('should work for smallish negative integers', () => {
+        expect(parse('-60000')).is(-60000);
+    });
+
+    it('should work for largeish positive integers', () => {
+        expect(parse('1234567812345678123456781234567'))
+            .is(BigInt("1234567812345678123456781234567"));
+    });
+    it('should work for largeish negative integers', () => {
+        expect(parse('-1234567812345678123456781234567'))
+            .is(BigInt("-1234567812345678123456781234567"));
+    });
+
+    it('should work for larger positive integers', () => {
+        expect(parse('12345678123456781234567812345678'))
+            .is(BigInt("12345678123456781234567812345678"));
+    });
+    it('should work for larger negative integers', () => {
+        expect(parse('-12345678123456781234567812345678'))
+            .is(BigInt("-12345678123456781234567812345678"));
+    });
+});
+
+describe('integer binary encoding', () => {
+    it('should work for zero integers', () => {
+        expect(encode(0)).is(Bytes.fromHex('b000'));
+    });
+    it('should work for zero bigints', () => {
+        expect(encode(BigInt(0))).is(Bytes.fromHex('b000'));
+    });
+
+    it('should work for smallish positive integers', () => {
+        expect(encode(60000)).is(Bytes.fromHex('b00300ea60'));
+    });
+    it('should work for smallish negative integers', () => {
+        expect(encode(-60000)).is(Bytes.fromHex('b003ff15a0'));
+    });
+
+    it('should work for largeish positive integers', () => {
+        expect(encode(BigInt("1234567812345678123456781234567")))
+            .is(Bytes.fromHex('b00d0f951a8f2b4b049d518b923187'));
+    });
+    it('should work for largeish negative integers', () => {
+        expect(encode(BigInt("-1234567812345678123456781234567")))
+            .is(Bytes.fromHex('b00df06ae570d4b4fb62ae746dce79'));
+    });
+
+    it('should work for larger positive integers', () => {
+        expect(encode(BigInt("12345678123456781234567812345678")))
+            .is(Bytes.fromHex('b00e009bd30997b0ee2e252f73b5ef4e'));
+    });
+    it('should work for larger negative integers', () => {
+        expect(encode(BigInt("-12345678123456781234567812345678")))
+            .is(Bytes.fromHex('b00eff642cf6684f11d1dad08c4a10b2'));
+    });
+});
+
 describe('common test suite', () => {
     const samples_bin = fs.readFileSync(__dirname + '/../../../../../tests/samples.bin');
     const samples = decodeWithAnnotations(samples_bin, { embeddedDecode: genericEmbeddedTypeDecode });
diff --git a/implementations/javascript/packages/core/test/values.test.ts b/implementations/javascript/packages/core/test/values.test.ts
index 11ad12d..7a91ba6 100644
--- a/implementations/javascript/packages/core/test/values.test.ts
+++ b/implementations/javascript/packages/core/test/values.test.ts
@@ -1,4 +1,4 @@
-import { Single, Double, fromJS, Dictionary, IDENTITY_FOLD, fold, mapEmbeddeds, Value, embed } from '../src/index';
+import { Single, Double, fromJS, Dictionary, IDENTITY_FOLD, fold, mapEmbeddeds, Value, embed, preserves } from '../src/index';
 import './test-utils';
 
 describe('Single', () => {
@@ -41,4 +41,51 @@ describe('fromJS', () => {
     it('should map integers to themselves', () => {
         expect(fromJS(1)).toBe(1);
     });
+
+    it('should map bigints to themselves', () => {
+        expect(fromJS(BigInt("12345678123456781234567812345678")))
+            .toBe(BigInt("12345678123456781234567812345678"));;
+    });
+});
+
+describe('is()', () => {
+    it('should compare small integers sensibly', () => {
+        expect(3).is(3);
+        expect(3).not.is(4);
+    });
+    it('should compare large integers sensibly', () => {
+        const a = BigInt("12345678123456781234567812345678");
+        const b = BigInt("12345678123456781234567812345679");
+        expect(a).is(a);
+        expect(a).is(BigInt("12345678123456781234567812345678"));
+        expect(a).not.is(b);
+    });
+    it('should compare mixed integers sensibly', () => {
+        const a = BigInt("12345678123456781234567812345678");
+        const b = BigInt("3");
+        const c = BigInt("4");
+        expect(3).not.is(a);
+        expect(a).not.is(3);
+        expect(3).not.toBe(b);
+        expect(3).is(b);
+        expect(b).not.toBe(3);
+        expect(b).is(3);
+        expect(3).not.toBe(c);
+        expect(3).not.is(c);
+        expect(c).not.toBe(3);
+        expect(c).not.is(3);
+    });
+});
+
+describe('`preserves` formatter', () => {
+    it('should format numbers', () => {
+        expect(preserves`>${3}<`).toBe('>3<');
+    });
+    it('should format small bigints', () => {
+        expect(preserves`>${BigInt("3")}<`).toBe('>3<');
+    });
+    it('should format big bigints', () => {
+        expect(preserves`>${BigInt("12345678123456781234567812345678")}<`)
+            .toBe('>12345678123456781234567812345678<');
+    });
 });
diff --git a/implementations/python/tests/samples.bin b/implementations/python/tests/samples.bin
index 70ebf1b3d6e50ae9165070fa3f157cead9483098..dbb41bf694a41c0e886b7b8d17639655d431ff03 100644
GIT binary patch
delta 324
zcmcZ;HzjeyZ@qe@%)AmqBV!X&Gjj_Z^p?#mA*sbBoA@{I@=ulO*Y;+a8`wR`upN!t
zvRN5umM#uMV1^?Z_95$OLH(62zmwLL<esbCzzgETxe(I`7-j)8O<)5b!|cnP(>J`+
vQ`Ilt`rZ$n-?CW+tK%%typCkx{}i2X8UBJ7Z(Zo|64<nX50yXpr(QAuFjaNW

delta 11
ScmbP|cqeYdZ@tOJ28jS98wFqh

diff --git a/implementations/python/tests/samples.pr b/implementations/python/tests/samples.pr
index 4646594..df8ae0b 100644
--- a/implementations/python/tests/samples.pr
+++ b/implementations/python/tests/samples.pr
@@ -118,6 +118,8 @@
   float14: @"+qNaN" <Test #x"87047fc00111" #xf"7fc00111">
   float15: @"-qNaN" <Test #x"8704ffc00001" #xf"ffc00001">
   float16: @"-qNaN" <Test #x"8704ffc00111" #xf"ffc00111">
+  int-12345678123456781234567812345678: <Test #x"b00eff642cf6684f11d1dad08c4a10b2" -12345678123456781234567812345678>
+  int-1234567812345678123456781234567: <Test #x"b00df06ae570d4b4fb62ae746dce79" -1234567812345678123456781234567>
   int-257: <Test #x"b002feff" -257>
   int-256: <Test #x"b002ff00" -256>
   int-255: <Test #x"b002ff01" -255>
@@ -146,6 +148,8 @@
   int65536: <Test #x"b003010000" 65536>
   int131072: <Test #x"b003020000" 131072>
   int2500000000: <Test #x"b005009502f900" 2500000000>
+  int1234567812345678123456781234567: <Test #x"b00d0f951a8f2b4b049d518b923187" 1234567812345678123456781234567>
+  int12345678123456781234567812345678: <Test #x"b00e009bd30997b0ee2e252f73b5ef4e" 12345678123456781234567812345678>
   int87112285931760246646623899502532662132736: <Test #x"b012010000000000000000000000000000000000" 87112285931760246646623899502532662132736>
   list0: <Test #x"b584" []>
   list4: <Test #x"b5b00101b00102b00103b0010484" [1 2 3 4]>
diff --git a/implementations/racket/preserves/preserves/tests/samples.pr b/implementations/racket/preserves/preserves/tests/samples.pr
index 4646594..df8ae0b 100644
--- a/implementations/racket/preserves/preserves/tests/samples.pr
+++ b/implementations/racket/preserves/preserves/tests/samples.pr
@@ -118,6 +118,8 @@
   float14: @"+qNaN" <Test #x"87047fc00111" #xf"7fc00111">
   float15: @"-qNaN" <Test #x"8704ffc00001" #xf"ffc00001">
   float16: @"-qNaN" <Test #x"8704ffc00111" #xf"ffc00111">
+  int-12345678123456781234567812345678: <Test #x"b00eff642cf6684f11d1dad08c4a10b2" -12345678123456781234567812345678>
+  int-1234567812345678123456781234567: <Test #x"b00df06ae570d4b4fb62ae746dce79" -1234567812345678123456781234567>
   int-257: <Test #x"b002feff" -257>
   int-256: <Test #x"b002ff00" -256>
   int-255: <Test #x"b002ff01" -255>
@@ -146,6 +148,8 @@
   int65536: <Test #x"b003010000" 65536>
   int131072: <Test #x"b003020000" 131072>
   int2500000000: <Test #x"b005009502f900" 2500000000>
+  int1234567812345678123456781234567: <Test #x"b00d0f951a8f2b4b049d518b923187" 1234567812345678123456781234567>
+  int12345678123456781234567812345678: <Test #x"b00e009bd30997b0ee2e252f73b5ef4e" 12345678123456781234567812345678>
   int87112285931760246646623899502532662132736: <Test #x"b012010000000000000000000000000000000000" 87112285931760246646623899502532662132736>
   list0: <Test #x"b584" []>
   list4: <Test #x"b5b00101b00102b00103b0010484" [1 2 3 4]>
diff --git a/tests/samples.bin b/tests/samples.bin
index 70ebf1b3d6e50ae9165070fa3f157cead9483098..dbb41bf694a41c0e886b7b8d17639655d431ff03 100644
GIT binary patch
delta 324
zcmcZ;HzjeyZ@qe@%)AmqBV!X&Gjj_Z^p?#mA*sbBoA@{I@=ulO*Y;+a8`wR`upN!t
zvRN5umM#uMV1^?Z_95$OLH(62zmwLL<esbCzzgETxe(I`7-j)8O<)5b!|cnP(>J`+
vQ`Ilt`rZ$n-?CW+tK%%typCkx{}i2X8UBJ7Z(Zo|64<nX50yXpr(QAuFjaNW

delta 11
ScmbP|cqeYdZ@tOJ28jS98wFqh

diff --git a/tests/samples.pr b/tests/samples.pr
index 4646594..df8ae0b 100644
--- a/tests/samples.pr
+++ b/tests/samples.pr
@@ -118,6 +118,8 @@
   float14: @"+qNaN" <Test #x"87047fc00111" #xf"7fc00111">
   float15: @"-qNaN" <Test #x"8704ffc00001" #xf"ffc00001">
   float16: @"-qNaN" <Test #x"8704ffc00111" #xf"ffc00111">
+  int-12345678123456781234567812345678: <Test #x"b00eff642cf6684f11d1dad08c4a10b2" -12345678123456781234567812345678>
+  int-1234567812345678123456781234567: <Test #x"b00df06ae570d4b4fb62ae746dce79" -1234567812345678123456781234567>
   int-257: <Test #x"b002feff" -257>
   int-256: <Test #x"b002ff00" -256>
   int-255: <Test #x"b002ff01" -255>
@@ -146,6 +148,8 @@
   int65536: <Test #x"b003010000" 65536>
   int131072: <Test #x"b003020000" 131072>
   int2500000000: <Test #x"b005009502f900" 2500000000>
+  int1234567812345678123456781234567: <Test #x"b00d0f951a8f2b4b049d518b923187" 1234567812345678123456781234567>
+  int12345678123456781234567812345678: <Test #x"b00e009bd30997b0ee2e252f73b5ef4e" 12345678123456781234567812345678>
   int87112285931760246646623899502532662132736: <Test #x"b012010000000000000000000000000000000000" 87112285931760246646623899502532662132736>
   list0: <Test #x"b584" []>
   list4: <Test #x"b5b00101b00102b00103b0010484" [1 2 3 4]>

From 8276a50552de3d6d56c2a47694c6270549c0da75 Mon Sep 17 00:00:00 2001
From: Tony Garnock-Jones <tonyg@leastfixedpoint.com>
Date: Tue, 31 Oct 2023 13:27:03 +0100
Subject: [PATCH 2/8] Repair error in Rust integer width calculation

---
 implementations/python/tests/samples.bin      | Bin 12436 -> 12629 bytes
 implementations/python/tests/samples.pr       |   2 ++
 .../preserves/preserves/tests/samples.pr      |   2 ++
 .../rust/preserves/src/value/packed/writer.rs |   2 +-
 tests/samples.bin                             | Bin 12436 -> 12629 bytes
 tests/samples.pr                              |   2 ++
 6 files changed, 7 insertions(+), 1 deletion(-)

diff --git a/implementations/python/tests/samples.bin b/implementations/python/tests/samples.bin
index dbb41bf694a41c0e886b7b8d17639655d431ff03..d6008aaab2fa3445f5efce4545e376e67137ad9e 100644
GIT binary patch
delta 204
zcmbP|cr|ImB!l|RI+=MTmKNq_rY6Qlh6V&!TQ;+Vq!yQK64@ZcsAP95Q2XxWjz80G
zW@Q>4_^OLdtYxz<(0pB-#=4-}ie$sTy9ZU*T~W#@XU<fcI_;c2*X0dDm|`t0EdU6!
BN>%^>

delta 10
RcmcbbG$nDvBm<_F762S!1gQW3

diff --git a/implementations/python/tests/samples.pr b/implementations/python/tests/samples.pr
index df8ae0b..a390467 100644
--- a/implementations/python/tests/samples.pr
+++ b/implementations/python/tests/samples.pr
@@ -118,6 +118,7 @@
   float14: @"+qNaN" <Test #x"87047fc00111" #xf"7fc00111">
   float15: @"-qNaN" <Test #x"8704ffc00001" #xf"ffc00001">
   float16: @"-qNaN" <Test #x"8704ffc00111" #xf"ffc00111">
+  int-98765432109876543210987654321098765432109: <Test #x"b012feddc125aed4226c770369269596ce3f0ad3" -98765432109876543210987654321098765432109>
   int-12345678123456781234567812345678: <Test #x"b00eff642cf6684f11d1dad08c4a10b2" -12345678123456781234567812345678>
   int-1234567812345678123456781234567: <Test #x"b00df06ae570d4b4fb62ae746dce79" -1234567812345678123456781234567>
   int-257: <Test #x"b002feff" -257>
@@ -151,6 +152,7 @@
   int1234567812345678123456781234567: <Test #x"b00d0f951a8f2b4b049d518b923187" 1234567812345678123456781234567>
   int12345678123456781234567812345678: <Test #x"b00e009bd30997b0ee2e252f73b5ef4e" 12345678123456781234567812345678>
   int87112285931760246646623899502532662132736: <Test #x"b012010000000000000000000000000000000000" 87112285931760246646623899502532662132736>
+  int98765432109876543210987654321098765432109: <Test #x"b01201223eda512bdd9388fc96d96a6931c0f52d" 98765432109876543210987654321098765432109>
   list0: <Test #x"b584" []>
   list4: <Test #x"b5b00101b00102b00103b0010484" [1 2 3 4]>
   list4a: <Test #x"b5b00101b00102b00103b0010484" [1, 2, 3, 4]>
diff --git a/implementations/racket/preserves/preserves/tests/samples.pr b/implementations/racket/preserves/preserves/tests/samples.pr
index df8ae0b..a390467 100644
--- a/implementations/racket/preserves/preserves/tests/samples.pr
+++ b/implementations/racket/preserves/preserves/tests/samples.pr
@@ -118,6 +118,7 @@
   float14: @"+qNaN" <Test #x"87047fc00111" #xf"7fc00111">
   float15: @"-qNaN" <Test #x"8704ffc00001" #xf"ffc00001">
   float16: @"-qNaN" <Test #x"8704ffc00111" #xf"ffc00111">
+  int-98765432109876543210987654321098765432109: <Test #x"b012feddc125aed4226c770369269596ce3f0ad3" -98765432109876543210987654321098765432109>
   int-12345678123456781234567812345678: <Test #x"b00eff642cf6684f11d1dad08c4a10b2" -12345678123456781234567812345678>
   int-1234567812345678123456781234567: <Test #x"b00df06ae570d4b4fb62ae746dce79" -1234567812345678123456781234567>
   int-257: <Test #x"b002feff" -257>
@@ -151,6 +152,7 @@
   int1234567812345678123456781234567: <Test #x"b00d0f951a8f2b4b049d518b923187" 1234567812345678123456781234567>
   int12345678123456781234567812345678: <Test #x"b00e009bd30997b0ee2e252f73b5ef4e" 12345678123456781234567812345678>
   int87112285931760246646623899502532662132736: <Test #x"b012010000000000000000000000000000000000" 87112285931760246646623899502532662132736>
+  int98765432109876543210987654321098765432109: <Test #x"b01201223eda512bdd9388fc96d96a6931c0f52d" 98765432109876543210987654321098765432109>
   list0: <Test #x"b584" []>
   list4: <Test #x"b5b00101b00102b00103b0010484" [1 2 3 4]>
   list4a: <Test #x"b5b00101b00102b00103b0010484" [1, 2, 3, 4]>
diff --git a/implementations/rust/preserves/src/value/packed/writer.rs b/implementations/rust/preserves/src/value/packed/writer.rs
index 72d6294..0fad0b2 100644
--- a/implementations/rust/preserves/src/value/packed/writer.rs
+++ b/implementations/rust/preserves/src/value/packed/writer.rs
@@ -289,7 +289,7 @@ impl Writer for BinaryOrderWriter {
 macro_rules! fits_in_bytes {
     ($v:ident, $limit:literal) => {{
         let bits = $limit * 8 - 1;
-        $v >= -(2 << bits) && $v < (2 << bits)
+        $v >= -(1 << bits) && $v < (1 << bits)
     }};
 }
 
diff --git a/tests/samples.bin b/tests/samples.bin
index dbb41bf694a41c0e886b7b8d17639655d431ff03..d6008aaab2fa3445f5efce4545e376e67137ad9e 100644
GIT binary patch
delta 204
zcmbP|cr|ImB!l|RI+=MTmKNq_rY6Qlh6V&!TQ;+Vq!yQK64@ZcsAP95Q2XxWjz80G
zW@Q>4_^OLdtYxz<(0pB-#=4-}ie$sTy9ZU*T~W#@XU<fcI_;c2*X0dDm|`t0EdU6!
BN>%^>

delta 10
RcmcbbG$nDvBm<_F762S!1gQW3

diff --git a/tests/samples.pr b/tests/samples.pr
index df8ae0b..a390467 100644
--- a/tests/samples.pr
+++ b/tests/samples.pr
@@ -118,6 +118,7 @@
   float14: @"+qNaN" <Test #x"87047fc00111" #xf"7fc00111">
   float15: @"-qNaN" <Test #x"8704ffc00001" #xf"ffc00001">
   float16: @"-qNaN" <Test #x"8704ffc00111" #xf"ffc00111">
+  int-98765432109876543210987654321098765432109: <Test #x"b012feddc125aed4226c770369269596ce3f0ad3" -98765432109876543210987654321098765432109>
   int-12345678123456781234567812345678: <Test #x"b00eff642cf6684f11d1dad08c4a10b2" -12345678123456781234567812345678>
   int-1234567812345678123456781234567: <Test #x"b00df06ae570d4b4fb62ae746dce79" -1234567812345678123456781234567>
   int-257: <Test #x"b002feff" -257>
@@ -151,6 +152,7 @@
   int1234567812345678123456781234567: <Test #x"b00d0f951a8f2b4b049d518b923187" 1234567812345678123456781234567>
   int12345678123456781234567812345678: <Test #x"b00e009bd30997b0ee2e252f73b5ef4e" 12345678123456781234567812345678>
   int87112285931760246646623899502532662132736: <Test #x"b012010000000000000000000000000000000000" 87112285931760246646623899502532662132736>
+  int98765432109876543210987654321098765432109: <Test #x"b01201223eda512bdd9388fc96d96a6931c0f52d" 98765432109876543210987654321098765432109>
   list0: <Test #x"b584" []>
   list4: <Test #x"b5b00101b00102b00103b0010484" [1 2 3 4]>
   list4a: <Test #x"b5b00101b00102b00103b0010484" [1, 2, 3, 4]>

From 47b4c072682e2a9bb1ce665bcea5178d759f4d15 Mon Sep 17 00:00:00 2001
From: Tony Garnock-Jones <tonyg@leastfixedpoint.com>
Date: Tue, 31 Oct 2023 13:27:35 +0100
Subject: [PATCH 3/8] Release independent packages

preserves@3.990.4

Generated by cargo-workspaces
---
 implementations/rust/preserves/Cargo.toml | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/implementations/rust/preserves/Cargo.toml b/implementations/rust/preserves/Cargo.toml
index a7e525b..36c662e 100644
--- a/implementations/rust/preserves/Cargo.toml
+++ b/implementations/rust/preserves/Cargo.toml
@@ -1,6 +1,6 @@
 [package]
 name = "preserves"
-version = "3.990.3"
+version = "3.990.4"
 authors = ["Tony Garnock-Jones <tonyg@leastfixedpoint.com>"]
 edition = "2018"
 description = "Implementation of the Preserves serialization format via serde."

From 982d916b613a63c1c932e52d175f9a535ebf8726 Mon Sep 17 00:00:00 2001
From: Tony Garnock-Jones <tonyg@leastfixedpoint.com>
Date: Tue, 31 Oct 2023 17:34:37 +0100
Subject: [PATCH 4/8] Minor presentation tweak to make Sequence/Set/Dictionary
 line up in the grammar like it does in the abstract model definition

---
 preserves-text.md | 20 ++++++++++----------
 1 file changed, 10 insertions(+), 10 deletions(-)

diff --git a/preserves-text.md b/preserves-text.md
index 97b145b..b7dfef0 100644
--- a/preserves-text.md
+++ b/preserves-text.md
@@ -55,7 +55,7 @@ Standalone documents may have trailing whitespace.
 Any `Value` may be preceded by whitespace.
 
              Value = ws (Record / Collection / Atom / Embedded)
-        Collection = Sequence / Dictionary / Set
+        Collection = Sequence / Set / Dictionary
               Atom = Boolean / String / ByteString /
                      QuotedSymbol / SymbolOrNumber
 
@@ -64,18 +64,18 @@ label-`Value` followed by its field-`Value`s.
 
             Record = "<" Value *Value ws ">"
 
-`Sequence`s are enclosed in square brackets. `Dictionary` values are
-curly-brace-enclosed colon-separated pairs of values. `Set`s are
-written as values enclosed by the tokens `#{` and
-`}`.[^printing-collections] It is an error for a set to contain
+`Sequence`s are enclosed in square brackets. `Set`s are written as
+values enclosed by the tokens `#{` and `}`. `Dictionary` values are
+curly-brace-enclosed colon-separated pairs of
+values.[^printing-collections] It is an error for a set to contain
 duplicate elements or for a dictionary to contain duplicate keys. When
-printing sets and dictionaries, implementations *SHOULD* order
-elements resp. keys with respect to the [total order over
+printing sets and dictionaries, implementations *SHOULD* order elements
+resp. keys with respect to the [total order over
 `Value`s](preserves.html#total-order).[^rationale-print-ordering]
 
-          Sequence = "[" *Value ws "]"
-        Dictionary = "{" *(Value ws ":" Value) ws "}"
-               Set = "#{" *Value ws "}"
+          Sequence =  "["  *Value               ws "]"
+               Set = "#{"  *Value               ws "}"
+        Dictionary =  "{" *(Value ws ":" Value) ws "}"
 
   [^printing-collections]: **Implementation note.** When implementing
     printing of `Value`s using the textual syntax, consider supporting

From a69444f08585f56d244a92e2ab6f49fe52d16000 Mon Sep 17 00:00:00 2001
From: Tony Garnock-Jones <tonyg@leastfixedpoint.com>
Date: Tue, 31 Oct 2023 17:37:09 +0100
Subject: [PATCH 5/8] preserves-expressions.md

---
 .gitignore               |   1 +
 Makefile                 |   7 +-
 preserves-expressions.md | 210 +++++++++++++++++++++++++++++++++++++++
 3 files changed, 217 insertions(+), 1 deletion(-)
 create mode 100644 preserves-expressions.md

diff --git a/.gitignore b/.gitignore
index 1fc3cf7..f631d1d 100644
--- a/.gitignore
+++ b/.gitignore
@@ -1,4 +1,5 @@
 _site/
+preserves-expressions.pdf
 preserves-binary.pdf
 preserves-schema.pdf
 preserves-text.pdf
diff --git a/Makefile b/Makefile
index 5f8abc8..ac1dc8b 100644
--- a/Makefile
+++ b/Makefile
@@ -1,6 +1,11 @@
 __ignored__ := $(shell ./setup.sh)
 
-PDFS=preserves.pdf preserves-text.pdf preserves-binary.pdf preserves-schema.pdf
+PDFS=\
+	preserves.pdf \
+	preserves-text.pdf \
+	preserves-binary.pdf \
+	preserves-schema.pdf \
+	preserves-expressions.pdf
 
 all: $(PDFS)
 
diff --git a/preserves-expressions.md b/preserves-expressions.md
new file mode 100644
index 0000000..2adf01e
--- /dev/null
+++ b/preserves-expressions.md
@@ -0,0 +1,210 @@
+---
+title: "P-expressions"
+---
+
+Tony Garnock-Jones <tonyg@leastfixedpoint.com>  
+October 2023. Version 0.1.0.
+
+This document defines a grammar called *Preserves Expressions*
+(*P-expressions*, *pexprs*) that includes [ordinary Preserves text
+syntax](preserves-text.html) but offers extensions sufficient to support
+a Lisp- or Haskell-like programming notation.
+
+**Motivation.** The [text syntax](preserves-text.html) for Preserves
+works well for writing `Value`s, i.e. data. However, in some contexts,
+Preserves applications need a broader grammar that allows interleaving
+of *expressions* with data. Two examples are the [Preserves Schema
+language](preserves-schema.html) and the [Synit configuration scripting
+language](https://synit.org/book/operation/scripting.html), both of
+which (ab)use Preserves text syntax as a kind of programming notation.
+
+## Preliminaries
+
+The P-expression grammar takes the text syntax grammar as its base and
+modifies it.
+
+<a id="whitespace">
+**Whitespace.** Whitespace is redefined as any number of spaces, tabs,
+carriage returns, or line feeds. Commas are *not* considered whitespace
+in P-expressions.
+
+                ws = *(%x20 / %x09 / CR / LF)
+
+<a id="delimiters"></a>
+**Delimiters.** Because commas are no longer included in class `ws`,
+class `delimiter` is widened to include them explicitly.
+
+         delimiter = ws / ","
+                   / "<" / ">" / "[" / "]" / "{" / "}"
+                   / "#" / ":" / DQUOTE / "|" / "@" / ";"
+
+## Grammar
+
+P-expressions add comma, semicolon, and sequences of one or more colons
+to the syntax class `Value`.
+
+            Value =/ Comma / Semicolon / Colons
+             Comma = ","
+         Semicolon = ";"
+            Colons = 1*":"
+
+Now that colon is in `Value`, the syntax for `Dictionary` is replaced
+with `Block` everywhere it is mentioned.
+
+             Block =  "{" *Value ws "}"
+
+New syntax for explicit uninterpreted grouping of sequences of values is
+introduced, and added to class `Value`.
+
+            Value =/ ws Group
+             Group = "(" *Value ws ")"
+
+Finally, class `Document` is replaced in order to allow standalone
+documents to directly comprise a sequence of multiple values.
+
+          Document = *Value ws
+
+No changes to [the Preserves semantic model](preserves.html) are made.
+Every Preserves text-syntax term is a valid P-expression, but in general
+P-expressions must be rewritten or otherwise interpreted before a
+meaningful Preserves value can be arrived at.
+
+## Encoding P-expressions as Preserves
+
+Aside from the special classes `Group`, `Block`, `Comma`, `Semicolon` or
+`Colons`, P-expressions are directly encodable as Preserves data. All
+members of the special classes are encoded as Preserves text
+`Dictionary`[^encoding-rationale] values:
+
+[^encoding-rationale]: In principle, it would be nice to use *records*
+    for this purpose, but if we did so we would have to also encode
+    usages of records!
+
+{:.pseudocode}
+> ⌜`(`*p* ...`)`⌝ ⟶ `{g:[`⌜*p*⌝ ...`]}`
+> ⌜`{`*p* ...`}`⌝ ⟶ `{b:[`⌜*p*⌝ ...`]}`
+> ⌜`,`⌝ ⟶ `{s:|,|}`
+> ⌜`;`⌝ ⟶ `{s:|;|}`
+> ⌜`:` ...⌝ ⟶ `{s:|:` ...`|}`
+
+## Appendix: Examples
+
+Examples are given as pairs of P-expressions and their Preserves
+text-syntax encodings.
+
+```preserves
+ ⌜<date 1821 (lookup-month "February") 3>⌝
+= <date 1821 {g:[lookup-month "February"]} 3>
+```
+
+```preserves
+ ⌜(begin (println! (+ 1 2)) (+ 3 4))⌝
+= {g:[begin {g:[println! {g:[+ 1 2]}]} {g:[+ 3 4]}]}
+```
+
+```preserves
+ ⌜()⌝
+= {g:[]}
+
+ ⌜[() () ()]⌝
+= [{g:[]}, {g:[]}, {g:[]}]
+```
+
+```preserves
+ ⌜{
+      setUp();
+      # Now enter the loop
+      loop: {
+          greet("World");
+      }
+      tearDown();
+  }⌝
+= {b:[
+      setUp {g:[]} {s:|;|}
+      # Now enter the loop
+      loop {s:|:|} {b:[
+          greet {g:["World"]} {s:|;|}
+      ]}
+      tearDown {g:[]} {s:|;|}
+  ]}
+```
+
+```preserves
+ ⌜[1 + 2.0, print "Hello", predicate: #t, foo, #!remote, bar]⌝
+= [1 + 2.0 {s:|,|} print "Hello" {s:|,|} predicate {s:|:|} #t {s:|,|}
+   foo {s:|,|} #!remote {s:|,|} bar]
+```
+
+```preserves
+ ⌜{
+      optional name: string,
+      address: Address,
+  }⌝
+= {b:[
+      optional name {s:|:|} string {s:|,|}
+      address {s:|:|} Address {s:|,|}
+  ]}
+```
+
+## Appendix: Using a P-expression reader to read Preserves
+
+A reader for P-expressions can be adapted to yield a reader for
+Preserves terms by processing (subterms of) each P-expression that the
+reader produces. The only subterms that need processing are the special
+classes mentioned above.
+
+ 1. Every `Group` or `Semicolon` that appears is an error.
+ 2. Every `Colons` with two or more colons in it is an error.
+ 3. Every `Comma` that appears is removed from its container.
+ 4. Every `Block` must contain triplets of `Value`, `Colons` (with a
+    single colon), `Value`. Any `Block` not following this pattern is an
+    error. Each `Block` following the pattern is translated to a
+    `Dictionary` containing a key/value pair for each triplet.
+
+## Appendix: Reading vs. Parsing
+
+Lisp systems first *read* streams of bytes into S-expressions and then
+*parse* those S-expressions into more abstract structures denoting
+various kinds of program syntax. [Separation of reading from parsing is
+what gives Lisp its syntactic
+flexibility.](http://calculist.org/blog/2012/04/17/homoiconicity-isnt-the-point/)
+
+Similarly, the Apple programming language
+[Dylan](https://en.wikipedia.org/wiki/Dylan_(programming_language))
+included a reader-parser split, with the Dylan reader producing
+*D-expressions* that are somewhat similar to P-expressions.
+
+Finally, the Racket dialects
+[Honu](https://docs.racket-lang.org/honu/index.html) and
+[Something](https://github.com/tonyg/racket-something) use a
+reader-parser-macro setup, where the reader produces Racket data, the
+parser produces "syntax" and is user-extensible, and Racket's own
+modular macro system rewrites this "syntax" down to core forms to be
+compiled to machine code.
+
+Similarly, when using P-expressions as the foundation for a language, a
+generic P-expression reader can then feed into special-purpose
+*parsers*. The reader captures the coarse syntactic structure of a
+program, and the parser refines this.
+
+Often, a parser will wish to extract structure from sequences of
+P-expression `Value`s.
+
+ - A simple technique is repeated splitting of sequences; first by
+   `Semicolon`, then by `Comma`, then by increasingly high binding-power
+   operators.
+
+ - More refined is to use a Pratt parser or similar
+   ([1](https://en.wikipedia.org/wiki/Operator-precedence_parser),
+   [2](https://matklad.github.io/2020/04/13/simple-but-powerful-pratt-parsing.html),
+   [3](https://github.com/tonyg/racket-something/blob/f6116bf3861b76970f5ce291a628476adef820b4/src/something/pratt.rkt))
+   to build a parse tree using an extensible specification of the pre-,
+   in-, and postfix operators involved.
+
+ - Finally, if you treat sequences of `Value`s as pre-lexed token
+   streams, almost any parsing formalism (such as [PEG
+   parsing](https://en.wikipedia.org/wiki/Parsing_expression_grammar),
+   [Ometa](https://en.wikipedia.org/wiki/OMeta), etc.) can be used to
+   extract further syntactic structure.
+
+## Notes

From c18e9dd1fe170d2ae8e620f04f98df049594d10d Mon Sep 17 00:00:00 2001
From: Tony Garnock-Jones <tonyg@leastfixedpoint.com>
Date: Tue, 31 Oct 2023 18:06:05 +0100
Subject: [PATCH 6/8] Tweaks

---
 preserves-expressions.md | 73 ++++++++++++++++++++++++++--------------
 preserves.css            |  6 ++++
 2 files changed, 53 insertions(+), 26 deletions(-)

diff --git a/preserves-expressions.md b/preserves-expressions.md
index 2adf01e..7901621 100644
--- a/preserves-expressions.md
+++ b/preserves-expressions.md
@@ -67,25 +67,61 @@ documents to directly comprise a sequence of multiple values.
 No changes to [the Preserves semantic model](preserves.html) are made.
 Every Preserves text-syntax term is a valid P-expression, but in general
 P-expressions must be rewritten or otherwise interpreted before a
-meaningful Preserves value can be arrived at.
+meaningful Preserves value can be arrived at ([see
+below](#reading-preserves)).
 
-## Encoding P-expressions as Preserves
+## <a id="encoding-pexprs"></a>Encoding P-expressions as Preserves
+
+We write ⌜*p*⌝ for the encoding into Preserves of P-expression *p*.
+
+{:.pseudocode.equations}
+| ⌜·⌝ | : | **P-expression** ⟶ **Preserves** |
 
 Aside from the special classes `Group`, `Block`, `Comma`, `Semicolon` or
-`Colons`, P-expressions are directly encodable as Preserves data. All
-members of the special classes are encoded as Preserves text
-`Dictionary`[^encoding-rationale] values:
+`Colons`, P-expressions are encoded directly as Preserves data.
+
+{:.pseudocode.equations}
+| ⌜`[`*p* ...`]`⌝  | = | `[`⌜*p*⌝ ...`]`             |
+| ⌜`<`*p* ...`>`⌝  | = | `<`⌜*p*⌝ ...`>`             |
+| ⌜`#{`*p* ...`}`⌝ | = | `#{`⌜*p*⌝ ...`}`            |
+| ⌜`#!`*p*⌝        | = | `#!`⌜*p*⌝                   |
+| ⌜`@`*p* *q*⌝     | = | `@`⌜*p*⌝ ⌜*q*⌝              |
+| ⌜*p*⌝            | = | *p* **when** *p* ∈ **Atom** |
+
+All members of the special classes are encoded as Preserves text
+`Dictionary`[^encoding-rationale] values.
 
 [^encoding-rationale]: In principle, it would be nice to use *records*
     for this purpose, but if we did so we would have to also encode
     usages of records!
 
-{:.pseudocode}
-> ⌜`(`*p* ...`)`⌝ ⟶ `{g:[`⌜*p*⌝ ...`]}`
-> ⌜`{`*p* ...`}`⌝ ⟶ `{b:[`⌜*p*⌝ ...`]}`
-> ⌜`,`⌝ ⟶ `{s:|,|}`
-> ⌜`;`⌝ ⟶ `{s:|;|}`
-> ⌜`:` ...⌝ ⟶ `{s:|:` ...`|}`
+{:.pseudocode.equations}
+| ⌜`(`*p* ...`)`⌝ | = | `{g:[`⌜*p*⌝ ...`]}` |
+| ⌜`{`*p* ...`}`⌝ | = | `{b:[`⌜*p*⌝ ...`]}` |
+| ⌜`,`⌝           | = | `{s:|,|}`           |
+| ⌜`;`⌝           | = | `{s:|;|}`           |
+| ⌜`:` ...⌝       | = | `{s:|:` ...`|}`     |
+
+## <a id="reading-preserves"></a>Interpreting P-expressions as Preserves
+
+The [previous section](#encoding-pexprs) discussed ways of representing
+P-expressions using Preserves. Here, we discuss *interpreting*
+P-expressions *as* Preserves, so that (1) a Preserves datum (2) written
+using Preserves text syntax and then (3) read as a P-expression can be
+(4) interpreted from that P-expression to yield the original datum.
+
+A reader for P-expressions can be adapted to yield a reader for
+Preserves terms by processing (subterms of) each P-expression that the
+reader produces. The only subterms that need processing are the special
+classes mentioned above.
+
+ 1. Every `Group` or `Semicolon` that appears is an error.
+ 2. Every `Colons` with two or more colons in it is an error.
+ 3. Every `Comma` that appears is removed from its container.
+ 4. Every `Block` must contain triplets of `Value`, `Colons` (with a
+    single colon), `Value`. Any `Block` not following this pattern is an
+    error. Each `Block` following the pattern is translated to a
+    `Dictionary` containing a key/value pair for each triplet.
 
 ## Appendix: Examples
 
@@ -146,21 +182,6 @@ text-syntax encodings.
   ]}
 ```
 
-## Appendix: Using a P-expression reader to read Preserves
-
-A reader for P-expressions can be adapted to yield a reader for
-Preserves terms by processing (subterms of) each P-expression that the
-reader produces. The only subterms that need processing are the special
-classes mentioned above.
-
- 1. Every `Group` or `Semicolon` that appears is an error.
- 2. Every `Colons` with two or more colons in it is an error.
- 3. Every `Comma` that appears is removed from its container.
- 4. Every `Block` must contain triplets of `Value`, `Colons` (with a
-    single colon), `Value`. Any `Block` not following this pattern is an
-    error. Each `Block` following the pattern is translated to a
-    `Dictionary` containing a key/value pair for each triplet.
-
 ## Appendix: Reading vs. Parsing
 
 Lisp systems first *read* streams of bytes into S-expressions and then
diff --git a/preserves.css b/preserves.css
index 9c34209..7322cc7 100644
--- a/preserves.css
+++ b/preserves.css
@@ -1,6 +1,7 @@
 :root {
     --sans-font: "Open Sans", -apple-system, BlinkMacSystemFont, avenir next, avenir, segoe ui, helvetica neue, helvetica, Cantarell, Ubuntu, roboto, noto, arial, sans-serif;
     --serif-font: palatino, "Palatino Linotype", "Palatino LT STD", "URW Palladio L", "TeX Gyre Pagella", serif;
+    --blockquote-indent: 40px;
 }
 body {
     font-family: var(--serif-font);
@@ -230,6 +231,7 @@ table.postcard-grammar {
 blockquote {
     padding: 0.5rem 1rem;
     border-left: solid #4f81bd 2px;
+    margin-left: var(--blockquote-indent);
     margin-right: 0;
 }
 blockquote :first-child {
@@ -243,6 +245,10 @@ blockquote :last-child {
     background-color: #e9f0f9;
 }
 
+table.equations { width: auto; margin-left: var(--blockquote-indent); }
+table.equations tr > *:nth-child(1) { text-align: right; }
+table.equations tr > *:nth-child(2) { text-align: center; }
+
 blockquote.pseudocode {
     border-left: none;
     padding: 0;

From 23e0e59dafca603a1a8e1f4f4ea9c4000f1029ac Mon Sep 17 00:00:00 2001
From: Tony Garnock-Jones <tonyg@leastfixedpoint.com>
Date: Tue, 31 Oct 2023 19:32:06 +0100
Subject: [PATCH 7/8] Trailing comments

---
 preserves-expressions.md | 78 +++++++++++++++++++++++++++++++++++-----
 preserves-text.md        |  5 +--
 2 files changed, 72 insertions(+), 11 deletions(-)

diff --git a/preserves-expressions.md b/preserves-expressions.md
index 7901621..abcc252 100644
--- a/preserves-expressions.md
+++ b/preserves-expressions.md
@@ -3,7 +3,7 @@ title: "P-expressions"
 ---
 
 Tony Garnock-Jones <tonyg@leastfixedpoint.com>  
-October 2023. Version 0.1.0.
+October 2023. Version 0.1.1.
 
 This document defines a grammar called *Preserves Expressions*
 (*P-expressions*, *pexprs*) that includes [ordinary Preserves text
@@ -51,7 +51,7 @@ to the syntax class `Value`.
 Now that colon is in `Value`, the syntax for `Dictionary` is replaced
 with `Block` everywhere it is mentioned.
 
-             Block =  "{" *Value ws "}"
+             Block = "{" *Value ws "}"
 
 New syntax for explicit uninterpreted grouping of sequences of values is
 introduced, and added to class `Value`.
@@ -70,15 +70,39 @@ P-expressions must be rewritten or otherwise interpreted before a
 meaningful Preserves value can be arrived at ([see
 below](#reading-preserves)).
 
+## <a id="annotations"></a>Annotations and Comments
+
+Annotations and comments attach to the term following them, just as in
+the ordinary text syntax. However, it is common in programming notations
+to allow comments at the end of a file or other sequential construct:
+
+    {
+        key: value
+        # example of a comment at the end of a dictionary
+    }
+    # example of a comment at the end of the input file
+
+While the ordinary text syntax forbids comments in these positions,
+P-expressions allow them:
+
+         Document =/ *Value Trailer ws
+           Record =/ "<" Value *Value Trailer ws ">"
+         Sequence =/  "[" *Value Trailer ws "]"
+              Set =/ "#{" *Value Trailer ws "}"
+            Block =/  "{" *Value Trailer ws "}"
+
+           Trailer = 1*Annotation
+
 ## <a id="encoding-pexprs"></a>Encoding P-expressions as Preserves
 
 We write ⌜*p*⌝ for the encoding into Preserves of P-expression *p*.
 
 {:.pseudocode.equations}
-| ⌜·⌝ | : | **P-expression** ⟶ **Preserves** |
+| ⌜·⌝ : **P-expression** | ⟶ | **Preserves** |
 
-Aside from the special classes `Group`, `Block`, `Comma`, `Semicolon` or
-`Colons`, P-expressions are encoded directly as Preserves data.
+Aside from the special classes `Group`, `Block`, `Comma`, `Semicolon`,
+`Colons`, or `Trailer`, P-expressions are encoded directly as Preserves
+data.
 
 {:.pseudocode.equations}
 | ⌜`[`*p* ...`]`⌝  | = | `[`⌜*p*⌝ ...`]`             |
@@ -86,10 +110,10 @@ Aside from the special classes `Group`, `Block`, `Comma`, `Semicolon` or
 | ⌜`#{`*p* ...`}`⌝ | = | `#{`⌜*p*⌝ ...`}`            |
 | ⌜`#!`*p*⌝        | = | `#!`⌜*p*⌝                   |
 | ⌜`@`*p* *q*⌝     | = | `@`⌜*p*⌝ ⌜*q*⌝              |
-| ⌜*p*⌝            | = | *p* **when** *p* ∈ **Atom** |
+| ⌜*p*⌝            | = | *p* when *p* ∈ **Atom** |
 
-All members of the special classes are encoded as Preserves text
-`Dictionary`[^encoding-rationale] values.
+All members of the special classes are encoded as Preserves
+dictionaries[^encoding-rationale].
 
 [^encoding-rationale]: In principle, it would be nice to use *records*
     for this purpose, but if we did so we would have to also encode
@@ -101,6 +125,17 @@ All members of the special classes are encoded as Preserves text
 | ⌜`,`⌝           | = | `{s:|,|}`           |
 | ⌜`;`⌝           | = | `{s:|;|}`           |
 | ⌜`:` ...⌝       | = | `{s:|:` ...`|}`     |
+| ⌜*t*⌝           | = | ⌜*a*⌝ ... `{}`, where *a* ... are the annotations in *t* and *t* ∈ **Trailer** |
+
+The empty dictionary `{}` acts as an anchor for the annotations in a
+`Trailer`.
+
+We overload the ⌜·⌝ notation for encoding whole `Document`s into
+sequences of Preserves values.
+
+{:.pseudocode.equations}
+| ⌜·⌝ : **P-expression Document** | ⟶ | **Preserves Sequence** |
+| ⌜*p* ...⌝                       | = | `[`⌜*p*⌝ ...`]`        |
 
 ## <a id="reading-preserves"></a>Interpreting P-expressions as Preserves
 
@@ -117,17 +152,25 @@ classes mentioned above.
 
  1. Every `Group` or `Semicolon` that appears is an error.
  2. Every `Colons` with two or more colons in it is an error.
- 3. Every `Comma` that appears is removed from its container.
+ 3. Every `Comma` that appears is discarded.
+ 3. Every `Trailer` that appears is an error.[^discard-trailers-instead-of-error]
  4. Every `Block` must contain triplets of `Value`, `Colons` (with a
     single colon), `Value`. Any `Block` not following this pattern is an
     error. Each `Block` following the pattern is translated to a
     `Dictionary` containing a key/value pair for each triplet.
 
+[^discard-trailers-instead-of-error]: **Implementation note.** When
+    implementing parsing of P-expressions into Preserves, consider
+    offering an optional mode where trailing annotations `Trailer` are
+    *discarded* instead of causing an error to be signalled.
+
 ## Appendix: Examples
 
 Examples are given as pairs of P-expressions and their Preserves
 text-syntax encodings.
 
+### Individual P-expression `Value`s
+
 ```preserves
  ⌜<date 1821 (lookup-month "February") 3>⌝
 = <date 1821 {g:[lookup-month "February"]} 3>
@@ -182,6 +225,23 @@ text-syntax encodings.
   ]}
 ```
 
+### Whole `Document`s
+
+```preserves
+ ⌜{
+      key: value
+      # example of a comment at the end of a dictionary
+  }
+  # example of a comment at the end of the input file⌝
+= [ {b:[
+        key {s:|:|} value
+        @"example of a comment at the end of a dictionary" {}
+    ]}
+    @"example of a comment at the end of the input file"
+    {}
+  ]
+```
+
 ## Appendix: Reading vs. Parsing
 
 Lisp systems first *read* streams of bytes into S-expressions and then
diff --git a/preserves-text.md b/preserves-text.md
index b7dfef0..00942a4 100644
--- a/preserves-text.md
+++ b/preserves-text.md
@@ -273,7 +273,8 @@ value. Each annotation is, in turn, a `Value`, and may itself have
 annotations. The ordering of annotations attached to a `Value` is
 significant.
 
-            Value =/ ws "@" Value Value
+            Value =/ ws Annotation Value
+        Annotation = "@" Value
 
 Each annotation is preceded by `@`; the underlying annotated value
 follows its annotations. Here we extend only the syntactic nonterminal
@@ -283,7 +284,7 @@ named “`Value`” without altering the semantic class of `Value`s.
 interpreted as comments associated with that value. Comments are
 sufficiently common that special syntax exists for them.
 
-            Value =/ ws ";" linecomment (CR / LF) Value
+       Annotation =/ ";" linecomment (CR / LF)
        linecomment = *<any unicode scalar value except CR or LF>
 
 When written this way, everything between the `;` and the end of the line

From ec03bdb45ff48bd37e4e62d8a4cce28fd7d5c44c Mon Sep 17 00:00:00 2001
From: Tony Garnock-Jones <tonyg@leastfixedpoint.com>
Date: Tue, 31 Oct 2023 20:00:18 +0100
Subject: [PATCH 8/8] Clarify lexicographical ordering

---
 preserves.md | 36 ++++++++++++++++++++++++++++--------
 1 file changed, 28 insertions(+), 8 deletions(-)

diff --git a/preserves.md b/preserves.md
index 1936975..8646805 100644
--- a/preserves.md
+++ b/preserves.md
@@ -104,8 +104,8 @@ the `totalOrder` predicate defined in section 5.10 of [IEEE Std
 
 A `Record` is a *labelled* tuple of `Value`s, the record's *fields*. A
 label can be any `Value`, but is usually a `Symbol`.[^extensibility]
-[^iri-labels] `Record`s are compared lexicographically: first by
-label, then by field sequence.
+[^iri-labels] `Record`s are ordered first by label, then
+lexicographically[^lexicographical-sequences] by field sequence.
 
   [^extensibility]: The [Racket](https://racket-lang.org/) programming
     language defines
@@ -123,10 +123,25 @@ label, then by field sequence.
     it cannot be read as an IRI at all, and so the label simply stands
     for itself—for its own `Value`.
 
+  [^lexicographical-sequences]: When comparing sequences of values for
+    the total order, [lexicographical
+    ordering](https://en.wikipedia.org/wiki/Lexicographic_order) is
+    used. Elements are drawn pairwise from the two sequences to be
+    compared. If one is smaller than the other according to the total
+    order, the sequence it was drawn from is the smaller of the
+    sequences. If the end of one sequence is reached, while the other
+    sequence has elements remaining, the shorter sequence is considered
+    smaller. Otherwise, all the elements compared equal and neither was
+    longer than the other, so they compare equal. For example,
+      - `[#f]` is ordered before `[foo]` because `Boolean` appears before `Symbol` in the kind ordering;
+      - `[x]` before `[x y]` because there is no element remaining to compare against `y`;
+      - `[a b]` before `[x]` because `a` is smaller than `x`; and
+      - `[x y]` before `[x z]` because `y` is ordered before `z` according to the ordering rules for `Symbol`.
+
 ### Sequences.
 
 A `Sequence` is a sequence of `Value`s. `Sequence`s are compared
-lexicographically.
+lexicographically.[^lexicographical-sequences]
 
 ### Sets.
 
@@ -134,15 +149,16 @@ A `Set` is an unordered finite set of `Value`s. It contains no
 duplicate values, following the [equivalence relation](#equivalence)
 induced by the total order on `Value`s. Two `Set`s are compared by
 sorting their elements ascending using the [total order](#total-order)
-and comparing the resulting `Sequence`s.
+and comparing the resulting `Sequence`s.[^lexicographical-sequences]
 
 ### Dictionaries.
 
 A `Dictionary` is an unordered finite collection of pairs of `Value`s.
 Each pair comprises a *key* and a *value*. Keys in a `Dictionary` are
 pairwise distinct. Instances of `Dictionary` are compared by
-lexicographic comparison of the sequences resulting from ordering each
-`Dictionary`'s pairs in ascending order by key.
+lexicographic[^lexicographical-sequences] comparison of the sequences
+resulting from ordering each `Dictionary`'s pairs in ascending order by
+key.
 
 ### Embeddeds.
 
@@ -194,8 +210,12 @@ sequences use [the Preserves binary encoding](preserves-binary.html).
 
 The total ordering specified [above](#total-order) means that the following statements are true:
 
-    "bzz" < "c" < "caa" < #!"a"
-    #t < 3.0f < 3.0 < 3 < "3" < |3| < [] < #!#t
+ - `"bzz"` &lt; `"c"` &lt; `"caa"` &lt; `#!"a"`
+ - `#t` &lt; `3.0f` &lt; `3.0` &lt; `3` &lt; `"3"` &lt; `|3|` &lt; `[]` &lt; `#!#t`
+ - `[#f]` &lt; `[foo]`, because `Boolean` appears before `Symbol` in the kind ordering
+ - `[x]` &lt; `[x y]`, because there is no element remaining to compare against `y`
+ - `[a b]` &lt; `[x]`, because `a` is smaller than `x`
+ - `[x y]` &lt; `[x z]`, because `y` is ordered before `z`
 
 ### Simple examples.