From cf50e00f80ad8ba51e9aaf410653bfd87660dd00 Mon Sep 17 00:00:00 2001 From: Tony Garnock-Jones Date: Tue, 31 Oct 2023 12:53:54 +0100 Subject: [PATCH 1/8] Repair failing TS bigint tests --- .../javascript/packages/core/src/bytes.ts | 16 +++-- .../javascript/packages/core/src/decoder.ts | 37 ++++++++-- .../javascript/packages/core/src/encoder.ts | 47 +++++++++++-- .../javascript/packages/core/src/fold.ts | 10 ++- .../javascript/packages/core/src/fromjs.ts | 2 +- .../javascript/packages/core/src/is.ts | 8 ++- .../javascript/packages/core/src/merge.ts | 13 +++- .../javascript/packages/core/src/reader.ts | 14 ++-- .../javascript/packages/core/src/values.ts | 2 +- .../javascript/packages/core/src/writer.ts | 5 +- .../packages/core/test/codec.test.ts | 65 ++++++++++++++++++ .../packages/core/test/values.test.ts | 49 ++++++++++++- implementations/python/tests/samples.bin | Bin 12124 -> 12436 bytes implementations/python/tests/samples.pr | 4 ++ .../preserves/preserves/tests/samples.pr | 4 ++ tests/samples.bin | Bin 12124 -> 12436 bytes tests/samples.pr | 4 ++ 17 files changed, 250 insertions(+), 30 deletions(-) diff --git a/implementations/javascript/packages/core/src/bytes.ts b/implementations/javascript/packages/core/src/bytes.ts index 89fd2a1..3f09e8a 100644 --- a/implementations/javascript/packages/core/src/bytes.ts +++ b/implementations/javascript/packages/core/src/bytes.ts @@ -53,13 +53,17 @@ export class Bytes implements Preservable, PreserveWritable { static fromHex(s: string): Bytes { if (s.length & 1) throw new Error("Cannot decode odd-length hexadecimal string"); + const result = new Bytes(s.length >> 1); + Bytes._raw_fromHexInto(s, result._view); + return result; + } + + static _raw_fromHexInto(s: string, target: Uint8Array): void { const len = s.length >> 1; - const result = new Bytes(len); for (let i = 0; i < len; i++) { - result._view[i] = + target[i] = (unhexDigit(s.charCodeAt(i << 1)) << 4) | unhexDigit(s.charCodeAt((i << 1) + 1)); } - return result; } static fromIO(io: string | BytesLike): string | Bytes { @@ -135,11 +139,11 @@ export class Bytes implements Preservable, PreserveWritable { return Bytes.isBytes(v) ? v : void 0; } - toHex(): string { + toHex(digit = hexDigit): string { var nibbles = []; for (let i = 0; i < this.length; i++) { - nibbles.push(hexDigit(this._view[i] >> 4)); - nibbles.push(hexDigit(this._view[i] & 15)); + nibbles.push(digit(this._view[i] >> 4)); + nibbles.push(digit(this._view[i] & 15)); } return nibbles.join(''); } diff --git a/implementations/javascript/packages/core/src/decoder.ts b/implementations/javascript/packages/core/src/decoder.ts index f552aa8..56eb106 100644 --- a/implementations/javascript/packages/core/src/decoder.ts +++ b/implementations/javascript/packages/core/src/decoder.ts @@ -4,7 +4,7 @@ import { Tag } from "./constants"; import { Set, Dictionary } from "./dictionary"; import { DoubleFloat, SingleFloat } from "./float"; import { Record } from "./record"; -import { Bytes, BytesLike, underlying } from "./bytes"; +import { Bytes, BytesLike, underlying, hexDigit } from "./bytes"; import { Value } from "./values"; import { is } from "./is"; import { embed, GenericEmbedded, Embedded, EmbeddedTypeDecode } from "./embedded"; @@ -34,7 +34,7 @@ export interface TypedDecoder { nextFloat(): SingleFloat | undefined; nextDouble(): DoubleFloat | undefined; nextEmbedded(): Embedded | undefined; - nextSignedInteger(): number | undefined; + nextSignedInteger(): number | bigint | undefined; nextString(): string | undefined; nextByteString(): Bytes | undefined; nextSymbol(): symbol | undefined; @@ -130,15 +130,42 @@ export class DecoderState { return (this.nextbyte() === Tag.End) || (this.index--, false); } - nextint(n: number): number { - // TODO: Bignums :-/ + nextint(n: number): number | bigint { + const start = this.index; if (n === 0) return 0; + if (n > 7) return this.nextbigint(n); + if (n === 7) { + const highByte = this.packet[this.index]; + if ((highByte >= 0x20) && (highByte < 0xe0)) { + return this.nextbigint(n); + } + // if highByte is 0xe0, we still might have a value + // equal to (Number.MIN_SAFE_INTEGER-1). + } let acc = this.nextbyte(); if (acc & 0x80) acc -= 256; for (let i = 1; i < n; i++) acc = (acc * 256) + this.nextbyte(); + if (!Number.isSafeInteger(acc)) { + this.index = start; + return this.nextbigint(n); + } return acc; } + nextbigint(n: number): bigint { + if (n === 0) return BigInt(0); + const bs = Bytes.from(this.nextbytes(n)); + if (bs.get(0) >= 128) { + // negative + const hex = bs.toHex(d => hexDigit(15 - d)); + return ~BigInt('0x' + hex); + } else { + // (strictly) positive + const hex = bs.toHex(); + return BigInt('0x' + hex); + } + } + wrap(v: Value): Value { return this.includeAnnotations ? new Annotated(v) : v; } @@ -306,7 +333,7 @@ export class Decoder implements TypedDecoder { }); } - nextSignedInteger(): number | undefined { + nextSignedInteger(): number | bigint | undefined { return this.skipAnnotations((reset) => { switch (this.state.nextbyte()) { case Tag.SignedInteger: return this.state.nextint(this.state.varint()); diff --git a/implementations/javascript/packages/core/src/encoder.ts b/implementations/javascript/packages/core/src/encoder.ts index 92b9a8c..c4942dc 100644 --- a/implementations/javascript/packages/core/src/encoder.ts +++ b/implementations/javascript/packages/core/src/encoder.ts @@ -1,5 +1,5 @@ import { Tag } from "./constants"; -import { Bytes } from "./bytes"; +import { Bytes, unhexDigit } from "./bytes"; import { Value } from "./values"; import { EncodeError } from "./codec"; import { Record, Tuple } from "./record"; @@ -122,6 +122,13 @@ export class EncoderState { this.index += bs.length; } + claimbytes(count: number) { + this.makeroom(count); + const view = new Uint8Array(this.view.buffer, this.index, count); + this.index += count; + return view; + } + varint(v: number) { while (v >= 128) { this.emitbyte((v % 128) + 128); @@ -130,8 +137,9 @@ export class EncoderState { this.emitbyte(v); } - encodeint(v: number) { - // TODO: Bignums :-/ + encodeint(v: number | bigint) { + if (typeof v === 'bigint') return this.encodebigint(v); + this.emitbyte(Tag.SignedInteger); if (v === 0) { @@ -153,6 +161,37 @@ export class EncoderState { enc(bytecount, v); } + encodebigint(v: bigint) { + this.emitbyte(Tag.SignedInteger); + + let hex: string; + if (v > 0) { + hex = v.toString(16); + if (hex.length & 1) { + hex = '0' + hex; + } else if (unhexDigit(hex.charCodeAt(0)) >= 8) { + hex = '00' + hex; + } + } else if (v < 0) { + const negatedHex = (~v).toString(16); + hex = ''; + for (let i = 0; i < negatedHex.length; i++) { + hex = hex + 'fedcba9876543210'[unhexDigit(negatedHex.charCodeAt(i))]; + } + if (hex.length & 1) { + hex = 'f' + hex; + } else if (unhexDigit(hex.charCodeAt(0)) < 8) { + hex = 'ff' + hex; + } + } else { + this.emitbyte(0); + return; + } + + this.varint(hex.length >> 1); + Bytes._raw_fromHexInto(hex, this.claimbytes(hex.length >> 1)); + } + encodebytes(tag: Tag, bs: Uint8Array) { this.emitbyte(tag); this.varint(bs.length); @@ -219,7 +258,7 @@ export class Encoder { else if (typeof v === 'boolean') { this.state.emitbyte(v ? Tag.True : Tag.False); } - else if (typeof v === 'number') { + else if (typeof v === 'number' || typeof v === 'bigint') { this.state.encodeint(v); } else if (typeof v === 'string') { diff --git a/implementations/javascript/packages/core/src/fold.ts b/implementations/javascript/packages/core/src/fold.ts index fe04412..d65b024 100644 --- a/implementations/javascript/packages/core/src/fold.ts +++ b/implementations/javascript/packages/core/src/fold.ts @@ -28,7 +28,7 @@ export interface FoldMethods { boolean(b: boolean): R; single(f: number): R; double(f: number): R; - integer(i: number): R; + integer(i: number | bigint): R; string(s: string): R; bytes(b: Bytes): R; symbol(s: symbol): R; @@ -47,7 +47,7 @@ export class VoidFold implements FoldMethods { boolean(b: boolean): void {} single(f: number): void {} double(f: number): void {} - integer(i: number): void {} + integer(i: number | bigint): void {} string(s: string): void {} bytes(b: Bytes): void {} symbol(s: symbol): void {} @@ -79,7 +79,7 @@ export abstract class ValueFold implements FoldMethods> { double(f: number): Value { return Double(f); } - integer(i: number): Value { + integer(i: number | bigint): Value { return i; } string(s: string): Value { @@ -138,6 +138,8 @@ export function valueClass(v: Value): ValueClass { } else { return ValueClass.SignedInteger; } + case 'bigint': + return ValueClass.SignedInteger; case 'string': return ValueClass.String; case 'symbol': @@ -181,6 +183,8 @@ export function fold(v: Value, o: FoldMethods): R { } else { return o.integer(v); } + case 'bigint': + return o.integer(v); case 'string': return o.string(v); case 'symbol': diff --git a/implementations/javascript/packages/core/src/fromjs.ts b/implementations/javascript/packages/core/src/fromjs.ts index c676152..7e3c1d2 100644 --- a/implementations/javascript/packages/core/src/fromjs.ts +++ b/implementations/javascript/packages/core/src/fromjs.ts @@ -12,6 +12,7 @@ export function fromJS(x: any): Value { throw new TypeError("Refusing to autoconvert non-integer number to Single or Double"); } // FALL THROUGH + case 'bigint': case 'string': case 'symbol': case 'boolean': @@ -19,7 +20,6 @@ export function fromJS(x: any): Value { case 'undefined': case 'function': - case 'bigint': break; case 'object': diff --git a/implementations/javascript/packages/core/src/is.ts b/implementations/javascript/packages/core/src/is.ts index 03551da..355f59c 100644 --- a/implementations/javascript/packages/core/src/is.ts +++ b/implementations/javascript/packages/core/src/is.ts @@ -12,7 +12,13 @@ export function is(a: any, b: any): boolean { if (isAnnotated(a)) a = a.item; if (isAnnotated(b)) b = b.item; if (Object.is(a, b)) return true; - if (typeof a !== typeof b) return false; + if (typeof a !== typeof b) { + if ((typeof a === 'number' && typeof b === 'bigint') || + (typeof a === 'bigint' && typeof b === 'number')) { + return a == b; + } + return false; + } if (typeof a === 'object') { if (a === null || b === null) return false; if ('equals' in a && typeof a.equals === 'function') return a.equals(b, is); diff --git a/implementations/javascript/packages/core/src/merge.ts b/implementations/javascript/packages/core/src/merge.ts index fa8b215..052374f 100644 --- a/implementations/javascript/packages/core/src/merge.ts +++ b/implementations/javascript/packages/core/src/merge.ts @@ -7,6 +7,7 @@ import { Set, Dictionary } from "./dictionary"; import { Annotated } from "./annotated"; import { unannotate } from "./strip"; import { embed, isEmbedded, Embedded } from "./embedded"; +import { isCompound } from "./compound"; export function merge( mergeEmbeddeds: (a: T, b: T) => T | undefined, @@ -18,7 +19,17 @@ export function merge( } function walk(a: Value, b: Value): Value { - if (a === b) return a; + if (a === b) { + // Shortcut for merges of trivially identical values. + return a; + } + if (!isCompound(a) && !isCompound(b)) { + // Don't do expensive recursive comparisons for compounds. + if (is(a, b)) { + // Shortcut for merges of marginally less trivially identical values. + return a; + } + } return fold>(a, { boolean: die, single(_f: number) { return is(a, b) ? a : die(); }, diff --git a/implementations/javascript/packages/core/src/reader.ts b/implementations/javascript/packages/core/src/reader.ts index 50bec7b..b17c86c 100644 --- a/implementations/javascript/packages/core/src/reader.ts +++ b/implementations/javascript/packages/core/src/reader.ts @@ -21,9 +21,8 @@ export interface ReaderOptions extends ReaderStateOptions { embeddedDecode?: EmbeddedTypeDecode; } -type IntOrFloat = 'int' | 'float'; -type Numeric = number | SingleFloat | DoubleFloat; -type IntContinuation = (kind: IntOrFloat, acc: string) => Numeric; +const MAX_SAFE_INTEGERn = BigInt(Number.MAX_SAFE_INTEGER); +const MIN_SAFE_INTEGERn = BigInt(Number.MIN_SAFE_INTEGER); export const NUMBER_RE: RegExp = /^([-+]?\d+)(((\.\d+([eE][-+]?\d+)?)|([eE][-+]?\d+))([fF]?))?$/; // Groups: @@ -174,9 +173,12 @@ export class ReaderState { const m = NUMBER_RE.exec(acc); if (m) { if (m[2] === void 0) { - let v = parseInt(m[1]); - if (Object.is(v, -0)) v = 0; - return v; + let v = BigInt(m[1]); + if (v <= MIN_SAFE_INTEGERn || v >= MAX_SAFE_INTEGERn) { + return v; + } else { + return Number(v); + } } else if (m[7] === '') { return Double(parseFloat(m[1] + m[3])); } else { diff --git a/implementations/javascript/packages/core/src/values.ts b/implementations/javascript/packages/core/src/values.ts index 1746bcb..1a030f5 100644 --- a/implementations/javascript/packages/core/src/values.ts +++ b/implementations/javascript/packages/core/src/values.ts @@ -15,7 +15,7 @@ export type Atom = | boolean | SingleFloat | DoubleFloat - | number + | number | bigint | string | Bytes | symbol; diff --git a/implementations/javascript/packages/core/src/writer.ts b/implementations/javascript/packages/core/src/writer.ts index 8409e5c..93c9d20 100644 --- a/implementations/javascript/packages/core/src/writer.ts +++ b/implementations/javascript/packages/core/src/writer.ts @@ -278,6 +278,7 @@ export class Writer { } break; } + case 'bigint': case 'number': this.state.pieces.push('' + v); break; @@ -328,7 +329,9 @@ export class Writer { } break; default: - throw new Error(`Internal error: unhandled in Preserves Writer.push for ${v}`); + ((_: never) => { + throw new Error(`Internal error: unhandled in Preserves Writer.push for ${v}`); + })(v); } return this; // for chaining } diff --git a/implementations/javascript/packages/core/test/codec.test.ts b/implementations/javascript/packages/core/test/codec.test.ts index 70f821d..9b0291c 100644 --- a/implementations/javascript/packages/core/test/codec.test.ts +++ b/implementations/javascript/packages/core/test/codec.test.ts @@ -184,6 +184,71 @@ describe('encoding and decoding embeddeds', () => { }); }); +describe('integer text parsing', () => { + it('should work for zero', () => { + expect(parse('0')).is(0); + }); + + it('should work for smallish positive integers', () => { + expect(parse('60000')).is(60000); + }); + it('should work for smallish negative integers', () => { + expect(parse('-60000')).is(-60000); + }); + + it('should work for largeish positive integers', () => { + expect(parse('1234567812345678123456781234567')) + .is(BigInt("1234567812345678123456781234567")); + }); + it('should work for largeish negative integers', () => { + expect(parse('-1234567812345678123456781234567')) + .is(BigInt("-1234567812345678123456781234567")); + }); + + it('should work for larger positive integers', () => { + expect(parse('12345678123456781234567812345678')) + .is(BigInt("12345678123456781234567812345678")); + }); + it('should work for larger negative integers', () => { + expect(parse('-12345678123456781234567812345678')) + .is(BigInt("-12345678123456781234567812345678")); + }); +}); + +describe('integer binary encoding', () => { + it('should work for zero integers', () => { + expect(encode(0)).is(Bytes.fromHex('b000')); + }); + it('should work for zero bigints', () => { + expect(encode(BigInt(0))).is(Bytes.fromHex('b000')); + }); + + it('should work for smallish positive integers', () => { + expect(encode(60000)).is(Bytes.fromHex('b00300ea60')); + }); + it('should work for smallish negative integers', () => { + expect(encode(-60000)).is(Bytes.fromHex('b003ff15a0')); + }); + + it('should work for largeish positive integers', () => { + expect(encode(BigInt("1234567812345678123456781234567"))) + .is(Bytes.fromHex('b00d0f951a8f2b4b049d518b923187')); + }); + it('should work for largeish negative integers', () => { + expect(encode(BigInt("-1234567812345678123456781234567"))) + .is(Bytes.fromHex('b00df06ae570d4b4fb62ae746dce79')); + }); + + it('should work for larger positive integers', () => { + expect(encode(BigInt("12345678123456781234567812345678"))) + .is(Bytes.fromHex('b00e009bd30997b0ee2e252f73b5ef4e')); + }); + it('should work for larger negative integers', () => { + expect(encode(BigInt("-12345678123456781234567812345678"))) + .is(Bytes.fromHex('b00eff642cf6684f11d1dad08c4a10b2')); + }); +}); + describe('common test suite', () => { const samples_bin = fs.readFileSync(__dirname + '/../../../../../tests/samples.bin'); const samples = decodeWithAnnotations(samples_bin, { embeddedDecode: genericEmbeddedTypeDecode }); diff --git a/implementations/javascript/packages/core/test/values.test.ts b/implementations/javascript/packages/core/test/values.test.ts index 11ad12d..7a91ba6 100644 --- a/implementations/javascript/packages/core/test/values.test.ts +++ b/implementations/javascript/packages/core/test/values.test.ts @@ -1,4 +1,4 @@ -import { Single, Double, fromJS, Dictionary, IDENTITY_FOLD, fold, mapEmbeddeds, Value, embed } from '../src/index'; +import { Single, Double, fromJS, Dictionary, IDENTITY_FOLD, fold, mapEmbeddeds, Value, embed, preserves } from '../src/index'; import './test-utils'; describe('Single', () => { @@ -41,4 +41,51 @@ describe('fromJS', () => { it('should map integers to themselves', () => { expect(fromJS(1)).toBe(1); }); + + it('should map bigints to themselves', () => { + expect(fromJS(BigInt("12345678123456781234567812345678"))) + .toBe(BigInt("12345678123456781234567812345678"));; + }); +}); + +describe('is()', () => { + it('should compare small integers sensibly', () => { + expect(3).is(3); + expect(3).not.is(4); + }); + it('should compare large integers sensibly', () => { + const a = BigInt("12345678123456781234567812345678"); + const b = BigInt("12345678123456781234567812345679"); + expect(a).is(a); + expect(a).is(BigInt("12345678123456781234567812345678")); + expect(a).not.is(b); + }); + it('should compare mixed integers sensibly', () => { + const a = BigInt("12345678123456781234567812345678"); + const b = BigInt("3"); + const c = BigInt("4"); + expect(3).not.is(a); + expect(a).not.is(3); + expect(3).not.toBe(b); + expect(3).is(b); + expect(b).not.toBe(3); + expect(b).is(3); + expect(3).not.toBe(c); + expect(3).not.is(c); + expect(c).not.toBe(3); + expect(c).not.is(3); + }); +}); + +describe('`preserves` formatter', () => { + it('should format numbers', () => { + expect(preserves`>${3}<`).toBe('>3<'); + }); + it('should format small bigints', () => { + expect(preserves`>${BigInt("3")}<`).toBe('>3<'); + }); + it('should format big bigints', () => { + expect(preserves`>${BigInt("12345678123456781234567812345678")}<`) + .toBe('>12345678123456781234567812345678<'); + }); }); diff --git a/implementations/python/tests/samples.bin b/implementations/python/tests/samples.bin index 70ebf1b3d6e50ae9165070fa3f157cead9483098..dbb41bf694a41c0e886b7b8d17639655d431ff03 100644 GIT binary patch delta 324 zcmcZ;HzjeyZ@qe@%)AmqBV!X&Gjj_Z^p?#mA*sbBoA@{I@=ulO*Y;+a8`wR`upN!t zvRN5umM#uMV1^?Z_95$OLH(62zmwLLJ`+ vQ`Ilt`rZ$n-?CW+tK%%typCkx{}i2X8UBJ7Z(Zo|64 float15: @"-qNaN" float16: @"-qNaN" + int-12345678123456781234567812345678: + int-1234567812345678123456781234567: int-257: int-256: int-255: @@ -146,6 +148,8 @@ int65536: int131072: int2500000000: + int1234567812345678123456781234567: + int12345678123456781234567812345678: int87112285931760246646623899502532662132736: list0: list4: diff --git a/implementations/racket/preserves/preserves/tests/samples.pr b/implementations/racket/preserves/preserves/tests/samples.pr index 4646594..df8ae0b 100644 --- a/implementations/racket/preserves/preserves/tests/samples.pr +++ b/implementations/racket/preserves/preserves/tests/samples.pr @@ -118,6 +118,8 @@ float14: @"+qNaN" float15: @"-qNaN" float16: @"-qNaN" + int-12345678123456781234567812345678: + int-1234567812345678123456781234567: int-257: int-256: int-255: @@ -146,6 +148,8 @@ int65536: int131072: int2500000000: + int1234567812345678123456781234567: + int12345678123456781234567812345678: int87112285931760246646623899502532662132736: list0: list4: diff --git a/tests/samples.bin b/tests/samples.bin index 70ebf1b3d6e50ae9165070fa3f157cead9483098..dbb41bf694a41c0e886b7b8d17639655d431ff03 100644 GIT binary patch delta 324 zcmcZ;HzjeyZ@qe@%)AmqBV!X&Gjj_Z^p?#mA*sbBoA@{I@=ulO*Y;+a8`wR`upN!t zvRN5umM#uMV1^?Z_95$OLH(62zmwLLJ`+ vQ`Ilt`rZ$n-?CW+tK%%typCkx{}i2X8UBJ7Z(Zo|64 float15: @"-qNaN" float16: @"-qNaN" + int-12345678123456781234567812345678: + int-1234567812345678123456781234567: int-257: int-256: int-255: @@ -146,6 +148,8 @@ int65536: int131072: int2500000000: + int1234567812345678123456781234567: + int12345678123456781234567812345678: int87112285931760246646623899502532662132736: list0: list4: From 8276a50552de3d6d56c2a47694c6270549c0da75 Mon Sep 17 00:00:00 2001 From: Tony Garnock-Jones Date: Tue, 31 Oct 2023 13:27:03 +0100 Subject: [PATCH 2/8] Repair error in Rust integer width calculation --- implementations/python/tests/samples.bin | Bin 12436 -> 12629 bytes implementations/python/tests/samples.pr | 2 ++ .../preserves/preserves/tests/samples.pr | 2 ++ .../rust/preserves/src/value/packed/writer.rs | 2 +- tests/samples.bin | Bin 12436 -> 12629 bytes tests/samples.pr | 2 ++ 6 files changed, 7 insertions(+), 1 deletion(-) diff --git a/implementations/python/tests/samples.bin b/implementations/python/tests/samples.bin index dbb41bf694a41c0e886b7b8d17639655d431ff03..d6008aaab2fa3445f5efce4545e376e67137ad9e 100644 GIT binary patch delta 204 zcmbP|cr|ImB!l|RI+=MTmKNq_rY6Qlh6V&!TQ;+Vq!yQK64@ZcsAP95Q2XxWjz80G zW@Q>4_^OLdtYxz<(0pB-#=4-}ie$sTy9ZU*T~W#@XU%^> delta 10 RcmcbbG$nDvBm<_F762S!1gQW3 diff --git a/implementations/python/tests/samples.pr b/implementations/python/tests/samples.pr index df8ae0b..a390467 100644 --- a/implementations/python/tests/samples.pr +++ b/implementations/python/tests/samples.pr @@ -118,6 +118,7 @@ float14: @"+qNaN" float15: @"-qNaN" float16: @"-qNaN" + int-98765432109876543210987654321098765432109: int-12345678123456781234567812345678: int-1234567812345678123456781234567: int-257: @@ -151,6 +152,7 @@ int1234567812345678123456781234567: int12345678123456781234567812345678: int87112285931760246646623899502532662132736: + int98765432109876543210987654321098765432109: list0: list4: list4a: diff --git a/implementations/racket/preserves/preserves/tests/samples.pr b/implementations/racket/preserves/preserves/tests/samples.pr index df8ae0b..a390467 100644 --- a/implementations/racket/preserves/preserves/tests/samples.pr +++ b/implementations/racket/preserves/preserves/tests/samples.pr @@ -118,6 +118,7 @@ float14: @"+qNaN" float15: @"-qNaN" float16: @"-qNaN" + int-98765432109876543210987654321098765432109: int-12345678123456781234567812345678: int-1234567812345678123456781234567: int-257: @@ -151,6 +152,7 @@ int1234567812345678123456781234567: int12345678123456781234567812345678: int87112285931760246646623899502532662132736: + int98765432109876543210987654321098765432109: list0: list4: list4a: diff --git a/implementations/rust/preserves/src/value/packed/writer.rs b/implementations/rust/preserves/src/value/packed/writer.rs index 72d6294..0fad0b2 100644 --- a/implementations/rust/preserves/src/value/packed/writer.rs +++ b/implementations/rust/preserves/src/value/packed/writer.rs @@ -289,7 +289,7 @@ impl Writer for BinaryOrderWriter { macro_rules! fits_in_bytes { ($v:ident, $limit:literal) => {{ let bits = $limit * 8 - 1; - $v >= -(2 << bits) && $v < (2 << bits) + $v >= -(1 << bits) && $v < (1 << bits) }}; } diff --git a/tests/samples.bin b/tests/samples.bin index dbb41bf694a41c0e886b7b8d17639655d431ff03..d6008aaab2fa3445f5efce4545e376e67137ad9e 100644 GIT binary patch delta 204 zcmbP|cr|ImB!l|RI+=MTmKNq_rY6Qlh6V&!TQ;+Vq!yQK64@ZcsAP95Q2XxWjz80G zW@Q>4_^OLdtYxz<(0pB-#=4-}ie$sTy9ZU*T~W#@XU%^> delta 10 RcmcbbG$nDvBm<_F762S!1gQW3 diff --git a/tests/samples.pr b/tests/samples.pr index df8ae0b..a390467 100644 --- a/tests/samples.pr +++ b/tests/samples.pr @@ -118,6 +118,7 @@ float14: @"+qNaN" float15: @"-qNaN" float16: @"-qNaN" + int-98765432109876543210987654321098765432109: int-12345678123456781234567812345678: int-1234567812345678123456781234567: int-257: @@ -151,6 +152,7 @@ int1234567812345678123456781234567: int12345678123456781234567812345678: int87112285931760246646623899502532662132736: + int98765432109876543210987654321098765432109: list0: list4: list4a: From 47b4c072682e2a9bb1ce665bcea5178d759f4d15 Mon Sep 17 00:00:00 2001 From: Tony Garnock-Jones Date: Tue, 31 Oct 2023 13:27:35 +0100 Subject: [PATCH 3/8] Release independent packages preserves@3.990.4 Generated by cargo-workspaces --- implementations/rust/preserves/Cargo.toml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/implementations/rust/preserves/Cargo.toml b/implementations/rust/preserves/Cargo.toml index a7e525b..36c662e 100644 --- a/implementations/rust/preserves/Cargo.toml +++ b/implementations/rust/preserves/Cargo.toml @@ -1,6 +1,6 @@ [package] name = "preserves" -version = "3.990.3" +version = "3.990.4" authors = ["Tony Garnock-Jones "] edition = "2018" description = "Implementation of the Preserves serialization format via serde." From 982d916b613a63c1c932e52d175f9a535ebf8726 Mon Sep 17 00:00:00 2001 From: Tony Garnock-Jones Date: Tue, 31 Oct 2023 17:34:37 +0100 Subject: [PATCH 4/8] Minor presentation tweak to make Sequence/Set/Dictionary line up in the grammar like it does in the abstract model definition --- preserves-text.md | 20 ++++++++++---------- 1 file changed, 10 insertions(+), 10 deletions(-) diff --git a/preserves-text.md b/preserves-text.md index 97b145b..b7dfef0 100644 --- a/preserves-text.md +++ b/preserves-text.md @@ -55,7 +55,7 @@ Standalone documents may have trailing whitespace. Any `Value` may be preceded by whitespace. Value = ws (Record / Collection / Atom / Embedded) - Collection = Sequence / Dictionary / Set + Collection = Sequence / Set / Dictionary Atom = Boolean / String / ByteString / QuotedSymbol / SymbolOrNumber @@ -64,18 +64,18 @@ label-`Value` followed by its field-`Value`s. Record = "<" Value *Value ws ">" -`Sequence`s are enclosed in square brackets. `Dictionary` values are -curly-brace-enclosed colon-separated pairs of values. `Set`s are -written as values enclosed by the tokens `#{` and -`}`.[^printing-collections] It is an error for a set to contain +`Sequence`s are enclosed in square brackets. `Set`s are written as +values enclosed by the tokens `#{` and `}`. `Dictionary` values are +curly-brace-enclosed colon-separated pairs of +values.[^printing-collections] It is an error for a set to contain duplicate elements or for a dictionary to contain duplicate keys. When -printing sets and dictionaries, implementations *SHOULD* order -elements resp. keys with respect to the [total order over +printing sets and dictionaries, implementations *SHOULD* order elements +resp. keys with respect to the [total order over `Value`s](preserves.html#total-order).[^rationale-print-ordering] - Sequence = "[" *Value ws "]" - Dictionary = "{" *(Value ws ":" Value) ws "}" - Set = "#{" *Value ws "}" + Sequence = "[" *Value ws "]" + Set = "#{" *Value ws "}" + Dictionary = "{" *(Value ws ":" Value) ws "}" [^printing-collections]: **Implementation note.** When implementing printing of `Value`s using the textual syntax, consider supporting From a69444f08585f56d244a92e2ab6f49fe52d16000 Mon Sep 17 00:00:00 2001 From: Tony Garnock-Jones Date: Tue, 31 Oct 2023 17:37:09 +0100 Subject: [PATCH 5/8] preserves-expressions.md --- .gitignore | 1 + Makefile | 7 +- preserves-expressions.md | 210 +++++++++++++++++++++++++++++++++++++++ 3 files changed, 217 insertions(+), 1 deletion(-) create mode 100644 preserves-expressions.md diff --git a/.gitignore b/.gitignore index 1fc3cf7..f631d1d 100644 --- a/.gitignore +++ b/.gitignore @@ -1,4 +1,5 @@ _site/ +preserves-expressions.pdf preserves-binary.pdf preserves-schema.pdf preserves-text.pdf diff --git a/Makefile b/Makefile index 5f8abc8..ac1dc8b 100644 --- a/Makefile +++ b/Makefile @@ -1,6 +1,11 @@ __ignored__ := $(shell ./setup.sh) -PDFS=preserves.pdf preserves-text.pdf preserves-binary.pdf preserves-schema.pdf +PDFS=\ + preserves.pdf \ + preserves-text.pdf \ + preserves-binary.pdf \ + preserves-schema.pdf \ + preserves-expressions.pdf all: $(PDFS) diff --git a/preserves-expressions.md b/preserves-expressions.md new file mode 100644 index 0000000..2adf01e --- /dev/null +++ b/preserves-expressions.md @@ -0,0 +1,210 @@ +--- +title: "P-expressions" +--- + +Tony Garnock-Jones +October 2023. Version 0.1.0. + +This document defines a grammar called *Preserves Expressions* +(*P-expressions*, *pexprs*) that includes [ordinary Preserves text +syntax](preserves-text.html) but offers extensions sufficient to support +a Lisp- or Haskell-like programming notation. + +**Motivation.** The [text syntax](preserves-text.html) for Preserves +works well for writing `Value`s, i.e. data. However, in some contexts, +Preserves applications need a broader grammar that allows interleaving +of *expressions* with data. Two examples are the [Preserves Schema +language](preserves-schema.html) and the [Synit configuration scripting +language](https://synit.org/book/operation/scripting.html), both of +which (ab)use Preserves text syntax as a kind of programming notation. + +## Preliminaries + +The P-expression grammar takes the text syntax grammar as its base and +modifies it. + + +**Whitespace.** Whitespace is redefined as any number of spaces, tabs, +carriage returns, or line feeds. Commas are *not* considered whitespace +in P-expressions. + + ws = *(%x20 / %x09 / CR / LF) + + +**Delimiters.** Because commas are no longer included in class `ws`, +class `delimiter` is widened to include them explicitly. + + delimiter = ws / "," + / "<" / ">" / "[" / "]" / "{" / "}" + / "#" / ":" / DQUOTE / "|" / "@" / ";" + +## Grammar + +P-expressions add comma, semicolon, and sequences of one or more colons +to the syntax class `Value`. + + Value =/ Comma / Semicolon / Colons + Comma = "," + Semicolon = ";" + Colons = 1*":" + +Now that colon is in `Value`, the syntax for `Dictionary` is replaced +with `Block` everywhere it is mentioned. + + Block = "{" *Value ws "}" + +New syntax for explicit uninterpreted grouping of sequences of values is +introduced, and added to class `Value`. + + Value =/ ws Group + Group = "(" *Value ws ")" + +Finally, class `Document` is replaced in order to allow standalone +documents to directly comprise a sequence of multiple values. + + Document = *Value ws + +No changes to [the Preserves semantic model](preserves.html) are made. +Every Preserves text-syntax term is a valid P-expression, but in general +P-expressions must be rewritten or otherwise interpreted before a +meaningful Preserves value can be arrived at. + +## Encoding P-expressions as Preserves + +Aside from the special classes `Group`, `Block`, `Comma`, `Semicolon` or +`Colons`, P-expressions are directly encodable as Preserves data. All +members of the special classes are encoded as Preserves text +`Dictionary`[^encoding-rationale] values: + +[^encoding-rationale]: In principle, it would be nice to use *records* + for this purpose, but if we did so we would have to also encode + usages of records! + +{:.pseudocode} +> ⌜`(`*p* ...`)`⌝ ⟶ `{g:[`⌜*p*⌝ ...`]}` +> ⌜`{`*p* ...`}`⌝ ⟶ `{b:[`⌜*p*⌝ ...`]}` +> ⌜`,`⌝ ⟶ `{s:|,|}` +> ⌜`;`⌝ ⟶ `{s:|;|}` +> ⌜`:` ...⌝ ⟶ `{s:|:` ...`|}` + +## Appendix: Examples + +Examples are given as pairs of P-expressions and their Preserves +text-syntax encodings. + +```preserves + ⌜⌝ += +``` + +```preserves + ⌜(begin (println! (+ 1 2)) (+ 3 4))⌝ += {g:[begin {g:[println! {g:[+ 1 2]}]} {g:[+ 3 4]}]} +``` + +```preserves + ⌜()⌝ += {g:[]} + + ⌜[() () ()]⌝ += [{g:[]}, {g:[]}, {g:[]}] +``` + +```preserves + ⌜{ + setUp(); + # Now enter the loop + loop: { + greet("World"); + } + tearDown(); + }⌝ += {b:[ + setUp {g:[]} {s:|;|} + # Now enter the loop + loop {s:|:|} {b:[ + greet {g:["World"]} {s:|;|} + ]} + tearDown {g:[]} {s:|;|} + ]} +``` + +```preserves + ⌜[1 + 2.0, print "Hello", predicate: #t, foo, #!remote, bar]⌝ += [1 + 2.0 {s:|,|} print "Hello" {s:|,|} predicate {s:|:|} #t {s:|,|} + foo {s:|,|} #!remote {s:|,|} bar] +``` + +```preserves + ⌜{ + optional name: string, + address: Address, + }⌝ += {b:[ + optional name {s:|:|} string {s:|,|} + address {s:|:|} Address {s:|,|} + ]} +``` + +## Appendix: Using a P-expression reader to read Preserves + +A reader for P-expressions can be adapted to yield a reader for +Preserves terms by processing (subterms of) each P-expression that the +reader produces. The only subterms that need processing are the special +classes mentioned above. + + 1. Every `Group` or `Semicolon` that appears is an error. + 2. Every `Colons` with two or more colons in it is an error. + 3. Every `Comma` that appears is removed from its container. + 4. Every `Block` must contain triplets of `Value`, `Colons` (with a + single colon), `Value`. Any `Block` not following this pattern is an + error. Each `Block` following the pattern is translated to a + `Dictionary` containing a key/value pair for each triplet. + +## Appendix: Reading vs. Parsing + +Lisp systems first *read* streams of bytes into S-expressions and then +*parse* those S-expressions into more abstract structures denoting +various kinds of program syntax. [Separation of reading from parsing is +what gives Lisp its syntactic +flexibility.](http://calculist.org/blog/2012/04/17/homoiconicity-isnt-the-point/) + +Similarly, the Apple programming language +[Dylan](https://en.wikipedia.org/wiki/Dylan_(programming_language)) +included a reader-parser split, with the Dylan reader producing +*D-expressions* that are somewhat similar to P-expressions. + +Finally, the Racket dialects +[Honu](https://docs.racket-lang.org/honu/index.html) and +[Something](https://github.com/tonyg/racket-something) use a +reader-parser-macro setup, where the reader produces Racket data, the +parser produces "syntax" and is user-extensible, and Racket's own +modular macro system rewrites this "syntax" down to core forms to be +compiled to machine code. + +Similarly, when using P-expressions as the foundation for a language, a +generic P-expression reader can then feed into special-purpose +*parsers*. The reader captures the coarse syntactic structure of a +program, and the parser refines this. + +Often, a parser will wish to extract structure from sequences of +P-expression `Value`s. + + - A simple technique is repeated splitting of sequences; first by + `Semicolon`, then by `Comma`, then by increasingly high binding-power + operators. + + - More refined is to use a Pratt parser or similar + ([1](https://en.wikipedia.org/wiki/Operator-precedence_parser), + [2](https://matklad.github.io/2020/04/13/simple-but-powerful-pratt-parsing.html), + [3](https://github.com/tonyg/racket-something/blob/f6116bf3861b76970f5ce291a628476adef820b4/src/something/pratt.rkt)) + to build a parse tree using an extensible specification of the pre-, + in-, and postfix operators involved. + + - Finally, if you treat sequences of `Value`s as pre-lexed token + streams, almost any parsing formalism (such as [PEG + parsing](https://en.wikipedia.org/wiki/Parsing_expression_grammar), + [Ometa](https://en.wikipedia.org/wiki/OMeta), etc.) can be used to + extract further syntactic structure. + +## Notes From c18e9dd1fe170d2ae8e620f04f98df049594d10d Mon Sep 17 00:00:00 2001 From: Tony Garnock-Jones Date: Tue, 31 Oct 2023 18:06:05 +0100 Subject: [PATCH 6/8] Tweaks --- preserves-expressions.md | 73 ++++++++++++++++++++++++++-------------- preserves.css | 6 ++++ 2 files changed, 53 insertions(+), 26 deletions(-) diff --git a/preserves-expressions.md b/preserves-expressions.md index 2adf01e..7901621 100644 --- a/preserves-expressions.md +++ b/preserves-expressions.md @@ -67,25 +67,61 @@ documents to directly comprise a sequence of multiple values. No changes to [the Preserves semantic model](preserves.html) are made. Every Preserves text-syntax term is a valid P-expression, but in general P-expressions must be rewritten or otherwise interpreted before a -meaningful Preserves value can be arrived at. +meaningful Preserves value can be arrived at ([see +below](#reading-preserves)). -## Encoding P-expressions as Preserves +## Encoding P-expressions as Preserves + +We write ⌜*p*⌝ for the encoding into Preserves of P-expression *p*. + +{:.pseudocode.equations} +| ⌜·⌝ | : | **P-expression** ⟶ **Preserves** | Aside from the special classes `Group`, `Block`, `Comma`, `Semicolon` or -`Colons`, P-expressions are directly encodable as Preserves data. All -members of the special classes are encoded as Preserves text -`Dictionary`[^encoding-rationale] values: +`Colons`, P-expressions are encoded directly as Preserves data. + +{:.pseudocode.equations} +| ⌜`[`*p* ...`]`⌝ | = | `[`⌜*p*⌝ ...`]` | +| ⌜`<`*p* ...`>`⌝ | = | `<`⌜*p*⌝ ...`>` | +| ⌜`#{`*p* ...`}`⌝ | = | `#{`⌜*p*⌝ ...`}` | +| ⌜`#!`*p*⌝ | = | `#!`⌜*p*⌝ | +| ⌜`@`*p* *q*⌝ | = | `@`⌜*p*⌝ ⌜*q*⌝ | +| ⌜*p*⌝ | = | *p* **when** *p* ∈ **Atom** | + +All members of the special classes are encoded as Preserves text +`Dictionary`[^encoding-rationale] values. [^encoding-rationale]: In principle, it would be nice to use *records* for this purpose, but if we did so we would have to also encode usages of records! -{:.pseudocode} -> ⌜`(`*p* ...`)`⌝ ⟶ `{g:[`⌜*p*⌝ ...`]}` -> ⌜`{`*p* ...`}`⌝ ⟶ `{b:[`⌜*p*⌝ ...`]}` -> ⌜`,`⌝ ⟶ `{s:|,|}` -> ⌜`;`⌝ ⟶ `{s:|;|}` -> ⌜`:` ...⌝ ⟶ `{s:|:` ...`|}` +{:.pseudocode.equations} +| ⌜`(`*p* ...`)`⌝ | = | `{g:[`⌜*p*⌝ ...`]}` | +| ⌜`{`*p* ...`}`⌝ | = | `{b:[`⌜*p*⌝ ...`]}` | +| ⌜`,`⌝ | = | `{s:|,|}` | +| ⌜`;`⌝ | = | `{s:|;|}` | +| ⌜`:` ...⌝ | = | `{s:|:` ...`|}` | + +## Interpreting P-expressions as Preserves + +The [previous section](#encoding-pexprs) discussed ways of representing +P-expressions using Preserves. Here, we discuss *interpreting* +P-expressions *as* Preserves, so that (1) a Preserves datum (2) written +using Preserves text syntax and then (3) read as a P-expression can be +(4) interpreted from that P-expression to yield the original datum. + +A reader for P-expressions can be adapted to yield a reader for +Preserves terms by processing (subterms of) each P-expression that the +reader produces. The only subterms that need processing are the special +classes mentioned above. + + 1. Every `Group` or `Semicolon` that appears is an error. + 2. Every `Colons` with two or more colons in it is an error. + 3. Every `Comma` that appears is removed from its container. + 4. Every `Block` must contain triplets of `Value`, `Colons` (with a + single colon), `Value`. Any `Block` not following this pattern is an + error. Each `Block` following the pattern is translated to a + `Dictionary` containing a key/value pair for each triplet. ## Appendix: Examples @@ -146,21 +182,6 @@ text-syntax encodings. ]} ``` -## Appendix: Using a P-expression reader to read Preserves - -A reader for P-expressions can be adapted to yield a reader for -Preserves terms by processing (subterms of) each P-expression that the -reader produces. The only subterms that need processing are the special -classes mentioned above. - - 1. Every `Group` or `Semicolon` that appears is an error. - 2. Every `Colons` with two or more colons in it is an error. - 3. Every `Comma` that appears is removed from its container. - 4. Every `Block` must contain triplets of `Value`, `Colons` (with a - single colon), `Value`. Any `Block` not following this pattern is an - error. Each `Block` following the pattern is translated to a - `Dictionary` containing a key/value pair for each triplet. - ## Appendix: Reading vs. Parsing Lisp systems first *read* streams of bytes into S-expressions and then diff --git a/preserves.css b/preserves.css index 9c34209..7322cc7 100644 --- a/preserves.css +++ b/preserves.css @@ -1,6 +1,7 @@ :root { --sans-font: "Open Sans", -apple-system, BlinkMacSystemFont, avenir next, avenir, segoe ui, helvetica neue, helvetica, Cantarell, Ubuntu, roboto, noto, arial, sans-serif; --serif-font: palatino, "Palatino Linotype", "Palatino LT STD", "URW Palladio L", "TeX Gyre Pagella", serif; + --blockquote-indent: 40px; } body { font-family: var(--serif-font); @@ -230,6 +231,7 @@ table.postcard-grammar { blockquote { padding: 0.5rem 1rem; border-left: solid #4f81bd 2px; + margin-left: var(--blockquote-indent); margin-right: 0; } blockquote :first-child { @@ -243,6 +245,10 @@ blockquote :last-child { background-color: #e9f0f9; } +table.equations { width: auto; margin-left: var(--blockquote-indent); } +table.equations tr > *:nth-child(1) { text-align: right; } +table.equations tr > *:nth-child(2) { text-align: center; } + blockquote.pseudocode { border-left: none; padding: 0; From 23e0e59dafca603a1a8e1f4f4ea9c4000f1029ac Mon Sep 17 00:00:00 2001 From: Tony Garnock-Jones Date: Tue, 31 Oct 2023 19:32:06 +0100 Subject: [PATCH 7/8] Trailing comments --- preserves-expressions.md | 78 +++++++++++++++++++++++++++++++++++----- preserves-text.md | 5 +-- 2 files changed, 72 insertions(+), 11 deletions(-) diff --git a/preserves-expressions.md b/preserves-expressions.md index 7901621..abcc252 100644 --- a/preserves-expressions.md +++ b/preserves-expressions.md @@ -3,7 +3,7 @@ title: "P-expressions" --- Tony Garnock-Jones -October 2023. Version 0.1.0. +October 2023. Version 0.1.1. This document defines a grammar called *Preserves Expressions* (*P-expressions*, *pexprs*) that includes [ordinary Preserves text @@ -51,7 +51,7 @@ to the syntax class `Value`. Now that colon is in `Value`, the syntax for `Dictionary` is replaced with `Block` everywhere it is mentioned. - Block = "{" *Value ws "}" + Block = "{" *Value ws "}" New syntax for explicit uninterpreted grouping of sequences of values is introduced, and added to class `Value`. @@ -70,15 +70,39 @@ P-expressions must be rewritten or otherwise interpreted before a meaningful Preserves value can be arrived at ([see below](#reading-preserves)). +## Annotations and Comments + +Annotations and comments attach to the term following them, just as in +the ordinary text syntax. However, it is common in programming notations +to allow comments at the end of a file or other sequential construct: + + { + key: value + # example of a comment at the end of a dictionary + } + # example of a comment at the end of the input file + +While the ordinary text syntax forbids comments in these positions, +P-expressions allow them: + + Document =/ *Value Trailer ws + Record =/ "<" Value *Value Trailer ws ">" + Sequence =/ "[" *Value Trailer ws "]" + Set =/ "#{" *Value Trailer ws "}" + Block =/ "{" *Value Trailer ws "}" + + Trailer = 1*Annotation + ## Encoding P-expressions as Preserves We write ⌜*p*⌝ for the encoding into Preserves of P-expression *p*. {:.pseudocode.equations} -| ⌜·⌝ | : | **P-expression** ⟶ **Preserves** | +| ⌜·⌝ : **P-expression** | ⟶ | **Preserves** | -Aside from the special classes `Group`, `Block`, `Comma`, `Semicolon` or -`Colons`, P-expressions are encoded directly as Preserves data. +Aside from the special classes `Group`, `Block`, `Comma`, `Semicolon`, +`Colons`, or `Trailer`, P-expressions are encoded directly as Preserves +data. {:.pseudocode.equations} | ⌜`[`*p* ...`]`⌝ | = | `[`⌜*p*⌝ ...`]` | @@ -86,10 +110,10 @@ Aside from the special classes `Group`, `Block`, `Comma`, `Semicolon` or | ⌜`#{`*p* ...`}`⌝ | = | `#{`⌜*p*⌝ ...`}` | | ⌜`#!`*p*⌝ | = | `#!`⌜*p*⌝ | | ⌜`@`*p* *q*⌝ | = | `@`⌜*p*⌝ ⌜*q*⌝ | -| ⌜*p*⌝ | = | *p* **when** *p* ∈ **Atom** | +| ⌜*p*⌝ | = | *p* when *p* ∈ **Atom** | -All members of the special classes are encoded as Preserves text -`Dictionary`[^encoding-rationale] values. +All members of the special classes are encoded as Preserves +dictionaries[^encoding-rationale]. [^encoding-rationale]: In principle, it would be nice to use *records* for this purpose, but if we did so we would have to also encode @@ -101,6 +125,17 @@ All members of the special classes are encoded as Preserves text | ⌜`,`⌝ | = | `{s:|,|}` | | ⌜`;`⌝ | = | `{s:|;|}` | | ⌜`:` ...⌝ | = | `{s:|:` ...`|}` | +| ⌜*t*⌝ | = | ⌜*a*⌝ ... `{}`, where *a* ... are the annotations in *t* and *t* ∈ **Trailer** | + +The empty dictionary `{}` acts as an anchor for the annotations in a +`Trailer`. + +We overload the ⌜·⌝ notation for encoding whole `Document`s into +sequences of Preserves values. + +{:.pseudocode.equations} +| ⌜·⌝ : **P-expression Document** | ⟶ | **Preserves Sequence** | +| ⌜*p* ...⌝ | = | `[`⌜*p*⌝ ...`]` | ## Interpreting P-expressions as Preserves @@ -117,17 +152,25 @@ classes mentioned above. 1. Every `Group` or `Semicolon` that appears is an error. 2. Every `Colons` with two or more colons in it is an error. - 3. Every `Comma` that appears is removed from its container. + 3. Every `Comma` that appears is discarded. + 3. Every `Trailer` that appears is an error.[^discard-trailers-instead-of-error] 4. Every `Block` must contain triplets of `Value`, `Colons` (with a single colon), `Value`. Any `Block` not following this pattern is an error. Each `Block` following the pattern is translated to a `Dictionary` containing a key/value pair for each triplet. +[^discard-trailers-instead-of-error]: **Implementation note.** When + implementing parsing of P-expressions into Preserves, consider + offering an optional mode where trailing annotations `Trailer` are + *discarded* instead of causing an error to be signalled. + ## Appendix: Examples Examples are given as pairs of P-expressions and their Preserves text-syntax encodings. +### Individual P-expression `Value`s + ```preserves ⌜⌝ = @@ -182,6 +225,23 @@ text-syntax encodings. ]} ``` +### Whole `Document`s + +```preserves + ⌜{ + key: value + # example of a comment at the end of a dictionary + } + # example of a comment at the end of the input file⌝ += [ {b:[ + key {s:|:|} value + @"example of a comment at the end of a dictionary" {} + ]} + @"example of a comment at the end of the input file" + {} + ] +``` + ## Appendix: Reading vs. Parsing Lisp systems first *read* streams of bytes into S-expressions and then diff --git a/preserves-text.md b/preserves-text.md index b7dfef0..00942a4 100644 --- a/preserves-text.md +++ b/preserves-text.md @@ -273,7 +273,8 @@ value. Each annotation is, in turn, a `Value`, and may itself have annotations. The ordering of annotations attached to a `Value` is significant. - Value =/ ws "@" Value Value + Value =/ ws Annotation Value + Annotation = "@" Value Each annotation is preceded by `@`; the underlying annotated value follows its annotations. Here we extend only the syntactic nonterminal @@ -283,7 +284,7 @@ named “`Value`” without altering the semantic class of `Value`s. interpreted as comments associated with that value. Comments are sufficiently common that special syntax exists for them. - Value =/ ws ";" linecomment (CR / LF) Value + Annotation =/ ";" linecomment (CR / LF) linecomment = * When written this way, everything between the `;` and the end of the line From ec03bdb45ff48bd37e4e62d8a4cce28fd7d5c44c Mon Sep 17 00:00:00 2001 From: Tony Garnock-Jones Date: Tue, 31 Oct 2023 20:00:18 +0100 Subject: [PATCH 8/8] Clarify lexicographical ordering --- preserves.md | 36 ++++++++++++++++++++++++++++-------- 1 file changed, 28 insertions(+), 8 deletions(-) diff --git a/preserves.md b/preserves.md index 1936975..8646805 100644 --- a/preserves.md +++ b/preserves.md @@ -104,8 +104,8 @@ the `totalOrder` predicate defined in section 5.10 of [IEEE Std A `Record` is a *labelled* tuple of `Value`s, the record's *fields*. A label can be any `Value`, but is usually a `Symbol`.[^extensibility] -[^iri-labels] `Record`s are compared lexicographically: first by -label, then by field sequence. +[^iri-labels] `Record`s are ordered first by label, then +lexicographically[^lexicographical-sequences] by field sequence. [^extensibility]: The [Racket](https://racket-lang.org/) programming language defines @@ -123,10 +123,25 @@ label, then by field sequence. it cannot be read as an IRI at all, and so the label simply stands for itself—for its own `Value`. + [^lexicographical-sequences]: When comparing sequences of values for + the total order, [lexicographical + ordering](https://en.wikipedia.org/wiki/Lexicographic_order) is + used. Elements are drawn pairwise from the two sequences to be + compared. If one is smaller than the other according to the total + order, the sequence it was drawn from is the smaller of the + sequences. If the end of one sequence is reached, while the other + sequence has elements remaining, the shorter sequence is considered + smaller. Otherwise, all the elements compared equal and neither was + longer than the other, so they compare equal. For example, + - `[#f]` is ordered before `[foo]` because `Boolean` appears before `Symbol` in the kind ordering; + - `[x]` before `[x y]` because there is no element remaining to compare against `y`; + - `[a b]` before `[x]` because `a` is smaller than `x`; and + - `[x y]` before `[x z]` because `y` is ordered before `z` according to the ordering rules for `Symbol`. + ### Sequences. A `Sequence` is a sequence of `Value`s. `Sequence`s are compared -lexicographically. +lexicographically.[^lexicographical-sequences] ### Sets. @@ -134,15 +149,16 @@ A `Set` is an unordered finite set of `Value`s. It contains no duplicate values, following the [equivalence relation](#equivalence) induced by the total order on `Value`s. Two `Set`s are compared by sorting their elements ascending using the [total order](#total-order) -and comparing the resulting `Sequence`s. +and comparing the resulting `Sequence`s.[^lexicographical-sequences] ### Dictionaries. A `Dictionary` is an unordered finite collection of pairs of `Value`s. Each pair comprises a *key* and a *value*. Keys in a `Dictionary` are pairwise distinct. Instances of `Dictionary` are compared by -lexicographic comparison of the sequences resulting from ordering each -`Dictionary`'s pairs in ascending order by key. +lexicographic[^lexicographical-sequences] comparison of the sequences +resulting from ordering each `Dictionary`'s pairs in ascending order by +key. ### Embeddeds. @@ -194,8 +210,12 @@ sequences use [the Preserves binary encoding](preserves-binary.html). The total ordering specified [above](#total-order) means that the following statements are true: - "bzz" < "c" < "caa" < #!"a" - #t < 3.0f < 3.0 < 3 < "3" < |3| < [] < #!#t + - `"bzz"` < `"c"` < `"caa"` < `#!"a"` + - `#t` < `3.0f` < `3.0` < `3` < `"3"` < `|3|` < `[]` < `#!#t` + - `[#f]` < `[foo]`, because `Boolean` appears before `Symbol` in the kind ordering + - `[x]` < `[x y]`, because there is no element remaining to compare against `y` + - `[a b]` < `[x]`, because `a` is smaller than `x` + - `[x y]` < `[x z]`, because `y` is ordered before `z` ### Simple examples.