Merge branch 'main' into comment-syntax-hash-space
This commit is contained in:
commit
fb63ac24b0
|
@ -1,4 +1,5 @@
|
||||||
_site/
|
_site/
|
||||||
|
preserves-expressions.pdf
|
||||||
preserves-binary.pdf
|
preserves-binary.pdf
|
||||||
preserves-schema.pdf
|
preserves-schema.pdf
|
||||||
preserves-text.pdf
|
preserves-text.pdf
|
||||||
|
|
7
Makefile
7
Makefile
|
@ -1,6 +1,11 @@
|
||||||
__ignored__ := $(shell ./setup.sh)
|
__ignored__ := $(shell ./setup.sh)
|
||||||
|
|
||||||
PDFS=preserves.pdf preserves-text.pdf preserves-binary.pdf preserves-schema.pdf
|
PDFS=\
|
||||||
|
preserves.pdf \
|
||||||
|
preserves-text.pdf \
|
||||||
|
preserves-binary.pdf \
|
||||||
|
preserves-schema.pdf \
|
||||||
|
preserves-expressions.pdf
|
||||||
|
|
||||||
all: $(PDFS)
|
all: $(PDFS)
|
||||||
|
|
||||||
|
|
|
@ -53,13 +53,17 @@ export class Bytes implements Preservable<any>, PreserveWritable<any> {
|
||||||
|
|
||||||
static fromHex(s: string): Bytes {
|
static fromHex(s: string): Bytes {
|
||||||
if (s.length & 1) throw new Error("Cannot decode odd-length hexadecimal string");
|
if (s.length & 1) throw new Error("Cannot decode odd-length hexadecimal string");
|
||||||
|
const result = new Bytes(s.length >> 1);
|
||||||
|
Bytes._raw_fromHexInto(s, result._view);
|
||||||
|
return result;
|
||||||
|
}
|
||||||
|
|
||||||
|
static _raw_fromHexInto(s: string, target: Uint8Array): void {
|
||||||
const len = s.length >> 1;
|
const len = s.length >> 1;
|
||||||
const result = new Bytes(len);
|
|
||||||
for (let i = 0; i < len; i++) {
|
for (let i = 0; i < len; i++) {
|
||||||
result._view[i] =
|
target[i] =
|
||||||
(unhexDigit(s.charCodeAt(i << 1)) << 4) | unhexDigit(s.charCodeAt((i << 1) + 1));
|
(unhexDigit(s.charCodeAt(i << 1)) << 4) | unhexDigit(s.charCodeAt((i << 1) + 1));
|
||||||
}
|
}
|
||||||
return result;
|
|
||||||
}
|
}
|
||||||
|
|
||||||
static fromIO(io: string | BytesLike): string | Bytes {
|
static fromIO(io: string | BytesLike): string | Bytes {
|
||||||
|
@ -135,11 +139,11 @@ export class Bytes implements Preservable<any>, PreserveWritable<any> {
|
||||||
return Bytes.isBytes(v) ? v : void 0;
|
return Bytes.isBytes(v) ? v : void 0;
|
||||||
}
|
}
|
||||||
|
|
||||||
toHex(): string {
|
toHex(digit = hexDigit): string {
|
||||||
var nibbles = [];
|
var nibbles = [];
|
||||||
for (let i = 0; i < this.length; i++) {
|
for (let i = 0; i < this.length; i++) {
|
||||||
nibbles.push(hexDigit(this._view[i] >> 4));
|
nibbles.push(digit(this._view[i] >> 4));
|
||||||
nibbles.push(hexDigit(this._view[i] & 15));
|
nibbles.push(digit(this._view[i] & 15));
|
||||||
}
|
}
|
||||||
return nibbles.join('');
|
return nibbles.join('');
|
||||||
}
|
}
|
||||||
|
|
|
@ -4,7 +4,7 @@ import { Tag } from "./constants";
|
||||||
import { Set, Dictionary } from "./dictionary";
|
import { Set, Dictionary } from "./dictionary";
|
||||||
import { DoubleFloat, SingleFloat } from "./float";
|
import { DoubleFloat, SingleFloat } from "./float";
|
||||||
import { Record } from "./record";
|
import { Record } from "./record";
|
||||||
import { Bytes, BytesLike, underlying } from "./bytes";
|
import { Bytes, BytesLike, underlying, hexDigit } from "./bytes";
|
||||||
import { Value } from "./values";
|
import { Value } from "./values";
|
||||||
import { is } from "./is";
|
import { is } from "./is";
|
||||||
import { embed, GenericEmbedded, Embedded, EmbeddedTypeDecode } from "./embedded";
|
import { embed, GenericEmbedded, Embedded, EmbeddedTypeDecode } from "./embedded";
|
||||||
|
@ -34,7 +34,7 @@ export interface TypedDecoder<T> {
|
||||||
nextFloat(): SingleFloat | undefined;
|
nextFloat(): SingleFloat | undefined;
|
||||||
nextDouble(): DoubleFloat | undefined;
|
nextDouble(): DoubleFloat | undefined;
|
||||||
nextEmbedded(): Embedded<T> | undefined;
|
nextEmbedded(): Embedded<T> | undefined;
|
||||||
nextSignedInteger(): number | undefined;
|
nextSignedInteger(): number | bigint | undefined;
|
||||||
nextString(): string | undefined;
|
nextString(): string | undefined;
|
||||||
nextByteString(): Bytes | undefined;
|
nextByteString(): Bytes | undefined;
|
||||||
nextSymbol(): symbol | undefined;
|
nextSymbol(): symbol | undefined;
|
||||||
|
@ -130,15 +130,42 @@ export class DecoderState {
|
||||||
return (this.nextbyte() === Tag.End) || (this.index--, false);
|
return (this.nextbyte() === Tag.End) || (this.index--, false);
|
||||||
}
|
}
|
||||||
|
|
||||||
nextint(n: number): number {
|
nextint(n: number): number | bigint {
|
||||||
// TODO: Bignums :-/
|
const start = this.index;
|
||||||
if (n === 0) return 0;
|
if (n === 0) return 0;
|
||||||
|
if (n > 7) return this.nextbigint(n);
|
||||||
|
if (n === 7) {
|
||||||
|
const highByte = this.packet[this.index];
|
||||||
|
if ((highByte >= 0x20) && (highByte < 0xe0)) {
|
||||||
|
return this.nextbigint(n);
|
||||||
|
}
|
||||||
|
// if highByte is 0xe0, we still might have a value
|
||||||
|
// equal to (Number.MIN_SAFE_INTEGER-1).
|
||||||
|
}
|
||||||
let acc = this.nextbyte();
|
let acc = this.nextbyte();
|
||||||
if (acc & 0x80) acc -= 256;
|
if (acc & 0x80) acc -= 256;
|
||||||
for (let i = 1; i < n; i++) acc = (acc * 256) + this.nextbyte();
|
for (let i = 1; i < n; i++) acc = (acc * 256) + this.nextbyte();
|
||||||
|
if (!Number.isSafeInteger(acc)) {
|
||||||
|
this.index = start;
|
||||||
|
return this.nextbigint(n);
|
||||||
|
}
|
||||||
return acc;
|
return acc;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
nextbigint(n: number): bigint {
|
||||||
|
if (n === 0) return BigInt(0);
|
||||||
|
const bs = Bytes.from(this.nextbytes(n));
|
||||||
|
if (bs.get(0) >= 128) {
|
||||||
|
// negative
|
||||||
|
const hex = bs.toHex(d => hexDigit(15 - d));
|
||||||
|
return ~BigInt('0x' + hex);
|
||||||
|
} else {
|
||||||
|
// (strictly) positive
|
||||||
|
const hex = bs.toHex();
|
||||||
|
return BigInt('0x' + hex);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
wrap<T>(v: Value<T>): Value<T> {
|
wrap<T>(v: Value<T>): Value<T> {
|
||||||
return this.includeAnnotations ? new Annotated(v) : v;
|
return this.includeAnnotations ? new Annotated(v) : v;
|
||||||
}
|
}
|
||||||
|
@ -306,7 +333,7 @@ export class Decoder<T = never> implements TypedDecoder<T> {
|
||||||
});
|
});
|
||||||
}
|
}
|
||||||
|
|
||||||
nextSignedInteger(): number | undefined {
|
nextSignedInteger(): number | bigint | undefined {
|
||||||
return this.skipAnnotations((reset) => {
|
return this.skipAnnotations((reset) => {
|
||||||
switch (this.state.nextbyte()) {
|
switch (this.state.nextbyte()) {
|
||||||
case Tag.SignedInteger: return this.state.nextint(this.state.varint());
|
case Tag.SignedInteger: return this.state.nextint(this.state.varint());
|
||||||
|
|
|
@ -1,5 +1,5 @@
|
||||||
import { Tag } from "./constants";
|
import { Tag } from "./constants";
|
||||||
import { Bytes } from "./bytes";
|
import { Bytes, unhexDigit } from "./bytes";
|
||||||
import { Value } from "./values";
|
import { Value } from "./values";
|
||||||
import { EncodeError } from "./codec";
|
import { EncodeError } from "./codec";
|
||||||
import { Record, Tuple } from "./record";
|
import { Record, Tuple } from "./record";
|
||||||
|
@ -122,6 +122,13 @@ export class EncoderState {
|
||||||
this.index += bs.length;
|
this.index += bs.length;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
claimbytes(count: number) {
|
||||||
|
this.makeroom(count);
|
||||||
|
const view = new Uint8Array(this.view.buffer, this.index, count);
|
||||||
|
this.index += count;
|
||||||
|
return view;
|
||||||
|
}
|
||||||
|
|
||||||
varint(v: number) {
|
varint(v: number) {
|
||||||
while (v >= 128) {
|
while (v >= 128) {
|
||||||
this.emitbyte((v % 128) + 128);
|
this.emitbyte((v % 128) + 128);
|
||||||
|
@ -130,8 +137,9 @@ export class EncoderState {
|
||||||
this.emitbyte(v);
|
this.emitbyte(v);
|
||||||
}
|
}
|
||||||
|
|
||||||
encodeint(v: number) {
|
encodeint(v: number | bigint) {
|
||||||
// TODO: Bignums :-/
|
if (typeof v === 'bigint') return this.encodebigint(v);
|
||||||
|
|
||||||
this.emitbyte(Tag.SignedInteger);
|
this.emitbyte(Tag.SignedInteger);
|
||||||
|
|
||||||
if (v === 0) {
|
if (v === 0) {
|
||||||
|
@ -153,6 +161,37 @@ export class EncoderState {
|
||||||
enc(bytecount, v);
|
enc(bytecount, v);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
encodebigint(v: bigint) {
|
||||||
|
this.emitbyte(Tag.SignedInteger);
|
||||||
|
|
||||||
|
let hex: string;
|
||||||
|
if (v > 0) {
|
||||||
|
hex = v.toString(16);
|
||||||
|
if (hex.length & 1) {
|
||||||
|
hex = '0' + hex;
|
||||||
|
} else if (unhexDigit(hex.charCodeAt(0)) >= 8) {
|
||||||
|
hex = '00' + hex;
|
||||||
|
}
|
||||||
|
} else if (v < 0) {
|
||||||
|
const negatedHex = (~v).toString(16);
|
||||||
|
hex = '';
|
||||||
|
for (let i = 0; i < negatedHex.length; i++) {
|
||||||
|
hex = hex + 'fedcba9876543210'[unhexDigit(negatedHex.charCodeAt(i))];
|
||||||
|
}
|
||||||
|
if (hex.length & 1) {
|
||||||
|
hex = 'f' + hex;
|
||||||
|
} else if (unhexDigit(hex.charCodeAt(0)) < 8) {
|
||||||
|
hex = 'ff' + hex;
|
||||||
|
}
|
||||||
|
} else {
|
||||||
|
this.emitbyte(0);
|
||||||
|
return;
|
||||||
|
}
|
||||||
|
|
||||||
|
this.varint(hex.length >> 1);
|
||||||
|
Bytes._raw_fromHexInto(hex, this.claimbytes(hex.length >> 1));
|
||||||
|
}
|
||||||
|
|
||||||
encodebytes(tag: Tag, bs: Uint8Array) {
|
encodebytes(tag: Tag, bs: Uint8Array) {
|
||||||
this.emitbyte(tag);
|
this.emitbyte(tag);
|
||||||
this.varint(bs.length);
|
this.varint(bs.length);
|
||||||
|
@ -219,7 +258,7 @@ export class Encoder<T = object> {
|
||||||
else if (typeof v === 'boolean') {
|
else if (typeof v === 'boolean') {
|
||||||
this.state.emitbyte(v ? Tag.True : Tag.False);
|
this.state.emitbyte(v ? Tag.True : Tag.False);
|
||||||
}
|
}
|
||||||
else if (typeof v === 'number') {
|
else if (typeof v === 'number' || typeof v === 'bigint') {
|
||||||
this.state.encodeint(v);
|
this.state.encodeint(v);
|
||||||
}
|
}
|
||||||
else if (typeof v === 'string') {
|
else if (typeof v === 'string') {
|
||||||
|
|
|
@ -28,7 +28,7 @@ export interface FoldMethods<T, R> {
|
||||||
boolean(b: boolean): R;
|
boolean(b: boolean): R;
|
||||||
single(f: number): R;
|
single(f: number): R;
|
||||||
double(f: number): R;
|
double(f: number): R;
|
||||||
integer(i: number): R;
|
integer(i: number | bigint): R;
|
||||||
string(s: string): R;
|
string(s: string): R;
|
||||||
bytes(b: Bytes): R;
|
bytes(b: Bytes): R;
|
||||||
symbol(s: symbol): R;
|
symbol(s: symbol): R;
|
||||||
|
@ -47,7 +47,7 @@ export class VoidFold<T> implements FoldMethods<T, void> {
|
||||||
boolean(b: boolean): void {}
|
boolean(b: boolean): void {}
|
||||||
single(f: number): void {}
|
single(f: number): void {}
|
||||||
double(f: number): void {}
|
double(f: number): void {}
|
||||||
integer(i: number): void {}
|
integer(i: number | bigint): void {}
|
||||||
string(s: string): void {}
|
string(s: string): void {}
|
||||||
bytes(b: Bytes): void {}
|
bytes(b: Bytes): void {}
|
||||||
symbol(s: symbol): void {}
|
symbol(s: symbol): void {}
|
||||||
|
@ -79,7 +79,7 @@ export abstract class ValueFold<T, R = T> implements FoldMethods<T, Value<R>> {
|
||||||
double(f: number): Value<R> {
|
double(f: number): Value<R> {
|
||||||
return Double(f);
|
return Double(f);
|
||||||
}
|
}
|
||||||
integer(i: number): Value<R> {
|
integer(i: number | bigint): Value<R> {
|
||||||
return i;
|
return i;
|
||||||
}
|
}
|
||||||
string(s: string): Value<R> {
|
string(s: string): Value<R> {
|
||||||
|
@ -138,6 +138,8 @@ export function valueClass<T>(v: Value<T>): ValueClass {
|
||||||
} else {
|
} else {
|
||||||
return ValueClass.SignedInteger;
|
return ValueClass.SignedInteger;
|
||||||
}
|
}
|
||||||
|
case 'bigint':
|
||||||
|
return ValueClass.SignedInteger;
|
||||||
case 'string':
|
case 'string':
|
||||||
return ValueClass.String;
|
return ValueClass.String;
|
||||||
case 'symbol':
|
case 'symbol':
|
||||||
|
@ -181,6 +183,8 @@ export function fold<T, R>(v: Value<T>, o: FoldMethods<T, R>): R {
|
||||||
} else {
|
} else {
|
||||||
return o.integer(v);
|
return o.integer(v);
|
||||||
}
|
}
|
||||||
|
case 'bigint':
|
||||||
|
return o.integer(v);
|
||||||
case 'string':
|
case 'string':
|
||||||
return o.string(v);
|
return o.string(v);
|
||||||
case 'symbol':
|
case 'symbol':
|
||||||
|
|
|
@ -12,6 +12,7 @@ export function fromJS<T = GenericEmbedded>(x: any): Value<T> {
|
||||||
throw new TypeError("Refusing to autoconvert non-integer number to Single or Double");
|
throw new TypeError("Refusing to autoconvert non-integer number to Single or Double");
|
||||||
}
|
}
|
||||||
// FALL THROUGH
|
// FALL THROUGH
|
||||||
|
case 'bigint':
|
||||||
case 'string':
|
case 'string':
|
||||||
case 'symbol':
|
case 'symbol':
|
||||||
case 'boolean':
|
case 'boolean':
|
||||||
|
@ -19,7 +20,6 @@ export function fromJS<T = GenericEmbedded>(x: any): Value<T> {
|
||||||
|
|
||||||
case 'undefined':
|
case 'undefined':
|
||||||
case 'function':
|
case 'function':
|
||||||
case 'bigint':
|
|
||||||
break;
|
break;
|
||||||
|
|
||||||
case 'object':
|
case 'object':
|
||||||
|
|
|
@ -12,7 +12,13 @@ export function is(a: any, b: any): boolean {
|
||||||
if (isAnnotated(a)) a = a.item;
|
if (isAnnotated(a)) a = a.item;
|
||||||
if (isAnnotated(b)) b = b.item;
|
if (isAnnotated(b)) b = b.item;
|
||||||
if (Object.is(a, b)) return true;
|
if (Object.is(a, b)) return true;
|
||||||
if (typeof a !== typeof b) return false;
|
if (typeof a !== typeof b) {
|
||||||
|
if ((typeof a === 'number' && typeof b === 'bigint') ||
|
||||||
|
(typeof a === 'bigint' && typeof b === 'number')) {
|
||||||
|
return a == b;
|
||||||
|
}
|
||||||
|
return false;
|
||||||
|
}
|
||||||
if (typeof a === 'object') {
|
if (typeof a === 'object') {
|
||||||
if (a === null || b === null) return false;
|
if (a === null || b === null) return false;
|
||||||
if ('equals' in a && typeof a.equals === 'function') return a.equals(b, is);
|
if ('equals' in a && typeof a.equals === 'function') return a.equals(b, is);
|
||||||
|
|
|
@ -7,6 +7,7 @@ import { Set, Dictionary } from "./dictionary";
|
||||||
import { Annotated } from "./annotated";
|
import { Annotated } from "./annotated";
|
||||||
import { unannotate } from "./strip";
|
import { unannotate } from "./strip";
|
||||||
import { embed, isEmbedded, Embedded } from "./embedded";
|
import { embed, isEmbedded, Embedded } from "./embedded";
|
||||||
|
import { isCompound } from "./compound";
|
||||||
|
|
||||||
export function merge<T>(
|
export function merge<T>(
|
||||||
mergeEmbeddeds: (a: T, b: T) => T | undefined,
|
mergeEmbeddeds: (a: T, b: T) => T | undefined,
|
||||||
|
@ -18,7 +19,17 @@ export function merge<T>(
|
||||||
}
|
}
|
||||||
|
|
||||||
function walk(a: Value<T>, b: Value<T>): Value<T> {
|
function walk(a: Value<T>, b: Value<T>): Value<T> {
|
||||||
if (a === b) return a;
|
if (a === b) {
|
||||||
|
// Shortcut for merges of trivially identical values.
|
||||||
|
return a;
|
||||||
|
}
|
||||||
|
if (!isCompound(a) && !isCompound(b)) {
|
||||||
|
// Don't do expensive recursive comparisons for compounds.
|
||||||
|
if (is(a, b)) {
|
||||||
|
// Shortcut for merges of marginally less trivially identical values.
|
||||||
|
return a;
|
||||||
|
}
|
||||||
|
}
|
||||||
return fold<T, Value<T>>(a, {
|
return fold<T, Value<T>>(a, {
|
||||||
boolean: die,
|
boolean: die,
|
||||||
single(_f: number) { return is(a, b) ? a : die(); },
|
single(_f: number) { return is(a, b) ? a : die(); },
|
||||||
|
|
|
@ -21,9 +21,8 @@ export interface ReaderOptions<T> extends ReaderStateOptions {
|
||||||
embeddedDecode?: EmbeddedTypeDecode<T>;
|
embeddedDecode?: EmbeddedTypeDecode<T>;
|
||||||
}
|
}
|
||||||
|
|
||||||
type IntOrFloat = 'int' | 'float';
|
const MAX_SAFE_INTEGERn = BigInt(Number.MAX_SAFE_INTEGER);
|
||||||
type Numeric = number | SingleFloat | DoubleFloat;
|
const MIN_SAFE_INTEGERn = BigInt(Number.MIN_SAFE_INTEGER);
|
||||||
type IntContinuation = (kind: IntOrFloat, acc: string) => Numeric;
|
|
||||||
|
|
||||||
export const NUMBER_RE: RegExp = /^([-+]?\d+)(((\.\d+([eE][-+]?\d+)?)|([eE][-+]?\d+))([fF]?))?$/;
|
export const NUMBER_RE: RegExp = /^([-+]?\d+)(((\.\d+([eE][-+]?\d+)?)|([eE][-+]?\d+))([fF]?))?$/;
|
||||||
// Groups:
|
// Groups:
|
||||||
|
@ -174,9 +173,12 @@ export class ReaderState {
|
||||||
const m = NUMBER_RE.exec(acc);
|
const m = NUMBER_RE.exec(acc);
|
||||||
if (m) {
|
if (m) {
|
||||||
if (m[2] === void 0) {
|
if (m[2] === void 0) {
|
||||||
let v = parseInt(m[1]);
|
let v = BigInt(m[1]);
|
||||||
if (Object.is(v, -0)) v = 0;
|
if (v <= MIN_SAFE_INTEGERn || v >= MAX_SAFE_INTEGERn) {
|
||||||
return v;
|
return v;
|
||||||
|
} else {
|
||||||
|
return Number(v);
|
||||||
|
}
|
||||||
} else if (m[7] === '') {
|
} else if (m[7] === '') {
|
||||||
return Double(parseFloat(m[1] + m[3]));
|
return Double(parseFloat(m[1] + m[3]));
|
||||||
} else {
|
} else {
|
||||||
|
|
|
@ -15,7 +15,7 @@ export type Atom =
|
||||||
| boolean
|
| boolean
|
||||||
| SingleFloat
|
| SingleFloat
|
||||||
| DoubleFloat
|
| DoubleFloat
|
||||||
| number
|
| number | bigint
|
||||||
| string
|
| string
|
||||||
| Bytes
|
| Bytes
|
||||||
| symbol;
|
| symbol;
|
||||||
|
|
|
@ -278,6 +278,7 @@ export class Writer<T> {
|
||||||
}
|
}
|
||||||
break;
|
break;
|
||||||
}
|
}
|
||||||
|
case 'bigint':
|
||||||
case 'number':
|
case 'number':
|
||||||
this.state.pieces.push('' + v);
|
this.state.pieces.push('' + v);
|
||||||
break;
|
break;
|
||||||
|
@ -328,7 +329,9 @@ export class Writer<T> {
|
||||||
}
|
}
|
||||||
break;
|
break;
|
||||||
default:
|
default:
|
||||||
throw new Error(`Internal error: unhandled in Preserves Writer.push for ${v}`);
|
((_: never) => {
|
||||||
|
throw new Error(`Internal error: unhandled in Preserves Writer.push for ${v}`);
|
||||||
|
})(v);
|
||||||
}
|
}
|
||||||
return this; // for chaining
|
return this; // for chaining
|
||||||
}
|
}
|
||||||
|
|
|
@ -184,6 +184,71 @@ describe('encoding and decoding embeddeds', () => {
|
||||||
});
|
});
|
||||||
});
|
});
|
||||||
|
|
||||||
|
describe('integer text parsing', () => {
|
||||||
|
it('should work for zero', () => {
|
||||||
|
expect(parse('0')).is(0);
|
||||||
|
});
|
||||||
|
|
||||||
|
it('should work for smallish positive integers', () => {
|
||||||
|
expect(parse('60000')).is(60000);
|
||||||
|
});
|
||||||
|
it('should work for smallish negative integers', () => {
|
||||||
|
expect(parse('-60000')).is(-60000);
|
||||||
|
});
|
||||||
|
|
||||||
|
it('should work for largeish positive integers', () => {
|
||||||
|
expect(parse('1234567812345678123456781234567'))
|
||||||
|
.is(BigInt("1234567812345678123456781234567"));
|
||||||
|
});
|
||||||
|
it('should work for largeish negative integers', () => {
|
||||||
|
expect(parse('-1234567812345678123456781234567'))
|
||||||
|
.is(BigInt("-1234567812345678123456781234567"));
|
||||||
|
});
|
||||||
|
|
||||||
|
it('should work for larger positive integers', () => {
|
||||||
|
expect(parse('12345678123456781234567812345678'))
|
||||||
|
.is(BigInt("12345678123456781234567812345678"));
|
||||||
|
});
|
||||||
|
it('should work for larger negative integers', () => {
|
||||||
|
expect(parse('-12345678123456781234567812345678'))
|
||||||
|
.is(BigInt("-12345678123456781234567812345678"));
|
||||||
|
});
|
||||||
|
});
|
||||||
|
|
||||||
|
describe('integer binary encoding', () => {
|
||||||
|
it('should work for zero integers', () => {
|
||||||
|
expect(encode(0)).is(Bytes.fromHex('b000'));
|
||||||
|
});
|
||||||
|
it('should work for zero bigints', () => {
|
||||||
|
expect(encode(BigInt(0))).is(Bytes.fromHex('b000'));
|
||||||
|
});
|
||||||
|
|
||||||
|
it('should work for smallish positive integers', () => {
|
||||||
|
expect(encode(60000)).is(Bytes.fromHex('b00300ea60'));
|
||||||
|
});
|
||||||
|
it('should work for smallish negative integers', () => {
|
||||||
|
expect(encode(-60000)).is(Bytes.fromHex('b003ff15a0'));
|
||||||
|
});
|
||||||
|
|
||||||
|
it('should work for largeish positive integers', () => {
|
||||||
|
expect(encode(BigInt("1234567812345678123456781234567")))
|
||||||
|
.is(Bytes.fromHex('b00d0f951a8f2b4b049d518b923187'));
|
||||||
|
});
|
||||||
|
it('should work for largeish negative integers', () => {
|
||||||
|
expect(encode(BigInt("-1234567812345678123456781234567")))
|
||||||
|
.is(Bytes.fromHex('b00df06ae570d4b4fb62ae746dce79'));
|
||||||
|
});
|
||||||
|
|
||||||
|
it('should work for larger positive integers', () => {
|
||||||
|
expect(encode(BigInt("12345678123456781234567812345678")))
|
||||||
|
.is(Bytes.fromHex('b00e009bd30997b0ee2e252f73b5ef4e'));
|
||||||
|
});
|
||||||
|
it('should work for larger negative integers', () => {
|
||||||
|
expect(encode(BigInt("-12345678123456781234567812345678")))
|
||||||
|
.is(Bytes.fromHex('b00eff642cf6684f11d1dad08c4a10b2'));
|
||||||
|
});
|
||||||
|
});
|
||||||
|
|
||||||
describe('common test suite', () => {
|
describe('common test suite', () => {
|
||||||
const samples_bin = fs.readFileSync(__dirname + '/../../../../../tests/samples.bin');
|
const samples_bin = fs.readFileSync(__dirname + '/../../../../../tests/samples.bin');
|
||||||
const samples = decodeWithAnnotations(samples_bin, { embeddedDecode: genericEmbeddedTypeDecode });
|
const samples = decodeWithAnnotations(samples_bin, { embeddedDecode: genericEmbeddedTypeDecode });
|
||||||
|
|
|
@ -1,4 +1,4 @@
|
||||||
import { Single, Double, fromJS, Dictionary, IDENTITY_FOLD, fold, mapEmbeddeds, Value, embed } from '../src/index';
|
import { Single, Double, fromJS, Dictionary, IDENTITY_FOLD, fold, mapEmbeddeds, Value, embed, preserves } from '../src/index';
|
||||||
import './test-utils';
|
import './test-utils';
|
||||||
|
|
||||||
describe('Single', () => {
|
describe('Single', () => {
|
||||||
|
@ -41,4 +41,51 @@ describe('fromJS', () => {
|
||||||
it('should map integers to themselves', () => {
|
it('should map integers to themselves', () => {
|
||||||
expect(fromJS(1)).toBe(1);
|
expect(fromJS(1)).toBe(1);
|
||||||
});
|
});
|
||||||
|
|
||||||
|
it('should map bigints to themselves', () => {
|
||||||
|
expect(fromJS(BigInt("12345678123456781234567812345678")))
|
||||||
|
.toBe(BigInt("12345678123456781234567812345678"));;
|
||||||
|
});
|
||||||
|
});
|
||||||
|
|
||||||
|
describe('is()', () => {
|
||||||
|
it('should compare small integers sensibly', () => {
|
||||||
|
expect(3).is(3);
|
||||||
|
expect(3).not.is(4);
|
||||||
|
});
|
||||||
|
it('should compare large integers sensibly', () => {
|
||||||
|
const a = BigInt("12345678123456781234567812345678");
|
||||||
|
const b = BigInt("12345678123456781234567812345679");
|
||||||
|
expect(a).is(a);
|
||||||
|
expect(a).is(BigInt("12345678123456781234567812345678"));
|
||||||
|
expect(a).not.is(b);
|
||||||
|
});
|
||||||
|
it('should compare mixed integers sensibly', () => {
|
||||||
|
const a = BigInt("12345678123456781234567812345678");
|
||||||
|
const b = BigInt("3");
|
||||||
|
const c = BigInt("4");
|
||||||
|
expect(3).not.is(a);
|
||||||
|
expect(a).not.is(3);
|
||||||
|
expect(3).not.toBe(b);
|
||||||
|
expect(3).is(b);
|
||||||
|
expect(b).not.toBe(3);
|
||||||
|
expect(b).is(3);
|
||||||
|
expect(3).not.toBe(c);
|
||||||
|
expect(3).not.is(c);
|
||||||
|
expect(c).not.toBe(3);
|
||||||
|
expect(c).not.is(3);
|
||||||
|
});
|
||||||
|
});
|
||||||
|
|
||||||
|
describe('`preserves` formatter', () => {
|
||||||
|
it('should format numbers', () => {
|
||||||
|
expect(preserves`>${3}<`).toBe('>3<');
|
||||||
|
});
|
||||||
|
it('should format small bigints', () => {
|
||||||
|
expect(preserves`>${BigInt("3")}<`).toBe('>3<');
|
||||||
|
});
|
||||||
|
it('should format big bigints', () => {
|
||||||
|
expect(preserves`>${BigInt("12345678123456781234567812345678")}<`)
|
||||||
|
.toBe('>12345678123456781234567812345678<');
|
||||||
|
});
|
||||||
});
|
});
|
||||||
|
|
Binary file not shown.
|
@ -118,6 +118,9 @@
|
||||||
float14: @"+qNaN" <Test #x"87047fc00111" #xf"7fc00111">
|
float14: @"+qNaN" <Test #x"87047fc00111" #xf"7fc00111">
|
||||||
float15: @"-qNaN" <Test #x"8704ffc00001" #xf"ffc00001">
|
float15: @"-qNaN" <Test #x"8704ffc00001" #xf"ffc00001">
|
||||||
float16: @"-qNaN" <Test #x"8704ffc00111" #xf"ffc00111">
|
float16: @"-qNaN" <Test #x"8704ffc00111" #xf"ffc00111">
|
||||||
|
int-98765432109876543210987654321098765432109: <Test #x"b012feddc125aed4226c770369269596ce3f0ad3" -98765432109876543210987654321098765432109>
|
||||||
|
int-12345678123456781234567812345678: <Test #x"b00eff642cf6684f11d1dad08c4a10b2" -12345678123456781234567812345678>
|
||||||
|
int-1234567812345678123456781234567: <Test #x"b00df06ae570d4b4fb62ae746dce79" -1234567812345678123456781234567>
|
||||||
int-257: <Test #x"b002feff" -257>
|
int-257: <Test #x"b002feff" -257>
|
||||||
int-256: <Test #x"b002ff00" -256>
|
int-256: <Test #x"b002ff00" -256>
|
||||||
int-255: <Test #x"b002ff01" -255>
|
int-255: <Test #x"b002ff01" -255>
|
||||||
|
@ -146,7 +149,10 @@
|
||||||
int65536: <Test #x"b003010000" 65536>
|
int65536: <Test #x"b003010000" 65536>
|
||||||
int131072: <Test #x"b003020000" 131072>
|
int131072: <Test #x"b003020000" 131072>
|
||||||
int2500000000: <Test #x"b005009502f900" 2500000000>
|
int2500000000: <Test #x"b005009502f900" 2500000000>
|
||||||
|
int1234567812345678123456781234567: <Test #x"b00d0f951a8f2b4b049d518b923187" 1234567812345678123456781234567>
|
||||||
|
int12345678123456781234567812345678: <Test #x"b00e009bd30997b0ee2e252f73b5ef4e" 12345678123456781234567812345678>
|
||||||
int87112285931760246646623899502532662132736: <Test #x"b012010000000000000000000000000000000000" 87112285931760246646623899502532662132736>
|
int87112285931760246646623899502532662132736: <Test #x"b012010000000000000000000000000000000000" 87112285931760246646623899502532662132736>
|
||||||
|
int98765432109876543210987654321098765432109: <Test #x"b01201223eda512bdd9388fc96d96a6931c0f52d" 98765432109876543210987654321098765432109>
|
||||||
list0: <Test #x"b584" []>
|
list0: <Test #x"b584" []>
|
||||||
list4: <Test #x"b5b00101b00102b00103b0010484" [1 2 3 4]>
|
list4: <Test #x"b5b00101b00102b00103b0010484" [1 2 3 4]>
|
||||||
list4a: <Test #x"b5b00101b00102b00103b0010484" [1, 2, 3, 4]>
|
list4a: <Test #x"b5b00101b00102b00103b0010484" [1, 2, 3, 4]>
|
||||||
|
|
|
@ -118,6 +118,9 @@
|
||||||
float14: @"+qNaN" <Test #x"87047fc00111" #xf"7fc00111">
|
float14: @"+qNaN" <Test #x"87047fc00111" #xf"7fc00111">
|
||||||
float15: @"-qNaN" <Test #x"8704ffc00001" #xf"ffc00001">
|
float15: @"-qNaN" <Test #x"8704ffc00001" #xf"ffc00001">
|
||||||
float16: @"-qNaN" <Test #x"8704ffc00111" #xf"ffc00111">
|
float16: @"-qNaN" <Test #x"8704ffc00111" #xf"ffc00111">
|
||||||
|
int-98765432109876543210987654321098765432109: <Test #x"b012feddc125aed4226c770369269596ce3f0ad3" -98765432109876543210987654321098765432109>
|
||||||
|
int-12345678123456781234567812345678: <Test #x"b00eff642cf6684f11d1dad08c4a10b2" -12345678123456781234567812345678>
|
||||||
|
int-1234567812345678123456781234567: <Test #x"b00df06ae570d4b4fb62ae746dce79" -1234567812345678123456781234567>
|
||||||
int-257: <Test #x"b002feff" -257>
|
int-257: <Test #x"b002feff" -257>
|
||||||
int-256: <Test #x"b002ff00" -256>
|
int-256: <Test #x"b002ff00" -256>
|
||||||
int-255: <Test #x"b002ff01" -255>
|
int-255: <Test #x"b002ff01" -255>
|
||||||
|
@ -146,7 +149,10 @@
|
||||||
int65536: <Test #x"b003010000" 65536>
|
int65536: <Test #x"b003010000" 65536>
|
||||||
int131072: <Test #x"b003020000" 131072>
|
int131072: <Test #x"b003020000" 131072>
|
||||||
int2500000000: <Test #x"b005009502f900" 2500000000>
|
int2500000000: <Test #x"b005009502f900" 2500000000>
|
||||||
|
int1234567812345678123456781234567: <Test #x"b00d0f951a8f2b4b049d518b923187" 1234567812345678123456781234567>
|
||||||
|
int12345678123456781234567812345678: <Test #x"b00e009bd30997b0ee2e252f73b5ef4e" 12345678123456781234567812345678>
|
||||||
int87112285931760246646623899502532662132736: <Test #x"b012010000000000000000000000000000000000" 87112285931760246646623899502532662132736>
|
int87112285931760246646623899502532662132736: <Test #x"b012010000000000000000000000000000000000" 87112285931760246646623899502532662132736>
|
||||||
|
int98765432109876543210987654321098765432109: <Test #x"b01201223eda512bdd9388fc96d96a6931c0f52d" 98765432109876543210987654321098765432109>
|
||||||
list0: <Test #x"b584" []>
|
list0: <Test #x"b584" []>
|
||||||
list4: <Test #x"b5b00101b00102b00103b0010484" [1 2 3 4]>
|
list4: <Test #x"b5b00101b00102b00103b0010484" [1 2 3 4]>
|
||||||
list4a: <Test #x"b5b00101b00102b00103b0010484" [1, 2, 3, 4]>
|
list4a: <Test #x"b5b00101b00102b00103b0010484" [1, 2, 3, 4]>
|
||||||
|
|
|
@ -1,6 +1,6 @@
|
||||||
[package]
|
[package]
|
||||||
name = "preserves"
|
name = "preserves"
|
||||||
version = "3.990.3"
|
version = "3.990.4"
|
||||||
authors = ["Tony Garnock-Jones <tonyg@leastfixedpoint.com>"]
|
authors = ["Tony Garnock-Jones <tonyg@leastfixedpoint.com>"]
|
||||||
edition = "2018"
|
edition = "2018"
|
||||||
description = "Implementation of the Preserves serialization format via serde."
|
description = "Implementation of the Preserves serialization format via serde."
|
||||||
|
|
|
@ -289,7 +289,7 @@ impl Writer for BinaryOrderWriter {
|
||||||
macro_rules! fits_in_bytes {
|
macro_rules! fits_in_bytes {
|
||||||
($v:ident, $limit:literal) => {{
|
($v:ident, $limit:literal) => {{
|
||||||
let bits = $limit * 8 - 1;
|
let bits = $limit * 8 - 1;
|
||||||
$v >= -(2 << bits) && $v < (2 << bits)
|
$v >= -(1 << bits) && $v < (1 << bits)
|
||||||
}};
|
}};
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
|
@ -0,0 +1,291 @@
|
||||||
|
---
|
||||||
|
title: "P-expressions"
|
||||||
|
---
|
||||||
|
|
||||||
|
Tony Garnock-Jones <tonyg@leastfixedpoint.com>
|
||||||
|
October 2023. Version 0.1.1.
|
||||||
|
|
||||||
|
This document defines a grammar called *Preserves Expressions*
|
||||||
|
(*P-expressions*, *pexprs*) that includes [ordinary Preserves text
|
||||||
|
syntax](preserves-text.html) but offers extensions sufficient to support
|
||||||
|
a Lisp- or Haskell-like programming notation.
|
||||||
|
|
||||||
|
**Motivation.** The [text syntax](preserves-text.html) for Preserves
|
||||||
|
works well for writing `Value`s, i.e. data. However, in some contexts,
|
||||||
|
Preserves applications need a broader grammar that allows interleaving
|
||||||
|
of *expressions* with data. Two examples are the [Preserves Schema
|
||||||
|
language](preserves-schema.html) and the [Synit configuration scripting
|
||||||
|
language](https://synit.org/book/operation/scripting.html), both of
|
||||||
|
which (ab)use Preserves text syntax as a kind of programming notation.
|
||||||
|
|
||||||
|
## Preliminaries
|
||||||
|
|
||||||
|
The P-expression grammar takes the text syntax grammar as its base and
|
||||||
|
modifies it.
|
||||||
|
|
||||||
|
<a id="whitespace">
|
||||||
|
**Whitespace.** Whitespace is redefined as any number of spaces, tabs,
|
||||||
|
carriage returns, or line feeds. Commas are *not* considered whitespace
|
||||||
|
in P-expressions.
|
||||||
|
|
||||||
|
ws = *(%x20 / %x09 / CR / LF)
|
||||||
|
|
||||||
|
<a id="delimiters"></a>
|
||||||
|
**Delimiters.** Because commas are no longer included in class `ws`,
|
||||||
|
class `delimiter` is widened to include them explicitly.
|
||||||
|
|
||||||
|
delimiter = ws / ","
|
||||||
|
/ "<" / ">" / "[" / "]" / "{" / "}"
|
||||||
|
/ "#" / ":" / DQUOTE / "|" / "@" / ";"
|
||||||
|
|
||||||
|
## Grammar
|
||||||
|
|
||||||
|
P-expressions add comma, semicolon, and sequences of one or more colons
|
||||||
|
to the syntax class `Value`.
|
||||||
|
|
||||||
|
Value =/ Comma / Semicolon / Colons
|
||||||
|
Comma = ","
|
||||||
|
Semicolon = ";"
|
||||||
|
Colons = 1*":"
|
||||||
|
|
||||||
|
Now that colon is in `Value`, the syntax for `Dictionary` is replaced
|
||||||
|
with `Block` everywhere it is mentioned.
|
||||||
|
|
||||||
|
Block = "{" *Value ws "}"
|
||||||
|
|
||||||
|
New syntax for explicit uninterpreted grouping of sequences of values is
|
||||||
|
introduced, and added to class `Value`.
|
||||||
|
|
||||||
|
Value =/ ws Group
|
||||||
|
Group = "(" *Value ws ")"
|
||||||
|
|
||||||
|
Finally, class `Document` is replaced in order to allow standalone
|
||||||
|
documents to directly comprise a sequence of multiple values.
|
||||||
|
|
||||||
|
Document = *Value ws
|
||||||
|
|
||||||
|
No changes to [the Preserves semantic model](preserves.html) are made.
|
||||||
|
Every Preserves text-syntax term is a valid P-expression, but in general
|
||||||
|
P-expressions must be rewritten or otherwise interpreted before a
|
||||||
|
meaningful Preserves value can be arrived at ([see
|
||||||
|
below](#reading-preserves)).
|
||||||
|
|
||||||
|
## <a id="annotations"></a>Annotations and Comments
|
||||||
|
|
||||||
|
Annotations and comments attach to the term following them, just as in
|
||||||
|
the ordinary text syntax. However, it is common in programming notations
|
||||||
|
to allow comments at the end of a file or other sequential construct:
|
||||||
|
|
||||||
|
{
|
||||||
|
key: value
|
||||||
|
# example of a comment at the end of a dictionary
|
||||||
|
}
|
||||||
|
# example of a comment at the end of the input file
|
||||||
|
|
||||||
|
While the ordinary text syntax forbids comments in these positions,
|
||||||
|
P-expressions allow them:
|
||||||
|
|
||||||
|
Document =/ *Value Trailer ws
|
||||||
|
Record =/ "<" Value *Value Trailer ws ">"
|
||||||
|
Sequence =/ "[" *Value Trailer ws "]"
|
||||||
|
Set =/ "#{" *Value Trailer ws "}"
|
||||||
|
Block =/ "{" *Value Trailer ws "}"
|
||||||
|
|
||||||
|
Trailer = 1*Annotation
|
||||||
|
|
||||||
|
## <a id="encoding-pexprs"></a>Encoding P-expressions as Preserves
|
||||||
|
|
||||||
|
We write ⌜*p*⌝ for the encoding into Preserves of P-expression *p*.
|
||||||
|
|
||||||
|
{:.pseudocode.equations}
|
||||||
|
| ⌜·⌝ : **P-expression** | ⟶ | **Preserves** |
|
||||||
|
|
||||||
|
Aside from the special classes `Group`, `Block`, `Comma`, `Semicolon`,
|
||||||
|
`Colons`, or `Trailer`, P-expressions are encoded directly as Preserves
|
||||||
|
data.
|
||||||
|
|
||||||
|
{:.pseudocode.equations}
|
||||||
|
| ⌜`[`*p* ...`]`⌝ | = | `[`⌜*p*⌝ ...`]` |
|
||||||
|
| ⌜`<`*p* ...`>`⌝ | = | `<`⌜*p*⌝ ...`>` |
|
||||||
|
| ⌜`#{`*p* ...`}`⌝ | = | `#{`⌜*p*⌝ ...`}` |
|
||||||
|
| ⌜`#!`*p*⌝ | = | `#!`⌜*p*⌝ |
|
||||||
|
| ⌜`@`*p* *q*⌝ | = | `@`⌜*p*⌝ ⌜*q*⌝ |
|
||||||
|
| ⌜*p*⌝ | = | *p* when *p* ∈ **Atom** |
|
||||||
|
|
||||||
|
All members of the special classes are encoded as Preserves
|
||||||
|
dictionaries[^encoding-rationale].
|
||||||
|
|
||||||
|
[^encoding-rationale]: In principle, it would be nice to use *records*
|
||||||
|
for this purpose, but if we did so we would have to also encode
|
||||||
|
usages of records!
|
||||||
|
|
||||||
|
{:.pseudocode.equations}
|
||||||
|
| ⌜`(`*p* ...`)`⌝ | = | `{g:[`⌜*p*⌝ ...`]}` |
|
||||||
|
| ⌜`{`*p* ...`}`⌝ | = | `{b:[`⌜*p*⌝ ...`]}` |
|
||||||
|
| ⌜`,`⌝ | = | `{s:|,|}` |
|
||||||
|
| ⌜`;`⌝ | = | `{s:|;|}` |
|
||||||
|
| ⌜`:` ...⌝ | = | `{s:|:` ...`|}` |
|
||||||
|
| ⌜*t*⌝ | = | ⌜*a*⌝ ... `{}`, where *a* ... are the annotations in *t* and *t* ∈ **Trailer** |
|
||||||
|
|
||||||
|
The empty dictionary `{}` acts as an anchor for the annotations in a
|
||||||
|
`Trailer`.
|
||||||
|
|
||||||
|
We overload the ⌜·⌝ notation for encoding whole `Document`s into
|
||||||
|
sequences of Preserves values.
|
||||||
|
|
||||||
|
{:.pseudocode.equations}
|
||||||
|
| ⌜·⌝ : **P-expression Document** | ⟶ | **Preserves Sequence** |
|
||||||
|
| ⌜*p* ...⌝ | = | `[`⌜*p*⌝ ...`]` |
|
||||||
|
|
||||||
|
## <a id="reading-preserves"></a>Interpreting P-expressions as Preserves
|
||||||
|
|
||||||
|
The [previous section](#encoding-pexprs) discussed ways of representing
|
||||||
|
P-expressions using Preserves. Here, we discuss *interpreting*
|
||||||
|
P-expressions *as* Preserves, so that (1) a Preserves datum (2) written
|
||||||
|
using Preserves text syntax and then (3) read as a P-expression can be
|
||||||
|
(4) interpreted from that P-expression to yield the original datum.
|
||||||
|
|
||||||
|
A reader for P-expressions can be adapted to yield a reader for
|
||||||
|
Preserves terms by processing (subterms of) each P-expression that the
|
||||||
|
reader produces. The only subterms that need processing are the special
|
||||||
|
classes mentioned above.
|
||||||
|
|
||||||
|
1. Every `Group` or `Semicolon` that appears is an error.
|
||||||
|
2. Every `Colons` with two or more colons in it is an error.
|
||||||
|
3. Every `Comma` that appears is discarded.
|
||||||
|
3. Every `Trailer` that appears is an error.[^discard-trailers-instead-of-error]
|
||||||
|
4. Every `Block` must contain triplets of `Value`, `Colons` (with a
|
||||||
|
single colon), `Value`. Any `Block` not following this pattern is an
|
||||||
|
error. Each `Block` following the pattern is translated to a
|
||||||
|
`Dictionary` containing a key/value pair for each triplet.
|
||||||
|
|
||||||
|
[^discard-trailers-instead-of-error]: **Implementation note.** When
|
||||||
|
implementing parsing of P-expressions into Preserves, consider
|
||||||
|
offering an optional mode where trailing annotations `Trailer` are
|
||||||
|
*discarded* instead of causing an error to be signalled.
|
||||||
|
|
||||||
|
## Appendix: Examples
|
||||||
|
|
||||||
|
Examples are given as pairs of P-expressions and their Preserves
|
||||||
|
text-syntax encodings.
|
||||||
|
|
||||||
|
### Individual P-expression `Value`s
|
||||||
|
|
||||||
|
```preserves
|
||||||
|
⌜<date 1821 (lookup-month "February") 3>⌝
|
||||||
|
= <date 1821 {g:[lookup-month "February"]} 3>
|
||||||
|
```
|
||||||
|
|
||||||
|
```preserves
|
||||||
|
⌜(begin (println! (+ 1 2)) (+ 3 4))⌝
|
||||||
|
= {g:[begin {g:[println! {g:[+ 1 2]}]} {g:[+ 3 4]}]}
|
||||||
|
```
|
||||||
|
|
||||||
|
```preserves
|
||||||
|
⌜()⌝
|
||||||
|
= {g:[]}
|
||||||
|
|
||||||
|
⌜[() () ()]⌝
|
||||||
|
= [{g:[]}, {g:[]}, {g:[]}]
|
||||||
|
```
|
||||||
|
|
||||||
|
```preserves
|
||||||
|
⌜{
|
||||||
|
setUp();
|
||||||
|
# Now enter the loop
|
||||||
|
loop: {
|
||||||
|
greet("World");
|
||||||
|
}
|
||||||
|
tearDown();
|
||||||
|
}⌝
|
||||||
|
= {b:[
|
||||||
|
setUp {g:[]} {s:|;|}
|
||||||
|
# Now enter the loop
|
||||||
|
loop {s:|:|} {b:[
|
||||||
|
greet {g:["World"]} {s:|;|}
|
||||||
|
]}
|
||||||
|
tearDown {g:[]} {s:|;|}
|
||||||
|
]}
|
||||||
|
```
|
||||||
|
|
||||||
|
```preserves
|
||||||
|
⌜[1 + 2.0, print "Hello", predicate: #t, foo, #!remote, bar]⌝
|
||||||
|
= [1 + 2.0 {s:|,|} print "Hello" {s:|,|} predicate {s:|:|} #t {s:|,|}
|
||||||
|
foo {s:|,|} #!remote {s:|,|} bar]
|
||||||
|
```
|
||||||
|
|
||||||
|
```preserves
|
||||||
|
⌜{
|
||||||
|
optional name: string,
|
||||||
|
address: Address,
|
||||||
|
}⌝
|
||||||
|
= {b:[
|
||||||
|
optional name {s:|:|} string {s:|,|}
|
||||||
|
address {s:|:|} Address {s:|,|}
|
||||||
|
]}
|
||||||
|
```
|
||||||
|
|
||||||
|
### Whole `Document`s
|
||||||
|
|
||||||
|
```preserves
|
||||||
|
⌜{
|
||||||
|
key: value
|
||||||
|
# example of a comment at the end of a dictionary
|
||||||
|
}
|
||||||
|
# example of a comment at the end of the input file⌝
|
||||||
|
= [ {b:[
|
||||||
|
key {s:|:|} value
|
||||||
|
@"example of a comment at the end of a dictionary" {}
|
||||||
|
]}
|
||||||
|
@"example of a comment at the end of the input file"
|
||||||
|
{}
|
||||||
|
]
|
||||||
|
```
|
||||||
|
|
||||||
|
## Appendix: Reading vs. Parsing
|
||||||
|
|
||||||
|
Lisp systems first *read* streams of bytes into S-expressions and then
|
||||||
|
*parse* those S-expressions into more abstract structures denoting
|
||||||
|
various kinds of program syntax. [Separation of reading from parsing is
|
||||||
|
what gives Lisp its syntactic
|
||||||
|
flexibility.](http://calculist.org/blog/2012/04/17/homoiconicity-isnt-the-point/)
|
||||||
|
|
||||||
|
Similarly, the Apple programming language
|
||||||
|
[Dylan](https://en.wikipedia.org/wiki/Dylan_(programming_language))
|
||||||
|
included a reader-parser split, with the Dylan reader producing
|
||||||
|
*D-expressions* that are somewhat similar to P-expressions.
|
||||||
|
|
||||||
|
Finally, the Racket dialects
|
||||||
|
[Honu](https://docs.racket-lang.org/honu/index.html) and
|
||||||
|
[Something](https://github.com/tonyg/racket-something) use a
|
||||||
|
reader-parser-macro setup, where the reader produces Racket data, the
|
||||||
|
parser produces "syntax" and is user-extensible, and Racket's own
|
||||||
|
modular macro system rewrites this "syntax" down to core forms to be
|
||||||
|
compiled to machine code.
|
||||||
|
|
||||||
|
Similarly, when using P-expressions as the foundation for a language, a
|
||||||
|
generic P-expression reader can then feed into special-purpose
|
||||||
|
*parsers*. The reader captures the coarse syntactic structure of a
|
||||||
|
program, and the parser refines this.
|
||||||
|
|
||||||
|
Often, a parser will wish to extract structure from sequences of
|
||||||
|
P-expression `Value`s.
|
||||||
|
|
||||||
|
- A simple technique is repeated splitting of sequences; first by
|
||||||
|
`Semicolon`, then by `Comma`, then by increasingly high binding-power
|
||||||
|
operators.
|
||||||
|
|
||||||
|
- More refined is to use a Pratt parser or similar
|
||||||
|
([1](https://en.wikipedia.org/wiki/Operator-precedence_parser),
|
||||||
|
[2](https://matklad.github.io/2020/04/13/simple-but-powerful-pratt-parsing.html),
|
||||||
|
[3](https://github.com/tonyg/racket-something/blob/f6116bf3861b76970f5ce291a628476adef820b4/src/something/pratt.rkt))
|
||||||
|
to build a parse tree using an extensible specification of the pre-,
|
||||||
|
in-, and postfix operators involved.
|
||||||
|
|
||||||
|
- Finally, if you treat sequences of `Value`s as pre-lexed token
|
||||||
|
streams, almost any parsing formalism (such as [PEG
|
||||||
|
parsing](https://en.wikipedia.org/wiki/Parsing_expression_grammar),
|
||||||
|
[Ometa](https://en.wikipedia.org/wiki/OMeta), etc.) can be used to
|
||||||
|
extract further syntactic structure.
|
||||||
|
|
||||||
|
## Notes
|
|
@ -55,7 +55,7 @@ Standalone documents may have trailing whitespace.
|
||||||
Any `Value` may be preceded by whitespace.
|
Any `Value` may be preceded by whitespace.
|
||||||
|
|
||||||
Value = ws (Record / Collection / Atom / Embedded)
|
Value = ws (Record / Collection / Atom / Embedded)
|
||||||
Collection = Sequence / Dictionary / Set
|
Collection = Sequence / Set / Dictionary
|
||||||
Atom = Boolean / String / ByteString /
|
Atom = Boolean / String / ByteString /
|
||||||
QuotedSymbol / SymbolOrNumber
|
QuotedSymbol / SymbolOrNumber
|
||||||
|
|
||||||
|
@ -64,18 +64,18 @@ label-`Value` followed by its field-`Value`s.
|
||||||
|
|
||||||
Record = "<" Value *Value ws ">"
|
Record = "<" Value *Value ws ">"
|
||||||
|
|
||||||
`Sequence`s are enclosed in square brackets. `Dictionary` values are
|
`Sequence`s are enclosed in square brackets. `Set`s are written as
|
||||||
curly-brace-enclosed colon-separated pairs of values. `Set`s are
|
values enclosed by the tokens `#{` and `}`. `Dictionary` values are
|
||||||
written as values enclosed by the tokens `#{` and
|
curly-brace-enclosed colon-separated pairs of
|
||||||
`}`.[^printing-collections] It is an error for a set to contain
|
values.[^printing-collections] It is an error for a set to contain
|
||||||
duplicate elements or for a dictionary to contain duplicate keys. When
|
duplicate elements or for a dictionary to contain duplicate keys. When
|
||||||
printing sets and dictionaries, implementations *SHOULD* order
|
printing sets and dictionaries, implementations *SHOULD* order elements
|
||||||
elements resp. keys with respect to the [total order over
|
resp. keys with respect to the [total order over
|
||||||
`Value`s](preserves.html#total-order).[^rationale-print-ordering]
|
`Value`s](preserves.html#total-order).[^rationale-print-ordering]
|
||||||
|
|
||||||
Sequence = "[" *Value ws "]"
|
Sequence = "[" *Value ws "]"
|
||||||
Dictionary = "{" *(Value ws ":" Value) ws "}"
|
Set = "#{" *Value ws "}"
|
||||||
Set = "#{" *Value ws "}"
|
Dictionary = "{" *(Value ws ":" Value) ws "}"
|
||||||
|
|
||||||
[^printing-collections]: **Implementation note.** When implementing
|
[^printing-collections]: **Implementation note.** When implementing
|
||||||
printing of `Value`s using the textual syntax, consider supporting
|
printing of `Value`s using the textual syntax, consider supporting
|
||||||
|
@ -273,7 +273,8 @@ value. Each annotation is, in turn, a `Value`, and may itself have
|
||||||
annotations. The ordering of annotations attached to a `Value` is
|
annotations. The ordering of annotations attached to a `Value` is
|
||||||
significant.
|
significant.
|
||||||
|
|
||||||
Value =/ ws "@" Value Value
|
Value =/ ws Annotation Value
|
||||||
|
Annotation = "@" Value
|
||||||
|
|
||||||
Each annotation is preceded by `@`; the underlying annotated value
|
Each annotation is preceded by `@`; the underlying annotated value
|
||||||
follows its annotations. Here we extend only the syntactic nonterminal
|
follows its annotations. Here we extend only the syntactic nonterminal
|
||||||
|
@ -283,7 +284,7 @@ named “`Value`” without altering the semantic class of `Value`s.
|
||||||
interpreted as comments associated with that value. Comments are
|
interpreted as comments associated with that value. Comments are
|
||||||
sufficiently common that special syntax exists for them.
|
sufficiently common that special syntax exists for them.
|
||||||
|
|
||||||
Value =/ ws ("#" [(%x20 / %x09) linecomment]) (CR / LF) Value
|
Annotation =/ "#" [(%x20 / %x09) linecomment] (CR / LF)
|
||||||
linecomment = *<any unicode scalar value except CR or LF>
|
linecomment = *<any unicode scalar value except CR or LF>
|
||||||
|
|
||||||
When written this way, everything between the hash-space or hash-tab and
|
When written this way, everything between the hash-space or hash-tab and
|
||||||
|
|
|
@ -1,6 +1,7 @@
|
||||||
:root {
|
:root {
|
||||||
--sans-font: "Open Sans", -apple-system, BlinkMacSystemFont, avenir next, avenir, segoe ui, helvetica neue, helvetica, Cantarell, Ubuntu, roboto, noto, arial, sans-serif;
|
--sans-font: "Open Sans", -apple-system, BlinkMacSystemFont, avenir next, avenir, segoe ui, helvetica neue, helvetica, Cantarell, Ubuntu, roboto, noto, arial, sans-serif;
|
||||||
--serif-font: palatino, "Palatino Linotype", "Palatino LT STD", "URW Palladio L", "TeX Gyre Pagella", serif;
|
--serif-font: palatino, "Palatino Linotype", "Palatino LT STD", "URW Palladio L", "TeX Gyre Pagella", serif;
|
||||||
|
--blockquote-indent: 40px;
|
||||||
}
|
}
|
||||||
body {
|
body {
|
||||||
font-family: var(--serif-font);
|
font-family: var(--serif-font);
|
||||||
|
@ -230,6 +231,7 @@ table.postcard-grammar {
|
||||||
blockquote {
|
blockquote {
|
||||||
padding: 0.5rem 1rem;
|
padding: 0.5rem 1rem;
|
||||||
border-left: solid #4f81bd 2px;
|
border-left: solid #4f81bd 2px;
|
||||||
|
margin-left: var(--blockquote-indent);
|
||||||
margin-right: 0;
|
margin-right: 0;
|
||||||
}
|
}
|
||||||
blockquote :first-child {
|
blockquote :first-child {
|
||||||
|
@ -243,6 +245,10 @@ blockquote :last-child {
|
||||||
background-color: #e9f0f9;
|
background-color: #e9f0f9;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
table.equations { width: auto; margin-left: var(--blockquote-indent); }
|
||||||
|
table.equations tr > *:nth-child(1) { text-align: right; }
|
||||||
|
table.equations tr > *:nth-child(2) { text-align: center; }
|
||||||
|
|
||||||
blockquote.pseudocode {
|
blockquote.pseudocode {
|
||||||
border-left: none;
|
border-left: none;
|
||||||
padding: 0;
|
padding: 0;
|
||||||
|
|
36
preserves.md
36
preserves.md
|
@ -104,8 +104,8 @@ the `totalOrder` predicate defined in section 5.10 of [IEEE Std
|
||||||
|
|
||||||
A `Record` is a *labelled* tuple of `Value`s, the record's *fields*. A
|
A `Record` is a *labelled* tuple of `Value`s, the record's *fields*. A
|
||||||
label can be any `Value`, but is usually a `Symbol`.[^extensibility]
|
label can be any `Value`, but is usually a `Symbol`.[^extensibility]
|
||||||
[^iri-labels] `Record`s are compared lexicographically: first by
|
[^iri-labels] `Record`s are ordered first by label, then
|
||||||
label, then by field sequence.
|
lexicographically[^lexicographical-sequences] by field sequence.
|
||||||
|
|
||||||
[^extensibility]: The [Racket](https://racket-lang.org/) programming
|
[^extensibility]: The [Racket](https://racket-lang.org/) programming
|
||||||
language defines
|
language defines
|
||||||
|
@ -123,10 +123,25 @@ label, then by field sequence.
|
||||||
it cannot be read as an IRI at all, and so the label simply stands
|
it cannot be read as an IRI at all, and so the label simply stands
|
||||||
for itself—for its own `Value`.
|
for itself—for its own `Value`.
|
||||||
|
|
||||||
|
[^lexicographical-sequences]: When comparing sequences of values for
|
||||||
|
the total order, [lexicographical
|
||||||
|
ordering](https://en.wikipedia.org/wiki/Lexicographic_order) is
|
||||||
|
used. Elements are drawn pairwise from the two sequences to be
|
||||||
|
compared. If one is smaller than the other according to the total
|
||||||
|
order, the sequence it was drawn from is the smaller of the
|
||||||
|
sequences. If the end of one sequence is reached, while the other
|
||||||
|
sequence has elements remaining, the shorter sequence is considered
|
||||||
|
smaller. Otherwise, all the elements compared equal and neither was
|
||||||
|
longer than the other, so they compare equal. For example,
|
||||||
|
- `[#f]` is ordered before `[foo]` because `Boolean` appears before `Symbol` in the kind ordering;
|
||||||
|
- `[x]` before `[x y]` because there is no element remaining to compare against `y`;
|
||||||
|
- `[a b]` before `[x]` because `a` is smaller than `x`; and
|
||||||
|
- `[x y]` before `[x z]` because `y` is ordered before `z` according to the ordering rules for `Symbol`.
|
||||||
|
|
||||||
### Sequences.
|
### Sequences.
|
||||||
|
|
||||||
A `Sequence` is a sequence of `Value`s. `Sequence`s are compared
|
A `Sequence` is a sequence of `Value`s. `Sequence`s are compared
|
||||||
lexicographically.
|
lexicographically.[^lexicographical-sequences]
|
||||||
|
|
||||||
### Sets.
|
### Sets.
|
||||||
|
|
||||||
|
@ -134,15 +149,16 @@ A `Set` is an unordered finite set of `Value`s. It contains no
|
||||||
duplicate values, following the [equivalence relation](#equivalence)
|
duplicate values, following the [equivalence relation](#equivalence)
|
||||||
induced by the total order on `Value`s. Two `Set`s are compared by
|
induced by the total order on `Value`s. Two `Set`s are compared by
|
||||||
sorting their elements ascending using the [total order](#total-order)
|
sorting their elements ascending using the [total order](#total-order)
|
||||||
and comparing the resulting `Sequence`s.
|
and comparing the resulting `Sequence`s.[^lexicographical-sequences]
|
||||||
|
|
||||||
### Dictionaries.
|
### Dictionaries.
|
||||||
|
|
||||||
A `Dictionary` is an unordered finite collection of pairs of `Value`s.
|
A `Dictionary` is an unordered finite collection of pairs of `Value`s.
|
||||||
Each pair comprises a *key* and a *value*. Keys in a `Dictionary` are
|
Each pair comprises a *key* and a *value*. Keys in a `Dictionary` are
|
||||||
pairwise distinct. Instances of `Dictionary` are compared by
|
pairwise distinct. Instances of `Dictionary` are compared by
|
||||||
lexicographic comparison of the sequences resulting from ordering each
|
lexicographic[^lexicographical-sequences] comparison of the sequences
|
||||||
`Dictionary`'s pairs in ascending order by key.
|
resulting from ordering each `Dictionary`'s pairs in ascending order by
|
||||||
|
key.
|
||||||
|
|
||||||
### Embeddeds.
|
### Embeddeds.
|
||||||
|
|
||||||
|
@ -194,8 +210,12 @@ sequences use [the Preserves binary encoding](preserves-binary.html).
|
||||||
|
|
||||||
The total ordering specified [above](#total-order) means that the following statements are true:
|
The total ordering specified [above](#total-order) means that the following statements are true:
|
||||||
|
|
||||||
"bzz" < "c" < "caa" < #!"a"
|
- `"bzz"` < `"c"` < `"caa"` < `#!"a"`
|
||||||
#t < 3.0f < 3.0 < 3 < "3" < |3| < [] < #!#t
|
- `#t` < `3.0f` < `3.0` < `3` < `"3"` < `|3|` < `[]` < `#!#t`
|
||||||
|
- `[#f]` < `[foo]`, because `Boolean` appears before `Symbol` in the kind ordering
|
||||||
|
- `[x]` < `[x y]`, because there is no element remaining to compare against `y`
|
||||||
|
- `[a b]` < `[x]`, because `a` is smaller than `x`
|
||||||
|
- `[x y]` < `[x z]`, because `y` is ordered before `z`
|
||||||
|
|
||||||
### Simple examples.
|
### Simple examples.
|
||||||
|
|
||||||
|
|
Binary file not shown.
|
@ -118,6 +118,9 @@
|
||||||
float14: @"+qNaN" <Test #x"87047fc00111" #xf"7fc00111">
|
float14: @"+qNaN" <Test #x"87047fc00111" #xf"7fc00111">
|
||||||
float15: @"-qNaN" <Test #x"8704ffc00001" #xf"ffc00001">
|
float15: @"-qNaN" <Test #x"8704ffc00001" #xf"ffc00001">
|
||||||
float16: @"-qNaN" <Test #x"8704ffc00111" #xf"ffc00111">
|
float16: @"-qNaN" <Test #x"8704ffc00111" #xf"ffc00111">
|
||||||
|
int-98765432109876543210987654321098765432109: <Test #x"b012feddc125aed4226c770369269596ce3f0ad3" -98765432109876543210987654321098765432109>
|
||||||
|
int-12345678123456781234567812345678: <Test #x"b00eff642cf6684f11d1dad08c4a10b2" -12345678123456781234567812345678>
|
||||||
|
int-1234567812345678123456781234567: <Test #x"b00df06ae570d4b4fb62ae746dce79" -1234567812345678123456781234567>
|
||||||
int-257: <Test #x"b002feff" -257>
|
int-257: <Test #x"b002feff" -257>
|
||||||
int-256: <Test #x"b002ff00" -256>
|
int-256: <Test #x"b002ff00" -256>
|
||||||
int-255: <Test #x"b002ff01" -255>
|
int-255: <Test #x"b002ff01" -255>
|
||||||
|
@ -146,7 +149,10 @@
|
||||||
int65536: <Test #x"b003010000" 65536>
|
int65536: <Test #x"b003010000" 65536>
|
||||||
int131072: <Test #x"b003020000" 131072>
|
int131072: <Test #x"b003020000" 131072>
|
||||||
int2500000000: <Test #x"b005009502f900" 2500000000>
|
int2500000000: <Test #x"b005009502f900" 2500000000>
|
||||||
|
int1234567812345678123456781234567: <Test #x"b00d0f951a8f2b4b049d518b923187" 1234567812345678123456781234567>
|
||||||
|
int12345678123456781234567812345678: <Test #x"b00e009bd30997b0ee2e252f73b5ef4e" 12345678123456781234567812345678>
|
||||||
int87112285931760246646623899502532662132736: <Test #x"b012010000000000000000000000000000000000" 87112285931760246646623899502532662132736>
|
int87112285931760246646623899502532662132736: <Test #x"b012010000000000000000000000000000000000" 87112285931760246646623899502532662132736>
|
||||||
|
int98765432109876543210987654321098765432109: <Test #x"b01201223eda512bdd9388fc96d96a6931c0f52d" 98765432109876543210987654321098765432109>
|
||||||
list0: <Test #x"b584" []>
|
list0: <Test #x"b584" []>
|
||||||
list4: <Test #x"b5b00101b00102b00103b0010484" [1 2 3 4]>
|
list4: <Test #x"b5b00101b00102b00103b0010484" [1 2 3 4]>
|
||||||
list4a: <Test #x"b5b00101b00102b00103b0010484" [1, 2, 3, 4]>
|
list4a: <Test #x"b5b00101b00102b00103b0010484" [1, 2, 3, 4]>
|
||||||
|
|
Loading…
Reference in New Issue