diff --git a/implementations/javascript/package.json b/implementations/javascript/package.json index 69939aa..8854316 100644 --- a/implementations/javascript/package.json +++ b/implementations/javascript/package.json @@ -1,6 +1,6 @@ { "name": "preserves", - "version": "0.4.0", + "version": "0.5.0", "description": "Experimental data serialization format", "homepage": "https://gitlab.com/preserves/preserves", "license": "Apache-2.0", diff --git a/implementations/javascript/src/codec.ts b/implementations/javascript/src/codec.ts index fda9b3d..8ccbd35 100644 --- a/implementations/javascript/src/codec.ts +++ b/implementations/javascript/src/codec.ts @@ -14,6 +14,17 @@ import { PreserveOn } from './symbols'; export type ErrorType = 'DecodeError' | 'EncodeError' | 'ShortPacket'; export const ErrorType = Symbol.for('ErrorType'); +export type Encodable = + Value | Preservable | Iterable> | ArrayBufferView; + +export interface Preservable { + [PreserveOn](encoder: Encoder): void; +} + +export function isPreservable(v: any): v is Preservable { + return typeof v === 'object' && v !== null && typeof v[PreserveOn] === 'function'; +} + export abstract class PreservesCodecError { abstract get [ErrorType](): ErrorType; @@ -53,16 +64,17 @@ export class ShortPacket extends DecodeError { } } -export interface DecoderOptions { +export interface DecoderOptions { includeAnnotations?: boolean; + decodePointer?: (v: Value) => T; } -export class Decoder { +export class Decoder { packet: Uint8Array; index: number; - options: DecoderOptions; + options: DecoderOptions; - constructor(packet: BytesLike = new Uint8Array(0), options: DecoderOptions = {}) { + constructor(packet: BytesLike = new Uint8Array(0), options: DecoderOptions = {}) { this.packet = underlying(packet); this.index = 0; this.options = options; @@ -104,7 +116,7 @@ export class Decoder { return matched; } - nextvalues(): Value[] { + nextvalues(): Value[] { const result = []; while (!this.peekend()) result.push(this.next()); return result; @@ -119,12 +131,12 @@ export class Decoder { return acc; } - wrap(v: Value): Value { + wrap(v: Value): Value { return this.includeAnnotations ? new Annotated(v) : v; } - static dictionaryFromArray(vs: Value[]): Dictionary { - const d = new Dictionary(); + static dictionaryFromArray(vs: Value[]): Dictionary> { + const d = new Dictionary>(); if (vs.length % 2) throw new DecodeError("Missing dictionary value"); for (let i = 0; i < vs.length; i += 2) { d.set(vs[i], vs[i+1]); @@ -132,14 +144,14 @@ export class Decoder { return d; } - unshiftAnnotation(a: Value, v: Annotated) { + unshiftAnnotation(a: Value, v: Annotated) { if (this.includeAnnotations) { v.annotations.unshift(a); } return v; } - next(): Value { + next(): Value { const tag = this.nextbyte(); switch (tag) { case Tag.False: return this.wrap(false); @@ -149,9 +161,16 @@ export class Decoder { case Tag.End: throw new DecodeError("Unexpected Compound end marker"); case Tag.Annotation: { const a = this.next(); - const v = this.next() as Annotated; + const v = this.next() as Annotated; return this.unshiftAnnotation(a, v); } + case Tag.Pointer: { + const d = this.options.decodePointer; + if (d === void 0) { + throw new DecodeError("No decodePointer function supplied"); + } + return this.wrap(d(this.next())); + } case Tag.SignedInteger: return this.wrap(this.nextint(this.varint())); case Tag.String: return this.wrap(Bytes.from(this.nextbytes(this.varint())).fromUtf8()); case Tag.ByteString: return this.wrap(Bytes.from(this.nextbytes(this.varint()))); @@ -192,30 +211,35 @@ export class Decoder { } } -export function decode(bs: BytesLike, options?: DecoderOptions) { +export function decode(bs: BytesLike, options?: DecoderOptions) { return new Decoder(bs, options).next(); } -export function decodeWithAnnotations(bs: BytesLike, options: DecoderOptions = {}): Annotated { - return decode(bs, { ... options, includeAnnotations: true }) as Annotated; +export function decodeWithAnnotations(bs: BytesLike, options: DecoderOptions = {}): Annotated { + return decode(bs, { ... options, includeAnnotations: true }) as Annotated; } -export interface EncoderOptions { +export interface EncoderOptions { canonical?: boolean; includeAnnotations?: boolean; + encodePointer?: (v: T) => Value; } function chunkStr(bs: Uint8Array): string { return String.fromCharCode.apply(null, bs as any as number[]); } -export class Encoder { +function isIterable(v: any): v is Iterable { + return typeof v === 'object' && v !== null && typeof v[Symbol.iterator] === 'function'; +} + +export class Encoder { chunks: Array; view: DataView; index: number; - options: EncoderOptions; + options: EncoderOptions; - constructor(options: EncoderOptions = {}) { + constructor(options: EncoderOptions = {}) { this.chunks = []; this.view = new DataView(new ArrayBuffer(256)); this.index = 0; @@ -310,7 +334,7 @@ export class Encoder { this.emitbytes(bs); } - encodevalues(tag: Tag, items: Iterable) { + encodevalues(tag: Tag, items: Iterable>) { this.emitbyte(tag); for (let i of items) { this.push(i); } this.emitbyte(Tag.End); @@ -322,8 +346,11 @@ export class Encoder { this.emitbyte(Tag.End); } - push(v: any) { - if (typeof v?.[PreserveOn] === 'function') { + push(v: Encodable) { + if (isPreservable(v)) { + v[PreserveOn](this as unknown as Encoder); + } + else if (isPreservable(v)) { v[PreserveOn](this); } else if (typeof v === 'boolean') { @@ -355,23 +382,36 @@ export class Encoder { else if (Array.isArray(v)) { this.encodevalues(Tag.Sequence, v); } - else if (typeof v?.[Symbol.iterator] === 'function') { - this.encodevalues(Tag.Sequence, v as Iterable); + else if (isIterable>(v)) { + this.encodevalues(Tag.Sequence, v as Iterable>); } else { - throw new EncodeError("Cannot encode", v); + const e = this.options.encodePointer ?? pointerId; + this.emitbyte(Tag.Pointer); + this.push(e(v)); } return this; // for chaining } } -export function encode(v: any, options?: EncoderOptions): Bytes { +export function encode(v: Encodable, options?: EncoderOptions): Bytes { return new Encoder(options).push(v).contents(); } +let _nextId = 0; +const _registry = new WeakMap(); +export function pointerId(v: object): number { + let id = _registry.get(v); + if (id === void 0) { + id = _nextId++; + _registry.set(v, id); + } + return id; +} + const _canonicalEncoder = new Encoder({ canonical: true }); let _usingCanonicalEncoder = false; -export function canonicalEncode(v: any, options?: EncoderOptions): Bytes { +export function canonicalEncode(v: Encodable, options?: EncoderOptions): Bytes { if (options === void 0 && !_usingCanonicalEncoder) { _usingCanonicalEncoder = true; const bs = _canonicalEncoder.push(v).contents(); @@ -382,10 +422,10 @@ export function canonicalEncode(v: any, options?: EncoderOptions): Bytes { } } -export function canonicalString(v: any): string { +export function canonicalString(v: Encodable): string { return _canonicalEncoder.push(v).contentsString(); } -export function encodeWithAnnotations(v: any, options: EncoderOptions = {}): Bytes { +export function encodeWithAnnotations(v: Encodable, options: EncoderOptions = {}): Bytes { return encode(v, { ... options, includeAnnotations: true }); } diff --git a/implementations/javascript/src/constants.ts b/implementations/javascript/src/constants.ts index f017b9e..461a417 100644 --- a/implementations/javascript/src/constants.ts +++ b/implementations/javascript/src/constants.ts @@ -5,6 +5,7 @@ export enum Tag { Double, End, Annotation, + Pointer, SmallInteger_lo = 0x90, MediumInteger_lo = 0xa0, diff --git a/implementations/javascript/src/text.ts b/implementations/javascript/src/text.ts index cd2c9d8..199b781 100644 --- a/implementations/javascript/src/text.ts +++ b/implementations/javascript/src/text.ts @@ -12,7 +12,7 @@ export function stringify(x: any): string { } } -export function preserves(pieces: TemplateStringsArray, ...values: Value[]): string { +export function preserves(pieces: TemplateStringsArray, ...values: Value[]): string { const result = [pieces[0]]; values.forEach((v, i) => { result.push(stringify(v)); diff --git a/implementations/javascript/src/values.ts b/implementations/javascript/src/values.ts index 3bc0511..b699816 100644 --- a/implementations/javascript/src/values.ts +++ b/implementations/javascript/src/values.ts @@ -2,22 +2,22 @@ import { PreserveOn, AsPreserve } from './symbols'; import { Tag } from './constants'; -import { Encoder, canonicalEncode, canonicalString } from './codec'; +import { Encoder, canonicalEncode, canonicalString, Preservable } from './codec'; import { stringify } from './text'; import { _iterMap, FlexMap, FlexSet } from './flex'; const textEncoder = new TextEncoder(); const textDecoder = new TextDecoder(); -export type Value = Atom | Compound | Annotated; +export type Value = Atom | Compound | T | Annotated; export type Atom = boolean | Single | Double | number | string | Bytes | symbol; -export type Compound = Record | Array | Set | Dictionary; +export type Compound = Record | Array> | Set | Dictionary>; export const IsPreservesRecord = Symbol.for('IsPreservesRecord'); export const IsPreservesBytes = Symbol.for('IsPreservesBytes'); export const IsPreservesAnnotated = Symbol.for('IsPreservesAnnotated'); -export function fromJS(x: any): Value { +export function fromJS(x: any): Value { switch (typeof x) { case 'number': if (!Number.isInteger(x)) { @@ -32,6 +32,7 @@ export function fromJS(x: any): Value { case 'undefined': case 'function': + case 'bigint': break; case 'object': @@ -41,16 +42,20 @@ export function fromJS(x: any): Value { if (typeof x[AsPreserve] === 'function') { return x[AsPreserve](); } - if (Record.isRecord(x)) { + if (Record.isRecord(x)) { return x; } if (Array.isArray(x)) { - return x.map(fromJS); + return (x as Array>).map>(fromJS); } if (ArrayBuffer.isView(x) || x instanceof ArrayBuffer) { return Bytes.from(x); } - return Dictionary.fromJS(x); + // Just... assume it's a T. + return (x as T); + + default: + break; } throw new TypeError("Cannot represent JavaScript value as Preserves: " + x); @@ -89,12 +94,12 @@ export abstract class Float { static isDouble = (x: any): x is Double => Float.isFloat(x, 'Double'); } -export class Single extends Float { - [AsPreserve](): Value { +export class Single extends Float implements Preservable { + [AsPreserve](): Value { return this; } - [PreserveOn](encoder: Encoder) { + [PreserveOn](encoder: Encoder) { encoder.emitbyte(Tag.Float); encoder.makeroom(4); encoder.view.setFloat32(encoder.index, this.value, false); @@ -110,12 +115,12 @@ export class Single extends Float { } } -export class Double extends Float { - [AsPreserve](): Value { +export class Double extends Float implements Preservable { + [AsPreserve](): Value { return this; } - [PreserveOn](encoder: Encoder) { + [PreserveOn](encoder: Encoder) { encoder.emitbyte(Tag.Double); encoder.makeroom(8); encoder.view.setFloat64(encoder.index, this.value, false); @@ -133,7 +138,7 @@ export class Double extends Float { export type BytesLike = Bytes | Uint8Array; -export class Bytes { +export class Bytes implements Preservable { readonly _view: Uint8Array; constructor(maybeByteIterable: any = new Uint8Array()) { @@ -249,7 +254,7 @@ export class Bytes { return this.asPreservesText(); } - [AsPreserve](): Value { + [AsPreserve](): Value { return this; } @@ -282,7 +287,7 @@ export class Bytes { return nibbles.join(''); } - [PreserveOn](encoder: Encoder) { + [PreserveOn](encoder: Encoder) { encoder.emitbyte(Tag.ByteString); encoder.varint(this.length); encoder.emitbytes(this._view); @@ -313,13 +318,15 @@ export function underlying(b: Bytes | Uint8Array): Uint8Array { } declare global { - interface Boolean { asPreservesText(): string; } - interface Number { asPreservesText(): string; } - interface String { asPreservesText(): string; } - interface Symbol { asPreservesText(): string; } - interface Array { asPreservesText(): string; } + interface Object { asPreservesText(): string; } } +Object.defineProperty(Object.prototype, 'asPreservesText', { + enumerable: false, + writable: true, + value: function(): string { return '#!' + JSON.stringify(this); } +}); + Boolean.prototype.asPreservesText = function (): string { return this ? '#t' : '#f'; }; @@ -338,7 +345,7 @@ Symbol.prototype.asPreservesText = function (): string { }; Array.prototype.asPreservesText = function (): string { - return '[' + this.map((i: Value) => i.asPreservesText()).join(', ') + ']'; + return '[' + this.map((i: Value) => i.asPreservesText()).join(', ') + ']'; }; // Uint8Array / TypedArray methods @@ -409,10 +416,10 @@ keys lastIndexOf reduce reduceRight some toLocaleString values`.split(/\s+/)) Bytes.prototype[Symbol.iterator] = function () { return this._view[Symbol.iterator](); }; })(); -export class Record extends Array { - readonly label: Value; +export class Record extends Array> { + readonly label: Value; - constructor(label: Value, fieldsJS: any[]) { + constructor(label: Value, fieldsJS: any[]) { if (arguments.length === 1) { // Using things like someRecord.map() involves the runtime // apparently instantiating instances of this.constructor @@ -430,15 +437,15 @@ export class Record extends Array { Object.freeze(this); } - get(index: number, defaultValue?: Value): Value | undefined { + get(index: number, defaultValue?: Value): Value | undefined { return (index < this.length) ? this[index] : defaultValue; } - set(index: number, newValue: Value): Record { + set(index: number, newValue: Value): Record { return new Record(this.label, this.map((f, i) => (i === index) ? newValue : f)); } - getConstructorInfo(): RecordConstructorInfo { + getConstructorInfo(): RecordConstructorInfo { return { label: this.label, arity: this.length }; } @@ -448,13 +455,13 @@ export class Record extends Array { this.every((f, i) => is(f, other.get(i))); } - hashCode(): number { - let h = hash(this.label); - this.forEach((f) => h = ((31 * h) + hash(f)) | 0); - return h; - } + // hashCode(): number { + // let h = hash(this.label); + // this.forEach((f) => h = ((31 * h) + hash(f)) | 0); + // return h; + // } - static fallbackToString: (f: Value) => string = (_f) => ''; + static fallbackToString: (f: Value) => string = (_f) => ''; toString(): string { return this.asPreservesText(); @@ -475,26 +482,26 @@ export class Record extends Array { }).join(', ') + ')'; } - static makeConstructor(labelSymbolText: string, fieldNames: string[]) { + static makeConstructor(labelSymbolText: string, fieldNames: string[]): RecordConstructor { return Record.makeBasicConstructor(Symbol.for(labelSymbolText), fieldNames); } - static makeBasicConstructor(label0: any, fieldNames: string[]): RecordConstructor { - const label = fromJS(label0); + static makeBasicConstructor(label0: any, fieldNames: string[]): RecordConstructor { + const label = fromJS(label0); const arity = fieldNames.length; - const ctor: RecordConstructor = (...fields: any[]) => { + const ctor: RecordConstructor = (...fields: any[]): Record => { if (fields.length !== arity) { throw new Error("Record: cannot instantiate " + (label && label.toString()) + " expecting " + arity + " fields with " + fields.length + " fields"); } - return new Record(label, fields); + return new Record(label, fields); }; const constructorInfo = { label, arity }; ctor.constructorInfo = constructorInfo; - ctor.isClassOf = (v: any): v is Record => Record.isClassOf(constructorInfo, v); + ctor.isClassOf = (v: any): v is Record => Record.isClassOf(constructorInfo, v); ctor._ = {}; fieldNames.forEach((name, i) => { - ctor._[name] = function (r: any): Value | undefined { + ctor._[name] = function (r: any): Value | undefined { if (!ctor.isClassOf(r)) { throw new Error("Record: attempt to retrieve field "+label.toString()+"."+name+ " from non-"+label.toString()+": "+(r && r.toString())); @@ -505,7 +512,7 @@ export class Record extends Array { return ctor; } - [PreserveOn](encoder: Encoder) { + [PreserveOn](encoder: Encoder) { encoder.emitbyte(Tag.Record); encoder.push(this.label); this.forEach((f) => encoder.push(f)); @@ -516,24 +523,24 @@ export class Record extends Array { return true; } - static isRecord(x: any): x is Record { + static isRecord(x: any): x is Record { return !!x?.[IsPreservesRecord]; } - static isClassOf(ci: RecordConstructorInfo, v: any): v is Record { + static isClassOf(ci: RecordConstructorInfo, v: any): v is Record { return (Record.isRecord(v)) && is(ci.label, v.label) && (ci.arity === v.length); } } -export interface RecordConstructor { - (...fields: any[]): Record; - constructorInfo: RecordConstructorInfo; - isClassOf(v: any): v is Record; - _: { [getter: string]: (r: any) => Value | undefined }; +export interface RecordConstructor { + (...fields: any[]): Record; + constructorInfo: RecordConstructorInfo; + isClassOf(v: any): v is Record; + _: { [getter: string]: (r: any) => Value | undefined }; } -export interface RecordConstructorInfo { - label: Value; +export interface RecordConstructorInfo { + label: Value; arity: number; } @@ -544,7 +551,7 @@ export function is(a: any, b: any): boolean { if (typeof a !== typeof b) return false; if (typeof a === 'object') { if (a === null || b === null) return false; - if ('equals' in a) return a.equals(b, is); + if ('equals' in a && typeof a.equals === 'function') return a.equals(b, is); if (Array.isArray(a) && Array.isArray(b)) { if (a.length !== b.length) return false; for (let i = 0; i < a.length; i++) if (!is(a[i], b[i])) return false; @@ -554,36 +561,32 @@ export function is(a: any, b: any): boolean { return false; } -export function hash(a: Value): number { - throw new Error("shouldBeImplemented"); // TODO -} - export type DictionaryType = 'Dictionary' | 'Set'; export const DictionaryType = Symbol.for('DictionaryType'); -export class Dictionary extends FlexMap { +export class Dictionary extends FlexMap, V> { get [DictionaryType](): DictionaryType { return 'Dictionary'; } - static isDictionary(x: any): x is Dictionary { + static isDictionary(x: any): x is Dictionary { return x?.[DictionaryType] === 'Dictionary'; } - static fromJS(x: object): Dictionary { - if (Dictionary.isDictionary(x)) return x as Dictionary; - const d = new Dictionary(); + static fromJS(x: object): Dictionary> { + if (Dictionary.isDictionary(x)) return x as Dictionary>; + const d = new Dictionary>(); Object.entries(x).forEach(([key, value]) => d.set(key, fromJS(value))); return d; } - constructor(items?: Iterable) { + constructor(items?: Iterable) { const iter = items?.[Symbol.iterator](); super(canonicalString, iter === void 0 ? void 0 : _iterMap(iter, ([k,v]) => [fromJS(k), v])); } - mapEntries(f: (entry: [Value, T]) => [Value, R]): Dictionary { - const result = new Dictionary(); + mapEntries(f: (entry: [Value, V]) => [Value, W]): Dictionary { + const result = new Dictionary(); for (let oldEntry of this.entries()) { const newEntry = f(oldEntry); result.set(newEntry[0], newEntry[1]) @@ -598,7 +601,7 @@ export class Dictionary extends FlexMap { '}'; } - clone(): Dictionary { + clone(): Dictionary { return new Dictionary(this); } @@ -608,7 +611,7 @@ export class Dictionary extends FlexMap { get [Symbol.toStringTag]() { return 'Dictionary'; } - [PreserveOn](encoder: Encoder) { + [PreserveOn](encoder: Encoder) { if (encoder.canonical) { const pieces = Array.from(this).map(([k, v]) => Bytes.concat([canonicalEncode(k), canonicalEncode(v)])); @@ -618,33 +621,33 @@ export class Dictionary extends FlexMap { encoder.emitbyte(Tag.Dictionary); this.forEach((v, k) => { encoder.push(k); - encoder.push(v); + encoder.push(v as unknown as Value); // Suuuuuuuper unsound }); encoder.emitbyte(Tag.End); } } } -export class Set extends FlexSet { +export class Set extends FlexSet> { get [DictionaryType](): DictionaryType { return 'Set'; } - static isSet(x: any): x is Set { + static isSet(x: any): x is Set { return x?.[DictionaryType] === 'Set'; } constructor(items?: Iterable) { const iter = items?.[Symbol.iterator](); - super(canonicalString, iter === void 0 ? void 0 : _iterMap(iter, fromJS)); + super(canonicalString, iter === void 0 ? void 0 : _iterMap>(iter, fromJS)); } - map(f: (value: Value) => Value): Set { + map(f: (value: Value) => Value): Set { return new Set(_iterMap(this[Symbol.iterator](), f)); } - filter(f: (value: Value) => boolean): Set { - const result = new Set(); + filter(f: (value: Value) => boolean): Set { + const result = new Set(); for (let k of this) if (f(k)) result.add(k); return result; } @@ -659,13 +662,13 @@ export class Set extends FlexSet { '}'; } - clone(): Set { + clone(): Set { return new Set(this); } get [Symbol.toStringTag]() { return 'Set'; } - [PreserveOn](encoder: Encoder) { + [PreserveOn](encoder: Encoder) { if (encoder.canonical) { const pieces = Array.from(this).map(k => canonicalEncode(k)); pieces.sort(Bytes.compare); @@ -676,20 +679,20 @@ export class Set extends FlexSet { } } -export class Annotated { - readonly annotations: Array; - readonly item: Value; +export class Annotated { + readonly annotations: Array>; + readonly item: Value; - constructor(item: Value) { + constructor(item: Value) { this.annotations = []; this.item = item; } - [AsPreserve](): Value { + [AsPreserve](): Value { return this; } - [PreserveOn](encoder: Encoder) { + [PreserveOn](encoder: Encoder) { if (encoder.includeAnnotations) { for (const a of this.annotations) { encoder.emitbyte(Tag.Annotation); @@ -703,9 +706,9 @@ export class Annotated { return is(this.item, Annotated.isAnnotated(other) ? other.item : other); } - hashCode(): number { - return hash(this.item); - } + // hashCode(): number { + // return hash(this.item); + // } toString(): string { return this.asPreservesText(); @@ -720,30 +723,30 @@ export class Annotated { return true; } - static isAnnotated(x: any): x is Annotated { + static isAnnotated(x: any): x is Annotated { return !!x?.[IsPreservesAnnotated]; } } -export function peel(v: Value): Value { +export function peel(v: Value): Value { return strip(v, 1); } -export function strip(v: Value, depth: number = Infinity) { - function step(v: Value, depth: number): Value { +export function strip(v: Value, depth: number = Infinity): Value { + function step(v: Value, depth: number): Value { if (depth === 0) return v; - if (!Annotated.isAnnotated(v)) return v; + if (!Annotated.isAnnotated(v)) return v; const nextDepth = depth - 1; - function walk(v: Value) { return step(v, nextDepth); } + function walk(v: Value): Value { return step(v, nextDepth); } - if (Record.isRecord(v.item)) { + if (Record.isRecord(v.item)) { return new Record(step(v.item.label, depth), v.item.map(walk)); } else if (Array.isArray(v.item)) { return v.item.map(walk); - } else if (Set.isSet(v.item)) { + } else if (Set.isSet(v.item)) { return v.item.map(walk); - } else if (Dictionary.isDictionary(v.item)) { + } else if (Dictionary.isDictionary>(v.item)) { return v.item.mapEntries((e) => [walk(e[0]), walk(e[1])]); } else if (Annotated.isAnnotated(v.item)) { throw new Error("Improper annotation structure"); @@ -754,8 +757,8 @@ export function strip(v: Value, depth: number = Infinity) { return step(v, depth); } -export function annotate(v0: Value, ...anns: Value[]) { - const v = Annotated.isAnnotated(v0) ? v0 : new Annotated(v0); +export function annotate(v0: Value, ...anns: Value[]): Annotated { + const v = Annotated.isAnnotated(v0) ? v0 : new Annotated(v0); anns.forEach((a) => v.annotations.push(a)); return v; } diff --git a/implementations/javascript/test/codec.test.ts b/implementations/javascript/test/codec.test.ts index bb8efef..b437e55 100644 --- a/implementations/javascript/test/codec.test.ts +++ b/implementations/javascript/test/codec.test.ts @@ -1,21 +1,43 @@ import { Value, Dictionary, - decode, decodeWithAnnotations, encodeWithAnnotations, canonicalEncode, + decode, decodeWithAnnotations, encode, encodeWithAnnotations, canonicalEncode, DecodeError, ShortPacket, Bytes, Record, annotate, strip, peel, preserves, fromJS, + Constants, } from '../src/index'; +const { Tag } = Constants; import './test-utils'; import * as fs from 'fs'; -const Discard = Record.makeConstructor('discard', []); -const Capture = Record.makeConstructor('capture', ['pattern']); -const Observe = Record.makeConstructor('observe', ['pattern']); +class Pointer { + v: Value; + + constructor(v: Value) { + this.v = v; + } + + equals(other: any, is: (a: any, b: any) => boolean) { + return Object.is(other.constructor, this.constructor) && is(this.v, other.v); + } +} + +function decodePointer(v: Value): Pointer { + return new Pointer(strip(v)); +} + +function encodePointer(w: Pointer): Value { + return w.v; +} + +const Discard = Record.makeConstructor('discard', []); +const Capture = Record.makeConstructor('capture', ['pattern']); +const Observe = Record.makeConstructor('observe', ['pattern']); describe('record constructors', () => { it('should have constructorInfo', () => { @@ -61,7 +83,7 @@ describe('parsing from subarray', () => { describe('reusing buffer space', () => { it('should be done safely, even with nested dictionaries', () => { - expect(canonicalEncode(fromJS(['aaa', {a: 1}, 'zzz'])).toHex()).is( + expect(canonicalEncode(fromJS(['aaa', Dictionary.fromJS({a: 1}), 'zzz'])).toHex()).is( `b5 b103616161 b7 @@ -72,46 +94,115 @@ describe('reusing buffer space', () => { }); }); +describe('encoding and decoding pointers', () => { + it('should encode using pointerId when no function has been supplied', () => { + const A1 = ({a: 1}); + const A2 = ({a: 1}); + const bs1 = canonicalEncode(A1); + const bs2 = canonicalEncode(A2); + const bs3 = canonicalEncode(A1); + expect(bs1.get(0)).toBe(Tag.Pointer); + expect(bs2.get(0)).toBe(Tag.Pointer); + expect(bs3.get(0)).toBe(Tag.Pointer); + // Can't really check the value assigned to the object. But we + // can check that it's different to a similar object! + expect(bs1).not.is(bs2); + expect(bs1).is(bs3); + }); + it('should refuse to decode pointers when no function has been supplied', () => { + expect(() => decode(Bytes.from([Tag.Pointer, Tag.SmallInteger_lo]))) + .toThrow('No decodePointer function supplied'); + }); + it('should encode properly', () => { + const objects: object[] = []; + const A = {a: 1}; + const B = {b: 2}; + expect(encode( + [A, B], + { + encodePointer(v: object): Value { + objects.push(v); + return objects.length - 1; + } + })).is(Bytes.from([Tag.Sequence, + Tag.Pointer, Tag.SmallInteger_lo, + Tag.Pointer, Tag.SmallInteger_lo + 1, + Tag.End])); + expect(objects).is([A, B]); + }); + it('should decode properly', () => { + const X = {x: 123}; + const Y = {y: 456}; + const objects: object[] = [X, Y]; + expect(decode(Bytes.from([ + Tag.Sequence, + Tag.Pointer, Tag.SmallInteger_lo, + Tag.Pointer, Tag.SmallInteger_lo + 1, + Tag.End + ]), { + decodePointer(v: Value): object { + if (typeof v !== 'number' || v < 0 || v >= objects.length) { + throw new Error("Unknown pointer target"); + } + return objects[v]; + } + })).is([X, Y]); + }); + it('should store pointers embedded in map keys correctly', () => { + const A1 = ({a: 1}); + const A2 = ({a: 1}); + const m = new Dictionary>(); + m.set([A1], 1); + m.set([A2], 2); + expect(m.get(A1)).toBeUndefined(); + expect(m.get([A1])).toBe(1); + expect(m.get([A2])).toBe(2); + expect(m.get([{a: 1}])).toBeUndefined(); + A1.a = 3; + expect(m.get([A1])).toBe(1); + }); +}); + describe('common test suite', () => { const samples_bin = fs.readFileSync(__dirname + '/../../../tests/samples.bin'); - const samples = decodeWithAnnotations(samples_bin); + const samples = decodeWithAnnotations(samples_bin, { decodePointer }); const TestCases = Record.makeConstructor('TestCases', ['cases']); function DS(bs: Bytes) { - return decode(bs); + return decode(bs, { decodePointer }); } function D(bs: Bytes) { - return decodeWithAnnotations(bs); + return decodeWithAnnotations(bs, { decodePointer }); } - function E(v: Value) { - return encodeWithAnnotations(v); + function E(v: Value) { + return encodeWithAnnotations(v, { encodePointer }); } interface ExpectedValues { - [testName: string]: { value: Value } | { forward: Value, back: Value }; + [testName: string]: { value: Value } | { forward: Value, back: Value }; } const expectedValues: ExpectedValues = { - annotation1: { forward: annotate(9, "abc"), + annotation1: { forward: annotate(9, "abc"), back: 9 }, - annotation2: { forward: annotate([[], annotate([], "x")], "abc", "def"), + annotation2: { forward: annotate([[], annotate([], "x")], "abc", "def"), back: [[], []] }, - annotation3: { forward: annotate(5, - annotate(2, 1), - annotate(4, 3)), + annotation3: { forward: annotate(5, + annotate(2, 1), + annotate(4, 3)), back: 5 }, - annotation5: { forward: annotate(new Record(Symbol.for('R'), + annotation5: { forward: annotate(new Record(Symbol.for('R'), + [annotate(Symbol.for('f'), + Symbol.for('af'))]), + Symbol.for('ar')), + back: new Record(Symbol.for('R'), [Symbol.for('f')]) }, + annotation6: { forward: new Record(annotate(Symbol.for('R'), + Symbol.for('ar')), [annotate(Symbol.for('f'), Symbol.for('af'))]), - Symbol.for('ar')), - back: new Record(Symbol.for('R'), [Symbol.for('f')]) }, - annotation6: { forward: new Record(annotate(Symbol.for('R'), - Symbol.for('ar')), - [annotate(Symbol.for('f'), - Symbol.for('af'))]), - back: new Record(Symbol.for('R'), [Symbol.for('f')]) }, - annotation7: { forward: annotate([], Symbol.for('a'), Symbol.for('b'), Symbol.for('c')), + back: new Record(Symbol.for('R'), [Symbol.for('f')]) }, + annotation7: { forward: annotate([], Symbol.for('a'), Symbol.for('b'), Symbol.for('c')), back: [] }, list1: { forward: [1, 2, 3, 4], back: [1, 2, 3, 4] }, @@ -123,7 +214,7 @@ describe('common test suite', () => { type Variety = 'normal' | 'nondeterministic' | 'decode'; - function runTestCase(variety: Variety, tName: string, binaryForm: Bytes, annotatedTextForm: Value) { + function runTestCase(variety: Variety, tName: string, binaryForm: Bytes, annotatedTextForm: Value) { describe(tName, () => { const textForm = strip(annotatedTextForm); const {forward, back} = (function () { @@ -150,10 +241,10 @@ describe('common test suite', () => { }); } - const tests = peel(TestCases._.cases(peel(samples))!) as Dictionary; - tests.forEach((t0: Value, tName0: Value) => { + const tests = peel(TestCases._.cases(peel(samples))!) as Dictionary>; + tests.forEach((t0: Value, tName0: Value) => { const tName = Symbol.keyFor(strip(tName0) as symbol)!; - const t = peel(t0) as Record; + const t = peel(t0) as Record; switch (t.label) { case Symbol.for('Test'): runTestCase('normal', tName, strip(t[0]) as Bytes, t[1]); diff --git a/implementations/javascript/test/test-utils.ts b/implementations/javascript/test/test-utils.ts index 404b364..2562512 100644 --- a/implementations/javascript/test/test-utils.ts +++ b/implementations/javascript/test/test-utils.ts @@ -4,7 +4,7 @@ import '../src/node_support'; declare global { namespace jest { interface Matchers { - is(expected: Value): R; + is(expected: Value): R; toThrowFilter(f: (e: Error) => boolean): R; } } diff --git a/implementations/python/preserves/preserves.py b/implementations/python/preserves/preserves.py index 34b9662..7eaf2ba 100644 --- a/implementations/python/preserves/preserves.py +++ b/implementations/python/preserves/preserves.py @@ -257,11 +257,12 @@ def annotate(v, *anns): return v class Decoder(Codec): - def __init__(self, packet=b'', include_annotations=False): + def __init__(self, packet=b'', include_annotations=False, decode_pointer=None): super(Decoder, self).__init__() self.packet = packet self.index = 0 self.include_annotations = include_annotations + self.decode_pointer = decode_pointer def extend(self, data): self.packet = self.packet[self.index:] + data @@ -327,6 +328,10 @@ class Decoder(Codec): a = self.next() v = self.next() return self.unshift_annotation(a, v) + if tag == 0x86: + if self.decode_pointer is None: + raise DecodeError('No decode_pointer function supplied') + return self.wrap(self.decode_pointer(self.next())) if tag >= 0x90 and tag <= 0x9f: return self.wrap(tag - (0xa0 if tag > 0x9c else 0x90)) if tag >= 0xa0 and tag <= 0xaf: return self.wrap(self.nextint(tag - 0xa0 + 1)) if tag == 0xb0: return self.wrap(self.nextint(self.varint())) @@ -350,16 +355,17 @@ class Decoder(Codec): self.index = start return None -def decode(bs): - return Decoder(packet=bs).next() +def decode(bs, **kwargs): + return Decoder(packet=bs, **kwargs).next() -def decode_with_annotations(bs): - return Decoder(packet=bs, include_annotations=True).next() +def decode_with_annotations(bs, **kwargs): + return Decoder(packet=bs, include_annotations=True, **kwargs).next() class Encoder(Codec): - def __init__(self): + def __init__(self, encode_pointer=id): super(Encoder, self).__init__() self.buffer = bytearray() + self.encode_pointer = encode_pointer def contents(self): return bytes(self.buffer) @@ -428,10 +434,12 @@ class Encoder(Codec): try: i = iter(v) except TypeError: - raise EncodeError('Cannot encode %r' % (v,)) + self.buffer.append(0x86) + self.append(self.encode_pointer(v)) + return self.encodevalues(5, i) -def encode(v): - e = Encoder() +def encode(v, **kwargs): + e = Encoder(**kwargs) e.append(v) return e.contents() diff --git a/implementations/python/preserves/test_preserves.py b/implementations/python/preserves/test_preserves.py index 9386991..ec55392 100644 --- a/implementations/python/preserves/test_preserves.py +++ b/implementations/python/preserves/test_preserves.py @@ -157,6 +157,37 @@ class CodecTests(unittest.TestCase): self._roundtrip((False,) * 100, _buf(0xb5, b'\x80' * 100, 0x84)) self._roundtrip((False,) * 200, _buf(0xb5, b'\x80' * 200, 0x84)) + def test_pointer_id(self): + class A: + def __init__(self, a): + self.a = a + a1 = A(1) + a2 = A(1) + self.assertNotEqual(_e(a1), _e(a2)) + self.assertEqual(_e(a1), _e(a1)) + from .preserves import _ord + self.assertEqual(_ord(_e(a1)[0]), 0x86) + self.assertEqual(_ord(_e(a2)[0]), 0x86) + + def test_decode_pointer_absent(self): + with self.assertRaises(DecodeError): + decode(b'\x86\xa0\xff') + + def test_encode_pointer(self): + objects = [] + def enc(p): + objects.append(p) + return len(objects) - 1 + self.assertEqual(encode([object(), object()], encode_pointer = enc), + b'\xb5\x86\x90\x86\x91\x84') + + def test_decode_pointer(self): + objects = [123, 234] + def dec(v): + return objects[v] + self.assertEqual(decode(b'\xb5\x86\x90\x86\x91\x84', decode_pointer = dec), + (123, 234)) + def add_method(d, tName, fn): if hasattr(fn, 'func_name'): # python2 @@ -223,11 +254,26 @@ def install_exn_test(d, tName, bs, check_proc): self.fail('did not fail as expected') add_method(d, tName, test_exn) +class Pointer: + def __init__(self, v): + self.v = strip_annotations(v) + + @staticmethod + def value(i): + return i.v + + def __eq__(self, other): + if other.__class__ is self.__class__: + return self.v == other.v + + def __hash__(self): + return hash(self.v) + class CommonTestSuite(unittest.TestCase): import os with open(os.path.join(os.path.dirname(__file__), '../../../tests/samples.bin'), 'rb') as f: - samples = Decoder(f.read(), include_annotations=True).next() + samples = Decoder(f.read(), include_annotations=True, decode_pointer=Pointer).next() TestCases = Record.makeConstructor('TestCases', 'cases') @@ -257,13 +303,13 @@ class CommonTestSuite(unittest.TestCase): raise Exception('Unsupported test kind', t.key) def DS(self, bs): - return decode(bs) + return decode(bs, decode_pointer=Pointer) def D(self, bs): - return decode_with_annotations(bs) + return decode_with_annotations(bs, decode_pointer=Pointer) def E(self, v): - return encode(v) + return encode(v, encode_pointer=Pointer.value) class RecordTests(unittest.TestCase): def test_getters(self): diff --git a/implementations/python/setup.py b/implementations/python/setup.py index c3940cd..10c5fa5 100644 --- a/implementations/python/setup.py +++ b/implementations/python/setup.py @@ -5,7 +5,7 @@ except ImportError: setup( name="preserves", - version="0.4.0", + version="0.5.0", author="Tony Garnock-Jones", author_email="tonyg@leastfixedpoint.com", license="Apache Software License", diff --git a/implementations/racket/preserves/preserves/jelly.rkt b/implementations/racket/preserves/preserves/jelly.rkt index 54383e4..45d13b3 100644 --- a/implementations/racket/preserves/preserves/jelly.rkt +++ b/implementations/racket/preserves/preserves/jelly.rkt @@ -11,6 +11,7 @@ (struct record (label fields) #:transparent) (struct float (value) #:transparent) ;; a marker for single-precision I/O (struct annotated (annotations item) #:transparent) +(struct pointer (value) #:transparent) ;;--------------------------------------------------------------------------- ;; Reader @@ -29,6 +30,7 @@ (match (next) [(annotated as i) (annotated (cons a as) i)] [i (annotated (list a) i)]))] + [#x86 (pointer (next))] [(? (between #x90 #x9C) v) (- v #x90)] [(? (between #x9D #x9F) v) (- v #xA0)] [(? (between #xA0 #xAF) v) (next-integer (- v #xA0 -1))] @@ -85,6 +87,8 @@ (for [(a (in-list as))] (write-byte #x85 out-port) (output a)) (output v)] + [(pointer v) (write-byte #x86 out-port) (output v)] + [(? integer?) (cond [(<= -3 v -1) (write-byte (+ v #xA0) out-port)] [(<= 0 v 12) (write-byte (+ v #x90) out-port)] diff --git a/implementations/racket/preserves/preserves/main.rkt b/implementations/racket/preserves/preserves/main.rkt index e7556fb..ef03b91 100644 --- a/implementations/racket/preserves/preserves/main.rkt +++ b/implementations/racket/preserves/preserves/main.rkt @@ -25,8 +25,14 @@ (define (read-preserve [in-port (current-input-port)] #:read-syntax? [read-syntax? #f] + #:decode-pointer [decode-pointer #f] #:source [source (object-name in-port)]) (define b (peek-byte in-port)) (cond [(eof-object? b) b] - [(<= #x80 b #xBF) (read-preserve/binary in-port #:read-syntax? read-syntax?)] - [else (read-preserve/text in-port #:read-syntax? read-syntax? #:source source)])) + [(<= #x80 b #xBF) (read-preserve/binary in-port + #:read-syntax? read-syntax? + #:decode-pointer decode-pointer)] + [else (read-preserve/text in-port + #:read-syntax? read-syntax? + #:decode-pointer decode-pointer + #:source source)])) diff --git a/implementations/racket/preserves/preserves/object-id.rkt b/implementations/racket/preserves/preserves/object-id.rkt new file mode 100644 index 0000000..9a59ff2 --- /dev/null +++ b/implementations/racket/preserves/preserves/object-id.rkt @@ -0,0 +1,9 @@ +#lang racket/base + +(provide object-id) + +(define table (make-weak-hasheq)) +(define next 0) + +(define (object-id x) + (hash-ref! table x (lambda () (let ((v next)) (set! next (+ v 1)) v)))) diff --git a/implementations/racket/preserves/preserves/order.rkt b/implementations/racket/preserves/preserves/order.rkt index fd1b19d..f1d156c 100644 --- a/implementations/racket/preserves/preserves/order.rkt +++ b/implementations/racket/preserves/preserves/order.rkt @@ -32,7 +32,7 @@ [(? list?) 8] [(? set?) 9] [(? dict?) 10] - [_ (error 'preserve-order "Cannot compare value ~v" v)])) + [_ 11])) (define-syntax chain-order (syntax-rules () diff --git a/implementations/racket/preserves/preserves/read-binary.rkt b/implementations/racket/preserves/preserves/read-binary.rkt index a498a96..47334a0 100644 --- a/implementations/racket/preserves/preserves/read-binary.rkt +++ b/implementations/racket/preserves/preserves/read-binary.rkt @@ -13,9 +13,12 @@ (define (default-on-short) (error 'read-preserve/binary "Short Preserves binary")) (define (default-on-fail message . args) (error 'read-preserve/binary (apply format message args))) +(define (default-decode-pointer v) + (error 'read-preserve/binary "No decode-pointer function supplied")) (define (bytes->preserve bs #:read-syntax? [read-syntax? #f] + #:decode-pointer [decode-pointer #f] #:on-short [on-short default-on-short] [on-fail default-on-fail]) (call-with-input-bytes @@ -23,6 +26,7 @@ (lambda (p) (match (read-preserve/binary p #:read-syntax? read-syntax? + #:decode-pointer decode-pointer #:on-short on-short on-fail) [(? eof-object?) (on-short)] @@ -32,9 +36,11 @@ (define (read-preserve/binary [in-port (current-input-port)] #:read-syntax? [read-syntax? #f] + #:decode-pointer [decode-pointer0 #f] #:on-short [on-short default-on-short] [on-fail default-on-fail]) (define read-annotations? read-syntax?) + (define decode-pointer (or decode-pointer0 default-decode-pointer)) (let/ec return (define (next) (wrap (pos) (next* (next-byte)))) @@ -70,6 +76,7 @@ (if read-annotations? (annotate (next) a) (next)))] + [#x86 (decode-pointer (next))] [(? (between #x90 #x9C) v) (- v #x90)] [(? (between #x9D #x9F) v) (- v #xA0)] [(? (between #xA0 #xAF) v) (next-integer (- v #xA0 -1))] diff --git a/implementations/racket/preserves/preserves/read-text.rkt b/implementations/racket/preserves/preserves/read-text.rkt index 941b2ab..2f59682 100644 --- a/implementations/racket/preserves/preserves/read-text.rkt +++ b/implementations/racket/preserves/preserves/read-text.rkt @@ -24,13 +24,18 @@ pos #f)) +(define (default-decode-pointer v) + (error 'read-preserve/text "No decode-pointer function supplied")) + (define (string->preserve s #:read-syntax? [read-syntax? #f] + #:decode-pointer [decode-pointer #f] #:source [source ""]) (define p (open-input-string s)) (when read-syntax? (port-count-lines! p)) (define v (read-preserve/text p #:read-syntax? read-syntax? + #:decode-pointer decode-pointer #:source source)) (when (eof-object? v) (parse-error* #:raise-proc raise-read-eof-error p source "Unexpected end of input")) @@ -48,8 +53,10 @@ (define (read-preserve/text [in-port (current-input-port)] #:read-syntax? [read-syntax? #f] + #:decode-pointer [decode-pointer0 #f] #:source [source (object-name in-port)]) (define read-annotations? read-syntax?) + (define decode-pointer (or decode-pointer0 default-decode-pointer)) ;;--------------------------------------------------------------------------- ;; Core of parser @@ -89,6 +96,7 @@ (apply parse-error (string-append "Embedded binary value: " message) args)) #:read-syntax? read-syntax? #:on-short (lambda () (parse-error "Incomplete embedded binary value")))] + [#\! (decode-pointer (next))] [c (parse-error "Invalid # syntax: ~v" c)])] [#\< (match (read-sequence #\>) diff --git a/implementations/racket/preserves/preserves/tests/test-main.rkt b/implementations/racket/preserves/preserves/tests/test-main.rkt index a2305ec..2117208 100644 --- a/implementations/racket/preserves/preserves/tests/test-main.rkt +++ b/implementations/racket/preserves/preserves/tests/test-main.rkt @@ -9,14 +9,22 @@ (require racket/runtime-path) (require syntax/srcloc) +(struct pointer (value) #:transparent) + +(define (pointer/no-annotations v) + (pointer (strip-annotations v))) + (define (d bs #:allow-invalid-prefix? [allow-invalid-prefix? #f]) (for [(i (in-range 1 (- (bytes-length bs) 1)))] - (define result (bytes->preserve (subbytes bs 0 i) #:on-short (lambda () 'short) void)) + (define result (bytes->preserve (subbytes bs 0 i) + #:decode-pointer pointer/no-annotations + #:on-short (lambda () 'short) void)) (when (and (not (eq? result 'short)) (not (and allow-invalid-prefix? (void? result)))) (error 'd "~a-byte prefix of ~v does not read as short; result: ~v" i bs result))) (bytes->preserve bs #:read-syntax? #t + #:decode-pointer pointer/no-annotations #:on-short (lambda () 'short) void)) @@ -125,16 +133,31 @@ (match (hash-ref samples-txt-expected t-name text-form) [(asymmetric f b) (values f b #f)] ;; #f because e.g. annotation4 includes annotations [v (values v v #t)])) - (check-equal? text-form back loc) ;; expectation 1 - (check-equal? (d-strip (preserve->bytes text-form)) back loc) ;; expectation 2 - (check-equal? (d-strip (preserve->bytes forward)) back loc) ;; expectation 3 - (check-equal? (d-strip binary-form) back loc) ;; expectation 4 - (check-equal? (d binary-form) annotated-text-form loc) ;; expectation 5 - (check-equal? (d (preserve->bytes annotated-text-form)) annotated-text-form loc) ;; expectation 6 - (check-equal? (string->preserve (preserve->string text-form)) back loc) ;; expectation 7 - (check-equal? (string->preserve (preserve->string forward)) back loc) ;; expectation 8 + (check-equal? text-form back loc) ;; expectation 1 + (check-equal? (d-strip (preserve->bytes #:encode-pointer pointer-value text-form)) + back + loc) ;; expectation 2 + (check-equal? (d-strip (preserve->bytes #:encode-pointer pointer-value forward)) + back + loc) ;; expectation 3 + (check-equal? (d-strip binary-form) back loc) ;; expectation 4 + (check-equal? (d binary-form) annotated-text-form loc) ;; expectation 5 + (check-equal? (d (preserve->bytes #:encode-pointer pointer-value annotated-text-form)) + annotated-text-form + loc) ;; expectation 6 + (check-equal? (string->preserve #:decode-pointer pointer/no-annotations + (preserve->string #:encode-pointer pointer-value text-form)) + back + loc) ;; expectation 7 + (check-equal? (string->preserve #:decode-pointer pointer/no-annotations + (preserve->string #:encode-pointer pointer-value forward)) + back + loc) ;; expectation 8 ;; similar to 8: - (check-equal? (string->preserve (preserve->string annotated-text-form) #:read-syntax? #t) + (check-equal? (string->preserve #:decode-pointer pointer/no-annotations + (preserve->string #:encode-pointer pointer-value + annotated-text-form) + #:read-syntax? #t) annotated-text-form loc) (when (and (not (memq variety '(decode))) @@ -142,13 +165,16 @@ (and can-execute-nondet-with-canonicalization?))) ;; expectations 9 and 10 (check-equal? (preserve->bytes forward + #:encode-pointer pointer-value #:canonicalizing? #t #:write-annotations? #t) binary-form loc)) (unless (memq variety '(decode nondeterministic)) ;; expectation 11 - (check-equal? (preserve->bytes annotated-text-form #:write-annotations? #t) + (check-equal? (preserve->bytes annotated-text-form + #:encode-pointer pointer-value + #:write-annotations? #t) binary-form loc))) @@ -157,7 +183,10 @@ (testfile (call-with-input-file path (lambda (p) (port-count-lines! p) - (read-preserve p #:read-syntax? #t #:source path))))) + (read-preserve p + #:read-syntax? #t + #:decode-pointer pointer/no-annotations + #:source path))))) (match-define (peel-annotations `#s(TestCases ,tests)) testfile) (for [((t-name* t*) (in-hash (annotated-item tests)))] (define t-name (strip-annotations t-name*)) diff --git a/implementations/racket/preserves/preserves/tool.rkt b/implementations/racket/preserves/preserves/tool.rkt index 6923b77..0701b47 100644 --- a/implementations/racket/preserves/preserves/tool.rkt +++ b/implementations/racket/preserves/preserves/tool.rkt @@ -43,19 +43,21 @@ ["--no-annotations" "Strip annotations" (set! annotations? #f)]) + (struct pointer (value) #:transparent) + (let loop ((count count)) (when (positive? count) (define v ((if annotations? values strip-annotations) (match input-format - ['any (read-preserve #:read-syntax? #t #:source "")] - ['text (read-preserve/text #:read-syntax? #t #:source "")] - ['binary (read-preserve/binary #:read-syntax? #t)]))) + ['any (read-preserve #:read-syntax? #t #:decode-pointer pointer #:source "")] + ['text (read-preserve/text #:read-syntax? #t #:decode-pointer pointer #:source "")] + ['binary (read-preserve/binary #:decode-pointer pointer #:read-syntax? #t)]))) (when (not (eof-object? v)) (void (match output-format ['text - (write-preserve/text v #:indent indent?) + (write-preserve/text v #:indent indent? #:encode-pointer pointer-value) (newline)] ['binary - (write-preserve/binary v #:write-annotations? #t)])) + (write-preserve/binary v #:encode-pointer pointer-value #:write-annotations? #t)])) (flush-output) (loop (- count 1)))))) diff --git a/implementations/racket/preserves/preserves/write-binary.rkt b/implementations/racket/preserves/preserves/write-binary.rkt index f4a1f80..cf128c0 100644 --- a/implementations/racket/preserves/preserves/write-binary.rkt +++ b/implementations/racket/preserves/preserves/write-binary.rkt @@ -9,21 +9,26 @@ (require "float.rkt") (require "annotation.rkt") (require "varint.rkt") +(require "object-id.rkt") (require racket/set) (require racket/dict) (require (only-in racket/list flatten)) (define (preserve->bytes v #:canonicalizing? [canonicalizing? #t] + #:encode-pointer [encode-pointer #f] #:write-annotations? [write-annotations? (not canonicalizing?)]) (call-with-output-bytes (lambda (p) (write-preserve/binary v p #:canonicalizing? canonicalizing? + #:encode-pointer encode-pointer #:write-annotations? write-annotations?)))) (define (write-preserve/binary v [out-port (current-output-port)] #:canonicalizing? [canonicalizing? #t] + #:encode-pointer [encode-pointer0 #f] #:write-annotations? [write-annotations? (not canonicalizing?)]) + (define encode-pointer (or encode-pointer0 object-id)) (define (output-byte b) (write-byte b out-port)) @@ -115,6 +120,8 @@ [(? set?) (with-seq 6 (output-set v))] [(? dict?) (with-seq 7 (output-dict v))] - [_ (error 'write-preserve/binary "Invalid value: ~v" v)])) + [other + (output-byte #x86) + (output (encode-pointer other))])) (output v)) diff --git a/implementations/racket/preserves/preserves/write-text.rkt b/implementations/racket/preserves/preserves/write-text.rkt index 6e0a999..a0381a1 100644 --- a/implementations/racket/preserves/preserves/write-text.rkt +++ b/implementations/racket/preserves/preserves/write-text.rkt @@ -12,6 +12,7 @@ (require "annotation.rkt") (require "float.rkt") (require "record.rkt") +(require "object-id.rkt") (require racket/dict) (require racket/set) (require (only-in racket/port with-output-to-string)) @@ -24,7 +25,9 @@ (define (write-preserve/text v0 [o (current-output-port)] #:indent [indent-amount0 #f] + #:encode-pointer [encode-pointer0 #f] #:write-annotations? [write-annotations? #t]) + (define encode-pointer (or encode-pointer0 object-id)) (define indent-amount (match indent-amount0 [#f 0] [#t 2] ;; a default @@ -164,15 +167,18 @@ [(? list?) (write-sequence distance "[" "," "]" write-value v)] [(? set?) (write-sequence distance "#{" "," "}" write-value (set->list v))] [(? dict?) (write-sequence distance "{" "," "}" write-key-value (dict->list v))] - - [_ (error 'write-preserve/text "Cannot encode value ~v" v)])) + [other + (! "#!") + (write-value distance (encode-pointer other))])) (write-value 0 v0)) (define (preserve->string v0 #:indent [indent-amount #f] + #:encode-pointer [encode-pointer #f] #:write-annotations? [write-annotations? #t]) (with-output-to-string (lambda () (write-preserve/text v0 #:indent indent-amount + #:encode-pointer encode-pointer #:write-annotations? write-annotations?)))) diff --git a/implementations/rust/src/error.rs b/implementations/rust/src/error.rs index 54ac674..721dc37 100644 --- a/implementations/rust/src/error.rs +++ b/implementations/rust/src/error.rs @@ -41,6 +41,8 @@ pub enum ExpectedKind { Set, Dictionary, + Pointer, + SequenceOrSet, // Because of hacking up serde's data model: see open_sequence_or_set etc. Option, diff --git a/implementations/rust/src/value/mod.rs b/implementations/rust/src/value/mod.rs index 97469a6..2ad25fa 100644 --- a/implementations/rust/src/value/mod.rs +++ b/implementations/rust/src/value/mod.rs @@ -21,7 +21,6 @@ pub use value::Domain; pub use value::IOValue; pub use value::Map; pub use value::NestedValue; -pub use value::NullDomain; pub use value::PlainValue; pub use value::RcValue; pub use value::Set; diff --git a/implementations/rust/src/value/packed/constants.rs b/implementations/rust/src/value/packed/constants.rs index 2b20132..ea93806 100644 --- a/implementations/rust/src/value/packed/constants.rs +++ b/implementations/rust/src/value/packed/constants.rs @@ -8,6 +8,7 @@ pub enum Tag { Double, End, Annotation, + Pointer, SmallInteger(i8), MediumInteger(u8), SignedInteger, @@ -46,6 +47,7 @@ impl TryFrom for Tag { 0x83 => Ok(Self::Double), 0x84 => Ok(Self::End), 0x85 => Ok(Self::Annotation), + 0x86 => Ok(Self::Pointer), 0x90..=0x9c => Ok(Self::SmallInteger((v - 0x90) as i8)), 0x9d..=0x9f => Ok(Self::SmallInteger((v - 0x90) as i8 - 16)), 0xa0..=0xaf => Ok(Self::MediumInteger(v - 0xa0 + 1)), @@ -71,6 +73,7 @@ impl From for u8 { Tag::Double => 0x83, Tag::End => 0x84, Tag::Annotation => 0x85, + Tag::Pointer => 0x86, Tag::SmallInteger(v) => if v < 0 { (v + 16) as u8 + 0x90 } else { v as u8 + 0x90 }, Tag::MediumInteger(count) => count - 1 + 0xa0, Tag::SignedInteger => 0xb0, diff --git a/implementations/rust/src/value/packed/reader.rs b/implementations/rust/src/value/packed/reader.rs index a71d15b..e838381 100644 --- a/implementations/rust/src/value/packed/reader.rs +++ b/implementations/rust/src/value/packed/reader.rs @@ -5,7 +5,7 @@ use std::convert::TryFrom; use std::convert::TryInto; use std::marker::PhantomData; use super::super::signed_integer::SignedInteger; -use super::super::value::{Value, NestedValue, IOValue, FALSE, TRUE, Map, Set, Record, Annotations}; +use super::super::value::{Value, NestedValue, Domain, IOValue, FALSE, TRUE, Map, Set, Record, Annotations}; use super::constants::Tag; use super::super::reader::{ @@ -262,6 +262,10 @@ impl<'de, S: BinarySource<'de>> Reader<'de> for PackedReader<'de, S> { self.demand_next(read_annotations)? } } + Tag::Pointer => { + let v = self.demand_next(read_annotations)?; + Value::Domain(IOValue::from_preserves(v)?).wrap() + } Tag::SmallInteger(v) => { // TODO: prebuild these in value.rs Value::from(v).wrap() @@ -358,6 +362,14 @@ impl<'de, S: BinarySource<'de>> Reader<'de> for PackedReader<'de, S> { Ok(self.peekend()?) } + fn open_pointer(&mut self) -> ReaderResult<()> { + self.next_compound(Tag::Pointer, ExpectedKind::Pointer) + } + + fn close_pointer(&mut self) -> ReaderResult<()> { + Ok(()) + } + fn next_boolean(&mut self) -> ReaderResult { match self.peek_next_nonannotation_tag()? { Tag::False => { self.skip()?; Ok(false) } diff --git a/implementations/rust/src/value/packed/writer.rs b/implementations/rust/src/value/packed/writer.rs index ba29736..9850571 100644 --- a/implementations/rust/src/value/packed/writer.rs +++ b/implementations/rust/src/value/packed/writer.rs @@ -215,6 +215,7 @@ impl Writer for BinaryOrderWriter { type AnnWriter = PackedWriter>; type SeqWriter = PackedWriter>; type SetWriter = BinaryOrderWriter; + type PointerWriter = PackedWriter>; binary_order_writer_method!(mut align(natural_chunksize: u64) -> Result<()>); @@ -271,6 +272,15 @@ impl Writer for BinaryOrderWriter { fn end_set(&mut self, set: Self::SetWriter) -> Result<()> { set.finish(self) } + + fn start_pointer(&mut self) -> Result { + self.write_tag(Tag::Pointer)?; + Ok(self.pop()) + } + fn end_pointer(&mut self, ptr: Self::PointerWriter) -> Result<()> { + self.push(ptr); + Ok(()) + } } macro_rules! fits_in_bytes { @@ -285,6 +295,7 @@ impl Writer for PackedWriter type AnnWriter = Self; type SeqWriter = Self; type SetWriter = BinaryOrderWriter; + type PointerWriter = Self; fn start_annotations(&mut self) -> Result { Ok(self.suspend()) @@ -478,4 +489,14 @@ impl Writer for PackedWriter fn end_set(&mut self, set: Self::SetWriter) -> Result<()> { set.finish(self) } + + fn start_pointer(&mut self) -> Result { + self.write_tag(Tag::Pointer)?; + Ok(self.suspend()) + } + + fn end_pointer(&mut self, ann: Self::PointerWriter) -> Result<()> { + self.resume(ann); + Ok(()) + } } diff --git a/implementations/rust/src/value/reader.rs b/implementations/rust/src/value/reader.rs index a314002..ebc1891 100644 --- a/implementations/rust/src/value/reader.rs +++ b/implementations/rust/src/value/reader.rs @@ -15,6 +15,8 @@ pub trait Reader<'de> { fn open_set(&mut self) -> ReaderResult<()>; fn open_dictionary(&mut self) -> ReaderResult<()>; fn close_compound(&mut self) -> ReaderResult; + fn open_pointer(&mut self) -> ReaderResult<()>; + fn close_pointer(&mut self) -> ReaderResult<()>; //--------------------------------------------------------------------------- @@ -142,6 +144,14 @@ impl<'r, 'de, R: Reader<'de>> Reader<'de> for &'r mut R { fn close_compound(&mut self) -> ReaderResult { (*self).close_compound() } + + fn open_pointer(&mut self) -> ReaderResult<()> { + (*self).open_pointer() + } + + fn close_pointer(&mut self) -> ReaderResult<()> { + (*self).close_pointer() + } } diff --git a/implementations/rust/src/value/value.rs b/implementations/rust/src/value/value.rs index a10f45e..32213e0 100644 --- a/implementations/rust/src/value/value.rs +++ b/implementations/rust/src/value/value.rs @@ -19,6 +19,12 @@ use super::signed_integer::SignedInteger; use crate::error::{Error, ExpectedKind, Received}; pub trait Domain: Sized + Debug + Clone + Eq + Hash + Ord { + fn from_preserves(v: IOValue) -> Result { + Err(std::io::Error::new(std::io::ErrorKind::InvalidData, + format!("Cannot Preserves-decode domain-specific value {:?}", + v))) + } + fn as_preserves(&self) -> Result { Err(std::io::Error::new(std::io::ErrorKind::InvalidData, format!("Cannot Preserves-encode domain-specific value {:?}", @@ -270,7 +276,7 @@ impl, D: Domain> Debug for Value { f.debug_set().entries(v.iter()).finish() } Value::Dictionary(ref v) => f.debug_map().entries(v.iter()).finish(), - Value::Domain(ref d) => write!(f, "{:?}", d), + Value::Domain(ref d) => write!(f, "#!{:?}", d), } } } @@ -1044,13 +1050,19 @@ impl<'de, Dom: Domain> serde::Deserialize<'de> for ArcValue { //--------------------------------------------------------------------------- -#[derive(Debug, Clone, PartialEq, Eq, Hash, PartialOrd, Ord)] -pub enum NullDomain {} -impl Domain for NullDomain {} - #[derive(Clone, PartialEq, Eq, Hash, PartialOrd, Ord)] -pub struct IOValue(Arc>); -pub type UnwrappedIOValue = Value; +pub struct IOValue(Arc>); +pub type UnwrappedIOValue = Value; + +impl Domain for IOValue { + fn from_preserves(v: IOValue) -> Result { + Ok(v) + } + + fn as_preserves(&self) -> Result { + Ok(self.clone()) + } +} lazy_static! { pub static ref FALSE: IOValue = IOValue(Arc::new(AnnotatedValue(Annotations::empty(), Value::Boolean(false)))); @@ -1058,27 +1070,27 @@ lazy_static! { pub static ref EMPTY_SEQ: IOValue = IOValue(Arc::new(AnnotatedValue(Annotations::empty(), Value::Sequence(Vec::new())))); } -impl NestedValue for IOValue { - fn wrap(anns: Annotations, v: Value) -> Self { +impl NestedValue for IOValue { + fn wrap(anns: Annotations, v: Value) -> Self { IOValue(Arc::new(AnnotatedValue::new(anns, v))) } - fn annotations(&self) -> &Annotations { + fn annotations(&self) -> &Annotations { &(self.0).0 } - fn value(&self) -> &Value { + fn value(&self) -> &Value { &(self.0).1 } - fn pieces(self) -> (Annotations, Value) { + fn pieces(self) -> (Annotations, Value) { match Arc::try_unwrap(self.0) { Ok(AnnotatedValue(anns, v)) => (anns, v), Err(r) => (r.0.clone(), r.1.clone()), } } - fn value_owned(self) -> Value { + fn value_owned(self) -> Value { match Arc::try_unwrap(self.0) { Ok(AnnotatedValue(_anns, v)) => v, Err(r) => r.1.clone(), diff --git a/implementations/rust/src/value/writer.rs b/implementations/rust/src/value/writer.rs index 7acf5b0..f74dd43 100644 --- a/implementations/rust/src/value/writer.rs +++ b/implementations/rust/src/value/writer.rs @@ -20,6 +20,7 @@ pub trait Writer: Sized { type AnnWriter: AnnotationWriter; type SeqWriter: CompoundWriter; type SetWriter: CompoundWriter; + type PointerWriter: Writer; fn align(&mut self, natural_chunksize: u64) -> Result<()>; @@ -55,6 +56,9 @@ pub trait Writer: Sized { fn start_dictionary(&mut self, entry_count: Option) -> Result; fn end_set(&mut self, set: Self::SetWriter) -> Result<()>; + fn start_pointer(&mut self) -> Result; + fn end_pointer(&mut self, ptr: Self::PointerWriter) -> Result<()>; + //--------------------------------------------------------------------------- fn write(&mut self, v: &IOValue) -> Result<()> { @@ -129,7 +133,11 @@ pub trait Writer: Sized { } self.end_set(c) } - Value::Domain(ref d) => self.write(&d.as_preserves()?) + Value::Domain(ref d) => { + let mut c = self.start_pointer()?; + c.write(&d.as_preserves()?)?; + self.end_pointer(c) + } } } } diff --git a/preserves.md b/preserves.md index feec74d..dca86c6 100644 --- a/preserves.md +++ b/preserves.md @@ -4,7 +4,7 @@ title: "Preserves: an Expressive Data Language" --- Tony Garnock-Jones -Jan 2021. Version 0.4.0. +Jan 2021. Version 0.5.0. [sexp.txt]: http://people.csail.mit.edu/rivest/Sexp.txt [spki]: http://world.std.com/~cme/html/spki.html @@ -17,21 +17,17 @@ Jan 2021. Version 0.4.0. This document proposes a data model and serialization format called *Preserves*. -Preserves supports *records* with user-defined *labels*. This relieves -the confusion caused by encoding records as dictionaries, seen in most -data languages in use on the web. It also allows Preserves to easily -represent the *labelled sums of products* as seen in many functional -programming languages. - -Preserves also supports the usual suite of atomic and compound data -types, in particular including *binary* data as a distinct type from -text strings. Its *annotations* allow separation of data from metadata -such as [comments](conventions.html#comments), trace information, and +Preserves supports *records* with user-defined *labels*, embedded +*references*, and the usual suite of atomic and compound data types, +including *binary* data as a distinct type from text strings. Its +*annotations* allow separation of data from metadata such as +[comments](conventions.html#comments), trace information, and provenance information. -Finally, Preserves defines precisely how to *compare* two values. -Comparison is based on the data model, not on syntax or on data -structures of any particular implementation language. +Preserves departs from many other data languages in defining how to +*compare* two values. Comparison is based on the data model, not on +syntax or on data structures of any particular implementation +language. ## Starting with Semantics @@ -40,23 +36,25 @@ definition of the *values* that we want to work with and give them meaning independent of their syntax. Our `Value`s fall into two broad categories: *atomic* and *compound* -data. Every `Value` is finite and non-cyclic. +data. Every `Value` is finite and non-cyclic. References, called +`Pointer`s, are a third, special-case category. Value = Atom - | Compound + | Compound + | Pointer Atom = Boolean - | Float - | Double - | SignedInteger - | String - | ByteString - | Symbol + | Float + | Double + | SignedInteger + | String + | ByteString + | Symbol Compound = Record - | Sequence - | Set - | Dictionary + | Sequence + | Set + | Dictionary **Total order.** As we go, we will incrementally specify a total order over `Value`s. Two values of the @@ -65,7 +63,7 @@ values of different kinds is essentially arbitrary, but having a total order is convenient for many tasks, so we define it as follows: - (Values) Atom < Compound + (Values) Atom < Compound < Pointer (Compounds) Record < Sequence < Set < Dictionary @@ -162,6 +160,45 @@ pairwise distinct. Instances of `Dictionary` are compared by lexicographic comparison of the sequences resulting from ordering each `Dictionary`'s pairs in ascending order by key. +### Pointers. + +A `Pointer` embeds *domain-specific*, potentially *stateful* or +*located* data into a `Value`.[^pointer-rationale] `Pointer`s may be +used to denote stateful objects, network services, object +capabilities, file descriptors, Unix processes, or other +possibly-stateful things. Because each `Pointer` is a domain-specific +datum, comparison of two `Pointer`s is done according to +domain-specific rules. + + [^pointer-rationale]: **Rationale.** Why include `Pointer`s as a + special class, distinct from, say, a specially-labeled `Record`? + First, a `Record` can only hold other `Value`s: in order to embed + values such as live pointers to Java objects, some means of + "escaping" from the `Value` data type must be provided. Second, + `Pointer`s are meant to be able to denote stateful entities, for + which comparison by address is appropriate; however, we do not + wish to place restrictions on the *nature* of these entities: if + we had used `Record`s instead of distinct `Pointer`s, users would + have to invent an encoding of domain data into `Record`s that + reflected domain ordering into `Value` ordering. This is often + difficult and may not always be possible. Finally, because + `Pointer`s are intended to be able to represent network and memory + *locations*, they must be able to be rewritten at network and + process boundaries. Having a distinct class allows generic + `Pointer` rewriting without the quotation-related complications of + encoding references as, say, `Record`s. + +*Examples.* In a Java or Python implementation, a `Pointer` may denote +a reference to a Java or Python object; comparison would be done via +the language's own rules for equivalence and ordering. In a Unix +application, a `Pointer` may denote an open file descriptor or a +process ID. In an HTTP-based application, each `Pointer` might be a +URL, compared according to +[RFC 6943](https://tools.ietf.org/html/rfc6943#section-3.3). When a +`Value` is serialized for storage or transfer, embedded `Pointer`s +will usually be represented as ordinary `Value`s, in which case the +ordinary rules for comparing `Value`s will apply. + ## Textual Syntax Now we have discussed `Value`s and their meanings, we may turn to @@ -204,7 +241,7 @@ Standalone documents may have trailing whitespace. Any `Value` may be preceded by whitespace. - Value = ws (Record / Collection / Atom / Compact) + Value = ws (Record / Collection / Atom / Pointer / Compact) Collection = Sequence / Dictionary / Set Atom = Boolean / Float / Double / SignedInteger / String / ByteString / Symbol @@ -364,6 +401,11 @@ double quote mark. definition of “token representation”, and with the [R6RS definition of identifiers](http://www.r6rs.org/final/html/r6rs/r6rs-Z-H-7.html#node_sec_4.2.4). +A `Pointer` is written as a `Value` chosen to represent the denoted +object, prefixed with `#!`. + + Pointer = "#!" Value + Finally, any `Value` may be represented by escaping from the textual syntax to the [compact binary syntax](#compact-binary-syntax) by prefixing a `ByteString` containing the binary representation of the @@ -467,11 +509,11 @@ write `varint(m)` for the varint-encoding of `m`. Quoting the The following table illustrates varint-encoding. -| Number, `m` | `m` in binary, grouped into 7-bit chunks | `varint(m)` bytes | -| ------ | ------------------- | ------------ | -| 15 | `0001111` | 15 | -| 300 | `0000010 0101100` | 172 2 | -| 1000000000 | `0000011 1011100 1101011 0010100 0000000` | 128 148 235 220 3 | +| Number, `m` | `m` in binary, grouped into 7-bit chunks | `varint(m)` bytes | +| ------ | ------------------- | ------------ | +| 15 | `0001111` | 15 | +| 300 | `0000010 0101100` | 172 2 | +| 1000000000 | `0000011 1011100 1101011 0010100 0000000` | 128 148 235 220 3 | It is an error for a varint-encoded `m` in a `Repr` to be anything other than the unique shortest encoding for that `m`. That is, a @@ -579,6 +621,13 @@ contained within the `Value` unmodified. The functions `binary32(F)` and `binary64(D)` yield big-endian 4- and 8-byte IEEE 754 binary representations of `F` and `D`, respectively. +### Pointers. + +The `Repr` of a `Pointer` is the `Repr` of a `Value` chosen to +represent the denoted object, prefixed with `[0x86]`. + + «#!V» = [0x86] ++ «V» + ### Annotations. To annotate a `Repr` `r` with some `Value` `v`, prepend `r` with @@ -596,8 +645,8 @@ syntax `@a@b[]`, i.e. an empty sequence annotated with two symbols, The total ordering specified [above](#total-order) means that the following statements are true: - "bzz" < "c" < "caa" - #t < 3.0f < 3.0 < 3 < "3" < |3| < [] + "bzz" < "c" < "caa" < #!"a" + #t < 3.0f < 3.0 < 3 < "3" < |3| < [] < #!#t ### Simple examples. @@ -774,10 +823,6 @@ the same `Value` to yield different binary `Repr`s. ## Acknowledgements -The use of the low-order bits in certain SignedInteger tags for the -length of the following data is inspired by a similar feature of -[CBOR](http://cbor.io/). - The treatment of commas as whitespace in the text syntax is inspired by the same feature of [EDN](https://github.com/edn-format/edn). @@ -810,7 +855,8 @@ a binary-syntax document; otherwise, it should be interpreted as text. 83 - Double 84 - End marker 85 - Annotation - (8x) RESERVED 86-8F + 86 - Pointer + (8x) RESERVED 87-8F 9x - Small integers 0..12,-3..-1 An - Small integers, (n+1) bytes long diff --git a/tests/samples.bin b/tests/samples.bin index 7fadda1..a832fc3 100644 Binary files a/tests/samples.bin and b/tests/samples.bin differ diff --git a/tests/samples.txt b/tests/samples.txt index 5a434e1..a7c21c5 100644 --- a/tests/samples.txt +++ b/tests/samples.txt @@ -111,6 +111,9 @@ list9: @"Unexpected close bracket" list10: @"Missing end byte" noinput0: @"No input at all" + pointer0: + pointer1: + pointer2: record1: >> record2: , >>>> record3: "Dr">>