Introduce pointers

This commit is contained in:
Tony Garnock-Jones 2021-01-29 12:03:28 +01:00
parent 6bf49874b7
commit 532e811894
31 changed files with 630 additions and 247 deletions

View File

@ -1,6 +1,6 @@
{ {
"name": "preserves", "name": "preserves",
"version": "0.4.0", "version": "0.5.0",
"description": "Experimental data serialization format", "description": "Experimental data serialization format",
"homepage": "https://gitlab.com/preserves/preserves", "homepage": "https://gitlab.com/preserves/preserves",
"license": "Apache-2.0", "license": "Apache-2.0",

View File

@ -14,6 +14,17 @@ import { PreserveOn } from './symbols';
export type ErrorType = 'DecodeError' | 'EncodeError' | 'ShortPacket'; export type ErrorType = 'DecodeError' | 'EncodeError' | 'ShortPacket';
export const ErrorType = Symbol.for('ErrorType'); export const ErrorType = Symbol.for('ErrorType');
export type Encodable<T extends object> =
Value<T> | Preservable<T> | Iterable<Value<T>> | ArrayBufferView;
export interface Preservable<T extends object> {
[PreserveOn](encoder: Encoder<T>): void;
}
export function isPreservable<T extends object>(v: any): v is Preservable<T> {
return typeof v === 'object' && v !== null && typeof v[PreserveOn] === 'function';
}
export abstract class PreservesCodecError { export abstract class PreservesCodecError {
abstract get [ErrorType](): ErrorType; abstract get [ErrorType](): ErrorType;
@ -53,16 +64,17 @@ export class ShortPacket extends DecodeError {
} }
} }
export interface DecoderOptions { export interface DecoderOptions<T extends object> {
includeAnnotations?: boolean; includeAnnotations?: boolean;
decodePointer?: (v: Value<T>) => T;
} }
export class Decoder { export class Decoder<T extends object> {
packet: Uint8Array; packet: Uint8Array;
index: number; index: number;
options: DecoderOptions; options: DecoderOptions<T>;
constructor(packet: BytesLike = new Uint8Array(0), options: DecoderOptions = {}) { constructor(packet: BytesLike = new Uint8Array(0), options: DecoderOptions<T> = {}) {
this.packet = underlying(packet); this.packet = underlying(packet);
this.index = 0; this.index = 0;
this.options = options; this.options = options;
@ -104,7 +116,7 @@ export class Decoder {
return matched; return matched;
} }
nextvalues(): Value[] { nextvalues(): Value<T>[] {
const result = []; const result = [];
while (!this.peekend()) result.push(this.next()); while (!this.peekend()) result.push(this.next());
return result; return result;
@ -119,12 +131,12 @@ export class Decoder {
return acc; return acc;
} }
wrap(v: Value): Value { wrap(v: Value<T>): Value<T> {
return this.includeAnnotations ? new Annotated(v) : v; return this.includeAnnotations ? new Annotated(v) : v;
} }
static dictionaryFromArray(vs: Value[]): Dictionary<Value> { static dictionaryFromArray<T extends object>(vs: Value<T>[]): Dictionary<T, Value<T>> {
const d = new Dictionary<Value>(); const d = new Dictionary<T, Value<T>>();
if (vs.length % 2) throw new DecodeError("Missing dictionary value"); if (vs.length % 2) throw new DecodeError("Missing dictionary value");
for (let i = 0; i < vs.length; i += 2) { for (let i = 0; i < vs.length; i += 2) {
d.set(vs[i], vs[i+1]); d.set(vs[i], vs[i+1]);
@ -132,14 +144,14 @@ export class Decoder {
return d; return d;
} }
unshiftAnnotation(a: Value, v: Annotated) { unshiftAnnotation(a: Value<T>, v: Annotated<T>) {
if (this.includeAnnotations) { if (this.includeAnnotations) {
v.annotations.unshift(a); v.annotations.unshift(a);
} }
return v; return v;
} }
next(): Value { next(): Value<T> {
const tag = this.nextbyte(); const tag = this.nextbyte();
switch (tag) { switch (tag) {
case Tag.False: return this.wrap(false); case Tag.False: return this.wrap(false);
@ -149,9 +161,16 @@ export class Decoder {
case Tag.End: throw new DecodeError("Unexpected Compound end marker"); case Tag.End: throw new DecodeError("Unexpected Compound end marker");
case Tag.Annotation: { case Tag.Annotation: {
const a = this.next(); const a = this.next();
const v = this.next() as Annotated; const v = this.next() as Annotated<T>;
return this.unshiftAnnotation(a, v); return this.unshiftAnnotation(a, v);
} }
case Tag.Pointer: {
const d = this.options.decodePointer;
if (d === void 0) {
throw new DecodeError("No decodePointer function supplied");
}
return this.wrap(d(this.next()));
}
case Tag.SignedInteger: return this.wrap(this.nextint(this.varint())); case Tag.SignedInteger: return this.wrap(this.nextint(this.varint()));
case Tag.String: return this.wrap(Bytes.from(this.nextbytes(this.varint())).fromUtf8()); case Tag.String: return this.wrap(Bytes.from(this.nextbytes(this.varint())).fromUtf8());
case Tag.ByteString: return this.wrap(Bytes.from(this.nextbytes(this.varint()))); case Tag.ByteString: return this.wrap(Bytes.from(this.nextbytes(this.varint())));
@ -192,30 +211,35 @@ export class Decoder {
} }
} }
export function decode(bs: BytesLike, options?: DecoderOptions) { export function decode<T extends object>(bs: BytesLike, options?: DecoderOptions<T>) {
return new Decoder(bs, options).next(); return new Decoder(bs, options).next();
} }
export function decodeWithAnnotations(bs: BytesLike, options: DecoderOptions = {}): Annotated { export function decodeWithAnnotations<T extends object>(bs: BytesLike, options: DecoderOptions<T> = {}): Annotated<T> {
return decode(bs, { ... options, includeAnnotations: true }) as Annotated; return decode(bs, { ... options, includeAnnotations: true }) as Annotated<T>;
} }
export interface EncoderOptions { export interface EncoderOptions<T extends object> {
canonical?: boolean; canonical?: boolean;
includeAnnotations?: boolean; includeAnnotations?: boolean;
encodePointer?: (v: T) => Value<T>;
} }
function chunkStr(bs: Uint8Array): string { function chunkStr(bs: Uint8Array): string {
return String.fromCharCode.apply(null, bs as any as number[]); return String.fromCharCode.apply(null, bs as any as number[]);
} }
export class Encoder { function isIterable<T>(v: any): v is Iterable<T> {
return typeof v === 'object' && v !== null && typeof v[Symbol.iterator] === 'function';
}
export class Encoder<T extends object> {
chunks: Array<Uint8Array>; chunks: Array<Uint8Array>;
view: DataView; view: DataView;
index: number; index: number;
options: EncoderOptions; options: EncoderOptions<T>;
constructor(options: EncoderOptions = {}) { constructor(options: EncoderOptions<T> = {}) {
this.chunks = []; this.chunks = [];
this.view = new DataView(new ArrayBuffer(256)); this.view = new DataView(new ArrayBuffer(256));
this.index = 0; this.index = 0;
@ -310,7 +334,7 @@ export class Encoder {
this.emitbytes(bs); this.emitbytes(bs);
} }
encodevalues(tag: Tag, items: Iterable<Value>) { encodevalues(tag: Tag, items: Iterable<Value<T>>) {
this.emitbyte(tag); this.emitbyte(tag);
for (let i of items) { this.push(i); } for (let i of items) { this.push(i); }
this.emitbyte(Tag.End); this.emitbyte(Tag.End);
@ -322,8 +346,11 @@ export class Encoder {
this.emitbyte(Tag.End); this.emitbyte(Tag.End);
} }
push(v: any) { push(v: Encodable<T>) {
if (typeof v?.[PreserveOn] === 'function') { if (isPreservable<never>(v)) {
v[PreserveOn](this as unknown as Encoder<never>);
}
else if (isPreservable<T>(v)) {
v[PreserveOn](this); v[PreserveOn](this);
} }
else if (typeof v === 'boolean') { else if (typeof v === 'boolean') {
@ -355,23 +382,36 @@ export class Encoder {
else if (Array.isArray(v)) { else if (Array.isArray(v)) {
this.encodevalues(Tag.Sequence, v); this.encodevalues(Tag.Sequence, v);
} }
else if (typeof v?.[Symbol.iterator] === 'function') { else if (isIterable<Value<T>>(v)) {
this.encodevalues(Tag.Sequence, v as Iterable<Value>); this.encodevalues(Tag.Sequence, v as Iterable<Value<T>>);
} }
else { else {
throw new EncodeError("Cannot encode", v); const e = this.options.encodePointer ?? pointerId;
this.emitbyte(Tag.Pointer);
this.push(e(v));
} }
return this; // for chaining return this; // for chaining
} }
} }
export function encode(v: any, options?: EncoderOptions): Bytes { export function encode<T extends object>(v: Encodable<T>, options?: EncoderOptions<T>): Bytes {
return new Encoder(options).push(v).contents(); return new Encoder(options).push(v).contents();
} }
let _nextId = 0;
const _registry = new WeakMap<object, number>();
export function pointerId(v: object): number {
let id = _registry.get(v);
if (id === void 0) {
id = _nextId++;
_registry.set(v, id);
}
return id;
}
const _canonicalEncoder = new Encoder({ canonical: true }); const _canonicalEncoder = new Encoder({ canonical: true });
let _usingCanonicalEncoder = false; let _usingCanonicalEncoder = false;
export function canonicalEncode(v: any, options?: EncoderOptions): Bytes { export function canonicalEncode(v: Encodable<any>, options?: EncoderOptions<any>): Bytes {
if (options === void 0 && !_usingCanonicalEncoder) { if (options === void 0 && !_usingCanonicalEncoder) {
_usingCanonicalEncoder = true; _usingCanonicalEncoder = true;
const bs = _canonicalEncoder.push(v).contents(); const bs = _canonicalEncoder.push(v).contents();
@ -382,10 +422,10 @@ export function canonicalEncode(v: any, options?: EncoderOptions): Bytes {
} }
} }
export function canonicalString(v: any): string { export function canonicalString(v: Encodable<any>): string {
return _canonicalEncoder.push(v).contentsString(); return _canonicalEncoder.push(v).contentsString();
} }
export function encodeWithAnnotations(v: any, options: EncoderOptions = {}): Bytes { export function encodeWithAnnotations<T extends object>(v: Encodable<T>, options: EncoderOptions<T> = {}): Bytes {
return encode(v, { ... options, includeAnnotations: true }); return encode(v, { ... options, includeAnnotations: true });
} }

View File

@ -5,6 +5,7 @@ export enum Tag {
Double, Double,
End, End,
Annotation, Annotation,
Pointer,
SmallInteger_lo = 0x90, SmallInteger_lo = 0x90,
MediumInteger_lo = 0xa0, MediumInteger_lo = 0xa0,

View File

@ -12,7 +12,7 @@ export function stringify(x: any): string {
} }
} }
export function preserves(pieces: TemplateStringsArray, ...values: Value[]): string { export function preserves(pieces: TemplateStringsArray, ...values: Value<any>[]): string {
const result = [pieces[0]]; const result = [pieces[0]];
values.forEach((v, i) => { values.forEach((v, i) => {
result.push(stringify(v)); result.push(stringify(v));

View File

@ -2,22 +2,22 @@
import { PreserveOn, AsPreserve } from './symbols'; import { PreserveOn, AsPreserve } from './symbols';
import { Tag } from './constants'; import { Tag } from './constants';
import { Encoder, canonicalEncode, canonicalString } from './codec'; import { Encoder, canonicalEncode, canonicalString, Preservable } from './codec';
import { stringify } from './text'; import { stringify } from './text';
import { _iterMap, FlexMap, FlexSet } from './flex'; import { _iterMap, FlexMap, FlexSet } from './flex';
const textEncoder = new TextEncoder(); const textEncoder = new TextEncoder();
const textDecoder = new TextDecoder(); const textDecoder = new TextDecoder();
export type Value = Atom | Compound | Annotated; export type Value<T extends object> = Atom | Compound<T> | T | Annotated<T>;
export type Atom = boolean | Single | Double | number | string | Bytes | symbol; export type Atom = boolean | Single | Double | number | string | Bytes | symbol;
export type Compound = Record | Array<Value> | Set | Dictionary<Value>; export type Compound<T extends object> = Record<T> | Array<Value<T>> | Set<T> | Dictionary<T, Value<T>>;
export const IsPreservesRecord = Symbol.for('IsPreservesRecord'); export const IsPreservesRecord = Symbol.for('IsPreservesRecord');
export const IsPreservesBytes = Symbol.for('IsPreservesBytes'); export const IsPreservesBytes = Symbol.for('IsPreservesBytes');
export const IsPreservesAnnotated = Symbol.for('IsPreservesAnnotated'); export const IsPreservesAnnotated = Symbol.for('IsPreservesAnnotated');
export function fromJS(x: any): Value { export function fromJS<T extends object>(x: any): Value<T> {
switch (typeof x) { switch (typeof x) {
case 'number': case 'number':
if (!Number.isInteger(x)) { if (!Number.isInteger(x)) {
@ -32,6 +32,7 @@ export function fromJS(x: any): Value {
case 'undefined': case 'undefined':
case 'function': case 'function':
case 'bigint':
break; break;
case 'object': case 'object':
@ -41,16 +42,20 @@ export function fromJS(x: any): Value {
if (typeof x[AsPreserve] === 'function') { if (typeof x[AsPreserve] === 'function') {
return x[AsPreserve](); return x[AsPreserve]();
} }
if (Record.isRecord(x)) { if (Record.isRecord<T>(x)) {
return x; return x;
} }
if (Array.isArray(x)) { if (Array.isArray(x)) {
return x.map(fromJS); return (x as Array<Value<T>>).map<Value<T>>(fromJS);
} }
if (ArrayBuffer.isView(x) || x instanceof ArrayBuffer) { if (ArrayBuffer.isView(x) || x instanceof ArrayBuffer) {
return Bytes.from(x); return Bytes.from(x);
} }
return Dictionary.fromJS(x); // Just... assume it's a T.
return (x as T);
default:
break;
} }
throw new TypeError("Cannot represent JavaScript value as Preserves: " + x); throw new TypeError("Cannot represent JavaScript value as Preserves: " + x);
@ -89,12 +94,12 @@ export abstract class Float {
static isDouble = (x: any): x is Double => Float.isFloat(x, 'Double'); static isDouble = (x: any): x is Double => Float.isFloat(x, 'Double');
} }
export class Single extends Float { export class Single extends Float implements Preservable<never> {
[AsPreserve](): Value { [AsPreserve]<T extends object>(): Value<T> {
return this; return this;
} }
[PreserveOn](encoder: Encoder) { [PreserveOn](encoder: Encoder<never>) {
encoder.emitbyte(Tag.Float); encoder.emitbyte(Tag.Float);
encoder.makeroom(4); encoder.makeroom(4);
encoder.view.setFloat32(encoder.index, this.value, false); encoder.view.setFloat32(encoder.index, this.value, false);
@ -110,12 +115,12 @@ export class Single extends Float {
} }
} }
export class Double extends Float { export class Double extends Float implements Preservable<never> {
[AsPreserve](): Value { [AsPreserve]<T extends object>(): Value<T> {
return this; return this;
} }
[PreserveOn](encoder: Encoder) { [PreserveOn](encoder: Encoder<never>) {
encoder.emitbyte(Tag.Double); encoder.emitbyte(Tag.Double);
encoder.makeroom(8); encoder.makeroom(8);
encoder.view.setFloat64(encoder.index, this.value, false); encoder.view.setFloat64(encoder.index, this.value, false);
@ -133,7 +138,7 @@ export class Double extends Float {
export type BytesLike = Bytes | Uint8Array; export type BytesLike = Bytes | Uint8Array;
export class Bytes { export class Bytes implements Preservable<never> {
readonly _view: Uint8Array; readonly _view: Uint8Array;
constructor(maybeByteIterable: any = new Uint8Array()) { constructor(maybeByteIterable: any = new Uint8Array()) {
@ -249,7 +254,7 @@ export class Bytes {
return this.asPreservesText(); return this.asPreservesText();
} }
[AsPreserve](): Value { [AsPreserve]<T extends object>(): Value<T> {
return this; return this;
} }
@ -282,7 +287,7 @@ export class Bytes {
return nibbles.join(''); return nibbles.join('');
} }
[PreserveOn](encoder: Encoder) { [PreserveOn](encoder: Encoder<never>) {
encoder.emitbyte(Tag.ByteString); encoder.emitbyte(Tag.ByteString);
encoder.varint(this.length); encoder.varint(this.length);
encoder.emitbytes(this._view); encoder.emitbytes(this._view);
@ -313,13 +318,15 @@ export function underlying(b: Bytes | Uint8Array): Uint8Array {
} }
declare global { declare global {
interface Boolean { asPreservesText(): string; } interface Object { asPreservesText(): string; }
interface Number { asPreservesText(): string; }
interface String { asPreservesText(): string; }
interface Symbol { asPreservesText(): string; }
interface Array<T> { asPreservesText(): string; }
} }
Object.defineProperty(Object.prototype, 'asPreservesText', {
enumerable: false,
writable: true,
value: function(): string { return '#!' + JSON.stringify(this); }
});
Boolean.prototype.asPreservesText = function (): string { Boolean.prototype.asPreservesText = function (): string {
return this ? '#t' : '#f'; return this ? '#t' : '#f';
}; };
@ -338,7 +345,7 @@ Symbol.prototype.asPreservesText = function (): string {
}; };
Array.prototype.asPreservesText = function (): string { Array.prototype.asPreservesText = function (): string {
return '[' + this.map((i: Value) => i.asPreservesText()).join(', ') + ']'; return '[' + this.map((i: Value<any>) => i.asPreservesText()).join(', ') + ']';
}; };
// Uint8Array / TypedArray methods // Uint8Array / TypedArray methods
@ -409,10 +416,10 @@ keys lastIndexOf reduce reduceRight some toLocaleString values`.split(/\s+/))
Bytes.prototype[Symbol.iterator] = function () { return this._view[Symbol.iterator](); }; Bytes.prototype[Symbol.iterator] = function () { return this._view[Symbol.iterator](); };
})(); })();
export class Record extends Array<Value> { export class Record<T extends object> extends Array<Value<T>> {
readonly label: Value; readonly label: Value<T>;
constructor(label: Value, fieldsJS: any[]) { constructor(label: Value<T>, fieldsJS: any[]) {
if (arguments.length === 1) { if (arguments.length === 1) {
// Using things like someRecord.map() involves the runtime // Using things like someRecord.map() involves the runtime
// apparently instantiating instances of this.constructor // apparently instantiating instances of this.constructor
@ -430,15 +437,15 @@ export class Record extends Array<Value> {
Object.freeze(this); Object.freeze(this);
} }
get(index: number, defaultValue?: Value): Value | undefined { get(index: number, defaultValue?: Value<T>): Value<T> | undefined {
return (index < this.length) ? this[index] : defaultValue; return (index < this.length) ? this[index] : defaultValue;
} }
set(index: number, newValue: Value): Record { set(index: number, newValue: Value<T>): Record<T> {
return new Record(this.label, this.map((f, i) => (i === index) ? newValue : f)); return new Record(this.label, this.map((f, i) => (i === index) ? newValue : f));
} }
getConstructorInfo(): RecordConstructorInfo { getConstructorInfo(): RecordConstructorInfo<T> {
return { label: this.label, arity: this.length }; return { label: this.label, arity: this.length };
} }
@ -448,13 +455,13 @@ export class Record extends Array<Value> {
this.every((f, i) => is(f, other.get(i))); this.every((f, i) => is(f, other.get(i)));
} }
hashCode(): number { // hashCode(): number {
let h = hash(this.label); // let h = hash(this.label);
this.forEach((f) => h = ((31 * h) + hash(f)) | 0); // this.forEach((f) => h = ((31 * h) + hash(f)) | 0);
return h; // return h;
} // }
static fallbackToString: (f: Value) => string = (_f) => '<unprintable_preserves_field_value>'; static fallbackToString: (f: Value<any>) => string = (_f) => '<unprintable_preserves_field_value>';
toString(): string { toString(): string {
return this.asPreservesText(); return this.asPreservesText();
@ -475,26 +482,26 @@ export class Record extends Array<Value> {
}).join(', ') + ')'; }).join(', ') + ')';
} }
static makeConstructor(labelSymbolText: string, fieldNames: string[]) { static makeConstructor<T extends object>(labelSymbolText: string, fieldNames: string[]): RecordConstructor<T> {
return Record.makeBasicConstructor(Symbol.for(labelSymbolText), fieldNames); return Record.makeBasicConstructor(Symbol.for(labelSymbolText), fieldNames);
} }
static makeBasicConstructor(label0: any, fieldNames: string[]): RecordConstructor { static makeBasicConstructor<T extends object>(label0: any, fieldNames: string[]): RecordConstructor<T> {
const label = fromJS(label0); const label = fromJS<T>(label0);
const arity = fieldNames.length; const arity = fieldNames.length;
const ctor: RecordConstructor = (...fields: any[]) => { const ctor: RecordConstructor<T> = (...fields: any[]): Record<T> => {
if (fields.length !== arity) { if (fields.length !== arity) {
throw new Error("Record: cannot instantiate " + (label && label.toString()) + throw new Error("Record: cannot instantiate " + (label && label.toString()) +
" expecting " + arity + " fields with " + fields.length + " fields"); " expecting " + arity + " fields with " + fields.length + " fields");
} }
return new Record(label, fields); return new Record<T>(label, fields);
}; };
const constructorInfo = { label, arity }; const constructorInfo = { label, arity };
ctor.constructorInfo = constructorInfo; ctor.constructorInfo = constructorInfo;
ctor.isClassOf = (v: any): v is Record => Record.isClassOf(constructorInfo, v); ctor.isClassOf = (v: any): v is Record<T> => Record.isClassOf(constructorInfo, v);
ctor._ = {}; ctor._ = {};
fieldNames.forEach((name, i) => { fieldNames.forEach((name, i) => {
ctor._[name] = function (r: any): Value | undefined { ctor._[name] = function (r: any): Value<T> | undefined {
if (!ctor.isClassOf(r)) { if (!ctor.isClassOf(r)) {
throw new Error("Record: attempt to retrieve field "+label.toString()+"."+name+ throw new Error("Record: attempt to retrieve field "+label.toString()+"."+name+
" from non-"+label.toString()+": "+(r && r.toString())); " from non-"+label.toString()+": "+(r && r.toString()));
@ -505,7 +512,7 @@ export class Record extends Array<Value> {
return ctor; return ctor;
} }
[PreserveOn](encoder: Encoder) { [PreserveOn](encoder: Encoder<T>) {
encoder.emitbyte(Tag.Record); encoder.emitbyte(Tag.Record);
encoder.push(this.label); encoder.push(this.label);
this.forEach((f) => encoder.push(f)); this.forEach((f) => encoder.push(f));
@ -516,24 +523,24 @@ export class Record extends Array<Value> {
return true; return true;
} }
static isRecord(x: any): x is Record { static isRecord<T extends object>(x: any): x is Record<T> {
return !!x?.[IsPreservesRecord]; return !!x?.[IsPreservesRecord];
} }
static isClassOf(ci: RecordConstructorInfo, v: any): v is Record { static isClassOf<T extends object>(ci: RecordConstructorInfo<T>, v: any): v is Record<T> {
return (Record.isRecord(v)) && is(ci.label, v.label) && (ci.arity === v.length); return (Record.isRecord(v)) && is(ci.label, v.label) && (ci.arity === v.length);
} }
} }
export interface RecordConstructor { export interface RecordConstructor<T extends object> {
(...fields: any[]): Record; (...fields: any[]): Record<T>;
constructorInfo: RecordConstructorInfo; constructorInfo: RecordConstructorInfo<T>;
isClassOf(v: any): v is Record; isClassOf(v: any): v is Record<T>;
_: { [getter: string]: (r: any) => Value | undefined }; _: { [getter: string]: (r: any) => Value<T> | undefined };
} }
export interface RecordConstructorInfo { export interface RecordConstructorInfo<T extends object> {
label: Value; label: Value<T>;
arity: number; arity: number;
} }
@ -544,7 +551,7 @@ export function is(a: any, b: any): boolean {
if (typeof a !== typeof b) return false; if (typeof a !== typeof b) return false;
if (typeof a === 'object') { if (typeof a === 'object') {
if (a === null || b === null) return false; if (a === null || b === null) return false;
if ('equals' in a) return a.equals(b, is); if ('equals' in a && typeof a.equals === 'function') return a.equals(b, is);
if (Array.isArray(a) && Array.isArray(b)) { if (Array.isArray(a) && Array.isArray(b)) {
if (a.length !== b.length) return false; if (a.length !== b.length) return false;
for (let i = 0; i < a.length; i++) if (!is(a[i], b[i])) return false; for (let i = 0; i < a.length; i++) if (!is(a[i], b[i])) return false;
@ -554,36 +561,32 @@ export function is(a: any, b: any): boolean {
return false; return false;
} }
export function hash(a: Value): number {
throw new Error("shouldBeImplemented"); // TODO
}
export type DictionaryType = 'Dictionary' | 'Set'; export type DictionaryType = 'Dictionary' | 'Set';
export const DictionaryType = Symbol.for('DictionaryType'); export const DictionaryType = Symbol.for('DictionaryType');
export class Dictionary<T> extends FlexMap<Value, T> { export class Dictionary<T extends object, V> extends FlexMap<Value<T>, V> {
get [DictionaryType](): DictionaryType { get [DictionaryType](): DictionaryType {
return 'Dictionary'; return 'Dictionary';
} }
static isDictionary<T>(x: any): x is Dictionary<T> { static isDictionary<T extends object, V>(x: any): x is Dictionary<T, V> {
return x?.[DictionaryType] === 'Dictionary'; return x?.[DictionaryType] === 'Dictionary';
} }
static fromJS(x: object): Dictionary<Value> { static fromJS<T extends object, V extends object>(x: object): Dictionary<T, Value<V>> {
if (Dictionary.isDictionary(x)) return x as Dictionary<Value>; if (Dictionary.isDictionary<T, V>(x)) return x as Dictionary<T, Value<V>>;
const d = new Dictionary<Value>(); const d = new Dictionary<T, Value<V>>();
Object.entries(x).forEach(([key, value]) => d.set(key, fromJS(value))); Object.entries(x).forEach(([key, value]) => d.set(key, fromJS(value)));
return d; return d;
} }
constructor(items?: Iterable<readonly [any, T]>) { constructor(items?: Iterable<readonly [any, V]>) {
const iter = items?.[Symbol.iterator](); const iter = items?.[Symbol.iterator]();
super(canonicalString, iter === void 0 ? void 0 : _iterMap(iter, ([k,v]) => [fromJS(k), v])); super(canonicalString, iter === void 0 ? void 0 : _iterMap(iter, ([k,v]) => [fromJS(k), v]));
} }
mapEntries<R>(f: (entry: [Value, T]) => [Value, R]): Dictionary<R> { mapEntries<R extends object, W>(f: (entry: [Value<T>, V]) => [Value<R>, W]): Dictionary<R, W> {
const result = new Dictionary<R>(); const result = new Dictionary<R, W>();
for (let oldEntry of this.entries()) { for (let oldEntry of this.entries()) {
const newEntry = f(oldEntry); const newEntry = f(oldEntry);
result.set(newEntry[0], newEntry[1]) result.set(newEntry[0], newEntry[1])
@ -598,7 +601,7 @@ export class Dictionary<T> extends FlexMap<Value, T> {
'}'; '}';
} }
clone(): Dictionary<T> { clone(): Dictionary<T, V> {
return new Dictionary(this); return new Dictionary(this);
} }
@ -608,7 +611,7 @@ export class Dictionary<T> extends FlexMap<Value, T> {
get [Symbol.toStringTag]() { return 'Dictionary'; } get [Symbol.toStringTag]() { return 'Dictionary'; }
[PreserveOn](encoder: Encoder) { [PreserveOn](encoder: Encoder<T>) {
if (encoder.canonical) { if (encoder.canonical) {
const pieces = Array.from(this).map(([k, v]) => const pieces = Array.from(this).map(([k, v]) =>
Bytes.concat([canonicalEncode(k), canonicalEncode(v)])); Bytes.concat([canonicalEncode(k), canonicalEncode(v)]));
@ -618,33 +621,33 @@ export class Dictionary<T> extends FlexMap<Value, T> {
encoder.emitbyte(Tag.Dictionary); encoder.emitbyte(Tag.Dictionary);
this.forEach((v, k) => { this.forEach((v, k) => {
encoder.push(k); encoder.push(k);
encoder.push(v); encoder.push(v as unknown as Value<T>); // Suuuuuuuper unsound
}); });
encoder.emitbyte(Tag.End); encoder.emitbyte(Tag.End);
} }
} }
} }
export class Set extends FlexSet<Value> { export class Set<T extends object> extends FlexSet<Value<T>> {
get [DictionaryType](): DictionaryType { get [DictionaryType](): DictionaryType {
return 'Set'; return 'Set';
} }
static isSet(x: any): x is Set { static isSet<T extends object>(x: any): x is Set<T> {
return x?.[DictionaryType] === 'Set'; return x?.[DictionaryType] === 'Set';
} }
constructor(items?: Iterable<any>) { constructor(items?: Iterable<any>) {
const iter = items?.[Symbol.iterator](); const iter = items?.[Symbol.iterator]();
super(canonicalString, iter === void 0 ? void 0 : _iterMap(iter, fromJS)); super(canonicalString, iter === void 0 ? void 0 : _iterMap<any, Value<T>>(iter, fromJS));
} }
map(f: (value: Value) => Value): Set { map<R extends object>(f: (value: Value<T>) => Value<R>): Set<R> {
return new Set(_iterMap(this[Symbol.iterator](), f)); return new Set(_iterMap(this[Symbol.iterator](), f));
} }
filter(f: (value: Value) => boolean): Set { filter(f: (value: Value<T>) => boolean): Set<T> {
const result = new Set(); const result = new Set<T>();
for (let k of this) if (f(k)) result.add(k); for (let k of this) if (f(k)) result.add(k);
return result; return result;
} }
@ -659,13 +662,13 @@ export class Set extends FlexSet<Value> {
'}'; '}';
} }
clone(): Set { clone(): Set<T> {
return new Set(this); return new Set(this);
} }
get [Symbol.toStringTag]() { return 'Set'; } get [Symbol.toStringTag]() { return 'Set'; }
[PreserveOn](encoder: Encoder) { [PreserveOn](encoder: Encoder<T>) {
if (encoder.canonical) { if (encoder.canonical) {
const pieces = Array.from(this).map(k => canonicalEncode(k)); const pieces = Array.from(this).map(k => canonicalEncode(k));
pieces.sort(Bytes.compare); pieces.sort(Bytes.compare);
@ -676,20 +679,20 @@ export class Set extends FlexSet<Value> {
} }
} }
export class Annotated { export class Annotated<T extends object> {
readonly annotations: Array<Value>; readonly annotations: Array<Value<T>>;
readonly item: Value; readonly item: Value<T>;
constructor(item: Value) { constructor(item: Value<T>) {
this.annotations = []; this.annotations = [];
this.item = item; this.item = item;
} }
[AsPreserve](): Value { [AsPreserve](): Value<T> {
return this; return this;
} }
[PreserveOn](encoder: Encoder) { [PreserveOn](encoder: Encoder<T>) {
if (encoder.includeAnnotations) { if (encoder.includeAnnotations) {
for (const a of this.annotations) { for (const a of this.annotations) {
encoder.emitbyte(Tag.Annotation); encoder.emitbyte(Tag.Annotation);
@ -703,9 +706,9 @@ export class Annotated {
return is(this.item, Annotated.isAnnotated(other) ? other.item : other); return is(this.item, Annotated.isAnnotated(other) ? other.item : other);
} }
hashCode(): number { // hashCode(): number {
return hash(this.item); // return hash(this.item);
} // }
toString(): string { toString(): string {
return this.asPreservesText(); return this.asPreservesText();
@ -720,30 +723,30 @@ export class Annotated {
return true; return true;
} }
static isAnnotated(x: any): x is Annotated { static isAnnotated<T extends object>(x: any): x is Annotated<T> {
return !!x?.[IsPreservesAnnotated]; return !!x?.[IsPreservesAnnotated];
} }
} }
export function peel(v: Value): Value { export function peel<T extends object>(v: Value<T>): Value<T> {
return strip(v, 1); return strip(v, 1);
} }
export function strip(v: Value, depth: number = Infinity) { export function strip<T extends object>(v: Value<T>, depth: number = Infinity): Value<T> {
function step(v: Value, depth: number): Value { function step(v: Value<T>, depth: number): Value<T> {
if (depth === 0) return v; if (depth === 0) return v;
if (!Annotated.isAnnotated(v)) return v; if (!Annotated.isAnnotated<T>(v)) return v;
const nextDepth = depth - 1; const nextDepth = depth - 1;
function walk(v: Value) { return step(v, nextDepth); } function walk(v: Value<T>): Value<T> { return step(v, nextDepth); }
if (Record.isRecord(v.item)) { if (Record.isRecord<T>(v.item)) {
return new Record(step(v.item.label, depth), v.item.map(walk)); return new Record(step(v.item.label, depth), v.item.map(walk));
} else if (Array.isArray(v.item)) { } else if (Array.isArray(v.item)) {
return v.item.map(walk); return v.item.map(walk);
} else if (Set.isSet(v.item)) { } else if (Set.isSet<T>(v.item)) {
return v.item.map(walk); return v.item.map(walk);
} else if (Dictionary.isDictionary(v.item)) { } else if (Dictionary.isDictionary<T, Value<T>>(v.item)) {
return v.item.mapEntries((e) => [walk(e[0]), walk(e[1])]); return v.item.mapEntries((e) => [walk(e[0]), walk(e[1])]);
} else if (Annotated.isAnnotated(v.item)) { } else if (Annotated.isAnnotated(v.item)) {
throw new Error("Improper annotation structure"); throw new Error("Improper annotation structure");
@ -754,8 +757,8 @@ export function strip(v: Value, depth: number = Infinity) {
return step(v, depth); return step(v, depth);
} }
export function annotate(v0: Value, ...anns: Value[]) { export function annotate<T extends object>(v0: Value<T>, ...anns: Value<T>[]): Annotated<T> {
const v = Annotated.isAnnotated(v0) ? v0 : new Annotated(v0); const v = Annotated.isAnnotated<T>(v0) ? v0 : new Annotated(v0);
anns.forEach((a) => v.annotations.push(a)); anns.forEach((a) => v.annotations.push(a));
return v; return v;
} }

View File

@ -1,21 +1,43 @@
import { import {
Value, Value,
Dictionary, Dictionary,
decode, decodeWithAnnotations, encodeWithAnnotations, canonicalEncode, decode, decodeWithAnnotations, encode, encodeWithAnnotations, canonicalEncode,
DecodeError, ShortPacket, DecodeError, ShortPacket,
Bytes, Record, Bytes, Record,
annotate, annotate,
strip, peel, strip, peel,
preserves, preserves,
fromJS, fromJS,
Constants,
} from '../src/index'; } from '../src/index';
const { Tag } = Constants;
import './test-utils'; import './test-utils';
import * as fs from 'fs'; import * as fs from 'fs';
const Discard = Record.makeConstructor('discard', []); class Pointer {
const Capture = Record.makeConstructor('capture', ['pattern']); v: Value<Pointer>;
const Observe = Record.makeConstructor('observe', ['pattern']);
constructor(v: Value<Pointer>) {
this.v = v;
}
equals(other: any, is: (a: any, b: any) => boolean) {
return Object.is(other.constructor, this.constructor) && is(this.v, other.v);
}
}
function decodePointer(v: Value<Pointer>): Pointer {
return new Pointer(strip(v));
}
function encodePointer(w: Pointer): Value<Pointer> {
return w.v;
}
const Discard = Record.makeConstructor<Pointer>('discard', []);
const Capture = Record.makeConstructor<Pointer>('capture', ['pattern']);
const Observe = Record.makeConstructor<Pointer>('observe', ['pattern']);
describe('record constructors', () => { describe('record constructors', () => {
it('should have constructorInfo', () => { it('should have constructorInfo', () => {
@ -61,7 +83,7 @@ describe('parsing from subarray', () => {
describe('reusing buffer space', () => { describe('reusing buffer space', () => {
it('should be done safely, even with nested dictionaries', () => { it('should be done safely, even with nested dictionaries', () => {
expect(canonicalEncode(fromJS(['aaa', {a: 1}, 'zzz'])).toHex()).is( expect(canonicalEncode(fromJS(['aaa', Dictionary.fromJS({a: 1}), 'zzz'])).toHex()).is(
`b5 `b5
b103616161 b103616161
b7 b7
@ -72,46 +94,115 @@ describe('reusing buffer space', () => {
}); });
}); });
describe('encoding and decoding pointers', () => {
it('should encode using pointerId when no function has been supplied', () => {
const A1 = ({a: 1});
const A2 = ({a: 1});
const bs1 = canonicalEncode(A1);
const bs2 = canonicalEncode(A2);
const bs3 = canonicalEncode(A1);
expect(bs1.get(0)).toBe(Tag.Pointer);
expect(bs2.get(0)).toBe(Tag.Pointer);
expect(bs3.get(0)).toBe(Tag.Pointer);
// Can't really check the value assigned to the object. But we
// can check that it's different to a similar object!
expect(bs1).not.is(bs2);
expect(bs1).is(bs3);
});
it('should refuse to decode pointers when no function has been supplied', () => {
expect(() => decode(Bytes.from([Tag.Pointer, Tag.SmallInteger_lo])))
.toThrow('No decodePointer function supplied');
});
it('should encode properly', () => {
const objects: object[] = [];
const A = {a: 1};
const B = {b: 2};
expect(encode(
[A, B],
{
encodePointer(v: object): Value<object> {
objects.push(v);
return objects.length - 1;
}
})).is(Bytes.from([Tag.Sequence,
Tag.Pointer, Tag.SmallInteger_lo,
Tag.Pointer, Tag.SmallInteger_lo + 1,
Tag.End]));
expect(objects).is([A, B]);
});
it('should decode properly', () => {
const X = {x: 123};
const Y = {y: 456};
const objects: object[] = [X, Y];
expect(decode(Bytes.from([
Tag.Sequence,
Tag.Pointer, Tag.SmallInteger_lo,
Tag.Pointer, Tag.SmallInteger_lo + 1,
Tag.End
]), {
decodePointer(v: Value<object>): object {
if (typeof v !== 'number' || v < 0 || v >= objects.length) {
throw new Error("Unknown pointer target");
}
return objects[v];
}
})).is([X, Y]);
});
it('should store pointers embedded in map keys correctly', () => {
const A1 = ({a: 1});
const A2 = ({a: 1});
const m = new Dictionary<object, Value<object>>();
m.set([A1], 1);
m.set([A2], 2);
expect(m.get(A1)).toBeUndefined();
expect(m.get([A1])).toBe(1);
expect(m.get([A2])).toBe(2);
expect(m.get([{a: 1}])).toBeUndefined();
A1.a = 3;
expect(m.get([A1])).toBe(1);
});
});
describe('common test suite', () => { describe('common test suite', () => {
const samples_bin = fs.readFileSync(__dirname + '/../../../tests/samples.bin'); const samples_bin = fs.readFileSync(__dirname + '/../../../tests/samples.bin');
const samples = decodeWithAnnotations(samples_bin); const samples = decodeWithAnnotations(samples_bin, { decodePointer });
const TestCases = Record.makeConstructor('TestCases', ['cases']); const TestCases = Record.makeConstructor('TestCases', ['cases']);
function DS(bs: Bytes) { function DS(bs: Bytes) {
return decode(bs); return decode(bs, { decodePointer });
} }
function D(bs: Bytes) { function D(bs: Bytes) {
return decodeWithAnnotations(bs); return decodeWithAnnotations(bs, { decodePointer });
} }
function E(v: Value) { function E(v: Value<Pointer>) {
return encodeWithAnnotations(v); return encodeWithAnnotations(v, { encodePointer });
} }
interface ExpectedValues { interface ExpectedValues {
[testName: string]: { value: Value } | { forward: Value, back: Value }; [testName: string]: { value: Value<Pointer> } | { forward: Value<Pointer>, back: Value<Pointer> };
} }
const expectedValues: ExpectedValues = { const expectedValues: ExpectedValues = {
annotation1: { forward: annotate(9, "abc"), annotation1: { forward: annotate<Pointer>(9, "abc"),
back: 9 }, back: 9 },
annotation2: { forward: annotate([[], annotate([], "x")], "abc", "def"), annotation2: { forward: annotate<Pointer>([[], annotate<Pointer>([], "x")], "abc", "def"),
back: [[], []] }, back: [[], []] },
annotation3: { forward: annotate(5, annotation3: { forward: annotate<Pointer>(5,
annotate(2, 1), annotate<Pointer>(2, 1),
annotate(4, 3)), annotate<Pointer>(4, 3)),
back: 5 }, back: 5 },
annotation5: { forward: annotate(new Record(Symbol.for('R'), annotation5: { forward: annotate(new Record<Pointer>(Symbol.for('R'),
[annotate(Symbol.for('f'),
Symbol.for('af'))]),
Symbol.for('ar')),
back: new Record<Pointer>(Symbol.for('R'), [Symbol.for('f')]) },
annotation6: { forward: new Record<Pointer>(annotate<Pointer>(Symbol.for('R'),
Symbol.for('ar')),
[annotate(Symbol.for('f'), [annotate(Symbol.for('f'),
Symbol.for('af'))]), Symbol.for('af'))]),
Symbol.for('ar')), back: new Record<Pointer>(Symbol.for('R'), [Symbol.for('f')]) },
back: new Record(Symbol.for('R'), [Symbol.for('f')]) }, annotation7: { forward: annotate<Pointer>([], Symbol.for('a'), Symbol.for('b'), Symbol.for('c')),
annotation6: { forward: new Record(annotate(Symbol.for('R'),
Symbol.for('ar')),
[annotate(Symbol.for('f'),
Symbol.for('af'))]),
back: new Record(Symbol.for('R'), [Symbol.for('f')]) },
annotation7: { forward: annotate([], Symbol.for('a'), Symbol.for('b'), Symbol.for('c')),
back: [] }, back: [] },
list1: { forward: [1, 2, 3, 4], list1: { forward: [1, 2, 3, 4],
back: [1, 2, 3, 4] }, back: [1, 2, 3, 4] },
@ -123,7 +214,7 @@ describe('common test suite', () => {
type Variety = 'normal' | 'nondeterministic' | 'decode'; type Variety = 'normal' | 'nondeterministic' | 'decode';
function runTestCase(variety: Variety, tName: string, binaryForm: Bytes, annotatedTextForm: Value) { function runTestCase(variety: Variety, tName: string, binaryForm: Bytes, annotatedTextForm: Value<Pointer>) {
describe(tName, () => { describe(tName, () => {
const textForm = strip(annotatedTextForm); const textForm = strip(annotatedTextForm);
const {forward, back} = (function () { const {forward, back} = (function () {
@ -150,10 +241,10 @@ describe('common test suite', () => {
}); });
} }
const tests = peel(TestCases._.cases(peel(samples))!) as Dictionary<Value>; const tests = peel(TestCases._.cases(peel(samples))!) as Dictionary<Pointer, Value<Pointer>>;
tests.forEach((t0: Value, tName0: Value) => { tests.forEach((t0: Value<Pointer>, tName0: Value<Pointer>) => {
const tName = Symbol.keyFor(strip(tName0) as symbol)!; const tName = Symbol.keyFor(strip(tName0) as symbol)!;
const t = peel(t0) as Record; const t = peel(t0) as Record<Pointer>;
switch (t.label) { switch (t.label) {
case Symbol.for('Test'): case Symbol.for('Test'):
runTestCase('normal', tName, strip(t[0]) as Bytes, t[1]); runTestCase('normal', tName, strip(t[0]) as Bytes, t[1]);

View File

@ -4,7 +4,7 @@ import '../src/node_support';
declare global { declare global {
namespace jest { namespace jest {
interface Matchers<R> { interface Matchers<R> {
is(expected: Value): R; is<T extends object>(expected: Value<T>): R;
toThrowFilter(f: (e: Error) => boolean): R; toThrowFilter(f: (e: Error) => boolean): R;
} }
} }

View File

@ -257,11 +257,12 @@ def annotate(v, *anns):
return v return v
class Decoder(Codec): class Decoder(Codec):
def __init__(self, packet=b'', include_annotations=False): def __init__(self, packet=b'', include_annotations=False, decode_pointer=None):
super(Decoder, self).__init__() super(Decoder, self).__init__()
self.packet = packet self.packet = packet
self.index = 0 self.index = 0
self.include_annotations = include_annotations self.include_annotations = include_annotations
self.decode_pointer = decode_pointer
def extend(self, data): def extend(self, data):
self.packet = self.packet[self.index:] + data self.packet = self.packet[self.index:] + data
@ -327,6 +328,10 @@ class Decoder(Codec):
a = self.next() a = self.next()
v = self.next() v = self.next()
return self.unshift_annotation(a, v) return self.unshift_annotation(a, v)
if tag == 0x86:
if self.decode_pointer is None:
raise DecodeError('No decode_pointer function supplied')
return self.wrap(self.decode_pointer(self.next()))
if tag >= 0x90 and tag <= 0x9f: return self.wrap(tag - (0xa0 if tag > 0x9c else 0x90)) if tag >= 0x90 and tag <= 0x9f: return self.wrap(tag - (0xa0 if tag > 0x9c else 0x90))
if tag >= 0xa0 and tag <= 0xaf: return self.wrap(self.nextint(tag - 0xa0 + 1)) if tag >= 0xa0 and tag <= 0xaf: return self.wrap(self.nextint(tag - 0xa0 + 1))
if tag == 0xb0: return self.wrap(self.nextint(self.varint())) if tag == 0xb0: return self.wrap(self.nextint(self.varint()))
@ -350,16 +355,17 @@ class Decoder(Codec):
self.index = start self.index = start
return None return None
def decode(bs): def decode(bs, **kwargs):
return Decoder(packet=bs).next() return Decoder(packet=bs, **kwargs).next()
def decode_with_annotations(bs): def decode_with_annotations(bs, **kwargs):
return Decoder(packet=bs, include_annotations=True).next() return Decoder(packet=bs, include_annotations=True, **kwargs).next()
class Encoder(Codec): class Encoder(Codec):
def __init__(self): def __init__(self, encode_pointer=id):
super(Encoder, self).__init__() super(Encoder, self).__init__()
self.buffer = bytearray() self.buffer = bytearray()
self.encode_pointer = encode_pointer
def contents(self): def contents(self):
return bytes(self.buffer) return bytes(self.buffer)
@ -428,10 +434,12 @@ class Encoder(Codec):
try: try:
i = iter(v) i = iter(v)
except TypeError: except TypeError:
raise EncodeError('Cannot encode %r' % (v,)) self.buffer.append(0x86)
self.append(self.encode_pointer(v))
return
self.encodevalues(5, i) self.encodevalues(5, i)
def encode(v): def encode(v, **kwargs):
e = Encoder() e = Encoder(**kwargs)
e.append(v) e.append(v)
return e.contents() return e.contents()

View File

@ -157,6 +157,37 @@ class CodecTests(unittest.TestCase):
self._roundtrip((False,) * 100, _buf(0xb5, b'\x80' * 100, 0x84)) self._roundtrip((False,) * 100, _buf(0xb5, b'\x80' * 100, 0x84))
self._roundtrip((False,) * 200, _buf(0xb5, b'\x80' * 200, 0x84)) self._roundtrip((False,) * 200, _buf(0xb5, b'\x80' * 200, 0x84))
def test_pointer_id(self):
class A:
def __init__(self, a):
self.a = a
a1 = A(1)
a2 = A(1)
self.assertNotEqual(_e(a1), _e(a2))
self.assertEqual(_e(a1), _e(a1))
from .preserves import _ord
self.assertEqual(_ord(_e(a1)[0]), 0x86)
self.assertEqual(_ord(_e(a2)[0]), 0x86)
def test_decode_pointer_absent(self):
with self.assertRaises(DecodeError):
decode(b'\x86\xa0\xff')
def test_encode_pointer(self):
objects = []
def enc(p):
objects.append(p)
return len(objects) - 1
self.assertEqual(encode([object(), object()], encode_pointer = enc),
b'\xb5\x86\x90\x86\x91\x84')
def test_decode_pointer(self):
objects = [123, 234]
def dec(v):
return objects[v]
self.assertEqual(decode(b'\xb5\x86\x90\x86\x91\x84', decode_pointer = dec),
(123, 234))
def add_method(d, tName, fn): def add_method(d, tName, fn):
if hasattr(fn, 'func_name'): if hasattr(fn, 'func_name'):
# python2 # python2
@ -223,11 +254,26 @@ def install_exn_test(d, tName, bs, check_proc):
self.fail('did not fail as expected') self.fail('did not fail as expected')
add_method(d, tName, test_exn) add_method(d, tName, test_exn)
class Pointer:
def __init__(self, v):
self.v = strip_annotations(v)
@staticmethod
def value(i):
return i.v
def __eq__(self, other):
if other.__class__ is self.__class__:
return self.v == other.v
def __hash__(self):
return hash(self.v)
class CommonTestSuite(unittest.TestCase): class CommonTestSuite(unittest.TestCase):
import os import os
with open(os.path.join(os.path.dirname(__file__), with open(os.path.join(os.path.dirname(__file__),
'../../../tests/samples.bin'), 'rb') as f: '../../../tests/samples.bin'), 'rb') as f:
samples = Decoder(f.read(), include_annotations=True).next() samples = Decoder(f.read(), include_annotations=True, decode_pointer=Pointer).next()
TestCases = Record.makeConstructor('TestCases', 'cases') TestCases = Record.makeConstructor('TestCases', 'cases')
@ -257,13 +303,13 @@ class CommonTestSuite(unittest.TestCase):
raise Exception('Unsupported test kind', t.key) raise Exception('Unsupported test kind', t.key)
def DS(self, bs): def DS(self, bs):
return decode(bs) return decode(bs, decode_pointer=Pointer)
def D(self, bs): def D(self, bs):
return decode_with_annotations(bs) return decode_with_annotations(bs, decode_pointer=Pointer)
def E(self, v): def E(self, v):
return encode(v) return encode(v, encode_pointer=Pointer.value)
class RecordTests(unittest.TestCase): class RecordTests(unittest.TestCase):
def test_getters(self): def test_getters(self):

View File

@ -5,7 +5,7 @@ except ImportError:
setup( setup(
name="preserves", name="preserves",
version="0.4.0", version="0.5.0",
author="Tony Garnock-Jones", author="Tony Garnock-Jones",
author_email="tonyg@leastfixedpoint.com", author_email="tonyg@leastfixedpoint.com",
license="Apache Software License", license="Apache Software License",

View File

@ -11,6 +11,7 @@
(struct record (label fields) #:transparent) (struct record (label fields) #:transparent)
(struct float (value) #:transparent) ;; a marker for single-precision I/O (struct float (value) #:transparent) ;; a marker for single-precision I/O
(struct annotated (annotations item) #:transparent) (struct annotated (annotations item) #:transparent)
(struct pointer (value) #:transparent)
;;--------------------------------------------------------------------------- ;;---------------------------------------------------------------------------
;; Reader ;; Reader
@ -29,6 +30,7 @@
(match (next) (match (next)
[(annotated as i) (annotated (cons a as) i)] [(annotated as i) (annotated (cons a as) i)]
[i (annotated (list a) i)]))] [i (annotated (list a) i)]))]
[#x86 (pointer (next))]
[(? (between #x90 #x9C) v) (- v #x90)] [(? (between #x90 #x9C) v) (- v #x90)]
[(? (between #x9D #x9F) v) (- v #xA0)] [(? (between #x9D #x9F) v) (- v #xA0)]
[(? (between #xA0 #xAF) v) (next-integer (- v #xA0 -1))] [(? (between #xA0 #xAF) v) (next-integer (- v #xA0 -1))]
@ -85,6 +87,8 @@
(for [(a (in-list as))] (write-byte #x85 out-port) (output a)) (for [(a (in-list as))] (write-byte #x85 out-port) (output a))
(output v)] (output v)]
[(pointer v) (write-byte #x86 out-port) (output v)]
[(? integer?) [(? integer?)
(cond [(<= -3 v -1) (write-byte (+ v #xA0) out-port)] (cond [(<= -3 v -1) (write-byte (+ v #xA0) out-port)]
[(<= 0 v 12) (write-byte (+ v #x90) out-port)] [(<= 0 v 12) (write-byte (+ v #x90) out-port)]

View File

@ -25,8 +25,14 @@
(define (read-preserve [in-port (current-input-port)] (define (read-preserve [in-port (current-input-port)]
#:read-syntax? [read-syntax? #f] #:read-syntax? [read-syntax? #f]
#:decode-pointer [decode-pointer #f]
#:source [source (object-name in-port)]) #:source [source (object-name in-port)])
(define b (peek-byte in-port)) (define b (peek-byte in-port))
(cond [(eof-object? b) b] (cond [(eof-object? b) b]
[(<= #x80 b #xBF) (read-preserve/binary in-port #:read-syntax? read-syntax?)] [(<= #x80 b #xBF) (read-preserve/binary in-port
[else (read-preserve/text in-port #:read-syntax? read-syntax? #:source source)])) #:read-syntax? read-syntax?
#:decode-pointer decode-pointer)]
[else (read-preserve/text in-port
#:read-syntax? read-syntax?
#:decode-pointer decode-pointer
#:source source)]))

View File

@ -0,0 +1,9 @@
#lang racket/base
(provide object-id)
(define table (make-weak-hasheq))
(define next 0)
(define (object-id x)
(hash-ref! table x (lambda () (let ((v next)) (set! next (+ v 1)) v))))

View File

@ -32,7 +32,7 @@
[(? list?) 8] [(? list?) 8]
[(? set?) 9] [(? set?) 9]
[(? dict?) 10] [(? dict?) 10]
[_ (error 'preserve-order "Cannot compare value ~v" v)])) [_ 11]))
(define-syntax chain-order (define-syntax chain-order
(syntax-rules () (syntax-rules ()

View File

@ -13,9 +13,12 @@
(define (default-on-short) (error 'read-preserve/binary "Short Preserves binary")) (define (default-on-short) (error 'read-preserve/binary "Short Preserves binary"))
(define (default-on-fail message . args) (error 'read-preserve/binary (apply format message args))) (define (default-on-fail message . args) (error 'read-preserve/binary (apply format message args)))
(define (default-decode-pointer v)
(error 'read-preserve/binary "No decode-pointer function supplied"))
(define (bytes->preserve bs (define (bytes->preserve bs
#:read-syntax? [read-syntax? #f] #:read-syntax? [read-syntax? #f]
#:decode-pointer [decode-pointer #f]
#:on-short [on-short default-on-short] #:on-short [on-short default-on-short]
[on-fail default-on-fail]) [on-fail default-on-fail])
(call-with-input-bytes (call-with-input-bytes
@ -23,6 +26,7 @@
(lambda (p) (lambda (p)
(match (read-preserve/binary p (match (read-preserve/binary p
#:read-syntax? read-syntax? #:read-syntax? read-syntax?
#:decode-pointer decode-pointer
#:on-short on-short #:on-short on-short
on-fail) on-fail)
[(? eof-object?) (on-short)] [(? eof-object?) (on-short)]
@ -32,9 +36,11 @@
(define (read-preserve/binary [in-port (current-input-port)] (define (read-preserve/binary [in-port (current-input-port)]
#:read-syntax? [read-syntax? #f] #:read-syntax? [read-syntax? #f]
#:decode-pointer [decode-pointer0 #f]
#:on-short [on-short default-on-short] #:on-short [on-short default-on-short]
[on-fail default-on-fail]) [on-fail default-on-fail])
(define read-annotations? read-syntax?) (define read-annotations? read-syntax?)
(define decode-pointer (or decode-pointer0 default-decode-pointer))
(let/ec return (let/ec return
(define (next) (wrap (pos) (next* (next-byte)))) (define (next) (wrap (pos) (next* (next-byte))))
@ -70,6 +76,7 @@
(if read-annotations? (if read-annotations?
(annotate (next) a) (annotate (next) a)
(next)))] (next)))]
[#x86 (decode-pointer (next))]
[(? (between #x90 #x9C) v) (- v #x90)] [(? (between #x90 #x9C) v) (- v #x90)]
[(? (between #x9D #x9F) v) (- v #xA0)] [(? (between #x9D #x9F) v) (- v #xA0)]
[(? (between #xA0 #xAF) v) (next-integer (- v #xA0 -1))] [(? (between #xA0 #xAF) v) (next-integer (- v #xA0 -1))]

View File

@ -24,13 +24,18 @@
pos pos
#f)) #f))
(define (default-decode-pointer v)
(error 'read-preserve/text "No decode-pointer function supplied"))
(define (string->preserve s (define (string->preserve s
#:read-syntax? [read-syntax? #f] #:read-syntax? [read-syntax? #f]
#:decode-pointer [decode-pointer #f]
#:source [source "<string>"]) #:source [source "<string>"])
(define p (open-input-string s)) (define p (open-input-string s))
(when read-syntax? (port-count-lines! p)) (when read-syntax? (port-count-lines! p))
(define v (read-preserve/text p (define v (read-preserve/text p
#:read-syntax? read-syntax? #:read-syntax? read-syntax?
#:decode-pointer decode-pointer
#:source source)) #:source source))
(when (eof-object? v) (when (eof-object? v)
(parse-error* #:raise-proc raise-read-eof-error p source "Unexpected end of input")) (parse-error* #:raise-proc raise-read-eof-error p source "Unexpected end of input"))
@ -48,8 +53,10 @@
(define (read-preserve/text [in-port (current-input-port)] (define (read-preserve/text [in-port (current-input-port)]
#:read-syntax? [read-syntax? #f] #:read-syntax? [read-syntax? #f]
#:decode-pointer [decode-pointer0 #f]
#:source [source (object-name in-port)]) #:source [source (object-name in-port)])
(define read-annotations? read-syntax?) (define read-annotations? read-syntax?)
(define decode-pointer (or decode-pointer0 default-decode-pointer))
;;--------------------------------------------------------------------------- ;;---------------------------------------------------------------------------
;; Core of parser ;; Core of parser
@ -89,6 +96,7 @@
(apply parse-error (string-append "Embedded binary value: " message) args)) (apply parse-error (string-append "Embedded binary value: " message) args))
#:read-syntax? read-syntax? #:read-syntax? read-syntax?
#:on-short (lambda () (parse-error "Incomplete embedded binary value")))] #:on-short (lambda () (parse-error "Incomplete embedded binary value")))]
[#\! (decode-pointer (next))]
[c (parse-error "Invalid # syntax: ~v" c)])] [c (parse-error "Invalid # syntax: ~v" c)])]
[#\< (match (read-sequence #\>) [#\< (match (read-sequence #\>)

View File

@ -9,14 +9,22 @@
(require racket/runtime-path) (require racket/runtime-path)
(require syntax/srcloc) (require syntax/srcloc)
(struct pointer (value) #:transparent)
(define (pointer/no-annotations v)
(pointer (strip-annotations v)))
(define (d bs #:allow-invalid-prefix? [allow-invalid-prefix? #f]) (define (d bs #:allow-invalid-prefix? [allow-invalid-prefix? #f])
(for [(i (in-range 1 (- (bytes-length bs) 1)))] (for [(i (in-range 1 (- (bytes-length bs) 1)))]
(define result (bytes->preserve (subbytes bs 0 i) #:on-short (lambda () 'short) void)) (define result (bytes->preserve (subbytes bs 0 i)
#:decode-pointer pointer/no-annotations
#:on-short (lambda () 'short) void))
(when (and (not (eq? result 'short)) (when (and (not (eq? result 'short))
(not (and allow-invalid-prefix? (void? result)))) (not (and allow-invalid-prefix? (void? result))))
(error 'd "~a-byte prefix of ~v does not read as short; result: ~v" i bs result))) (error 'd "~a-byte prefix of ~v does not read as short; result: ~v" i bs result)))
(bytes->preserve bs (bytes->preserve bs
#:read-syntax? #t #:read-syntax? #t
#:decode-pointer pointer/no-annotations
#:on-short (lambda () 'short) #:on-short (lambda () 'short)
void)) void))
@ -125,16 +133,31 @@
(match (hash-ref samples-txt-expected t-name text-form) (match (hash-ref samples-txt-expected t-name text-form)
[(asymmetric f b) (values f b #f)] ;; #f because e.g. annotation4 includes annotations [(asymmetric f b) (values f b #f)] ;; #f because e.g. annotation4 includes annotations
[v (values v v #t)])) [v (values v v #t)]))
(check-equal? text-form back loc) ;; expectation 1 (check-equal? text-form back loc) ;; expectation 1
(check-equal? (d-strip (preserve->bytes text-form)) back loc) ;; expectation 2 (check-equal? (d-strip (preserve->bytes #:encode-pointer pointer-value text-form))
(check-equal? (d-strip (preserve->bytes forward)) back loc) ;; expectation 3 back
(check-equal? (d-strip binary-form) back loc) ;; expectation 4 loc) ;; expectation 2
(check-equal? (d binary-form) annotated-text-form loc) ;; expectation 5 (check-equal? (d-strip (preserve->bytes #:encode-pointer pointer-value forward))
(check-equal? (d (preserve->bytes annotated-text-form)) annotated-text-form loc) ;; expectation 6 back
(check-equal? (string->preserve (preserve->string text-form)) back loc) ;; expectation 7 loc) ;; expectation 3
(check-equal? (string->preserve (preserve->string forward)) back loc) ;; expectation 8 (check-equal? (d-strip binary-form) back loc) ;; expectation 4
(check-equal? (d binary-form) annotated-text-form loc) ;; expectation 5
(check-equal? (d (preserve->bytes #:encode-pointer pointer-value annotated-text-form))
annotated-text-form
loc) ;; expectation 6
(check-equal? (string->preserve #:decode-pointer pointer/no-annotations
(preserve->string #:encode-pointer pointer-value text-form))
back
loc) ;; expectation 7
(check-equal? (string->preserve #:decode-pointer pointer/no-annotations
(preserve->string #:encode-pointer pointer-value forward))
back
loc) ;; expectation 8
;; similar to 8: ;; similar to 8:
(check-equal? (string->preserve (preserve->string annotated-text-form) #:read-syntax? #t) (check-equal? (string->preserve #:decode-pointer pointer/no-annotations
(preserve->string #:encode-pointer pointer-value
annotated-text-form)
#:read-syntax? #t)
annotated-text-form annotated-text-form
loc) loc)
(when (and (not (memq variety '(decode))) (when (and (not (memq variety '(decode)))
@ -142,13 +165,16 @@
(and can-execute-nondet-with-canonicalization?))) (and can-execute-nondet-with-canonicalization?)))
;; expectations 9 and 10 ;; expectations 9 and 10
(check-equal? (preserve->bytes forward (check-equal? (preserve->bytes forward
#:encode-pointer pointer-value
#:canonicalizing? #t #:canonicalizing? #t
#:write-annotations? #t) #:write-annotations? #t)
binary-form binary-form
loc)) loc))
(unless (memq variety '(decode nondeterministic)) (unless (memq variety '(decode nondeterministic))
;; expectation 11 ;; expectation 11
(check-equal? (preserve->bytes annotated-text-form #:write-annotations? #t) (check-equal? (preserve->bytes annotated-text-form
#:encode-pointer pointer-value
#:write-annotations? #t)
binary-form binary-form
loc))) loc)))
@ -157,7 +183,10 @@
(testfile (call-with-input-file path (testfile (call-with-input-file path
(lambda (p) (lambda (p)
(port-count-lines! p) (port-count-lines! p)
(read-preserve p #:read-syntax? #t #:source path))))) (read-preserve p
#:read-syntax? #t
#:decode-pointer pointer/no-annotations
#:source path)))))
(match-define (peel-annotations `#s(TestCases ,tests)) testfile) (match-define (peel-annotations `#s(TestCases ,tests)) testfile)
(for [((t-name* t*) (in-hash (annotated-item tests)))] (for [((t-name* t*) (in-hash (annotated-item tests)))]
(define t-name (strip-annotations t-name*)) (define t-name (strip-annotations t-name*))

View File

@ -43,19 +43,21 @@
["--no-annotations" "Strip annotations" ["--no-annotations" "Strip annotations"
(set! annotations? #f)]) (set! annotations? #f)])
(struct pointer (value) #:transparent)
(let loop ((count count)) (let loop ((count count))
(when (positive? count) (when (positive? count)
(define v ((if annotations? values strip-annotations) (define v ((if annotations? values strip-annotations)
(match input-format (match input-format
['any (read-preserve #:read-syntax? #t #:source "<stdin>")] ['any (read-preserve #:read-syntax? #t #:decode-pointer pointer #:source "<stdin>")]
['text (read-preserve/text #:read-syntax? #t #:source "<stdin>")] ['text (read-preserve/text #:read-syntax? #t #:decode-pointer pointer #:source "<stdin>")]
['binary (read-preserve/binary #:read-syntax? #t)]))) ['binary (read-preserve/binary #:decode-pointer pointer #:read-syntax? #t)])))
(when (not (eof-object? v)) (when (not (eof-object? v))
(void (match output-format (void (match output-format
['text ['text
(write-preserve/text v #:indent indent?) (write-preserve/text v #:indent indent? #:encode-pointer pointer-value)
(newline)] (newline)]
['binary ['binary
(write-preserve/binary v #:write-annotations? #t)])) (write-preserve/binary v #:encode-pointer pointer-value #:write-annotations? #t)]))
(flush-output) (flush-output)
(loop (- count 1)))))) (loop (- count 1))))))

View File

@ -9,21 +9,26 @@
(require "float.rkt") (require "float.rkt")
(require "annotation.rkt") (require "annotation.rkt")
(require "varint.rkt") (require "varint.rkt")
(require "object-id.rkt")
(require racket/set) (require racket/set)
(require racket/dict) (require racket/dict)
(require (only-in racket/list flatten)) (require (only-in racket/list flatten))
(define (preserve->bytes v (define (preserve->bytes v
#:canonicalizing? [canonicalizing? #t] #:canonicalizing? [canonicalizing? #t]
#:encode-pointer [encode-pointer #f]
#:write-annotations? [write-annotations? (not canonicalizing?)]) #:write-annotations? [write-annotations? (not canonicalizing?)])
(call-with-output-bytes (call-with-output-bytes
(lambda (p) (write-preserve/binary v p (lambda (p) (write-preserve/binary v p
#:canonicalizing? canonicalizing? #:canonicalizing? canonicalizing?
#:encode-pointer encode-pointer
#:write-annotations? write-annotations?)))) #:write-annotations? write-annotations?))))
(define (write-preserve/binary v [out-port (current-output-port)] (define (write-preserve/binary v [out-port (current-output-port)]
#:canonicalizing? [canonicalizing? #t] #:canonicalizing? [canonicalizing? #t]
#:encode-pointer [encode-pointer0 #f]
#:write-annotations? [write-annotations? (not canonicalizing?)]) #:write-annotations? [write-annotations? (not canonicalizing?)])
(define encode-pointer (or encode-pointer0 object-id))
(define (output-byte b) (define (output-byte b)
(write-byte b out-port)) (write-byte b out-port))
@ -115,6 +120,8 @@
[(? set?) (with-seq 6 (output-set v))] [(? set?) (with-seq 6 (output-set v))]
[(? dict?) (with-seq 7 (output-dict v))] [(? dict?) (with-seq 7 (output-dict v))]
[_ (error 'write-preserve/binary "Invalid value: ~v" v)])) [other
(output-byte #x86)
(output (encode-pointer other))]))
(output v)) (output v))

View File

@ -12,6 +12,7 @@
(require "annotation.rkt") (require "annotation.rkt")
(require "float.rkt") (require "float.rkt")
(require "record.rkt") (require "record.rkt")
(require "object-id.rkt")
(require racket/dict) (require racket/dict)
(require racket/set) (require racket/set)
(require (only-in racket/port with-output-to-string)) (require (only-in racket/port with-output-to-string))
@ -24,7 +25,9 @@
(define (write-preserve/text v0 [o (current-output-port)] (define (write-preserve/text v0 [o (current-output-port)]
#:indent [indent-amount0 #f] #:indent [indent-amount0 #f]
#:encode-pointer [encode-pointer0 #f]
#:write-annotations? [write-annotations? #t]) #:write-annotations? [write-annotations? #t])
(define encode-pointer (or encode-pointer0 object-id))
(define indent-amount (match indent-amount0 (define indent-amount (match indent-amount0
[#f 0] [#f 0]
[#t 2] ;; a default [#t 2] ;; a default
@ -164,15 +167,18 @@
[(? list?) (write-sequence distance "[" "," "]" write-value v)] [(? list?) (write-sequence distance "[" "," "]" write-value v)]
[(? set?) (write-sequence distance "#{" "," "}" write-value (set->list v))] [(? set?) (write-sequence distance "#{" "," "}" write-value (set->list v))]
[(? dict?) (write-sequence distance "{" "," "}" write-key-value (dict->list v))] [(? dict?) (write-sequence distance "{" "," "}" write-key-value (dict->list v))]
[other
[_ (error 'write-preserve/text "Cannot encode value ~v" v)])) (! "#!")
(write-value distance (encode-pointer other))]))
(write-value 0 v0)) (write-value 0 v0))
(define (preserve->string v0 (define (preserve->string v0
#:indent [indent-amount #f] #:indent [indent-amount #f]
#:encode-pointer [encode-pointer #f]
#:write-annotations? [write-annotations? #t]) #:write-annotations? [write-annotations? #t])
(with-output-to-string (with-output-to-string
(lambda () (write-preserve/text v0 (lambda () (write-preserve/text v0
#:indent indent-amount #:indent indent-amount
#:encode-pointer encode-pointer
#:write-annotations? write-annotations?)))) #:write-annotations? write-annotations?))))

View File

@ -41,6 +41,8 @@ pub enum ExpectedKind {
Set, Set,
Dictionary, Dictionary,
Pointer,
SequenceOrSet, // Because of hacking up serde's data model: see open_sequence_or_set etc. SequenceOrSet, // Because of hacking up serde's data model: see open_sequence_or_set etc.
Option, Option,

View File

@ -21,7 +21,6 @@ pub use value::Domain;
pub use value::IOValue; pub use value::IOValue;
pub use value::Map; pub use value::Map;
pub use value::NestedValue; pub use value::NestedValue;
pub use value::NullDomain;
pub use value::PlainValue; pub use value::PlainValue;
pub use value::RcValue; pub use value::RcValue;
pub use value::Set; pub use value::Set;

View File

@ -8,6 +8,7 @@ pub enum Tag {
Double, Double,
End, End,
Annotation, Annotation,
Pointer,
SmallInteger(i8), SmallInteger(i8),
MediumInteger(u8), MediumInteger(u8),
SignedInteger, SignedInteger,
@ -46,6 +47,7 @@ impl TryFrom<u8> for Tag {
0x83 => Ok(Self::Double), 0x83 => Ok(Self::Double),
0x84 => Ok(Self::End), 0x84 => Ok(Self::End),
0x85 => Ok(Self::Annotation), 0x85 => Ok(Self::Annotation),
0x86 => Ok(Self::Pointer),
0x90..=0x9c => Ok(Self::SmallInteger((v - 0x90) as i8)), 0x90..=0x9c => Ok(Self::SmallInteger((v - 0x90) as i8)),
0x9d..=0x9f => Ok(Self::SmallInteger((v - 0x90) as i8 - 16)), 0x9d..=0x9f => Ok(Self::SmallInteger((v - 0x90) as i8 - 16)),
0xa0..=0xaf => Ok(Self::MediumInteger(v - 0xa0 + 1)), 0xa0..=0xaf => Ok(Self::MediumInteger(v - 0xa0 + 1)),
@ -71,6 +73,7 @@ impl From<Tag> for u8 {
Tag::Double => 0x83, Tag::Double => 0x83,
Tag::End => 0x84, Tag::End => 0x84,
Tag::Annotation => 0x85, Tag::Annotation => 0x85,
Tag::Pointer => 0x86,
Tag::SmallInteger(v) => if v < 0 { (v + 16) as u8 + 0x90 } else { v as u8 + 0x90 }, Tag::SmallInteger(v) => if v < 0 { (v + 16) as u8 + 0x90 } else { v as u8 + 0x90 },
Tag::MediumInteger(count) => count - 1 + 0xa0, Tag::MediumInteger(count) => count - 1 + 0xa0,
Tag::SignedInteger => 0xb0, Tag::SignedInteger => 0xb0,

View File

@ -5,7 +5,7 @@ use std::convert::TryFrom;
use std::convert::TryInto; use std::convert::TryInto;
use std::marker::PhantomData; use std::marker::PhantomData;
use super::super::signed_integer::SignedInteger; use super::super::signed_integer::SignedInteger;
use super::super::value::{Value, NestedValue, IOValue, FALSE, TRUE, Map, Set, Record, Annotations}; use super::super::value::{Value, NestedValue, Domain, IOValue, FALSE, TRUE, Map, Set, Record, Annotations};
use super::constants::Tag; use super::constants::Tag;
use super::super::reader::{ use super::super::reader::{
@ -262,6 +262,10 @@ impl<'de, S: BinarySource<'de>> Reader<'de> for PackedReader<'de, S> {
self.demand_next(read_annotations)? self.demand_next(read_annotations)?
} }
} }
Tag::Pointer => {
let v = self.demand_next(read_annotations)?;
Value::Domain(IOValue::from_preserves(v)?).wrap()
}
Tag::SmallInteger(v) => { Tag::SmallInteger(v) => {
// TODO: prebuild these in value.rs // TODO: prebuild these in value.rs
Value::from(v).wrap() Value::from(v).wrap()
@ -358,6 +362,14 @@ impl<'de, S: BinarySource<'de>> Reader<'de> for PackedReader<'de, S> {
Ok(self.peekend()?) Ok(self.peekend()?)
} }
fn open_pointer(&mut self) -> ReaderResult<()> {
self.next_compound(Tag::Pointer, ExpectedKind::Pointer)
}
fn close_pointer(&mut self) -> ReaderResult<()> {
Ok(())
}
fn next_boolean(&mut self) -> ReaderResult<bool> { fn next_boolean(&mut self) -> ReaderResult<bool> {
match self.peek_next_nonannotation_tag()? { match self.peek_next_nonannotation_tag()? {
Tag::False => { self.skip()?; Ok(false) } Tag::False => { self.skip()?; Ok(false) }

View File

@ -215,6 +215,7 @@ impl Writer for BinaryOrderWriter {
type AnnWriter = PackedWriter<Vec<u8>>; type AnnWriter = PackedWriter<Vec<u8>>;
type SeqWriter = PackedWriter<Vec<u8>>; type SeqWriter = PackedWriter<Vec<u8>>;
type SetWriter = BinaryOrderWriter; type SetWriter = BinaryOrderWriter;
type PointerWriter = PackedWriter<Vec<u8>>;
binary_order_writer_method!(mut align(natural_chunksize: u64) -> Result<()>); binary_order_writer_method!(mut align(natural_chunksize: u64) -> Result<()>);
@ -271,6 +272,15 @@ impl Writer for BinaryOrderWriter {
fn end_set(&mut self, set: Self::SetWriter) -> Result<()> { fn end_set(&mut self, set: Self::SetWriter) -> Result<()> {
set.finish(self) set.finish(self)
} }
fn start_pointer(&mut self) -> Result<Self::PointerWriter> {
self.write_tag(Tag::Pointer)?;
Ok(self.pop())
}
fn end_pointer(&mut self, ptr: Self::PointerWriter) -> Result<()> {
self.push(ptr);
Ok(())
}
} }
macro_rules! fits_in_bytes { macro_rules! fits_in_bytes {
@ -285,6 +295,7 @@ impl<W: std::io::Write> Writer for PackedWriter<W>
type AnnWriter = Self; type AnnWriter = Self;
type SeqWriter = Self; type SeqWriter = Self;
type SetWriter = BinaryOrderWriter; type SetWriter = BinaryOrderWriter;
type PointerWriter = Self;
fn start_annotations(&mut self) -> Result<Self::AnnWriter> { fn start_annotations(&mut self) -> Result<Self::AnnWriter> {
Ok(self.suspend()) Ok(self.suspend())
@ -478,4 +489,14 @@ impl<W: std::io::Write> Writer for PackedWriter<W>
fn end_set(&mut self, set: Self::SetWriter) -> Result<()> { fn end_set(&mut self, set: Self::SetWriter) -> Result<()> {
set.finish(self) set.finish(self)
} }
fn start_pointer(&mut self) -> Result<Self::PointerWriter> {
self.write_tag(Tag::Pointer)?;
Ok(self.suspend())
}
fn end_pointer(&mut self, ann: Self::PointerWriter) -> Result<()> {
self.resume(ann);
Ok(())
}
} }

View File

@ -15,6 +15,8 @@ pub trait Reader<'de> {
fn open_set(&mut self) -> ReaderResult<()>; fn open_set(&mut self) -> ReaderResult<()>;
fn open_dictionary(&mut self) -> ReaderResult<()>; fn open_dictionary(&mut self) -> ReaderResult<()>;
fn close_compound(&mut self) -> ReaderResult<bool>; fn close_compound(&mut self) -> ReaderResult<bool>;
fn open_pointer(&mut self) -> ReaderResult<()>;
fn close_pointer(&mut self) -> ReaderResult<()>;
//--------------------------------------------------------------------------- //---------------------------------------------------------------------------
@ -142,6 +144,14 @@ impl<'r, 'de, R: Reader<'de>> Reader<'de> for &'r mut R {
fn close_compound(&mut self) -> ReaderResult<bool> { fn close_compound(&mut self) -> ReaderResult<bool> {
(*self).close_compound() (*self).close_compound()
} }
fn open_pointer(&mut self) -> ReaderResult<()> {
(*self).open_pointer()
}
fn close_pointer(&mut self) -> ReaderResult<()> {
(*self).close_pointer()
}
} }

View File

@ -19,6 +19,12 @@ use super::signed_integer::SignedInteger;
use crate::error::{Error, ExpectedKind, Received}; use crate::error::{Error, ExpectedKind, Received};
pub trait Domain: Sized + Debug + Clone + Eq + Hash + Ord { pub trait Domain: Sized + Debug + Clone + Eq + Hash + Ord {
fn from_preserves(v: IOValue) -> Result<Self, std::io::Error> {
Err(std::io::Error::new(std::io::ErrorKind::InvalidData,
format!("Cannot Preserves-decode domain-specific value {:?}",
v)))
}
fn as_preserves(&self) -> Result<IOValue, std::io::Error> { fn as_preserves(&self) -> Result<IOValue, std::io::Error> {
Err(std::io::Error::new(std::io::ErrorKind::InvalidData, Err(std::io::Error::new(std::io::ErrorKind::InvalidData,
format!("Cannot Preserves-encode domain-specific value {:?}", format!("Cannot Preserves-encode domain-specific value {:?}",
@ -270,7 +276,7 @@ impl<N: NestedValue<D>, D: Domain> Debug for Value<N, D> {
f.debug_set().entries(v.iter()).finish() f.debug_set().entries(v.iter()).finish()
} }
Value::Dictionary(ref v) => f.debug_map().entries(v.iter()).finish(), Value::Dictionary(ref v) => f.debug_map().entries(v.iter()).finish(),
Value::Domain(ref d) => write!(f, "{:?}", d), Value::Domain(ref d) => write!(f, "#!{:?}", d),
} }
} }
} }
@ -1044,13 +1050,19 @@ impl<'de, Dom: Domain> serde::Deserialize<'de> for ArcValue<Dom> {
//--------------------------------------------------------------------------- //---------------------------------------------------------------------------
#[derive(Debug, Clone, PartialEq, Eq, Hash, PartialOrd, Ord)]
pub enum NullDomain {}
impl Domain for NullDomain {}
#[derive(Clone, PartialEq, Eq, Hash, PartialOrd, Ord)] #[derive(Clone, PartialEq, Eq, Hash, PartialOrd, Ord)]
pub struct IOValue(Arc<AnnotatedValue<IOValue, NullDomain>>); pub struct IOValue(Arc<AnnotatedValue<IOValue, IOValue>>);
pub type UnwrappedIOValue = Value<IOValue, NullDomain>; pub type UnwrappedIOValue = Value<IOValue, IOValue>;
impl Domain for IOValue {
fn from_preserves(v: IOValue) -> Result<Self, std::io::Error> {
Ok(v)
}
fn as_preserves(&self) -> Result<IOValue, std::io::Error> {
Ok(self.clone())
}
}
lazy_static! { lazy_static! {
pub static ref FALSE: IOValue = IOValue(Arc::new(AnnotatedValue(Annotations::empty(), Value::Boolean(false)))); pub static ref FALSE: IOValue = IOValue(Arc::new(AnnotatedValue(Annotations::empty(), Value::Boolean(false))));
@ -1058,27 +1070,27 @@ lazy_static! {
pub static ref EMPTY_SEQ: IOValue = IOValue(Arc::new(AnnotatedValue(Annotations::empty(), Value::Sequence(Vec::new())))); pub static ref EMPTY_SEQ: IOValue = IOValue(Arc::new(AnnotatedValue(Annotations::empty(), Value::Sequence(Vec::new()))));
} }
impl NestedValue<NullDomain> for IOValue { impl NestedValue<IOValue> for IOValue {
fn wrap(anns: Annotations<Self, NullDomain>, v: Value<Self, NullDomain>) -> Self { fn wrap(anns: Annotations<Self, IOValue>, v: Value<Self, IOValue>) -> Self {
IOValue(Arc::new(AnnotatedValue::new(anns, v))) IOValue(Arc::new(AnnotatedValue::new(anns, v)))
} }
fn annotations(&self) -> &Annotations<Self, NullDomain> { fn annotations(&self) -> &Annotations<Self, IOValue> {
&(self.0).0 &(self.0).0
} }
fn value(&self) -> &Value<Self, NullDomain> { fn value(&self) -> &Value<Self, IOValue> {
&(self.0).1 &(self.0).1
} }
fn pieces(self) -> (Annotations<Self, NullDomain>, Value<Self, NullDomain>) { fn pieces(self) -> (Annotations<Self, IOValue>, Value<Self, IOValue>) {
match Arc::try_unwrap(self.0) { match Arc::try_unwrap(self.0) {
Ok(AnnotatedValue(anns, v)) => (anns, v), Ok(AnnotatedValue(anns, v)) => (anns, v),
Err(r) => (r.0.clone(), r.1.clone()), Err(r) => (r.0.clone(), r.1.clone()),
} }
} }
fn value_owned(self) -> Value<Self, NullDomain> { fn value_owned(self) -> Value<Self, IOValue> {
match Arc::try_unwrap(self.0) { match Arc::try_unwrap(self.0) {
Ok(AnnotatedValue(_anns, v)) => v, Ok(AnnotatedValue(_anns, v)) => v,
Err(r) => r.1.clone(), Err(r) => r.1.clone(),

View File

@ -20,6 +20,7 @@ pub trait Writer: Sized {
type AnnWriter: AnnotationWriter; type AnnWriter: AnnotationWriter;
type SeqWriter: CompoundWriter; type SeqWriter: CompoundWriter;
type SetWriter: CompoundWriter; type SetWriter: CompoundWriter;
type PointerWriter: Writer;
fn align(&mut self, natural_chunksize: u64) -> Result<()>; fn align(&mut self, natural_chunksize: u64) -> Result<()>;
@ -55,6 +56,9 @@ pub trait Writer: Sized {
fn start_dictionary(&mut self, entry_count: Option<usize>) -> Result<Self::SetWriter>; fn start_dictionary(&mut self, entry_count: Option<usize>) -> Result<Self::SetWriter>;
fn end_set(&mut self, set: Self::SetWriter) -> Result<()>; fn end_set(&mut self, set: Self::SetWriter) -> Result<()>;
fn start_pointer(&mut self) -> Result<Self::PointerWriter>;
fn end_pointer(&mut self, ptr: Self::PointerWriter) -> Result<()>;
//--------------------------------------------------------------------------- //---------------------------------------------------------------------------
fn write(&mut self, v: &IOValue) -> Result<()> { fn write(&mut self, v: &IOValue) -> Result<()> {
@ -129,7 +133,11 @@ pub trait Writer: Sized {
} }
self.end_set(c) self.end_set(c)
} }
Value::Domain(ref d) => self.write(&d.as_preserves()?) Value::Domain(ref d) => {
let mut c = self.start_pointer()?;
c.write(&d.as_preserves()?)?;
self.end_pointer(c)
}
} }
} }
} }

View File

@ -4,7 +4,7 @@ title: "Preserves: an Expressive Data Language"
--- ---
Tony Garnock-Jones <tonyg@leastfixedpoint.com> Tony Garnock-Jones <tonyg@leastfixedpoint.com>
Jan 2021. Version 0.4.0. Jan 2021. Version 0.5.0.
[sexp.txt]: http://people.csail.mit.edu/rivest/Sexp.txt [sexp.txt]: http://people.csail.mit.edu/rivest/Sexp.txt
[spki]: http://world.std.com/~cme/html/spki.html [spki]: http://world.std.com/~cme/html/spki.html
@ -17,21 +17,17 @@ Jan 2021. Version 0.4.0.
This document proposes a data model and serialization format called This document proposes a data model and serialization format called
*Preserves*. *Preserves*.
Preserves supports *records* with user-defined *labels*. This relieves Preserves supports *records* with user-defined *labels*, embedded
the confusion caused by encoding records as dictionaries, seen in most *references*, and the usual suite of atomic and compound data types,
data languages in use on the web. It also allows Preserves to easily including *binary* data as a distinct type from text strings. Its
represent the *labelled sums of products* as seen in many functional *annotations* allow separation of data from metadata such as
programming languages. [comments](conventions.html#comments), trace information, and
Preserves also supports the usual suite of atomic and compound data
types, in particular including *binary* data as a distinct type from
text strings. Its *annotations* allow separation of data from metadata
such as [comments](conventions.html#comments), trace information, and
provenance information. provenance information.
Finally, Preserves defines precisely how to *compare* two values. Preserves departs from many other data languages in defining how to
Comparison is based on the data model, not on syntax or on data *compare* two values. Comparison is based on the data model, not on
structures of any particular implementation language. syntax or on data structures of any particular implementation
language.
## Starting with Semantics ## Starting with Semantics
@ -40,23 +36,25 @@ definition of the *values* that we want to work with and give them
meaning independent of their syntax. meaning independent of their syntax.
Our `Value`s fall into two broad categories: *atomic* and *compound* Our `Value`s fall into two broad categories: *atomic* and *compound*
data. Every `Value` is finite and non-cyclic. data. Every `Value` is finite and non-cyclic. References, called
`Pointer`s, are a third, special-case category.
Value = Atom Value = Atom
| Compound | Compound
| Pointer
Atom = Boolean Atom = Boolean
| Float | Float
| Double | Double
| SignedInteger | SignedInteger
| String | String
| ByteString | ByteString
| Symbol | Symbol
Compound = Record Compound = Record
| Sequence | Sequence
| Set | Set
| Dictionary | Dictionary
**Total order.**<a name="total-order"></a> As we go, we will **Total order.**<a name="total-order"></a> As we go, we will
incrementally specify a total order over `Value`s. Two values of the incrementally specify a total order over `Value`s. Two values of the
@ -65,7 +63,7 @@ values of different kinds is essentially arbitrary, but having a total
order is convenient for many tasks, so we define it as order is convenient for many tasks, so we define it as
follows: follows:
(Values) Atom < Compound (Values) Atom < Compound < Pointer
(Compounds) Record < Sequence < Set < Dictionary (Compounds) Record < Sequence < Set < Dictionary
@ -162,6 +160,45 @@ pairwise distinct. Instances of `Dictionary` are compared by
lexicographic comparison of the sequences resulting from ordering each lexicographic comparison of the sequences resulting from ordering each
`Dictionary`'s pairs in ascending order by key. `Dictionary`'s pairs in ascending order by key.
### Pointers.
A `Pointer` embeds *domain-specific*, potentially *stateful* or
*located* data into a `Value`.[^pointer-rationale] `Pointer`s may be
used to denote stateful objects, network services, object
capabilities, file descriptors, Unix processes, or other
possibly-stateful things. Because each `Pointer` is a domain-specific
datum, comparison of two `Pointer`s is done according to
domain-specific rules.
[^pointer-rationale]: **Rationale.** Why include `Pointer`s as a
special class, distinct from, say, a specially-labeled `Record`?
First, a `Record` can only hold other `Value`s: in order to embed
values such as live pointers to Java objects, some means of
"escaping" from the `Value` data type must be provided. Second,
`Pointer`s are meant to be able to denote stateful entities, for
which comparison by address is appropriate; however, we do not
wish to place restrictions on the *nature* of these entities: if
we had used `Record`s instead of distinct `Pointer`s, users would
have to invent an encoding of domain data into `Record`s that
reflected domain ordering into `Value` ordering. This is often
difficult and may not always be possible. Finally, because
`Pointer`s are intended to be able to represent network and memory
*locations*, they must be able to be rewritten at network and
process boundaries. Having a distinct class allows generic
`Pointer` rewriting without the quotation-related complications of
encoding references as, say, `Record`s.
*Examples.* In a Java or Python implementation, a `Pointer` may denote
a reference to a Java or Python object; comparison would be done via
the language's own rules for equivalence and ordering. In a Unix
application, a `Pointer` may denote an open file descriptor or a
process ID. In an HTTP-based application, each `Pointer` might be a
URL, compared according to
[RFC 6943](https://tools.ietf.org/html/rfc6943#section-3.3). When a
`Value` is serialized for storage or transfer, embedded `Pointer`s
will usually be represented as ordinary `Value`s, in which case the
ordinary rules for comparing `Value`s will apply.
## Textual Syntax ## Textual Syntax
Now we have discussed `Value`s and their meanings, we may turn to Now we have discussed `Value`s and their meanings, we may turn to
@ -204,7 +241,7 @@ Standalone documents may have trailing whitespace.
Any `Value` may be preceded by whitespace. Any `Value` may be preceded by whitespace.
Value = ws (Record / Collection / Atom / Compact) Value = ws (Record / Collection / Atom / Pointer / Compact)
Collection = Sequence / Dictionary / Set Collection = Sequence / Dictionary / Set
Atom = Boolean / Float / Double / SignedInteger / Atom = Boolean / Float / Double / SignedInteger /
String / ByteString / Symbol String / ByteString / Symbol
@ -364,6 +401,11 @@ double quote mark.
definition of “token representation”, and with the definition of “token representation”, and with the
[R6RS definition of identifiers](http://www.r6rs.org/final/html/r6rs/r6rs-Z-H-7.html#node_sec_4.2.4). [R6RS definition of identifiers](http://www.r6rs.org/final/html/r6rs/r6rs-Z-H-7.html#node_sec_4.2.4).
A `Pointer` is written as a `Value` chosen to represent the denoted
object, prefixed with `#!`.
Pointer = "#!" Value
Finally, any `Value` may be represented by escaping from the textual Finally, any `Value` may be represented by escaping from the textual
syntax to the [compact binary syntax](#compact-binary-syntax) by syntax to the [compact binary syntax](#compact-binary-syntax) by
prefixing a `ByteString` containing the binary representation of the prefixing a `ByteString` containing the binary representation of the
@ -467,11 +509,11 @@ write `varint(m)` for the varint-encoding of `m`. Quoting the
The following table illustrates varint-encoding. The following table illustrates varint-encoding.
| Number, `m` | `m` in binary, grouped into 7-bit chunks | `varint(m)` bytes | | Number, `m` | `m` in binary, grouped into 7-bit chunks | `varint(m)` bytes |
| ------ | ------------------- | ------------ | | ------ | ------------------- | ------------ |
| 15 | `0001111` | 15 | | 15 | `0001111` | 15 |
| 300 | `0000010 0101100` | 172 2 | | 300 | `0000010 0101100` | 172 2 |
| 1000000000 | `0000011 1011100 1101011 0010100 0000000` | 128 148 235 220 3 | | 1000000000 | `0000011 1011100 1101011 0010100 0000000` | 128 148 235 220 3 |
It is an error for a varint-encoded `m` in a `Repr` to be anything It is an error for a varint-encoded `m` in a `Repr` to be anything
other than the unique shortest encoding for that `m`. That is, a other than the unique shortest encoding for that `m`. That is, a
@ -579,6 +621,13 @@ contained within the `Value` unmodified.
The functions `binary32(F)` and `binary64(D)` yield big-endian 4- and The functions `binary32(F)` and `binary64(D)` yield big-endian 4- and
8-byte IEEE 754 binary representations of `F` and `D`, respectively. 8-byte IEEE 754 binary representations of `F` and `D`, respectively.
### Pointers.
The `Repr` of a `Pointer` is the `Repr` of a `Value` chosen to
represent the denoted object, prefixed with `[0x86]`.
«#!V» = [0x86] ++ «V»
### Annotations. ### Annotations.
To annotate a `Repr` `r` with some `Value` `v`, prepend `r` with To annotate a `Repr` `r` with some `Value` `v`, prepend `r` with
@ -596,8 +645,8 @@ syntax `@a@b[]`, i.e. an empty sequence annotated with two symbols,
The total ordering specified [above](#total-order) means that the following statements are true: The total ordering specified [above](#total-order) means that the following statements are true:
"bzz" < "c" < "caa" "bzz" < "c" < "caa" < #!"a"
#t < 3.0f < 3.0 < 3 < "3" < |3| < [] #t < 3.0f < 3.0 < 3 < "3" < |3| < [] < #!#t
### Simple examples. ### Simple examples.
@ -774,10 +823,6 @@ the same `Value` to yield different binary `Repr`s.
## Acknowledgements ## Acknowledgements
The use of the low-order bits in certain SignedInteger tags for the
length of the following data is inspired by a similar feature of
[CBOR](http://cbor.io/).
The treatment of commas as whitespace in the text syntax is inspired The treatment of commas as whitespace in the text syntax is inspired
by the same feature of [EDN](https://github.com/edn-format/edn). by the same feature of [EDN](https://github.com/edn-format/edn).
@ -810,7 +855,8 @@ a binary-syntax document; otherwise, it should be interpreted as text.
83 - Double 83 - Double
84 - End marker 84 - End marker
85 - Annotation 85 - Annotation
(8x) RESERVED 86-8F 86 - Pointer
(8x) RESERVED 87-8F
9x - Small integers 0..12,-3..-1 9x - Small integers 0..12,-3..-1
An - Small integers, (n+1) bytes long An - Small integers, (n+1) bytes long

Binary file not shown.

View File

@ -111,6 +111,9 @@
list9: @"Unexpected close bracket" <ParseError "]"> list9: @"Unexpected close bracket" <ParseError "]">
list10: @"Missing end byte" <DecodeShort #x"b58080"> list10: @"Missing end byte" <DecodeShort #x"b58080">
noinput0: @"No input at all" <DecodeEOF #x""> noinput0: @"No input at all" <DecodeEOF #x"">
pointer0: <Test #x"8690" #!0>
pointer1: <Test #x"868690" #!#!0>
pointer2: <Test #x"b5869086b10568656c6c6f84" [#!0 #!"hello"]>
record1: <Test #x"b4 b30763617074757265 b4 b30764697363617264 84 84" <capture <discard>>> record1: <Test #x"b4 b30763617074757265 b4 b30764697363617264 84 84" <capture <discard>>>
record2: <Test #x"b4 b3076f627365727665 b4 b305737065616b b4 b30764697363617264 84 b4 b30763617074757265 b4 b30764697363617264 84 84 84 84" <observe <speak <discard>, <capture <discard>>>>> record2: <Test #x"b4 b3076f627365727665 b4 b305737065616b b4 b30764697363617264 84 b4 b30763617074757265 b4 b30764697363617264 84 84 84 84" <observe <speak <discard>, <capture <discard>>>>>
record3: <Test #x"b4 b5 b3067469746c6564 b306706572736f6e 92 b3057468696e67 91 84 a065 b109426c61636b77656c6c b4 b30464617465 a1071d 92 93 84 b1024472 84" <[titled person 2 thing 1] 101 "Blackwell" <date 1821 2 3> "Dr">> record3: <Test #x"b4 b5 b3067469746c6564 b306706572736f6e 92 b3057468696e67 91 84 a065 b109426c61636b77656c6c b4 b30464617465 a1071d 92 93 84 b1024472 84" <[titled person 2 thing 1] 101 "Blackwell" <date 1821 2 3> "Dr">>