Introduce pointers

This commit is contained in:
Tony Garnock-Jones 2021-01-29 12:03:28 +01:00
parent 6bf49874b7
commit 532e811894
31 changed files with 630 additions and 247 deletions

View File

@ -1,6 +1,6 @@
{
"name": "preserves",
"version": "0.4.0",
"version": "0.5.0",
"description": "Experimental data serialization format",
"homepage": "https://gitlab.com/preserves/preserves",
"license": "Apache-2.0",

View File

@ -14,6 +14,17 @@ import { PreserveOn } from './symbols';
export type ErrorType = 'DecodeError' | 'EncodeError' | 'ShortPacket';
export const ErrorType = Symbol.for('ErrorType');
export type Encodable<T extends object> =
Value<T> | Preservable<T> | Iterable<Value<T>> | ArrayBufferView;
export interface Preservable<T extends object> {
[PreserveOn](encoder: Encoder<T>): void;
}
export function isPreservable<T extends object>(v: any): v is Preservable<T> {
return typeof v === 'object' && v !== null && typeof v[PreserveOn] === 'function';
}
export abstract class PreservesCodecError {
abstract get [ErrorType](): ErrorType;
@ -53,16 +64,17 @@ export class ShortPacket extends DecodeError {
}
}
export interface DecoderOptions {
export interface DecoderOptions<T extends object> {
includeAnnotations?: boolean;
decodePointer?: (v: Value<T>) => T;
}
export class Decoder {
export class Decoder<T extends object> {
packet: Uint8Array;
index: number;
options: DecoderOptions;
options: DecoderOptions<T>;
constructor(packet: BytesLike = new Uint8Array(0), options: DecoderOptions = {}) {
constructor(packet: BytesLike = new Uint8Array(0), options: DecoderOptions<T> = {}) {
this.packet = underlying(packet);
this.index = 0;
this.options = options;
@ -104,7 +116,7 @@ export class Decoder {
return matched;
}
nextvalues(): Value[] {
nextvalues(): Value<T>[] {
const result = [];
while (!this.peekend()) result.push(this.next());
return result;
@ -119,12 +131,12 @@ export class Decoder {
return acc;
}
wrap(v: Value): Value {
wrap(v: Value<T>): Value<T> {
return this.includeAnnotations ? new Annotated(v) : v;
}
static dictionaryFromArray(vs: Value[]): Dictionary<Value> {
const d = new Dictionary<Value>();
static dictionaryFromArray<T extends object>(vs: Value<T>[]): Dictionary<T, Value<T>> {
const d = new Dictionary<T, Value<T>>();
if (vs.length % 2) throw new DecodeError("Missing dictionary value");
for (let i = 0; i < vs.length; i += 2) {
d.set(vs[i], vs[i+1]);
@ -132,14 +144,14 @@ export class Decoder {
return d;
}
unshiftAnnotation(a: Value, v: Annotated) {
unshiftAnnotation(a: Value<T>, v: Annotated<T>) {
if (this.includeAnnotations) {
v.annotations.unshift(a);
}
return v;
}
next(): Value {
next(): Value<T> {
const tag = this.nextbyte();
switch (tag) {
case Tag.False: return this.wrap(false);
@ -149,9 +161,16 @@ export class Decoder {
case Tag.End: throw new DecodeError("Unexpected Compound end marker");
case Tag.Annotation: {
const a = this.next();
const v = this.next() as Annotated;
const v = this.next() as Annotated<T>;
return this.unshiftAnnotation(a, v);
}
case Tag.Pointer: {
const d = this.options.decodePointer;
if (d === void 0) {
throw new DecodeError("No decodePointer function supplied");
}
return this.wrap(d(this.next()));
}
case Tag.SignedInteger: return this.wrap(this.nextint(this.varint()));
case Tag.String: return this.wrap(Bytes.from(this.nextbytes(this.varint())).fromUtf8());
case Tag.ByteString: return this.wrap(Bytes.from(this.nextbytes(this.varint())));
@ -192,30 +211,35 @@ export class Decoder {
}
}
export function decode(bs: BytesLike, options?: DecoderOptions) {
export function decode<T extends object>(bs: BytesLike, options?: DecoderOptions<T>) {
return new Decoder(bs, options).next();
}
export function decodeWithAnnotations(bs: BytesLike, options: DecoderOptions = {}): Annotated {
return decode(bs, { ... options, includeAnnotations: true }) as Annotated;
export function decodeWithAnnotations<T extends object>(bs: BytesLike, options: DecoderOptions<T> = {}): Annotated<T> {
return decode(bs, { ... options, includeAnnotations: true }) as Annotated<T>;
}
export interface EncoderOptions {
export interface EncoderOptions<T extends object> {
canonical?: boolean;
includeAnnotations?: boolean;
encodePointer?: (v: T) => Value<T>;
}
function chunkStr(bs: Uint8Array): string {
return String.fromCharCode.apply(null, bs as any as number[]);
}
export class Encoder {
function isIterable<T>(v: any): v is Iterable<T> {
return typeof v === 'object' && v !== null && typeof v[Symbol.iterator] === 'function';
}
export class Encoder<T extends object> {
chunks: Array<Uint8Array>;
view: DataView;
index: number;
options: EncoderOptions;
options: EncoderOptions<T>;
constructor(options: EncoderOptions = {}) {
constructor(options: EncoderOptions<T> = {}) {
this.chunks = [];
this.view = new DataView(new ArrayBuffer(256));
this.index = 0;
@ -310,7 +334,7 @@ export class Encoder {
this.emitbytes(bs);
}
encodevalues(tag: Tag, items: Iterable<Value>) {
encodevalues(tag: Tag, items: Iterable<Value<T>>) {
this.emitbyte(tag);
for (let i of items) { this.push(i); }
this.emitbyte(Tag.End);
@ -322,8 +346,11 @@ export class Encoder {
this.emitbyte(Tag.End);
}
push(v: any) {
if (typeof v?.[PreserveOn] === 'function') {
push(v: Encodable<T>) {
if (isPreservable<never>(v)) {
v[PreserveOn](this as unknown as Encoder<never>);
}
else if (isPreservable<T>(v)) {
v[PreserveOn](this);
}
else if (typeof v === 'boolean') {
@ -355,23 +382,36 @@ export class Encoder {
else if (Array.isArray(v)) {
this.encodevalues(Tag.Sequence, v);
}
else if (typeof v?.[Symbol.iterator] === 'function') {
this.encodevalues(Tag.Sequence, v as Iterable<Value>);
else if (isIterable<Value<T>>(v)) {
this.encodevalues(Tag.Sequence, v as Iterable<Value<T>>);
}
else {
throw new EncodeError("Cannot encode", v);
const e = this.options.encodePointer ?? pointerId;
this.emitbyte(Tag.Pointer);
this.push(e(v));
}
return this; // for chaining
}
}
export function encode(v: any, options?: EncoderOptions): Bytes {
export function encode<T extends object>(v: Encodable<T>, options?: EncoderOptions<T>): Bytes {
return new Encoder(options).push(v).contents();
}
let _nextId = 0;
const _registry = new WeakMap<object, number>();
export function pointerId(v: object): number {
let id = _registry.get(v);
if (id === void 0) {
id = _nextId++;
_registry.set(v, id);
}
return id;
}
const _canonicalEncoder = new Encoder({ canonical: true });
let _usingCanonicalEncoder = false;
export function canonicalEncode(v: any, options?: EncoderOptions): Bytes {
export function canonicalEncode(v: Encodable<any>, options?: EncoderOptions<any>): Bytes {
if (options === void 0 && !_usingCanonicalEncoder) {
_usingCanonicalEncoder = true;
const bs = _canonicalEncoder.push(v).contents();
@ -382,10 +422,10 @@ export function canonicalEncode(v: any, options?: EncoderOptions): Bytes {
}
}
export function canonicalString(v: any): string {
export function canonicalString(v: Encodable<any>): string {
return _canonicalEncoder.push(v).contentsString();
}
export function encodeWithAnnotations(v: any, options: EncoderOptions = {}): Bytes {
export function encodeWithAnnotations<T extends object>(v: Encodable<T>, options: EncoderOptions<T> = {}): Bytes {
return encode(v, { ... options, includeAnnotations: true });
}

View File

@ -5,6 +5,7 @@ export enum Tag {
Double,
End,
Annotation,
Pointer,
SmallInteger_lo = 0x90,
MediumInteger_lo = 0xa0,

View File

@ -12,7 +12,7 @@ export function stringify(x: any): string {
}
}
export function preserves(pieces: TemplateStringsArray, ...values: Value[]): string {
export function preserves(pieces: TemplateStringsArray, ...values: Value<any>[]): string {
const result = [pieces[0]];
values.forEach((v, i) => {
result.push(stringify(v));

View File

@ -2,22 +2,22 @@
import { PreserveOn, AsPreserve } from './symbols';
import { Tag } from './constants';
import { Encoder, canonicalEncode, canonicalString } from './codec';
import { Encoder, canonicalEncode, canonicalString, Preservable } from './codec';
import { stringify } from './text';
import { _iterMap, FlexMap, FlexSet } from './flex';
const textEncoder = new TextEncoder();
const textDecoder = new TextDecoder();
export type Value = Atom | Compound | Annotated;
export type Value<T extends object> = Atom | Compound<T> | T | Annotated<T>;
export type Atom = boolean | Single | Double | number | string | Bytes | symbol;
export type Compound = Record | Array<Value> | Set | Dictionary<Value>;
export type Compound<T extends object> = Record<T> | Array<Value<T>> | Set<T> | Dictionary<T, Value<T>>;
export const IsPreservesRecord = Symbol.for('IsPreservesRecord');
export const IsPreservesBytes = Symbol.for('IsPreservesBytes');
export const IsPreservesAnnotated = Symbol.for('IsPreservesAnnotated');
export function fromJS(x: any): Value {
export function fromJS<T extends object>(x: any): Value<T> {
switch (typeof x) {
case 'number':
if (!Number.isInteger(x)) {
@ -32,6 +32,7 @@ export function fromJS(x: any): Value {
case 'undefined':
case 'function':
case 'bigint':
break;
case 'object':
@ -41,16 +42,20 @@ export function fromJS(x: any): Value {
if (typeof x[AsPreserve] === 'function') {
return x[AsPreserve]();
}
if (Record.isRecord(x)) {
if (Record.isRecord<T>(x)) {
return x;
}
if (Array.isArray(x)) {
return x.map(fromJS);
return (x as Array<Value<T>>).map<Value<T>>(fromJS);
}
if (ArrayBuffer.isView(x) || x instanceof ArrayBuffer) {
return Bytes.from(x);
}
return Dictionary.fromJS(x);
// Just... assume it's a T.
return (x as T);
default:
break;
}
throw new TypeError("Cannot represent JavaScript value as Preserves: " + x);
@ -89,12 +94,12 @@ export abstract class Float {
static isDouble = (x: any): x is Double => Float.isFloat(x, 'Double');
}
export class Single extends Float {
[AsPreserve](): Value {
export class Single extends Float implements Preservable<never> {
[AsPreserve]<T extends object>(): Value<T> {
return this;
}
[PreserveOn](encoder: Encoder) {
[PreserveOn](encoder: Encoder<never>) {
encoder.emitbyte(Tag.Float);
encoder.makeroom(4);
encoder.view.setFloat32(encoder.index, this.value, false);
@ -110,12 +115,12 @@ export class Single extends Float {
}
}
export class Double extends Float {
[AsPreserve](): Value {
export class Double extends Float implements Preservable<never> {
[AsPreserve]<T extends object>(): Value<T> {
return this;
}
[PreserveOn](encoder: Encoder) {
[PreserveOn](encoder: Encoder<never>) {
encoder.emitbyte(Tag.Double);
encoder.makeroom(8);
encoder.view.setFloat64(encoder.index, this.value, false);
@ -133,7 +138,7 @@ export class Double extends Float {
export type BytesLike = Bytes | Uint8Array;
export class Bytes {
export class Bytes implements Preservable<never> {
readonly _view: Uint8Array;
constructor(maybeByteIterable: any = new Uint8Array()) {
@ -249,7 +254,7 @@ export class Bytes {
return this.asPreservesText();
}
[AsPreserve](): Value {
[AsPreserve]<T extends object>(): Value<T> {
return this;
}
@ -282,7 +287,7 @@ export class Bytes {
return nibbles.join('');
}
[PreserveOn](encoder: Encoder) {
[PreserveOn](encoder: Encoder<never>) {
encoder.emitbyte(Tag.ByteString);
encoder.varint(this.length);
encoder.emitbytes(this._view);
@ -313,13 +318,15 @@ export function underlying(b: Bytes | Uint8Array): Uint8Array {
}
declare global {
interface Boolean { asPreservesText(): string; }
interface Number { asPreservesText(): string; }
interface String { asPreservesText(): string; }
interface Symbol { asPreservesText(): string; }
interface Array<T> { asPreservesText(): string; }
interface Object { asPreservesText(): string; }
}
Object.defineProperty(Object.prototype, 'asPreservesText', {
enumerable: false,
writable: true,
value: function(): string { return '#!' + JSON.stringify(this); }
});
Boolean.prototype.asPreservesText = function (): string {
return this ? '#t' : '#f';
};
@ -338,7 +345,7 @@ Symbol.prototype.asPreservesText = function (): string {
};
Array.prototype.asPreservesText = function (): string {
return '[' + this.map((i: Value) => i.asPreservesText()).join(', ') + ']';
return '[' + this.map((i: Value<any>) => i.asPreservesText()).join(', ') + ']';
};
// Uint8Array / TypedArray methods
@ -409,10 +416,10 @@ keys lastIndexOf reduce reduceRight some toLocaleString values`.split(/\s+/))
Bytes.prototype[Symbol.iterator] = function () { return this._view[Symbol.iterator](); };
})();
export class Record extends Array<Value> {
readonly label: Value;
export class Record<T extends object> extends Array<Value<T>> {
readonly label: Value<T>;
constructor(label: Value, fieldsJS: any[]) {
constructor(label: Value<T>, fieldsJS: any[]) {
if (arguments.length === 1) {
// Using things like someRecord.map() involves the runtime
// apparently instantiating instances of this.constructor
@ -430,15 +437,15 @@ export class Record extends Array<Value> {
Object.freeze(this);
}
get(index: number, defaultValue?: Value): Value | undefined {
get(index: number, defaultValue?: Value<T>): Value<T> | undefined {
return (index < this.length) ? this[index] : defaultValue;
}
set(index: number, newValue: Value): Record {
set(index: number, newValue: Value<T>): Record<T> {
return new Record(this.label, this.map((f, i) => (i === index) ? newValue : f));
}
getConstructorInfo(): RecordConstructorInfo {
getConstructorInfo(): RecordConstructorInfo<T> {
return { label: this.label, arity: this.length };
}
@ -448,13 +455,13 @@ export class Record extends Array<Value> {
this.every((f, i) => is(f, other.get(i)));
}
hashCode(): number {
let h = hash(this.label);
this.forEach((f) => h = ((31 * h) + hash(f)) | 0);
return h;
}
// hashCode(): number {
// let h = hash(this.label);
// this.forEach((f) => h = ((31 * h) + hash(f)) | 0);
// return h;
// }
static fallbackToString: (f: Value) => string = (_f) => '<unprintable_preserves_field_value>';
static fallbackToString: (f: Value<any>) => string = (_f) => '<unprintable_preserves_field_value>';
toString(): string {
return this.asPreservesText();
@ -475,26 +482,26 @@ export class Record extends Array<Value> {
}).join(', ') + ')';
}
static makeConstructor(labelSymbolText: string, fieldNames: string[]) {
static makeConstructor<T extends object>(labelSymbolText: string, fieldNames: string[]): RecordConstructor<T> {
return Record.makeBasicConstructor(Symbol.for(labelSymbolText), fieldNames);
}
static makeBasicConstructor(label0: any, fieldNames: string[]): RecordConstructor {
const label = fromJS(label0);
static makeBasicConstructor<T extends object>(label0: any, fieldNames: string[]): RecordConstructor<T> {
const label = fromJS<T>(label0);
const arity = fieldNames.length;
const ctor: RecordConstructor = (...fields: any[]) => {
const ctor: RecordConstructor<T> = (...fields: any[]): Record<T> => {
if (fields.length !== arity) {
throw new Error("Record: cannot instantiate " + (label && label.toString()) +
" expecting " + arity + " fields with " + fields.length + " fields");
}
return new Record(label, fields);
return new Record<T>(label, fields);
};
const constructorInfo = { label, arity };
ctor.constructorInfo = constructorInfo;
ctor.isClassOf = (v: any): v is Record => Record.isClassOf(constructorInfo, v);
ctor.isClassOf = (v: any): v is Record<T> => Record.isClassOf(constructorInfo, v);
ctor._ = {};
fieldNames.forEach((name, i) => {
ctor._[name] = function (r: any): Value | undefined {
ctor._[name] = function (r: any): Value<T> | undefined {
if (!ctor.isClassOf(r)) {
throw new Error("Record: attempt to retrieve field "+label.toString()+"."+name+
" from non-"+label.toString()+": "+(r && r.toString()));
@ -505,7 +512,7 @@ export class Record extends Array<Value> {
return ctor;
}
[PreserveOn](encoder: Encoder) {
[PreserveOn](encoder: Encoder<T>) {
encoder.emitbyte(Tag.Record);
encoder.push(this.label);
this.forEach((f) => encoder.push(f));
@ -516,24 +523,24 @@ export class Record extends Array<Value> {
return true;
}
static isRecord(x: any): x is Record {
static isRecord<T extends object>(x: any): x is Record<T> {
return !!x?.[IsPreservesRecord];
}
static isClassOf(ci: RecordConstructorInfo, v: any): v is Record {
static isClassOf<T extends object>(ci: RecordConstructorInfo<T>, v: any): v is Record<T> {
return (Record.isRecord(v)) && is(ci.label, v.label) && (ci.arity === v.length);
}
}
export interface RecordConstructor {
(...fields: any[]): Record;
constructorInfo: RecordConstructorInfo;
isClassOf(v: any): v is Record;
_: { [getter: string]: (r: any) => Value | undefined };
export interface RecordConstructor<T extends object> {
(...fields: any[]): Record<T>;
constructorInfo: RecordConstructorInfo<T>;
isClassOf(v: any): v is Record<T>;
_: { [getter: string]: (r: any) => Value<T> | undefined };
}
export interface RecordConstructorInfo {
label: Value;
export interface RecordConstructorInfo<T extends object> {
label: Value<T>;
arity: number;
}
@ -544,7 +551,7 @@ export function is(a: any, b: any): boolean {
if (typeof a !== typeof b) return false;
if (typeof a === 'object') {
if (a === null || b === null) return false;
if ('equals' in a) return a.equals(b, is);
if ('equals' in a && typeof a.equals === 'function') return a.equals(b, is);
if (Array.isArray(a) && Array.isArray(b)) {
if (a.length !== b.length) return false;
for (let i = 0; i < a.length; i++) if (!is(a[i], b[i])) return false;
@ -554,36 +561,32 @@ export function is(a: any, b: any): boolean {
return false;
}
export function hash(a: Value): number {
throw new Error("shouldBeImplemented"); // TODO
}
export type DictionaryType = 'Dictionary' | 'Set';
export const DictionaryType = Symbol.for('DictionaryType');
export class Dictionary<T> extends FlexMap<Value, T> {
export class Dictionary<T extends object, V> extends FlexMap<Value<T>, V> {
get [DictionaryType](): DictionaryType {
return 'Dictionary';
}
static isDictionary<T>(x: any): x is Dictionary<T> {
static isDictionary<T extends object, V>(x: any): x is Dictionary<T, V> {
return x?.[DictionaryType] === 'Dictionary';
}
static fromJS(x: object): Dictionary<Value> {
if (Dictionary.isDictionary(x)) return x as Dictionary<Value>;
const d = new Dictionary<Value>();
static fromJS<T extends object, V extends object>(x: object): Dictionary<T, Value<V>> {
if (Dictionary.isDictionary<T, V>(x)) return x as Dictionary<T, Value<V>>;
const d = new Dictionary<T, Value<V>>();
Object.entries(x).forEach(([key, value]) => d.set(key, fromJS(value)));
return d;
}
constructor(items?: Iterable<readonly [any, T]>) {
constructor(items?: Iterable<readonly [any, V]>) {
const iter = items?.[Symbol.iterator]();
super(canonicalString, iter === void 0 ? void 0 : _iterMap(iter, ([k,v]) => [fromJS(k), v]));
}
mapEntries<R>(f: (entry: [Value, T]) => [Value, R]): Dictionary<R> {
const result = new Dictionary<R>();
mapEntries<R extends object, W>(f: (entry: [Value<T>, V]) => [Value<R>, W]): Dictionary<R, W> {
const result = new Dictionary<R, W>();
for (let oldEntry of this.entries()) {
const newEntry = f(oldEntry);
result.set(newEntry[0], newEntry[1])
@ -598,7 +601,7 @@ export class Dictionary<T> extends FlexMap<Value, T> {
'}';
}
clone(): Dictionary<T> {
clone(): Dictionary<T, V> {
return new Dictionary(this);
}
@ -608,7 +611,7 @@ export class Dictionary<T> extends FlexMap<Value, T> {
get [Symbol.toStringTag]() { return 'Dictionary'; }
[PreserveOn](encoder: Encoder) {
[PreserveOn](encoder: Encoder<T>) {
if (encoder.canonical) {
const pieces = Array.from(this).map(([k, v]) =>
Bytes.concat([canonicalEncode(k), canonicalEncode(v)]));
@ -618,33 +621,33 @@ export class Dictionary<T> extends FlexMap<Value, T> {
encoder.emitbyte(Tag.Dictionary);
this.forEach((v, k) => {
encoder.push(k);
encoder.push(v);
encoder.push(v as unknown as Value<T>); // Suuuuuuuper unsound
});
encoder.emitbyte(Tag.End);
}
}
}
export class Set extends FlexSet<Value> {
export class Set<T extends object> extends FlexSet<Value<T>> {
get [DictionaryType](): DictionaryType {
return 'Set';
}
static isSet(x: any): x is Set {
static isSet<T extends object>(x: any): x is Set<T> {
return x?.[DictionaryType] === 'Set';
}
constructor(items?: Iterable<any>) {
const iter = items?.[Symbol.iterator]();
super(canonicalString, iter === void 0 ? void 0 : _iterMap(iter, fromJS));
super(canonicalString, iter === void 0 ? void 0 : _iterMap<any, Value<T>>(iter, fromJS));
}
map(f: (value: Value) => Value): Set {
map<R extends object>(f: (value: Value<T>) => Value<R>): Set<R> {
return new Set(_iterMap(this[Symbol.iterator](), f));
}
filter(f: (value: Value) => boolean): Set {
const result = new Set();
filter(f: (value: Value<T>) => boolean): Set<T> {
const result = new Set<T>();
for (let k of this) if (f(k)) result.add(k);
return result;
}
@ -659,13 +662,13 @@ export class Set extends FlexSet<Value> {
'}';
}
clone(): Set {
clone(): Set<T> {
return new Set(this);
}
get [Symbol.toStringTag]() { return 'Set'; }
[PreserveOn](encoder: Encoder) {
[PreserveOn](encoder: Encoder<T>) {
if (encoder.canonical) {
const pieces = Array.from(this).map(k => canonicalEncode(k));
pieces.sort(Bytes.compare);
@ -676,20 +679,20 @@ export class Set extends FlexSet<Value> {
}
}
export class Annotated {
readonly annotations: Array<Value>;
readonly item: Value;
export class Annotated<T extends object> {
readonly annotations: Array<Value<T>>;
readonly item: Value<T>;
constructor(item: Value) {
constructor(item: Value<T>) {
this.annotations = [];
this.item = item;
}
[AsPreserve](): Value {
[AsPreserve](): Value<T> {
return this;
}
[PreserveOn](encoder: Encoder) {
[PreserveOn](encoder: Encoder<T>) {
if (encoder.includeAnnotations) {
for (const a of this.annotations) {
encoder.emitbyte(Tag.Annotation);
@ -703,9 +706,9 @@ export class Annotated {
return is(this.item, Annotated.isAnnotated(other) ? other.item : other);
}
hashCode(): number {
return hash(this.item);
}
// hashCode(): number {
// return hash(this.item);
// }
toString(): string {
return this.asPreservesText();
@ -720,30 +723,30 @@ export class Annotated {
return true;
}
static isAnnotated(x: any): x is Annotated {
static isAnnotated<T extends object>(x: any): x is Annotated<T> {
return !!x?.[IsPreservesAnnotated];
}
}
export function peel(v: Value): Value {
export function peel<T extends object>(v: Value<T>): Value<T> {
return strip(v, 1);
}
export function strip(v: Value, depth: number = Infinity) {
function step(v: Value, depth: number): Value {
export function strip<T extends object>(v: Value<T>, depth: number = Infinity): Value<T> {
function step(v: Value<T>, depth: number): Value<T> {
if (depth === 0) return v;
if (!Annotated.isAnnotated(v)) return v;
if (!Annotated.isAnnotated<T>(v)) return v;
const nextDepth = depth - 1;
function walk(v: Value) { return step(v, nextDepth); }
function walk(v: Value<T>): Value<T> { return step(v, nextDepth); }
if (Record.isRecord(v.item)) {
if (Record.isRecord<T>(v.item)) {
return new Record(step(v.item.label, depth), v.item.map(walk));
} else if (Array.isArray(v.item)) {
return v.item.map(walk);
} else if (Set.isSet(v.item)) {
} else if (Set.isSet<T>(v.item)) {
return v.item.map(walk);
} else if (Dictionary.isDictionary(v.item)) {
} else if (Dictionary.isDictionary<T, Value<T>>(v.item)) {
return v.item.mapEntries((e) => [walk(e[0]), walk(e[1])]);
} else if (Annotated.isAnnotated(v.item)) {
throw new Error("Improper annotation structure");
@ -754,8 +757,8 @@ export function strip(v: Value, depth: number = Infinity) {
return step(v, depth);
}
export function annotate(v0: Value, ...anns: Value[]) {
const v = Annotated.isAnnotated(v0) ? v0 : new Annotated(v0);
export function annotate<T extends object>(v0: Value<T>, ...anns: Value<T>[]): Annotated<T> {
const v = Annotated.isAnnotated<T>(v0) ? v0 : new Annotated(v0);
anns.forEach((a) => v.annotations.push(a));
return v;
}

View File

@ -1,21 +1,43 @@
import {
Value,
Dictionary,
decode, decodeWithAnnotations, encodeWithAnnotations, canonicalEncode,
decode, decodeWithAnnotations, encode, encodeWithAnnotations, canonicalEncode,
DecodeError, ShortPacket,
Bytes, Record,
annotate,
strip, peel,
preserves,
fromJS,
Constants,
} from '../src/index';
const { Tag } = Constants;
import './test-utils';
import * as fs from 'fs';
const Discard = Record.makeConstructor('discard', []);
const Capture = Record.makeConstructor('capture', ['pattern']);
const Observe = Record.makeConstructor('observe', ['pattern']);
class Pointer {
v: Value<Pointer>;
constructor(v: Value<Pointer>) {
this.v = v;
}
equals(other: any, is: (a: any, b: any) => boolean) {
return Object.is(other.constructor, this.constructor) && is(this.v, other.v);
}
}
function decodePointer(v: Value<Pointer>): Pointer {
return new Pointer(strip(v));
}
function encodePointer(w: Pointer): Value<Pointer> {
return w.v;
}
const Discard = Record.makeConstructor<Pointer>('discard', []);
const Capture = Record.makeConstructor<Pointer>('capture', ['pattern']);
const Observe = Record.makeConstructor<Pointer>('observe', ['pattern']);
describe('record constructors', () => {
it('should have constructorInfo', () => {
@ -61,7 +83,7 @@ describe('parsing from subarray', () => {
describe('reusing buffer space', () => {
it('should be done safely, even with nested dictionaries', () => {
expect(canonicalEncode(fromJS(['aaa', {a: 1}, 'zzz'])).toHex()).is(
expect(canonicalEncode(fromJS(['aaa', Dictionary.fromJS({a: 1}), 'zzz'])).toHex()).is(
`b5
b103616161
b7
@ -72,46 +94,115 @@ describe('reusing buffer space', () => {
});
});
describe('encoding and decoding pointers', () => {
it('should encode using pointerId when no function has been supplied', () => {
const A1 = ({a: 1});
const A2 = ({a: 1});
const bs1 = canonicalEncode(A1);
const bs2 = canonicalEncode(A2);
const bs3 = canonicalEncode(A1);
expect(bs1.get(0)).toBe(Tag.Pointer);
expect(bs2.get(0)).toBe(Tag.Pointer);
expect(bs3.get(0)).toBe(Tag.Pointer);
// Can't really check the value assigned to the object. But we
// can check that it's different to a similar object!
expect(bs1).not.is(bs2);
expect(bs1).is(bs3);
});
it('should refuse to decode pointers when no function has been supplied', () => {
expect(() => decode(Bytes.from([Tag.Pointer, Tag.SmallInteger_lo])))
.toThrow('No decodePointer function supplied');
});
it('should encode properly', () => {
const objects: object[] = [];
const A = {a: 1};
const B = {b: 2};
expect(encode(
[A, B],
{
encodePointer(v: object): Value<object> {
objects.push(v);
return objects.length - 1;
}
})).is(Bytes.from([Tag.Sequence,
Tag.Pointer, Tag.SmallInteger_lo,
Tag.Pointer, Tag.SmallInteger_lo + 1,
Tag.End]));
expect(objects).is([A, B]);
});
it('should decode properly', () => {
const X = {x: 123};
const Y = {y: 456};
const objects: object[] = [X, Y];
expect(decode(Bytes.from([
Tag.Sequence,
Tag.Pointer, Tag.SmallInteger_lo,
Tag.Pointer, Tag.SmallInteger_lo + 1,
Tag.End
]), {
decodePointer(v: Value<object>): object {
if (typeof v !== 'number' || v < 0 || v >= objects.length) {
throw new Error("Unknown pointer target");
}
return objects[v];
}
})).is([X, Y]);
});
it('should store pointers embedded in map keys correctly', () => {
const A1 = ({a: 1});
const A2 = ({a: 1});
const m = new Dictionary<object, Value<object>>();
m.set([A1], 1);
m.set([A2], 2);
expect(m.get(A1)).toBeUndefined();
expect(m.get([A1])).toBe(1);
expect(m.get([A2])).toBe(2);
expect(m.get([{a: 1}])).toBeUndefined();
A1.a = 3;
expect(m.get([A1])).toBe(1);
});
});
describe('common test suite', () => {
const samples_bin = fs.readFileSync(__dirname + '/../../../tests/samples.bin');
const samples = decodeWithAnnotations(samples_bin);
const samples = decodeWithAnnotations(samples_bin, { decodePointer });
const TestCases = Record.makeConstructor('TestCases', ['cases']);
function DS(bs: Bytes) {
return decode(bs);
return decode(bs, { decodePointer });
}
function D(bs: Bytes) {
return decodeWithAnnotations(bs);
return decodeWithAnnotations(bs, { decodePointer });
}
function E(v: Value) {
return encodeWithAnnotations(v);
function E(v: Value<Pointer>) {
return encodeWithAnnotations(v, { encodePointer });
}
interface ExpectedValues {
[testName: string]: { value: Value } | { forward: Value, back: Value };
[testName: string]: { value: Value<Pointer> } | { forward: Value<Pointer>, back: Value<Pointer> };
}
const expectedValues: ExpectedValues = {
annotation1: { forward: annotate(9, "abc"),
annotation1: { forward: annotate<Pointer>(9, "abc"),
back: 9 },
annotation2: { forward: annotate([[], annotate([], "x")], "abc", "def"),
annotation2: { forward: annotate<Pointer>([[], annotate<Pointer>([], "x")], "abc", "def"),
back: [[], []] },
annotation3: { forward: annotate(5,
annotate(2, 1),
annotate(4, 3)),
annotation3: { forward: annotate<Pointer>(5,
annotate<Pointer>(2, 1),
annotate<Pointer>(4, 3)),
back: 5 },
annotation5: { forward: annotate(new Record(Symbol.for('R'),
annotation5: { forward: annotate(new Record<Pointer>(Symbol.for('R'),
[annotate(Symbol.for('f'),
Symbol.for('af'))]),
Symbol.for('ar')),
back: new Record<Pointer>(Symbol.for('R'), [Symbol.for('f')]) },
annotation6: { forward: new Record<Pointer>(annotate<Pointer>(Symbol.for('R'),
Symbol.for('ar')),
[annotate(Symbol.for('f'),
Symbol.for('af'))]),
Symbol.for('ar')),
back: new Record(Symbol.for('R'), [Symbol.for('f')]) },
annotation6: { forward: new Record(annotate(Symbol.for('R'),
Symbol.for('ar')),
[annotate(Symbol.for('f'),
Symbol.for('af'))]),
back: new Record(Symbol.for('R'), [Symbol.for('f')]) },
annotation7: { forward: annotate([], Symbol.for('a'), Symbol.for('b'), Symbol.for('c')),
back: new Record<Pointer>(Symbol.for('R'), [Symbol.for('f')]) },
annotation7: { forward: annotate<Pointer>([], Symbol.for('a'), Symbol.for('b'), Symbol.for('c')),
back: [] },
list1: { forward: [1, 2, 3, 4],
back: [1, 2, 3, 4] },
@ -123,7 +214,7 @@ describe('common test suite', () => {
type Variety = 'normal' | 'nondeterministic' | 'decode';
function runTestCase(variety: Variety, tName: string, binaryForm: Bytes, annotatedTextForm: Value) {
function runTestCase(variety: Variety, tName: string, binaryForm: Bytes, annotatedTextForm: Value<Pointer>) {
describe(tName, () => {
const textForm = strip(annotatedTextForm);
const {forward, back} = (function () {
@ -150,10 +241,10 @@ describe('common test suite', () => {
});
}
const tests = peel(TestCases._.cases(peel(samples))!) as Dictionary<Value>;
tests.forEach((t0: Value, tName0: Value) => {
const tests = peel(TestCases._.cases(peel(samples))!) as Dictionary<Pointer, Value<Pointer>>;
tests.forEach((t0: Value<Pointer>, tName0: Value<Pointer>) => {
const tName = Symbol.keyFor(strip(tName0) as symbol)!;
const t = peel(t0) as Record;
const t = peel(t0) as Record<Pointer>;
switch (t.label) {
case Symbol.for('Test'):
runTestCase('normal', tName, strip(t[0]) as Bytes, t[1]);

View File

@ -4,7 +4,7 @@ import '../src/node_support';
declare global {
namespace jest {
interface Matchers<R> {
is(expected: Value): R;
is<T extends object>(expected: Value<T>): R;
toThrowFilter(f: (e: Error) => boolean): R;
}
}

View File

@ -257,11 +257,12 @@ def annotate(v, *anns):
return v
class Decoder(Codec):
def __init__(self, packet=b'', include_annotations=False):
def __init__(self, packet=b'', include_annotations=False, decode_pointer=None):
super(Decoder, self).__init__()
self.packet = packet
self.index = 0
self.include_annotations = include_annotations
self.decode_pointer = decode_pointer
def extend(self, data):
self.packet = self.packet[self.index:] + data
@ -327,6 +328,10 @@ class Decoder(Codec):
a = self.next()
v = self.next()
return self.unshift_annotation(a, v)
if tag == 0x86:
if self.decode_pointer is None:
raise DecodeError('No decode_pointer function supplied')
return self.wrap(self.decode_pointer(self.next()))
if tag >= 0x90 and tag <= 0x9f: return self.wrap(tag - (0xa0 if tag > 0x9c else 0x90))
if tag >= 0xa0 and tag <= 0xaf: return self.wrap(self.nextint(tag - 0xa0 + 1))
if tag == 0xb0: return self.wrap(self.nextint(self.varint()))
@ -350,16 +355,17 @@ class Decoder(Codec):
self.index = start
return None
def decode(bs):
return Decoder(packet=bs).next()
def decode(bs, **kwargs):
return Decoder(packet=bs, **kwargs).next()
def decode_with_annotations(bs):
return Decoder(packet=bs, include_annotations=True).next()
def decode_with_annotations(bs, **kwargs):
return Decoder(packet=bs, include_annotations=True, **kwargs).next()
class Encoder(Codec):
def __init__(self):
def __init__(self, encode_pointer=id):
super(Encoder, self).__init__()
self.buffer = bytearray()
self.encode_pointer = encode_pointer
def contents(self):
return bytes(self.buffer)
@ -428,10 +434,12 @@ class Encoder(Codec):
try:
i = iter(v)
except TypeError:
raise EncodeError('Cannot encode %r' % (v,))
self.buffer.append(0x86)
self.append(self.encode_pointer(v))
return
self.encodevalues(5, i)
def encode(v):
e = Encoder()
def encode(v, **kwargs):
e = Encoder(**kwargs)
e.append(v)
return e.contents()

View File

@ -157,6 +157,37 @@ class CodecTests(unittest.TestCase):
self._roundtrip((False,) * 100, _buf(0xb5, b'\x80' * 100, 0x84))
self._roundtrip((False,) * 200, _buf(0xb5, b'\x80' * 200, 0x84))
def test_pointer_id(self):
class A:
def __init__(self, a):
self.a = a
a1 = A(1)
a2 = A(1)
self.assertNotEqual(_e(a1), _e(a2))
self.assertEqual(_e(a1), _e(a1))
from .preserves import _ord
self.assertEqual(_ord(_e(a1)[0]), 0x86)
self.assertEqual(_ord(_e(a2)[0]), 0x86)
def test_decode_pointer_absent(self):
with self.assertRaises(DecodeError):
decode(b'\x86\xa0\xff')
def test_encode_pointer(self):
objects = []
def enc(p):
objects.append(p)
return len(objects) - 1
self.assertEqual(encode([object(), object()], encode_pointer = enc),
b'\xb5\x86\x90\x86\x91\x84')
def test_decode_pointer(self):
objects = [123, 234]
def dec(v):
return objects[v]
self.assertEqual(decode(b'\xb5\x86\x90\x86\x91\x84', decode_pointer = dec),
(123, 234))
def add_method(d, tName, fn):
if hasattr(fn, 'func_name'):
# python2
@ -223,11 +254,26 @@ def install_exn_test(d, tName, bs, check_proc):
self.fail('did not fail as expected')
add_method(d, tName, test_exn)
class Pointer:
def __init__(self, v):
self.v = strip_annotations(v)
@staticmethod
def value(i):
return i.v
def __eq__(self, other):
if other.__class__ is self.__class__:
return self.v == other.v
def __hash__(self):
return hash(self.v)
class CommonTestSuite(unittest.TestCase):
import os
with open(os.path.join(os.path.dirname(__file__),
'../../../tests/samples.bin'), 'rb') as f:
samples = Decoder(f.read(), include_annotations=True).next()
samples = Decoder(f.read(), include_annotations=True, decode_pointer=Pointer).next()
TestCases = Record.makeConstructor('TestCases', 'cases')
@ -257,13 +303,13 @@ class CommonTestSuite(unittest.TestCase):
raise Exception('Unsupported test kind', t.key)
def DS(self, bs):
return decode(bs)
return decode(bs, decode_pointer=Pointer)
def D(self, bs):
return decode_with_annotations(bs)
return decode_with_annotations(bs, decode_pointer=Pointer)
def E(self, v):
return encode(v)
return encode(v, encode_pointer=Pointer.value)
class RecordTests(unittest.TestCase):
def test_getters(self):

View File

@ -5,7 +5,7 @@ except ImportError:
setup(
name="preserves",
version="0.4.0",
version="0.5.0",
author="Tony Garnock-Jones",
author_email="tonyg@leastfixedpoint.com",
license="Apache Software License",

View File

@ -11,6 +11,7 @@
(struct record (label fields) #:transparent)
(struct float (value) #:transparent) ;; a marker for single-precision I/O
(struct annotated (annotations item) #:transparent)
(struct pointer (value) #:transparent)
;;---------------------------------------------------------------------------
;; Reader
@ -29,6 +30,7 @@
(match (next)
[(annotated as i) (annotated (cons a as) i)]
[i (annotated (list a) i)]))]
[#x86 (pointer (next))]
[(? (between #x90 #x9C) v) (- v #x90)]
[(? (between #x9D #x9F) v) (- v #xA0)]
[(? (between #xA0 #xAF) v) (next-integer (- v #xA0 -1))]
@ -85,6 +87,8 @@
(for [(a (in-list as))] (write-byte #x85 out-port) (output a))
(output v)]
[(pointer v) (write-byte #x86 out-port) (output v)]
[(? integer?)
(cond [(<= -3 v -1) (write-byte (+ v #xA0) out-port)]
[(<= 0 v 12) (write-byte (+ v #x90) out-port)]

View File

@ -25,8 +25,14 @@
(define (read-preserve [in-port (current-input-port)]
#:read-syntax? [read-syntax? #f]
#:decode-pointer [decode-pointer #f]
#:source [source (object-name in-port)])
(define b (peek-byte in-port))
(cond [(eof-object? b) b]
[(<= #x80 b #xBF) (read-preserve/binary in-port #:read-syntax? read-syntax?)]
[else (read-preserve/text in-port #:read-syntax? read-syntax? #:source source)]))
[(<= #x80 b #xBF) (read-preserve/binary in-port
#:read-syntax? read-syntax?
#:decode-pointer decode-pointer)]
[else (read-preserve/text in-port
#:read-syntax? read-syntax?
#:decode-pointer decode-pointer
#:source source)]))

View File

@ -0,0 +1,9 @@
#lang racket/base
(provide object-id)
(define table (make-weak-hasheq))
(define next 0)
(define (object-id x)
(hash-ref! table x (lambda () (let ((v next)) (set! next (+ v 1)) v))))

View File

@ -32,7 +32,7 @@
[(? list?) 8]
[(? set?) 9]
[(? dict?) 10]
[_ (error 'preserve-order "Cannot compare value ~v" v)]))
[_ 11]))
(define-syntax chain-order
(syntax-rules ()

View File

@ -13,9 +13,12 @@
(define (default-on-short) (error 'read-preserve/binary "Short Preserves binary"))
(define (default-on-fail message . args) (error 'read-preserve/binary (apply format message args)))
(define (default-decode-pointer v)
(error 'read-preserve/binary "No decode-pointer function supplied"))
(define (bytes->preserve bs
#:read-syntax? [read-syntax? #f]
#:decode-pointer [decode-pointer #f]
#:on-short [on-short default-on-short]
[on-fail default-on-fail])
(call-with-input-bytes
@ -23,6 +26,7 @@
(lambda (p)
(match (read-preserve/binary p
#:read-syntax? read-syntax?
#:decode-pointer decode-pointer
#:on-short on-short
on-fail)
[(? eof-object?) (on-short)]
@ -32,9 +36,11 @@
(define (read-preserve/binary [in-port (current-input-port)]
#:read-syntax? [read-syntax? #f]
#:decode-pointer [decode-pointer0 #f]
#:on-short [on-short default-on-short]
[on-fail default-on-fail])
(define read-annotations? read-syntax?)
(define decode-pointer (or decode-pointer0 default-decode-pointer))
(let/ec return
(define (next) (wrap (pos) (next* (next-byte))))
@ -70,6 +76,7 @@
(if read-annotations?
(annotate (next) a)
(next)))]
[#x86 (decode-pointer (next))]
[(? (between #x90 #x9C) v) (- v #x90)]
[(? (between #x9D #x9F) v) (- v #xA0)]
[(? (between #xA0 #xAF) v) (next-integer (- v #xA0 -1))]

View File

@ -24,13 +24,18 @@
pos
#f))
(define (default-decode-pointer v)
(error 'read-preserve/text "No decode-pointer function supplied"))
(define (string->preserve s
#:read-syntax? [read-syntax? #f]
#:decode-pointer [decode-pointer #f]
#:source [source "<string>"])
(define p (open-input-string s))
(when read-syntax? (port-count-lines! p))
(define v (read-preserve/text p
#:read-syntax? read-syntax?
#:decode-pointer decode-pointer
#:source source))
(when (eof-object? v)
(parse-error* #:raise-proc raise-read-eof-error p source "Unexpected end of input"))
@ -48,8 +53,10 @@
(define (read-preserve/text [in-port (current-input-port)]
#:read-syntax? [read-syntax? #f]
#:decode-pointer [decode-pointer0 #f]
#:source [source (object-name in-port)])
(define read-annotations? read-syntax?)
(define decode-pointer (or decode-pointer0 default-decode-pointer))
;;---------------------------------------------------------------------------
;; Core of parser
@ -89,6 +96,7 @@
(apply parse-error (string-append "Embedded binary value: " message) args))
#:read-syntax? read-syntax?
#:on-short (lambda () (parse-error "Incomplete embedded binary value")))]
[#\! (decode-pointer (next))]
[c (parse-error "Invalid # syntax: ~v" c)])]
[#\< (match (read-sequence #\>)

View File

@ -9,14 +9,22 @@
(require racket/runtime-path)
(require syntax/srcloc)
(struct pointer (value) #:transparent)
(define (pointer/no-annotations v)
(pointer (strip-annotations v)))
(define (d bs #:allow-invalid-prefix? [allow-invalid-prefix? #f])
(for [(i (in-range 1 (- (bytes-length bs) 1)))]
(define result (bytes->preserve (subbytes bs 0 i) #:on-short (lambda () 'short) void))
(define result (bytes->preserve (subbytes bs 0 i)
#:decode-pointer pointer/no-annotations
#:on-short (lambda () 'short) void))
(when (and (not (eq? result 'short))
(not (and allow-invalid-prefix? (void? result))))
(error 'd "~a-byte prefix of ~v does not read as short; result: ~v" i bs result)))
(bytes->preserve bs
#:read-syntax? #t
#:decode-pointer pointer/no-annotations
#:on-short (lambda () 'short)
void))
@ -125,16 +133,31 @@
(match (hash-ref samples-txt-expected t-name text-form)
[(asymmetric f b) (values f b #f)] ;; #f because e.g. annotation4 includes annotations
[v (values v v #t)]))
(check-equal? text-form back loc) ;; expectation 1
(check-equal? (d-strip (preserve->bytes text-form)) back loc) ;; expectation 2
(check-equal? (d-strip (preserve->bytes forward)) back loc) ;; expectation 3
(check-equal? (d-strip binary-form) back loc) ;; expectation 4
(check-equal? (d binary-form) annotated-text-form loc) ;; expectation 5
(check-equal? (d (preserve->bytes annotated-text-form)) annotated-text-form loc) ;; expectation 6
(check-equal? (string->preserve (preserve->string text-form)) back loc) ;; expectation 7
(check-equal? (string->preserve (preserve->string forward)) back loc) ;; expectation 8
(check-equal? text-form back loc) ;; expectation 1
(check-equal? (d-strip (preserve->bytes #:encode-pointer pointer-value text-form))
back
loc) ;; expectation 2
(check-equal? (d-strip (preserve->bytes #:encode-pointer pointer-value forward))
back
loc) ;; expectation 3
(check-equal? (d-strip binary-form) back loc) ;; expectation 4
(check-equal? (d binary-form) annotated-text-form loc) ;; expectation 5
(check-equal? (d (preserve->bytes #:encode-pointer pointer-value annotated-text-form))
annotated-text-form
loc) ;; expectation 6
(check-equal? (string->preserve #:decode-pointer pointer/no-annotations
(preserve->string #:encode-pointer pointer-value text-form))
back
loc) ;; expectation 7
(check-equal? (string->preserve #:decode-pointer pointer/no-annotations
(preserve->string #:encode-pointer pointer-value forward))
back
loc) ;; expectation 8
;; similar to 8:
(check-equal? (string->preserve (preserve->string annotated-text-form) #:read-syntax? #t)
(check-equal? (string->preserve #:decode-pointer pointer/no-annotations
(preserve->string #:encode-pointer pointer-value
annotated-text-form)
#:read-syntax? #t)
annotated-text-form
loc)
(when (and (not (memq variety '(decode)))
@ -142,13 +165,16 @@
(and can-execute-nondet-with-canonicalization?)))
;; expectations 9 and 10
(check-equal? (preserve->bytes forward
#:encode-pointer pointer-value
#:canonicalizing? #t
#:write-annotations? #t)
binary-form
loc))
(unless (memq variety '(decode nondeterministic))
;; expectation 11
(check-equal? (preserve->bytes annotated-text-form #:write-annotations? #t)
(check-equal? (preserve->bytes annotated-text-form
#:encode-pointer pointer-value
#:write-annotations? #t)
binary-form
loc)))
@ -157,7 +183,10 @@
(testfile (call-with-input-file path
(lambda (p)
(port-count-lines! p)
(read-preserve p #:read-syntax? #t #:source path)))))
(read-preserve p
#:read-syntax? #t
#:decode-pointer pointer/no-annotations
#:source path)))))
(match-define (peel-annotations `#s(TestCases ,tests)) testfile)
(for [((t-name* t*) (in-hash (annotated-item tests)))]
(define t-name (strip-annotations t-name*))

View File

@ -43,19 +43,21 @@
["--no-annotations" "Strip annotations"
(set! annotations? #f)])
(struct pointer (value) #:transparent)
(let loop ((count count))
(when (positive? count)
(define v ((if annotations? values strip-annotations)
(match input-format
['any (read-preserve #:read-syntax? #t #:source "<stdin>")]
['text (read-preserve/text #:read-syntax? #t #:source "<stdin>")]
['binary (read-preserve/binary #:read-syntax? #t)])))
['any (read-preserve #:read-syntax? #t #:decode-pointer pointer #:source "<stdin>")]
['text (read-preserve/text #:read-syntax? #t #:decode-pointer pointer #:source "<stdin>")]
['binary (read-preserve/binary #:decode-pointer pointer #:read-syntax? #t)])))
(when (not (eof-object? v))
(void (match output-format
['text
(write-preserve/text v #:indent indent?)
(write-preserve/text v #:indent indent? #:encode-pointer pointer-value)
(newline)]
['binary
(write-preserve/binary v #:write-annotations? #t)]))
(write-preserve/binary v #:encode-pointer pointer-value #:write-annotations? #t)]))
(flush-output)
(loop (- count 1))))))

View File

@ -9,21 +9,26 @@
(require "float.rkt")
(require "annotation.rkt")
(require "varint.rkt")
(require "object-id.rkt")
(require racket/set)
(require racket/dict)
(require (only-in racket/list flatten))
(define (preserve->bytes v
#:canonicalizing? [canonicalizing? #t]
#:encode-pointer [encode-pointer #f]
#:write-annotations? [write-annotations? (not canonicalizing?)])
(call-with-output-bytes
(lambda (p) (write-preserve/binary v p
#:canonicalizing? canonicalizing?
#:encode-pointer encode-pointer
#:write-annotations? write-annotations?))))
(define (write-preserve/binary v [out-port (current-output-port)]
#:canonicalizing? [canonicalizing? #t]
#:encode-pointer [encode-pointer0 #f]
#:write-annotations? [write-annotations? (not canonicalizing?)])
(define encode-pointer (or encode-pointer0 object-id))
(define (output-byte b)
(write-byte b out-port))
@ -115,6 +120,8 @@
[(? set?) (with-seq 6 (output-set v))]
[(? dict?) (with-seq 7 (output-dict v))]
[_ (error 'write-preserve/binary "Invalid value: ~v" v)]))
[other
(output-byte #x86)
(output (encode-pointer other))]))
(output v))

View File

@ -12,6 +12,7 @@
(require "annotation.rkt")
(require "float.rkt")
(require "record.rkt")
(require "object-id.rkt")
(require racket/dict)
(require racket/set)
(require (only-in racket/port with-output-to-string))
@ -24,7 +25,9 @@
(define (write-preserve/text v0 [o (current-output-port)]
#:indent [indent-amount0 #f]
#:encode-pointer [encode-pointer0 #f]
#:write-annotations? [write-annotations? #t])
(define encode-pointer (or encode-pointer0 object-id))
(define indent-amount (match indent-amount0
[#f 0]
[#t 2] ;; a default
@ -164,15 +167,18 @@
[(? list?) (write-sequence distance "[" "," "]" write-value v)]
[(? set?) (write-sequence distance "#{" "," "}" write-value (set->list v))]
[(? dict?) (write-sequence distance "{" "," "}" write-key-value (dict->list v))]
[_ (error 'write-preserve/text "Cannot encode value ~v" v)]))
[other
(! "#!")
(write-value distance (encode-pointer other))]))
(write-value 0 v0))
(define (preserve->string v0
#:indent [indent-amount #f]
#:encode-pointer [encode-pointer #f]
#:write-annotations? [write-annotations? #t])
(with-output-to-string
(lambda () (write-preserve/text v0
#:indent indent-amount
#:encode-pointer encode-pointer
#:write-annotations? write-annotations?))))

View File

@ -41,6 +41,8 @@ pub enum ExpectedKind {
Set,
Dictionary,
Pointer,
SequenceOrSet, // Because of hacking up serde's data model: see open_sequence_or_set etc.
Option,

View File

@ -21,7 +21,6 @@ pub use value::Domain;
pub use value::IOValue;
pub use value::Map;
pub use value::NestedValue;
pub use value::NullDomain;
pub use value::PlainValue;
pub use value::RcValue;
pub use value::Set;

View File

@ -8,6 +8,7 @@ pub enum Tag {
Double,
End,
Annotation,
Pointer,
SmallInteger(i8),
MediumInteger(u8),
SignedInteger,
@ -46,6 +47,7 @@ impl TryFrom<u8> for Tag {
0x83 => Ok(Self::Double),
0x84 => Ok(Self::End),
0x85 => Ok(Self::Annotation),
0x86 => Ok(Self::Pointer),
0x90..=0x9c => Ok(Self::SmallInteger((v - 0x90) as i8)),
0x9d..=0x9f => Ok(Self::SmallInteger((v - 0x90) as i8 - 16)),
0xa0..=0xaf => Ok(Self::MediumInteger(v - 0xa0 + 1)),
@ -71,6 +73,7 @@ impl From<Tag> for u8 {
Tag::Double => 0x83,
Tag::End => 0x84,
Tag::Annotation => 0x85,
Tag::Pointer => 0x86,
Tag::SmallInteger(v) => if v < 0 { (v + 16) as u8 + 0x90 } else { v as u8 + 0x90 },
Tag::MediumInteger(count) => count - 1 + 0xa0,
Tag::SignedInteger => 0xb0,

View File

@ -5,7 +5,7 @@ use std::convert::TryFrom;
use std::convert::TryInto;
use std::marker::PhantomData;
use super::super::signed_integer::SignedInteger;
use super::super::value::{Value, NestedValue, IOValue, FALSE, TRUE, Map, Set, Record, Annotations};
use super::super::value::{Value, NestedValue, Domain, IOValue, FALSE, TRUE, Map, Set, Record, Annotations};
use super::constants::Tag;
use super::super::reader::{
@ -262,6 +262,10 @@ impl<'de, S: BinarySource<'de>> Reader<'de> for PackedReader<'de, S> {
self.demand_next(read_annotations)?
}
}
Tag::Pointer => {
let v = self.demand_next(read_annotations)?;
Value::Domain(IOValue::from_preserves(v)?).wrap()
}
Tag::SmallInteger(v) => {
// TODO: prebuild these in value.rs
Value::from(v).wrap()
@ -358,6 +362,14 @@ impl<'de, S: BinarySource<'de>> Reader<'de> for PackedReader<'de, S> {
Ok(self.peekend()?)
}
fn open_pointer(&mut self) -> ReaderResult<()> {
self.next_compound(Tag::Pointer, ExpectedKind::Pointer)
}
fn close_pointer(&mut self) -> ReaderResult<()> {
Ok(())
}
fn next_boolean(&mut self) -> ReaderResult<bool> {
match self.peek_next_nonannotation_tag()? {
Tag::False => { self.skip()?; Ok(false) }

View File

@ -215,6 +215,7 @@ impl Writer for BinaryOrderWriter {
type AnnWriter = PackedWriter<Vec<u8>>;
type SeqWriter = PackedWriter<Vec<u8>>;
type SetWriter = BinaryOrderWriter;
type PointerWriter = PackedWriter<Vec<u8>>;
binary_order_writer_method!(mut align(natural_chunksize: u64) -> Result<()>);
@ -271,6 +272,15 @@ impl Writer for BinaryOrderWriter {
fn end_set(&mut self, set: Self::SetWriter) -> Result<()> {
set.finish(self)
}
fn start_pointer(&mut self) -> Result<Self::PointerWriter> {
self.write_tag(Tag::Pointer)?;
Ok(self.pop())
}
fn end_pointer(&mut self, ptr: Self::PointerWriter) -> Result<()> {
self.push(ptr);
Ok(())
}
}
macro_rules! fits_in_bytes {
@ -285,6 +295,7 @@ impl<W: std::io::Write> Writer for PackedWriter<W>
type AnnWriter = Self;
type SeqWriter = Self;
type SetWriter = BinaryOrderWriter;
type PointerWriter = Self;
fn start_annotations(&mut self) -> Result<Self::AnnWriter> {
Ok(self.suspend())
@ -478,4 +489,14 @@ impl<W: std::io::Write> Writer for PackedWriter<W>
fn end_set(&mut self, set: Self::SetWriter) -> Result<()> {
set.finish(self)
}
fn start_pointer(&mut self) -> Result<Self::PointerWriter> {
self.write_tag(Tag::Pointer)?;
Ok(self.suspend())
}
fn end_pointer(&mut self, ann: Self::PointerWriter) -> Result<()> {
self.resume(ann);
Ok(())
}
}

View File

@ -15,6 +15,8 @@ pub trait Reader<'de> {
fn open_set(&mut self) -> ReaderResult<()>;
fn open_dictionary(&mut self) -> ReaderResult<()>;
fn close_compound(&mut self) -> ReaderResult<bool>;
fn open_pointer(&mut self) -> ReaderResult<()>;
fn close_pointer(&mut self) -> ReaderResult<()>;
//---------------------------------------------------------------------------
@ -142,6 +144,14 @@ impl<'r, 'de, R: Reader<'de>> Reader<'de> for &'r mut R {
fn close_compound(&mut self) -> ReaderResult<bool> {
(*self).close_compound()
}
fn open_pointer(&mut self) -> ReaderResult<()> {
(*self).open_pointer()
}
fn close_pointer(&mut self) -> ReaderResult<()> {
(*self).close_pointer()
}
}

View File

@ -19,6 +19,12 @@ use super::signed_integer::SignedInteger;
use crate::error::{Error, ExpectedKind, Received};
pub trait Domain: Sized + Debug + Clone + Eq + Hash + Ord {
fn from_preserves(v: IOValue) -> Result<Self, std::io::Error> {
Err(std::io::Error::new(std::io::ErrorKind::InvalidData,
format!("Cannot Preserves-decode domain-specific value {:?}",
v)))
}
fn as_preserves(&self) -> Result<IOValue, std::io::Error> {
Err(std::io::Error::new(std::io::ErrorKind::InvalidData,
format!("Cannot Preserves-encode domain-specific value {:?}",
@ -270,7 +276,7 @@ impl<N: NestedValue<D>, D: Domain> Debug for Value<N, D> {
f.debug_set().entries(v.iter()).finish()
}
Value::Dictionary(ref v) => f.debug_map().entries(v.iter()).finish(),
Value::Domain(ref d) => write!(f, "{:?}", d),
Value::Domain(ref d) => write!(f, "#!{:?}", d),
}
}
}
@ -1044,13 +1050,19 @@ impl<'de, Dom: Domain> serde::Deserialize<'de> for ArcValue<Dom> {
//---------------------------------------------------------------------------
#[derive(Debug, Clone, PartialEq, Eq, Hash, PartialOrd, Ord)]
pub enum NullDomain {}
impl Domain for NullDomain {}
#[derive(Clone, PartialEq, Eq, Hash, PartialOrd, Ord)]
pub struct IOValue(Arc<AnnotatedValue<IOValue, NullDomain>>);
pub type UnwrappedIOValue = Value<IOValue, NullDomain>;
pub struct IOValue(Arc<AnnotatedValue<IOValue, IOValue>>);
pub type UnwrappedIOValue = Value<IOValue, IOValue>;
impl Domain for IOValue {
fn from_preserves(v: IOValue) -> Result<Self, std::io::Error> {
Ok(v)
}
fn as_preserves(&self) -> Result<IOValue, std::io::Error> {
Ok(self.clone())
}
}
lazy_static! {
pub static ref FALSE: IOValue = IOValue(Arc::new(AnnotatedValue(Annotations::empty(), Value::Boolean(false))));
@ -1058,27 +1070,27 @@ lazy_static! {
pub static ref EMPTY_SEQ: IOValue = IOValue(Arc::new(AnnotatedValue(Annotations::empty(), Value::Sequence(Vec::new()))));
}
impl NestedValue<NullDomain> for IOValue {
fn wrap(anns: Annotations<Self, NullDomain>, v: Value<Self, NullDomain>) -> Self {
impl NestedValue<IOValue> for IOValue {
fn wrap(anns: Annotations<Self, IOValue>, v: Value<Self, IOValue>) -> Self {
IOValue(Arc::new(AnnotatedValue::new(anns, v)))
}
fn annotations(&self) -> &Annotations<Self, NullDomain> {
fn annotations(&self) -> &Annotations<Self, IOValue> {
&(self.0).0
}
fn value(&self) -> &Value<Self, NullDomain> {
fn value(&self) -> &Value<Self, IOValue> {
&(self.0).1
}
fn pieces(self) -> (Annotations<Self, NullDomain>, Value<Self, NullDomain>) {
fn pieces(self) -> (Annotations<Self, IOValue>, Value<Self, IOValue>) {
match Arc::try_unwrap(self.0) {
Ok(AnnotatedValue(anns, v)) => (anns, v),
Err(r) => (r.0.clone(), r.1.clone()),
}
}
fn value_owned(self) -> Value<Self, NullDomain> {
fn value_owned(self) -> Value<Self, IOValue> {
match Arc::try_unwrap(self.0) {
Ok(AnnotatedValue(_anns, v)) => v,
Err(r) => r.1.clone(),

View File

@ -20,6 +20,7 @@ pub trait Writer: Sized {
type AnnWriter: AnnotationWriter;
type SeqWriter: CompoundWriter;
type SetWriter: CompoundWriter;
type PointerWriter: Writer;
fn align(&mut self, natural_chunksize: u64) -> Result<()>;
@ -55,6 +56,9 @@ pub trait Writer: Sized {
fn start_dictionary(&mut self, entry_count: Option<usize>) -> Result<Self::SetWriter>;
fn end_set(&mut self, set: Self::SetWriter) -> Result<()>;
fn start_pointer(&mut self) -> Result<Self::PointerWriter>;
fn end_pointer(&mut self, ptr: Self::PointerWriter) -> Result<()>;
//---------------------------------------------------------------------------
fn write(&mut self, v: &IOValue) -> Result<()> {
@ -129,7 +133,11 @@ pub trait Writer: Sized {
}
self.end_set(c)
}
Value::Domain(ref d) => self.write(&d.as_preserves()?)
Value::Domain(ref d) => {
let mut c = self.start_pointer()?;
c.write(&d.as_preserves()?)?;
self.end_pointer(c)
}
}
}
}

View File

@ -4,7 +4,7 @@ title: "Preserves: an Expressive Data Language"
---
Tony Garnock-Jones <tonyg@leastfixedpoint.com>
Jan 2021. Version 0.4.0.
Jan 2021. Version 0.5.0.
[sexp.txt]: http://people.csail.mit.edu/rivest/Sexp.txt
[spki]: http://world.std.com/~cme/html/spki.html
@ -17,21 +17,17 @@ Jan 2021. Version 0.4.0.
This document proposes a data model and serialization format called
*Preserves*.
Preserves supports *records* with user-defined *labels*. This relieves
the confusion caused by encoding records as dictionaries, seen in most
data languages in use on the web. It also allows Preserves to easily
represent the *labelled sums of products* as seen in many functional
programming languages.
Preserves also supports the usual suite of atomic and compound data
types, in particular including *binary* data as a distinct type from
text strings. Its *annotations* allow separation of data from metadata
such as [comments](conventions.html#comments), trace information, and
Preserves supports *records* with user-defined *labels*, embedded
*references*, and the usual suite of atomic and compound data types,
including *binary* data as a distinct type from text strings. Its
*annotations* allow separation of data from metadata such as
[comments](conventions.html#comments), trace information, and
provenance information.
Finally, Preserves defines precisely how to *compare* two values.
Comparison is based on the data model, not on syntax or on data
structures of any particular implementation language.
Preserves departs from many other data languages in defining how to
*compare* two values. Comparison is based on the data model, not on
syntax or on data structures of any particular implementation
language.
## Starting with Semantics
@ -40,23 +36,25 @@ definition of the *values* that we want to work with and give them
meaning independent of their syntax.
Our `Value`s fall into two broad categories: *atomic* and *compound*
data. Every `Value` is finite and non-cyclic.
data. Every `Value` is finite and non-cyclic. References, called
`Pointer`s, are a third, special-case category.
Value = Atom
| Compound
| Compound
| Pointer
Atom = Boolean
| Float
| Double
| SignedInteger
| String
| ByteString
| Symbol
| Float
| Double
| SignedInteger
| String
| ByteString
| Symbol
Compound = Record
| Sequence
| Set
| Dictionary
| Sequence
| Set
| Dictionary
**Total order.**<a name="total-order"></a> As we go, we will
incrementally specify a total order over `Value`s. Two values of the
@ -65,7 +63,7 @@ values of different kinds is essentially arbitrary, but having a total
order is convenient for many tasks, so we define it as
follows:
(Values) Atom < Compound
(Values) Atom < Compound < Pointer
(Compounds) Record < Sequence < Set < Dictionary
@ -162,6 +160,45 @@ pairwise distinct. Instances of `Dictionary` are compared by
lexicographic comparison of the sequences resulting from ordering each
`Dictionary`'s pairs in ascending order by key.
### Pointers.
A `Pointer` embeds *domain-specific*, potentially *stateful* or
*located* data into a `Value`.[^pointer-rationale] `Pointer`s may be
used to denote stateful objects, network services, object
capabilities, file descriptors, Unix processes, or other
possibly-stateful things. Because each `Pointer` is a domain-specific
datum, comparison of two `Pointer`s is done according to
domain-specific rules.
[^pointer-rationale]: **Rationale.** Why include `Pointer`s as a
special class, distinct from, say, a specially-labeled `Record`?
First, a `Record` can only hold other `Value`s: in order to embed
values such as live pointers to Java objects, some means of
"escaping" from the `Value` data type must be provided. Second,
`Pointer`s are meant to be able to denote stateful entities, for
which comparison by address is appropriate; however, we do not
wish to place restrictions on the *nature* of these entities: if
we had used `Record`s instead of distinct `Pointer`s, users would
have to invent an encoding of domain data into `Record`s that
reflected domain ordering into `Value` ordering. This is often
difficult and may not always be possible. Finally, because
`Pointer`s are intended to be able to represent network and memory
*locations*, they must be able to be rewritten at network and
process boundaries. Having a distinct class allows generic
`Pointer` rewriting without the quotation-related complications of
encoding references as, say, `Record`s.
*Examples.* In a Java or Python implementation, a `Pointer` may denote
a reference to a Java or Python object; comparison would be done via
the language's own rules for equivalence and ordering. In a Unix
application, a `Pointer` may denote an open file descriptor or a
process ID. In an HTTP-based application, each `Pointer` might be a
URL, compared according to
[RFC 6943](https://tools.ietf.org/html/rfc6943#section-3.3). When a
`Value` is serialized for storage or transfer, embedded `Pointer`s
will usually be represented as ordinary `Value`s, in which case the
ordinary rules for comparing `Value`s will apply.
## Textual Syntax
Now we have discussed `Value`s and their meanings, we may turn to
@ -204,7 +241,7 @@ Standalone documents may have trailing whitespace.
Any `Value` may be preceded by whitespace.
Value = ws (Record / Collection / Atom / Compact)
Value = ws (Record / Collection / Atom / Pointer / Compact)
Collection = Sequence / Dictionary / Set
Atom = Boolean / Float / Double / SignedInteger /
String / ByteString / Symbol
@ -364,6 +401,11 @@ double quote mark.
definition of “token representation”, and with the
[R6RS definition of identifiers](http://www.r6rs.org/final/html/r6rs/r6rs-Z-H-7.html#node_sec_4.2.4).
A `Pointer` is written as a `Value` chosen to represent the denoted
object, prefixed with `#!`.
Pointer = "#!" Value
Finally, any `Value` may be represented by escaping from the textual
syntax to the [compact binary syntax](#compact-binary-syntax) by
prefixing a `ByteString` containing the binary representation of the
@ -467,11 +509,11 @@ write `varint(m)` for the varint-encoding of `m`. Quoting the
The following table illustrates varint-encoding.
| Number, `m` | `m` in binary, grouped into 7-bit chunks | `varint(m)` bytes |
| ------ | ------------------- | ------------ |
| 15 | `0001111` | 15 |
| 300 | `0000010 0101100` | 172 2 |
| 1000000000 | `0000011 1011100 1101011 0010100 0000000` | 128 148 235 220 3 |
| Number, `m` | `m` in binary, grouped into 7-bit chunks | `varint(m)` bytes |
| ------ | ------------------- | ------------ |
| 15 | `0001111` | 15 |
| 300 | `0000010 0101100` | 172 2 |
| 1000000000 | `0000011 1011100 1101011 0010100 0000000` | 128 148 235 220 3 |
It is an error for a varint-encoded `m` in a `Repr` to be anything
other than the unique shortest encoding for that `m`. That is, a
@ -579,6 +621,13 @@ contained within the `Value` unmodified.
The functions `binary32(F)` and `binary64(D)` yield big-endian 4- and
8-byte IEEE 754 binary representations of `F` and `D`, respectively.
### Pointers.
The `Repr` of a `Pointer` is the `Repr` of a `Value` chosen to
represent the denoted object, prefixed with `[0x86]`.
«#!V» = [0x86] ++ «V»
### Annotations.
To annotate a `Repr` `r` with some `Value` `v`, prepend `r` with
@ -596,8 +645,8 @@ syntax `@a@b[]`, i.e. an empty sequence annotated with two symbols,
The total ordering specified [above](#total-order) means that the following statements are true:
"bzz" < "c" < "caa"
#t < 3.0f < 3.0 < 3 < "3" < |3| < []
"bzz" < "c" < "caa" < #!"a"
#t < 3.0f < 3.0 < 3 < "3" < |3| < [] < #!#t
### Simple examples.
@ -774,10 +823,6 @@ the same `Value` to yield different binary `Repr`s.
## Acknowledgements
The use of the low-order bits in certain SignedInteger tags for the
length of the following data is inspired by a similar feature of
[CBOR](http://cbor.io/).
The treatment of commas as whitespace in the text syntax is inspired
by the same feature of [EDN](https://github.com/edn-format/edn).
@ -810,7 +855,8 @@ a binary-syntax document; otherwise, it should be interpreted as text.
83 - Double
84 - End marker
85 - Annotation
(8x) RESERVED 86-8F
86 - Pointer
(8x) RESERVED 87-8F
9x - Small integers 0..12,-3..-1
An - Small integers, (n+1) bytes long

Binary file not shown.

View File

@ -111,6 +111,9 @@
list9: @"Unexpected close bracket" <ParseError "]">
list10: @"Missing end byte" <DecodeShort #x"b58080">
noinput0: @"No input at all" <DecodeEOF #x"">
pointer0: <Test #x"8690" #!0>
pointer1: <Test #x"868690" #!#!0>
pointer2: <Test #x"b5869086b10568656c6c6f84" [#!0 #!"hello"]>
record1: <Test #x"b4 b30763617074757265 b4 b30764697363617264 84 84" <capture <discard>>>
record2: <Test #x"b4 b3076f627365727665 b4 b305737065616b b4 b30764697363617264 84 b4 b30763617074757265 b4 b30764697363617264 84 84 84 84" <observe <speak <discard>, <capture <discard>>>>>
record3: <Test #x"b4 b5 b3067469746c6564 b306706572736f6e 92 b3057468696e67 91 84 a065 b109426c61636b77656c6c b4 b30464617465 a1071d 92 93 84 b1024472 84" <[titled person 2 thing 1] 101 "Blackwell" <date 1821 2 3> "Dr">>