diff --git a/implementations/javascript/packages/core/src/annotated.ts b/implementations/javascript/packages/core/src/annotated.ts index 1555152..5eecb8f 100644 --- a/implementations/javascript/packages/core/src/annotated.ts +++ b/implementations/javascript/packages/core/src/annotated.ts @@ -4,6 +4,7 @@ import type { GenericEmbedded } from "./embedded"; import type { Value } from "./values"; import type { Encoder, Preservable } from "./encoder"; import type { Writer, PreserveWritable } from "./writer"; +import * as IO from "./iolist"; export interface Position { line?: number; @@ -71,14 +72,12 @@ export class Annotated implements Preservable, PreserveW return isAnnotated(v) ? v : void 0; } - __preserve_on__(encoder: Encoder): void { - if (encoder.includeAnnotations) { - for (const a of this.annotations) { - encoder.state.emitbyte(Tag.Annotation); - encoder.push(a); - } + __preserve_on__(encoder: Encoder): IO.IOList { + if (encoder.includeAnnotations && this.annotations.length > 0) { + return [Tag.Annotation, encoder._encodevalues([this.item, ... this.annotations])]; + } else { + return encoder._encode(this.item); } - encoder.push(this.item); } __preserve_text_on__(w: Writer): void { diff --git a/implementations/javascript/packages/core/src/bytes.ts b/implementations/javascript/packages/core/src/bytes.ts index 40cae0b..c9a069e 100644 --- a/implementations/javascript/packages/core/src/bytes.ts +++ b/implementations/javascript/packages/core/src/bytes.ts @@ -3,6 +3,7 @@ import { GenericEmbedded } from './embedded'; import { Encoder, Preservable } from './encoder'; import { Value } from './values'; import { Writer, PreserveWritable } from './writer'; +import * as IO from './iolist'; const textEncoder = new TextEncoder(); const textDecoder = new TextDecoder(); @@ -145,10 +146,8 @@ export class Bytes implements Preservable, PreserveWritable { return this.toHex(); } - __preserve_on__(encoder: Encoder) { - encoder.state.emitbyte(Tag.ByteString); - encoder.state.varint(this.length); - encoder.state.emitbytes(this._view); + __preserve_on__(_encoder: Encoder): IO.IOList { + return [Tag.ByteString, this._view]; } __preserve_text_on__(w: Writer) { diff --git a/implementations/javascript/packages/core/src/constants.ts b/implementations/javascript/packages/core/src/constants.ts index d583885..72fb4f2 100644 --- a/implementations/javascript/packages/core/src/constants.ts +++ b/implementations/javascript/packages/core/src/constants.ts @@ -1,16 +1,8 @@ export enum Tag { - False = 0x80, + False = 0xa0, True, Float, - Double, - End, - Annotation, - Embedded, - - SmallInteger_lo = 0x90, - MediumInteger_lo = 0xa0, - - SignedInteger = 0xb0, + SignedInteger, String, ByteString, Symbol, @@ -18,4 +10,7 @@ export enum Tag { Sequence, Set, Dictionary, + Embedded, + + Annotation = 0xbf, } diff --git a/implementations/javascript/packages/core/src/decoder.ts b/implementations/javascript/packages/core/src/decoder.ts index f5316c1..7ba03c9 100644 --- a/implementations/javascript/packages/core/src/decoder.ts +++ b/implementations/javascript/packages/core/src/decoder.ts @@ -6,7 +6,6 @@ import { DoubleFloat, SingleFloat } from "./float"; import { Record } from "./record"; import { Bytes, BytesLike, underlying } from "./bytes"; import { Value } from "./values"; -import { is } from "./is"; import { embed, GenericEmbedded, Embedded, EmbeddedTypeDecode } from "./embedded"; import { ReaderStateOptions } from "reader"; @@ -47,21 +46,29 @@ export interface TypedDecoder { closeCompound(): boolean; } -export function asLiteral, Annotated>>( - actual: Value, - expected: E): E | undefined -{ - return is(actual, expected) ? expected : void 0; -} +type DecoderStateMark = { + index: number; + inSequence: boolean; +}; export class DecoderState { - packet: Uint8Array; - index = 0; options: DecoderOptions; + packet: Uint8Array; + count: number | null; + index = 0; + inSequence = false; constructor(packet: BytesLike, options: DecoderOptions) { - this.packet = underlying(packet); this.options = options; + this.packet = underlying(packet); + this.count = null; + } + + setExpectedCount(expectedCount: number) { + if (this.count !== null) { + throw new Error(`Attempt to setExpectedCount to ${expectedCount} when count already ${this.count}`); + } + this.count = expectedCount; } get includeAnnotations(): boolean { @@ -78,15 +85,23 @@ export class DecoderState { } atEnd(): boolean { - return this.index >= this.packet.length; + if (this.count === null) { // toplevel + return this.index >= this.packet.length; + } else { // nested + return this.count <= 0; + } } - mark(): number { - return this.index; + mark(): DecoderStateMark { + return { + index: this.index, + inSequence: this.inSequence, + }; } - restoreMark(m: number): void { - this.index = m; + restoreMark(m: DecoderStateMark): void { + this.index = m.index; + this.inSequence = m.inSequence; } shortGuard(body: () => R, short: () => R): R { @@ -107,59 +122,110 @@ export class DecoderState { nextbyte(): number { if (this.atEnd()) throw new ShortPacket("Short packet"); + if (this.count !== null) this.count--; return this.packet[this.index++]; } - nextbytes(n: number): DataView { - const start = this.index; + _rewind(): undefined { + this.index--; + if (this.count !== null) this.count++; + return void 0; + } + + error(message: string, offset = 0): never { + throw new DecodeError(message, { pos: this.index + offset }); + } + + _ensureCounted(): number { + if (this.count === null) { + this.error("Attempt to retrieve sized object in uncounted context"); + } + return this.count; + } + + nextbytes(): DataView { + const n = this._ensureCounted(); + const c = new DataView(this.packet.buffer, this.packet.byteOffset + this.index, n); this.index += n; - if (this.index > this.packet.length) throw new ShortPacket("Short packet"); - // ^ NOTE: greater-than, not greater-than-or-equal-to - this makes atEnd() inappropriate - return new DataView(this.packet.buffer, this.packet.byteOffset + start, n); + this.count = 0; + return c; + } + + _checkLengthInfo(toConsume: number) { + if (this.count === null) { + if (this.index + toConsume > this.packet.length) { + throw new ShortPacket("Short packet"); + } + } else { + if (toConsume > this.count) { + this.error(`Attempt to read ${toConsume} bytes when only ${this.count} are available`); + } + } + } + + skip(byteCount: number) { + this._checkLengthInfo(byteCount); + this.index += byteCount; + if (this.count !== null) this.count -= byteCount; } varint(): number { // TODO: Bignums :-/ - const v = this.nextbyte(); - if (v < 128) return v; - return (this.varint() << 7) + (v - 128); + let v = this.nextbyte(); + { + let redundantLeadingZeroCount = 0; + while (true) { + if (v !== 0) break; + redundantLeadingZeroCount++; + if (redundantLeadingZeroCount >= 8) { + this.error("Excessively overlong varint", -1); + } + v = this.nextbyte(); + } + } + let acc = 0; + while (true) { + if (v >= 128) return (acc << 7) + v - 128; + acc = (acc << 7) + v; + v = this.nextbyte(); + } } - peekend(): boolean { - return (this.nextbyte() === Tag.End) || (this.index--, false); - } - - nextint(n: number): number { + nextint(): number { // TODO: Bignums :-/ - if (n === 0) return 0; + this._ensureCounted(); + if (this.count! === 0) return 0; let acc = this.nextbyte(); if (acc & 0x80) acc -= 256; - for (let i = 1; i < n; i++) acc = (acc * 256) + this.nextbyte(); + while (this.count! > 0) acc = (acc * 256) + this.nextbyte(); return acc; } - nextSmallOrMediumInteger(tag: number): number | undefined { - if (tag >= Tag.SmallInteger_lo && tag <= Tag.SmallInteger_lo + 15) { - const v = tag - Tag.SmallInteger_lo; - return v > 12 ? v - 16 : v; - } - if (tag >= Tag.MediumInteger_lo && tag <= Tag.MediumInteger_lo + 15) { - const n = tag - Tag.MediumInteger_lo; - return this.nextint(n + 1); - } - return void 0; - } - wrap(v: Value): Value { return this.includeAnnotations ? new Annotated(v) : v; } - unshiftAnnotation(a: Value, v: Annotated): Annotated { + unshiftAnnotation(anns: Value[], v: Annotated): Annotated { if (this.includeAnnotations) { - v.annotations.unshift(a); + v.annotations.unshift(... anns); } return v; } + + _withCount(newCount: number, f: () => R): R { + this._checkLengthInfo(newCount); + const nextCount = this.count === null ? null : this.count - newCount; + const savedInSequence = this.inSequence; + this.count = newCount; + this.inSequence = false; + try { + return f(); + } finally { + this.index += this.count; + this.count = nextCount; + this.inSequence = savedInSequence; + } + } } export const neverEmbeddedTypeDecode: EmbeddedTypeDecode = { @@ -172,22 +238,42 @@ export const neverEmbeddedTypeDecode: EmbeddedTypeDecode = { }, }; +function chopNul(bs: Bytes): Bytes { + if (bs.get(bs.length - 1) !== 0) throw new DecodeError("Missing mandatory NUL byte in string"); + return bs.slice(0, bs.length - 1); +} + export class Decoder implements TypedDecoder { state: DecoderState; embeddedDecode: EmbeddedTypeDecode; - constructor(state: DecoderState, embeddedDecode?: EmbeddedTypeDecode); - constructor(packet?: BytesLike, options?: DecoderEmbeddedOptions); + /* (A) */ constructor() + /* (B) */ constructor(state: DecoderState, embeddedDecode?: EmbeddedTypeDecode); + /* (C) */ constructor(options: DecoderEmbeddedOptions); + /* (D) */ constructor(packet: BytesLike, options?: DecoderEmbeddedOptions); constructor( - packet_or_state: (DecoderState | BytesLike) = new Uint8Array(0), + packet_or_state_or_options?: (DecoderState | BytesLike | DecoderEmbeddedOptions), options_or_embeddedDecode?: (DecoderEmbeddedOptions | EmbeddedTypeDecode)) { - if (packet_or_state instanceof DecoderState) { - this.state = packet_or_state; + if (packet_or_state_or_options === void 0) { + // (A) + this.state = new DecoderState(new Uint8Array(0), {}); + this.embeddedDecode = neverEmbeddedTypeDecode; + } else if (packet_or_state_or_options instanceof DecoderState) { + // (B) + this.state = packet_or_state_or_options; this.embeddedDecode = (options_or_embeddedDecode as EmbeddedTypeDecode) ?? neverEmbeddedTypeDecode; - } else { + } else if ('length' in packet_or_state_or_options) { + // (D) + const packet = packet_or_state_or_options; const options = (options_or_embeddedDecode as DecoderEmbeddedOptions) ?? {}; - this.state = new DecoderState(packet_or_state, options); + this.state = new DecoderState(packet, options); + this.state.setExpectedCount(packet.length); + this.embeddedDecode = options.embeddedDecode ?? neverEmbeddedTypeDecode; + } else { + // (C) + const options = packet_or_state_or_options; + this.state = new DecoderState(new Uint8Array(0), options); this.embeddedDecode = options.embeddedDecode ?? neverEmbeddedTypeDecode; } } @@ -198,13 +284,17 @@ export class Decoder implements TypedDecoder { nextvalues(): Value[] { const result = []; - while (!this.state.peekend()) result.push(this.next()); + this.state.inSequence = true; + while (!this.state.atEnd()) result.push(this.next()); + this.state.inSequence = false; return result; } static dictionaryFromArray(vs: Value[]): Dictionary { const d = new Dictionary(); - if (vs.length % 2) throw new DecodeError("Missing dictionary value"); + if (vs.length % 2) { + throw new DecodeError("Missing dictionary value"); + } for (let i = 0; i < vs.length; i += 2) { d.set(vs[i], vs[i+1]); } @@ -212,38 +302,47 @@ export class Decoder implements TypedDecoder { } next(): Value { + if (this.state.inSequence) { + return this.state._withCount(this.state.varint(), () => this._next()); + } else { + return this._next(); + } + } + + _next(): Value { const tag = this.state.nextbyte(); switch (tag) { case Tag.False: return this.state.wrap(false); case Tag.True: return this.state.wrap(true); - case Tag.Float: return this.state.wrap(new SingleFloat(this.state.nextbytes(4).getFloat32(0, false))); - case Tag.Double: return this.state.wrap(new DoubleFloat(this.state.nextbytes(8).getFloat64(0, false))); - case Tag.End: throw new DecodeError("Unexpected Compound end marker"); - case Tag.Annotation: { - const a = this.next(); - const v = this.next() as Annotated; - return this.state.unshiftAnnotation(a, v); + case Tag.Float: switch (this.state.count) { + case 4: return this.state.wrap(new SingleFloat(this.state.nextbytes().getFloat32(0, false))); + case 8: return this.state.wrap(new DoubleFloat(this.state.nextbytes().getFloat64(0, false))); + default: this.state.error("Bad floating-point value length " + this.state.count); } - case Tag.Embedded: return this.state.wrap(embed(this.embeddedDecode.decode(this.state))); - case Tag.SignedInteger: return this.state.wrap(this.state.nextint(this.state.varint())); - case Tag.String: return this.state.wrap(Bytes.from(this.state.nextbytes(this.state.varint())).fromUtf8()); - case Tag.ByteString: return this.state.wrap(Bytes.from(this.state.nextbytes(this.state.varint()))); - case Tag.Symbol: return this.state.wrap(Symbol.for(Bytes.from(this.state.nextbytes(this.state.varint())).fromUtf8())); + case Tag.SignedInteger: return this.state.wrap(this.state.nextint()); + case Tag.String: return this.state.wrap(chopNul(Bytes.from(this.state.nextbytes())).fromUtf8()); + case Tag.ByteString: return this.state.wrap(Bytes.from(this.state.nextbytes())); + case Tag.Symbol: return this.state.wrap(Symbol.for(Bytes.from(this.state.nextbytes()).fromUtf8())); case Tag.Record: { const vs = this.nextvalues(); - if (vs.length === 0) throw new DecodeError("Too few elements in encoded record"); + if (vs.length === 0) this.state.error("Too few elements in encoded record"); return this.state.wrap(Record(vs[0], vs.slice(1))); } case Tag.Sequence: return this.state.wrap(this.nextvalues()); case Tag.Set: return this.state.wrap(new Set(this.nextvalues())); case Tag.Dictionary: return this.state.wrap(Decoder.dictionaryFromArray(this.nextvalues())); - default: { - const v = this.state.nextSmallOrMediumInteger(tag); - if (v === void 0) { - throw new DecodeError("Unsupported Preserves tag: " + tag); - } - return this.state.wrap(v); + + case Tag.Embedded: return this.state.wrap(embed(this.embeddedDecode.decode(this.state))); + + case Tag.Annotation: { + const vs = this.nextvalues(); + if (vs.length === 0) this.state.error("Missing value in encoded annotation"); + const anns = vs.slice(1); + const v = vs[0] as Annotated; + return this.state.unshiftAnnotation(anns, v); } + + default: this.state.error("Unsupported Preserves tag: " + tag, -1); } } @@ -264,8 +363,11 @@ export class Decoder implements TypedDecoder { } skip(): void { - // TODO: be more efficient - this.next(); + if (this.state.inSequence) { + this.state.skip(this.state.varint()); + } else { + this.next(); + } } withEmbeddedDecode( @@ -278,7 +380,12 @@ export class Decoder implements TypedDecoder { skipAnnotations(): void { if (!this.state.atEnd() && this.state.packet[this.state.index] === Tag.Annotation) { this.state.index++; - this.skip(); + const valueLen = this.state.varint(); + this.state._checkLengthInfo(valueLen); + this.state.count = valueLen; + if (!this.state.atEnd() && this.state.packet[this.state.index] === Tag.Annotation) { + this.state.error("Immediately-nested Annotation detected"); + } } } @@ -287,91 +394,87 @@ export class Decoder implements TypedDecoder { switch (this.state.nextbyte()) { case Tag.False: return false; case Tag.True: return true; - default: return void 0; + default: return this.state._rewind(); } } nextFloat(): SingleFloat | undefined { this.skipAnnotations(); - switch (this.state.nextbyte()) { - case Tag.Float: return new SingleFloat(this.state.nextbytes(4).getFloat32(0, false)); - default: return void 0; + if (this.state.nextbyte() !== Tag.Float || this.state.count !== 4) { + return this.state._rewind(); } + return new SingleFloat(this.state.nextbytes().getFloat32(0, false)); } nextDouble(): DoubleFloat | undefined { this.skipAnnotations(); - switch (this.state.nextbyte()) { - case Tag.Double: return new DoubleFloat(this.state.nextbytes(8).getFloat64(0, false)); - default: return void 0; + if (this.state.nextbyte() !== Tag.Float || this.state.count !== 8) { + return this.state._rewind(); } + return new DoubleFloat(this.state.nextbytes().getFloat64(0, false)); } nextEmbedded(): Embedded | undefined { this.skipAnnotations(); - switch (this.state.nextbyte()) { - case Tag.Embedded: return embed(this.embeddedDecode.decode(this.state)); - default: return void 0; - } + if (this.state.nextbyte() !== Tag.Embedded) return this.state._rewind(); + return embed(this.embeddedDecode.decode(this.state)); } nextSignedInteger(): number | undefined { this.skipAnnotations(); - const b = this.state.nextbyte(); - switch (b) { - case Tag.SignedInteger: return this.state.nextint(this.state.varint()); - default: return this.state.nextSmallOrMediumInteger(b); - } + if (this.state.nextbyte() !== Tag.SignedInteger) return this.state._rewind(); + return this.state.nextint(); } nextString(): string | undefined { this.skipAnnotations(); - switch (this.state.nextbyte()) { - case Tag.String: return Bytes.from(this.state.nextbytes(this.state.varint())).fromUtf8(); - default: return void 0; - } + if (this.state.nextbyte() !== Tag.String) return this.state._rewind(); + return Bytes.from(this.state.nextbytes()).fromUtf8(); } nextByteString(): Bytes | undefined { this.skipAnnotations(); - switch (this.state.nextbyte()) { - case Tag.ByteString: return Bytes.from(this.state.nextbytes(this.state.varint())); - default: return void 0; - } + if (this.state.nextbyte() !== Tag.ByteString) return this.state._rewind(); + return Bytes.from(this.state.nextbytes()); } nextSymbol(): symbol | undefined { this.skipAnnotations(); - switch (this.state.nextbyte()) { - case Tag.Symbol: - return Symbol.for(Bytes.from(this.state.nextbytes(this.state.varint())).fromUtf8()); - default: - return void 0; + if (this.state.nextbyte() !== Tag.Symbol) return this.state._rewind(); + return Symbol.for(Bytes.from(this.state.nextbytes()).fromUtf8()); + } + + _openSequencelike(expectedTag: number): boolean { + this.skipAnnotations(); + if (this.state.nextbyte() !== expectedTag) { + this.state._rewind(); + return false; + } else { + this.state.inSequence = true; + return true; } } openRecord(): boolean { - this.skipAnnotations(); - return (this.state.nextbyte() === Tag.Record) || (this.state.index--, false); + return this._openSequencelike(Tag.Record); } openSequence(): boolean { - this.skipAnnotations(); - return (this.state.nextbyte() === Tag.Sequence) || (this.state.index--, false); + return this._openSequencelike(Tag.Sequence); } openSet(): boolean { - this.skipAnnotations(); - return (this.state.nextbyte() === Tag.Set) || (this.state.index--, false); + return this._openSequencelike(Tag.Set); } openDictionary(): boolean { - this.skipAnnotations(); - return (this.state.nextbyte() === Tag.Dictionary) || (this.state.index--, false); + return this._openSequencelike(Tag.Dictionary); } closeCompound(): boolean { - return this.state.peekend(); + const r = this.state.atEnd(); + if (r) this.state.inSequence = false; + return r; } } diff --git a/implementations/javascript/packages/core/src/dictionary.ts b/implementations/javascript/packages/core/src/dictionary.ts index a2db039..ac18011 100644 --- a/implementations/javascript/packages/core/src/dictionary.ts +++ b/implementations/javascript/packages/core/src/dictionary.ts @@ -7,6 +7,7 @@ import { GenericEmbedded } from "./embedded"; import type { Preservable } from "./encoder"; import type { Writer, PreserveWritable } from "./writer"; import { annotations, Annotated } from "./annotated"; +import * as IO from "./iolist"; export type DictionaryType = 'Dictionary' | 'Set'; export const DictionaryType = Symbol.for('DictionaryType'); @@ -43,25 +44,20 @@ export class KeyedDictionary, V, T = GenericEmbedded> extends get [Symbol.toStringTag]() { return 'Dictionary'; } - __preserve_on__(encoder: Encoder) { + __preserve_on__(encoder: Encoder): IO.IOList { if (encoder.canonical) { const entries = Array.from(this); const pieces = entries.map<[Bytes, number]>(([k, _v], i) => [canonicalEncode(k), i]); pieces.sort((a, b) => Bytes.compare(a[0], b[0])); - encoder.state.emitbyte(Tag.Dictionary); - pieces.forEach(([_encodedKey, i]) => { - const [k, v] = entries[i]; - encoder.push(k); - encoder.push(v as unknown as Value); // Suuuuuuuper unsound - }); - encoder.state.emitbyte(Tag.End); + return [Tag.Dictionary, encoder._encodevalues(pieces.flatMap( + ([_encodedKey, i]) => entries[i] as Value[]))]; // Suuuuuuuper unsound } else { - encoder.state.emitbyte(Tag.Dictionary); + const r: IO.IOList = [Tag.Dictionary]; this.forEach((v, k) => { - encoder.push(k); - encoder.push(v as unknown as Value); // Suuuuuuuper unsound + r.push(encoder._encode(k)); + r.push(encoder._encode(v as unknown as Value)); // Suuuuuuuper unsound }); - encoder.state.emitbyte(Tag.End); + return r; } } @@ -123,13 +119,13 @@ export class KeyedSet, T = GenericEmbedded> extends FlexSet) { + __preserve_on__(encoder: Encoder): IO.IOList { if (encoder.canonical) { const pieces = Array.from(this).map<[Bytes, K]>(k => [canonicalEncode(k), k]); pieces.sort((a, b) => Bytes.compare(a[0], b[0])); - encoder.encodevalues(Tag.Set, pieces.map(e => e[1])); + return [Tag.Set, encoder._encodevalues(pieces.map(e => e[1]))]; } else { - encoder.encodevalues(Tag.Set, this); + return [Tag.Set, encoder._encodevalues(this)]; } } diff --git a/implementations/javascript/packages/core/src/embedded.ts b/implementations/javascript/packages/core/src/embedded.ts index 1df670f..0f75762 100644 --- a/implementations/javascript/packages/core/src/embedded.ts +++ b/implementations/javascript/packages/core/src/embedded.ts @@ -1,10 +1,11 @@ import type { DecoderState } from "./decoder"; -import type { EncoderState } from "./encoder"; +import type { EncoderOptions } from "./encoder"; import type { Value } from "./values"; import { ReaderStateOptions } from "./reader"; +import * as IO from "./iolist"; export type EmbeddedTypeEncode = { - encode(s: EncoderState, v: T): void; + encode(s: EncoderOptions, v: T): IO.IOList; } export type EmbeddedTypeDecode = { diff --git a/implementations/javascript/packages/core/src/encoder.ts b/implementations/javascript/packages/core/src/encoder.ts index 011fa56..60cd43b 100644 --- a/implementations/javascript/packages/core/src/encoder.ts +++ b/implementations/javascript/packages/core/src/encoder.ts @@ -3,14 +3,15 @@ import { Bytes } from "./bytes"; import { Value } from "./values"; import { EncodeError } from "./codec"; import { Record, Tuple } from "./record"; -import { GenericEmbedded, EmbeddedTypeEncode } from "./embedded"; +import { EmbeddedTypeEncode } from "./embedded"; import type { Embedded } from "./embedded"; +import * as IO from "./iolist"; export type Encodable = Value | Preservable | Iterable> | ArrayBufferView; export interface Preservable { - __preserve_on__(encoder: Encoder): void; + __preserve_on__(encoder: Encoder): IO.IOList; } export function isPreservable(v: any): v is Preservable { @@ -46,22 +47,54 @@ export function embeddedId(v: any): number { } export const identityEmbeddedTypeEncode: EmbeddedTypeEncode = { - encode(s: EncoderState, v: any): void { - new Encoder(s, this).push(embeddedId(v)); + encode(s: EncoderOptions, v: any): IO.IOList { + return new Encoder(s, this)._encode(embeddedId(v)); } }; -export class EncoderState { - chunks: Array; - view: DataView; - index: number; - options: EncoderOptions; +export function encodeVarint(v: number): IO.IOList { + function wr(v: number, d: number): IO.IOList { + if (v < 128) { + return v + d; + } else { + return [wr(Math.floor(v / 128), 0), (v & 127) + d]; + } + } + return wr(v, 128); +} - constructor(options: EncoderOptions) { - this.chunks = []; - this.view = new DataView(new ArrayBuffer(256)); - this.index = 0; +export function encodeInt(v: number): IO.IOList { + // TODO: Bignums :-/ + + if (v === 0) return false; + if (v === -1) return 255; + + const plain_bitcount = Math.floor(Math.log2(v > 0 ? v : -(1 + v))) + 1; + const signed_bitcount = plain_bitcount + 1; + const bytecount = (signed_bitcount + 7) >> 3; + + function enc(n: number, x: number): IO.IOList { + return (n > 0) && [enc(n - 1, Math.floor(x / 256)), x & 255]; + }; + + return enc(bytecount, v); +} + +export class Encoder { + options: EncoderOptions; + embeddedEncode: EmbeddedTypeEncode; + + constructor(options: EncoderEmbeddedOptions); + constructor(options: EncoderOptions, embeddedEncode?: EmbeddedTypeEncode); + constructor(options: (EncoderOptions | EncoderEmbeddedOptions) = {}, + embeddedEncode?: EmbeddedTypeEncode) + { this.options = options; + if ('embeddedEncode' in options) { + this.embeddedEncode = options.embeddedEncode ?? identityEmbeddedTypeEncode; + } else { + this.embeddedEncode = embeddedEncode ?? identityEmbeddedTypeEncode; + } } get canonical(): boolean { @@ -72,189 +105,47 @@ export class EncoderState { return this.options.includeAnnotations ?? !this.canonical; } - contents(): Bytes { - if (this.chunks.length === 0) { - const resultLength = this.index; - this.index = 0; - return new Bytes(this.view.buffer.slice(0, resultLength)); - } else { - this.rotatebuffer(4096); - const chunks = this.chunks; - this.chunks = []; - return Bytes.concat(chunks); + encode(v: Encodable): Bytes { + return IO.ioListBytes(this._encode(v)); + } + + encodeString(v: Encodable): string { + return asLatin1(this.encode(v)._view); + } + + _encodevalues(items: Iterable>): IO.IOList { + const ios: IO.IOList = []; + for (let i of items) { + const c = IO.countIOList(this._encode(i)); + ios.push(encodeVarint(c.length)); + ios.push(c); } + return ios; } - /* Like contents(), but hands back a string containing binary data "encoded" via latin-1 */ - contentsString(): string { - if (this.chunks.length === 0) { - const s = asLatin1(new Uint8Array(this.view.buffer, 0, this.index)); - this.index = 0; - return s; - } else { - this.rotatebuffer(4096); - const chunks = this.chunks; - this.chunks = []; - return chunks.map(asLatin1).join(''); - } - } - - rotatebuffer(size: number) { - this.chunks.push(new Uint8Array(this.view.buffer, 0, this.index)); - this.view = new DataView(new ArrayBuffer(size)); - this.index = 0; - } - - makeroom(amount: number) { - if (this.index + amount > this.view.byteLength) { - this.rotatebuffer(amount + 4096); - } - } - - emitbyte(b: number) { - this.makeroom(1); - this.view.setUint8(this.index++, b); - } - - emitbytes(bs: Uint8Array) { - this.makeroom(bs.length); - (new Uint8Array(this.view.buffer)).set(bs, this.index); - this.index += bs.length; - } - - varint(v: number) { - while (v >= 128) { - this.emitbyte((v % 128) + 128); - v = Math.floor(v / 128); - } - this.emitbyte(v); - } - - encodeint(v: number) { - // TODO: Bignums :-/ - const plain_bitcount = Math.floor(Math.log2(v > 0 ? v : -(1 + v))) + 1; - const signed_bitcount = plain_bitcount + 1; - const bytecount = (signed_bitcount + 7) >> 3; - if (bytecount <= 16) { - this.emitbyte(Tag.MediumInteger_lo + bytecount - 1); - } else { - this.emitbyte(Tag.SignedInteger); - this.varint(bytecount); - } - const enc = (n: number, x: number) => { - if (n > 0) { - enc(n - 1, Math.floor(x / 256)); - this.emitbyte(x & 255); - } - }; - enc(bytecount, v); - } - - encodebytes(tag: Tag, bs: Uint8Array) { - this.emitbyte(tag); - this.varint(bs.length); - this.emitbytes(bs); - } -} - -export class Encoder { - state: EncoderState; - embeddedEncode: EmbeddedTypeEncode; - - constructor(options: EncoderEmbeddedOptions); - constructor(state: EncoderState, embeddedEncode?: EmbeddedTypeEncode); - constructor( - state_or_options: (EncoderState | EncoderEmbeddedOptions) = {}, - embeddedEncode?: EmbeddedTypeEncode) - { - if (state_or_options instanceof EncoderState) { - this.state = state_or_options; - this.embeddedEncode = embeddedEncode ?? identityEmbeddedTypeEncode; - } else { - this.state = new EncoderState(state_or_options); - this.embeddedEncode = state_or_options.embeddedEncode ?? identityEmbeddedTypeEncode; - } - } - - withEmbeddedEncode( - embeddedEncode: EmbeddedTypeEncode, - body: (e: Encoder) => void): this - { - body(new Encoder(this.state, embeddedEncode)); - return this; - } - - get canonical(): boolean { - return this.state.canonical; - } - - get includeAnnotations(): boolean { - return this.state.includeAnnotations; - } - - contents(): Bytes { - return this.state.contents(); - } - - contentsString(): string { - return this.state.contentsString(); - } - - encodevalues(tag: Tag, items: Iterable>) { - this.state.emitbyte(tag); - for (let i of items) { this.push(i); } - this.state.emitbyte(Tag.End); - } - - push(v: Encodable) { - if (isPreservable(v)) { - v.__preserve_on__(this); - } - else if (isPreservable(v)) { - v.__preserve_on__(this); - } - else if (typeof v === 'boolean') { - this.state.emitbyte(v ? Tag.True : Tag.False); - } - else if (typeof v === 'number') { - if (v >= -3 && v <= 12) { - this.state.emitbyte(Tag.SmallInteger_lo + ((v + 16) & 0xf)); - } else { - this.state.encodeint(v); - } - } - else if (typeof v === 'string') { - this.state.encodebytes(Tag.String, new Bytes(v)._view); - } - else if (typeof v === 'symbol') { + _encode(v: Encodable): IO.IOList { + if (isPreservable(v)) return v.__preserve_on__(this); + if (typeof v === 'boolean') return v ? Tag.True : Tag.False; + if (typeof v === 'number') return [Tag.SignedInteger, encodeInt(v)]; + if (typeof v === 'string') return [Tag.String, new Bytes(v)._view, 0]; + if (typeof v === 'symbol') { const key = Symbol.keyFor(v); if (key === void 0) throw new EncodeError("Cannot preserve non-global Symbol", v); - this.state.encodebytes(Tag.Symbol, new Bytes(key)._view); + return [Tag.Symbol, new Bytes(key)._view]; } - else if (ArrayBuffer.isView(v)) { + if (ArrayBuffer.isView(v)) { if (v instanceof Uint8Array) { - this.state.encodebytes(Tag.ByteString, v); + return [Tag.ByteString, v]; } else { - const bs = new Uint8Array(v.buffer, v.byteOffset, v.byteLength); - this.state.encodebytes(Tag.ByteString, bs); + return [Tag.ByteString, new Uint8Array(v.buffer, v.byteOffset, v.byteLength)]; } } - else if (Record.isRecord, Tuple>, T>(v)) { - this.state.emitbyte(Tag.Record); - this.push(v.label); - for (let i of v) { this.push(i); } - this.state.emitbyte(Tag.End); + if (Record.isRecord, Tuple>, T>(v)) { + return [Tag.Record, this._encodevalues([v.label, ... v])]; } - else if (isIterable>(v)) { - this.encodevalues(Tag.Sequence, v); - } - else { - ((v: Embedded) => { - this.state.emitbyte(Tag.Embedded); - this.embeddedEncode.encode(this.state, v.embeddedValue); - })(v); - } - return this; // for chaining + if (isIterable>(v)) return [Tag.Sequence, this._encodevalues(v)]; + return ((v: Embedded) => + [Tag.Embedded, this.embeddedEncode.encode(this.options, v.embeddedValue)])(v); } } @@ -262,34 +153,23 @@ export function encode( v: Encodable, options: EncoderEmbeddedOptions = {}): Bytes { - return new Encoder(options).push(v).contents(); + return new Encoder(options).encode(v); } const _canonicalEncoder = new Encoder({ canonical: true }); -let _usingCanonicalEncoder = false; export function canonicalEncode(v: Encodable, options?: EncoderEmbeddedOptions): Bytes; export function canonicalEncode(v: Encodable, options?: EncoderEmbeddedOptions): Bytes; export function canonicalEncode(v: any, options?: EncoderEmbeddedOptions): Bytes { - if (options === void 0 && !_usingCanonicalEncoder) { - _usingCanonicalEncoder = true; - const bs = _canonicalEncoder.push(v).contents(); - _usingCanonicalEncoder = false; - return bs; + if (options === void 0) { + return _canonicalEncoder.encode(v); } else { return encode(v, { ... options, canonical: true }); } } export function canonicalString(v: Encodable): string { - if (!_usingCanonicalEncoder) { - _usingCanonicalEncoder = true; - const s = _canonicalEncoder.push(v).contentsString(); - _usingCanonicalEncoder = false; - return s; - } else { - return new Encoder({ canonical: true }).push(v).contentsString(); - } + return _canonicalEncoder.encodeString(v); } export function encodeWithAnnotations(v: Encodable, diff --git a/implementations/javascript/packages/core/src/float.ts b/implementations/javascript/packages/core/src/float.ts index a831a44..7277378 100644 --- a/implementations/javascript/packages/core/src/float.ts +++ b/implementations/javascript/packages/core/src/float.ts @@ -4,6 +4,7 @@ import { Value } from "./values"; import type { GenericEmbedded } from "./embedded"; import type { Encoder, Preservable } from "./encoder"; import type { Writer, PreserveWritable } from "./writer"; +import * as IO from "./iolist"; export type FloatType = 'Single' | 'Double'; export const FloatType = Symbol.for('FloatType'); @@ -53,11 +54,10 @@ export class SingleFloat extends Float implements Preservable, PreserveWrit return Float.isSingle(v) ? v : void 0; } - __preserve_on__(encoder: Encoder) { - encoder.state.emitbyte(Tag.Float); - encoder.state.makeroom(4); - encoder.state.view.setFloat32(encoder.state.index, this.value, false); - encoder.state.index += 4; + __preserve_on__(_encoder: Encoder): IO.IOList { + const bs = new Uint8Array(4); + new DataView(bs.buffer).setFloat32(0, this.value, false); + return [Tag.Float, bs]; } __preserve_text_on__(w: Writer) { @@ -82,11 +82,10 @@ export class DoubleFloat extends Float implements Preservable, PreserveWrit return Float.isDouble(v) ? v : void 0; } - __preserve_on__(encoder: Encoder) { - encoder.state.emitbyte(Tag.Double); - encoder.state.makeroom(8); - encoder.state.view.setFloat64(encoder.state.index, this.value, false); - encoder.state.index += 8; + __preserve_on__(_encoder: Encoder): IO.IOList { + const bs = new Uint8Array(8); + new DataView(bs.buffer).setFloat64(0, this.value, false); + return [Tag.Float, bs]; } __preserve_text_on__(w: Writer) { diff --git a/implementations/javascript/packages/core/src/iolist.ts b/implementations/javascript/packages/core/src/iolist.ts new file mode 100644 index 0000000..342e64e --- /dev/null +++ b/implementations/javascript/packages/core/src/iolist.ts @@ -0,0 +1,67 @@ +import { Bytes, BytesLike, underlying } from "./bytes"; + +export type IOList = number | BytesLike | IOList[] | false | CountedIOList; + +class CountedIOList { + value: IOList; + length: number; + + constructor(i: IOList) { + this.value = i; + this.length = iolistLength(i); + } +} + +export function pushByte(i: IOList, b: number): IOList { + if (Array.isArray(i)) { + i.push(b); + return i; + } else { + return [i, b]; + } +} + +export function append(i: IOList, j: IOList): IOList { + if (i === false) return j; + if (j === false) return i; + return [i, j]; +} + +export function iolistLength(i: IOList, acc = 0): number { + if (typeof(i) === 'number') return acc + 1; + if (i === false) return acc; + if (Array.isArray(i)) return i.reduce((acc, j) => iolistLength(j, acc), acc); + if (i instanceof CountedIOList) return acc + i.length; + return acc + i.length; +} + +export function countIOList(i: IOList): CountedIOList { + if (i instanceof CountedIOList) return i; + return new CountedIOList(i); +} + +export function ioListBytes(i: IOList): Bytes { + if (i instanceof Bytes) return i; + const buffer = new Bytes(iolistLength(i)); + + function fill(i: IOList, offset: number): number { + while (i instanceof CountedIOList) i = i.value; + if (typeof(i) === 'number') { + buffer._view[offset] = i; + return offset + 1; + } + if (i === false) { + return offset; + } + if (Array.isArray(i)) { + i.forEach(j => offset = fill(j, offset)); + return offset; + } + const bs = underlying(i); + buffer._view.set(bs, offset); + return offset + bs.length; + } + + fill(i, 0); + return buffer; +} diff --git a/implementations/javascript/packages/core/src/runtime.ts b/implementations/javascript/packages/core/src/runtime.ts index 4b9909e..930898b 100644 --- a/implementations/javascript/packages/core/src/runtime.ts +++ b/implementations/javascript/packages/core/src/runtime.ts @@ -19,3 +19,5 @@ export * from './strip'; export * from './text'; export * from './values'; export * from './writer'; + +export * as IO from './iolist'; diff --git a/implementations/javascript/packages/core/src/writer.ts b/implementations/javascript/packages/core/src/writer.ts index be55d34..725529d 100644 --- a/implementations/javascript/packages/core/src/writer.ts +++ b/implementations/javascript/packages/core/src/writer.ts @@ -1,8 +1,9 @@ import { isAnnotated } from './is'; import { Record, Tuple } from "./record"; import type { GenericEmbedded, Embedded, EmbeddedTypeEncode } from "./embedded"; -import { Encoder, EncoderState } from "./encoder"; +import { Encoder, EncoderOptions } from "./encoder"; import type { Value } from "./values"; +import * as IO from "./iolist"; export type Writable = Value | PreserveWritable | Iterable> | ArrayBufferView; @@ -23,8 +24,8 @@ export type EmbeddedWriter = { write(s: WriterState, v: T): void } | { toValue(v: T): Value }; export const genericEmbeddedTypeEncode: EmbeddedTypeEncode & EmbeddedWriter = { - encode(s: EncoderState, v: GenericEmbedded): void { - new Encoder(s, this).push(v.generic); + encode(s: EncoderOptions, v: GenericEmbedded): IO.IOList { + return new Encoder(s, this).encode(v.generic); }, toValue(v: GenericEmbedded): Value { @@ -33,7 +34,7 @@ export const genericEmbeddedTypeEncode: EmbeddedTypeEncode & Em }; export const neverEmbeddedTypeEncode: EmbeddedTypeEncode & EmbeddedWriter = { - encode(_s: EncoderState, _v: never): void { + encode(_s: EncoderOptions, _v: never): IO.IOList { throw new Error("Embeddeds not permitted encoding Preserves document"); }, diff --git a/implementations/javascript/packages/core/test/codec.test.ts b/implementations/javascript/packages/core/test/codec.test.ts index fe99f9c..2295663 100644 --- a/implementations/javascript/packages/core/test/codec.test.ts +++ b/implementations/javascript/packages/core/test/codec.test.ts @@ -11,7 +11,7 @@ import { Constants, Encoder, GenericEmbedded, - EncoderState, + EncoderOptions, EmbeddedType, DecoderState, Decoder, @@ -19,6 +19,7 @@ import { embed, genericEmbeddedTypeDecode, genericEmbeddedTypeEncode, + IO, } from '../src/index'; const { Tag } = Constants; import './test-utils'; @@ -68,7 +69,7 @@ describe('records', () => { describe('parsing from subarray', () => { it('should maintain alignment of nextbytes', () => { - const u = Uint8Array.of(1, 1, 1, 1, 0xb1, 0x03, 0x33, 0x33, 0x33); + const u = Uint8Array.of(1, 1, 1, 1, 0xa4, 0x33, 0x33, 0x33, 0x00); const bs = Bytes.from(u.subarray(4)); expect(decode(bs)).is("333"); }); @@ -77,13 +78,11 @@ describe('parsing from subarray', () => { describe('reusing buffer space', () => { it('should be done safely, even with nested dictionaries', () => { expect(canonicalEncode(fromJS(['aaa', Dictionary.fromJS({a: 1}), 'zzz'])).toHex()).is( - `b5 - b103616161 - b7 - b10161 91 - 84 - b1037a7a7a - 84`.replace(/\s+/g, '')); + `a8 + 85a461616100 + 88aa + 83a46100 82a301 + 85a47a7a7a00`.replace(/\s+/g, '')); }); }); @@ -99,8 +98,8 @@ describe('encoding and decoding embeddeds', () => { return this.fromValue(new Decoder(d).next()); } - encode(e: EncoderState, v: object): void { - new Encoder(e).push(this.toValue(v)); + encode(e: EncoderOptions, v: object): IO.IOList { + return new Encoder(e)._encode(this.toValue(v)); } equals(a: object, b: object): boolean { @@ -137,7 +136,7 @@ describe('encoding and decoding embeddeds', () => { expect(bs1).is(bs3); }); it('should refuse to decode embeddeds when no function has been supplied', () => { - expect(() => decode(Bytes.from([Tag.Embedded, Tag.SmallInteger_lo]))) + expect(() => decode(Bytes.from([Tag.Embedded, Tag.SignedInteger]))) .toThrow("Embeddeds not permitted at this point in Preserves document"); }); it('should encode properly', () => { @@ -147,9 +146,8 @@ describe('encoding and decoding embeddeds', () => { const B = embed({b: 2}); expect(encode([A, B], { embeddedEncode: pt })).is( Bytes.from([Tag.Sequence, - Tag.Embedded, Tag.SmallInteger_lo, - Tag.Embedded, Tag.SmallInteger_lo + 1, - Tag.End])); + 0x82, Tag.Embedded, Tag.SignedInteger, + 0x83, Tag.Embedded, Tag.SignedInteger, 1])); expect(objects).toEqual([A.embeddedValue, B.embeddedValue]); }); it('should decode properly', () => { @@ -161,9 +159,8 @@ describe('encoding and decoding embeddeds', () => { objects.push(Y.embeddedValue); expect(decode(Bytes.from([ Tag.Sequence, - Tag.Embedded, Tag.SmallInteger_lo, - Tag.Embedded, Tag.SmallInteger_lo + 1, - Tag.End + 0x82, Tag.Embedded, Tag.SignedInteger, + 0x83, Tag.Embedded, Tag.SignedInteger, 1, ]), { embeddedDecode: pt })).is([X, Y]); }); it('should store embeddeds embedded in map keys correctly', () => { @@ -315,8 +312,14 @@ describe('common test suite', () => { case Symbol.for('DecodeShort'): describe(tName, () => { it('should fail with ShortPacket', () => { - expect(() => D(strip(t[0]) as Bytes)) - .toThrowFilter(e => ShortPacket.isShortPacket(e)); + expect(() => { + const d = new Decoder({ + includeAnnotations: true, + embeddedDecode: genericEmbeddedTypeDecode, + }); + d.write(strip(t[0]) as Bytes); + return d.next(); + }).toThrowFilter(e => ShortPacket.isShortPacket(e)); }); }); break;