preserves/implementations/javascript/packages/core/src/decoder.ts

489 lines
16 KiB
TypeScript

import { Annotated } from "./annotated";
import { DecodeError, ShortPacket } from "./codec";
import { Tag } from "./constants";
import { Set, Dictionary } from "./dictionary";
import { DoubleFloat, SingleFloat } from "./float";
import { Record } from "./record";
import { Bytes, BytesLike, underlying } from "./bytes";
import { Value } from "./values";
import { embed, GenericEmbedded, Embedded, EmbeddedTypeDecode } from "./embedded";
import { ReaderStateOptions } from "reader";
export interface DecoderOptions {
includeAnnotations?: boolean;
}
export interface DecoderEmbeddedOptions<T> extends DecoderOptions {
embeddedDecode?: EmbeddedTypeDecode<T>;
}
export interface TypedDecoder<T> {
atEnd(): boolean;
mark(): any;
restoreMark(m: any): void;
skip(): void;
next(): Value<T>;
withEmbeddedDecode<S, R>(
embeddedDecode: EmbeddedTypeDecode<S>,
body: (d: TypedDecoder<S>) => R): R;
nextBoolean(): boolean | undefined;
nextFloat(): SingleFloat | undefined;
nextDouble(): DoubleFloat | undefined;
nextEmbedded(): Embedded<T> | undefined;
nextSignedInteger(): number | undefined;
nextString(): string | undefined;
nextByteString(): Bytes | undefined;
nextSymbol(): symbol | undefined;
openRecord(): boolean;
openSequence(): boolean;
openSet(): boolean;
openDictionary(): boolean;
closeCompound(): boolean;
}
type DecoderStateMark = {
index: number;
inSequence: boolean;
};
export class DecoderState {
options: DecoderOptions;
packet: Uint8Array;
count: number | null;
index = 0;
inSequence = false;
constructor(packet: BytesLike, options: DecoderOptions) {
this.options = options;
this.packet = underlying(packet);
this.count = null;
}
setExpectedCount(expectedCount: number) {
if (this.count !== null) {
throw new Error(`Attempt to setExpectedCount to ${expectedCount} when count already ${this.count}`);
}
this.count = expectedCount;
}
get includeAnnotations(): boolean {
return this.options.includeAnnotations ?? false;
}
write(data: BytesLike) {
if (this.index === this.packet.length) {
this.packet = underlying(data);
} else {
this.packet = Bytes.concat([this.packet.slice(this.index), data])._view;
}
this.index = 0;
}
atEnd(): boolean {
if (this.count === null) { // toplevel
return this.index >= this.packet.length;
} else { // nested
return this.count <= 0;
}
}
mark(): DecoderStateMark {
return {
index: this.index,
inSequence: this.inSequence,
};
}
restoreMark(m: DecoderStateMark): void {
this.index = m.index;
this.inSequence = m.inSequence;
}
shortGuard<R>(body: () => R, short: () => R): R {
if (this.atEnd()) return short();
// ^ important somewhat-common case optimization - avoid the exception
const start = this.mark();
try {
return body();
} catch (e) {
if (ShortPacket.isShortPacket(e)) {
this.restoreMark(start);
return short();
}
throw e;
}
}
nextbyte(): number {
if (this.atEnd()) throw new ShortPacket("Short packet");
if (this.count !== null) this.count--;
return this.packet[this.index++];
}
_rewind(): undefined {
this.index--;
if (this.count !== null) this.count++;
return void 0;
}
error(message: string, offset = 0): never {
throw new DecodeError(message, { pos: this.index + offset });
}
_ensureCounted(): number {
if (this.count === null) {
this.error("Attempt to retrieve sized object in uncounted context");
}
return this.count;
}
nextbytes(): DataView {
const n = this._ensureCounted();
const c = new DataView(this.packet.buffer, this.packet.byteOffset + this.index, n);
this.index += n;
this.count = 0;
return c;
}
_checkLengthInfo(toConsume: number) {
if (this.count === null) {
if (this.index + toConsume > this.packet.length) {
throw new ShortPacket("Short packet");
}
} else {
if (toConsume > this.count) {
this.error(`Attempt to read ${toConsume} bytes when only ${this.count} are available`);
}
}
}
skip(byteCount: number) {
this._checkLengthInfo(byteCount);
this.index += byteCount;
if (this.count !== null) this.count -= byteCount;
}
varint(): number {
// TODO: Bignums :-/
let v = this.nextbyte();
{
let redundantLeadingZeroCount = 0;
while (true) {
if (v !== 0) break;
redundantLeadingZeroCount++;
if (redundantLeadingZeroCount >= 8) {
this.error("Excessively overlong varint", -1);
}
v = this.nextbyte();
}
}
let acc = 0;
while (true) {
if (v >= 128) return (acc << 7) + v - 128;
acc = (acc << 7) + v;
v = this.nextbyte();
}
}
nextint(): number {
// TODO: Bignums :-/
this._ensureCounted();
if (this.count! === 0) return 0;
let acc = this.nextbyte();
if (acc & 0x80) acc -= 256;
while (this.count! > 0) acc = (acc * 256) + this.nextbyte();
return acc;
}
wrap<T>(v: Value<T>): Value<T> {
return this.includeAnnotations ? new Annotated(v) : v;
}
unshiftAnnotation<T>(anns: Value<T>[], v: Annotated<T>): Annotated<T> {
if (this.includeAnnotations) {
v.annotations.unshift(... anns);
}
return v;
}
_withCount<R>(newCount: number, f: () => R): R {
this._checkLengthInfo(newCount);
const nextCount = this.count === null ? null : this.count - newCount;
const savedInSequence = this.inSequence;
this.count = newCount;
this.inSequence = false;
try {
return f();
} finally {
this.index += this.count;
this.count = nextCount;
this.inSequence = savedInSequence;
}
}
}
export const neverEmbeddedTypeDecode: EmbeddedTypeDecode<never> = {
decode(_s: DecoderState): never {
throw new Error("Embeddeds not permitted at this point in Preserves document");
},
fromValue(_v: Value<GenericEmbedded>, _options: ReaderStateOptions): never {
throw new Error("Embeddeds not permitted at this point in Preserves document");
},
};
function chopNul(bs: Bytes): Bytes {
if (bs.get(bs.length - 1) !== 0) throw new DecodeError("Missing mandatory NUL byte in string");
return bs.slice(0, bs.length - 1);
}
export class Decoder<T = never> implements TypedDecoder<T> {
state: DecoderState;
embeddedDecode: EmbeddedTypeDecode<T>;
/* (A) */ constructor()
/* (B) */ constructor(state: DecoderState, embeddedDecode?: EmbeddedTypeDecode<T>);
/* (C) */ constructor(options: DecoderEmbeddedOptions<T>);
/* (D) */ constructor(packet: BytesLike, options?: DecoderEmbeddedOptions<T>);
constructor(
packet_or_state_or_options?: (DecoderState | BytesLike | DecoderEmbeddedOptions<T>),
options_or_embeddedDecode?: (DecoderEmbeddedOptions<T> | EmbeddedTypeDecode<T>))
{
if (packet_or_state_or_options === void 0) {
// (A)
this.state = new DecoderState(new Uint8Array(0), {});
this.embeddedDecode = neverEmbeddedTypeDecode;
} else if (packet_or_state_or_options instanceof DecoderState) {
// (B)
this.state = packet_or_state_or_options;
this.embeddedDecode = (options_or_embeddedDecode as EmbeddedTypeDecode<T>) ?? neverEmbeddedTypeDecode;
} else if ('length' in packet_or_state_or_options) {
// (D)
const packet = packet_or_state_or_options;
const options = (options_or_embeddedDecode as DecoderEmbeddedOptions<T>) ?? {};
this.state = new DecoderState(packet, options);
this.state.setExpectedCount(packet.length);
this.embeddedDecode = options.embeddedDecode ?? neverEmbeddedTypeDecode;
} else {
// (C)
const options = packet_or_state_or_options;
this.state = new DecoderState(new Uint8Array(0), options);
this.embeddedDecode = options.embeddedDecode ?? neverEmbeddedTypeDecode;
}
}
write(data: BytesLike) {
this.state.write(data);
}
nextvalues(): Value<T>[] {
const result = [];
this.state.inSequence = true;
while (!this.state.atEnd()) result.push(this.next());
this.state.inSequence = false;
return result;
}
static dictionaryFromArray<T>(vs: Value<T>[]): Dictionary<T> {
const d = new Dictionary<T>();
if (vs.length % 2) {
throw new DecodeError("Missing dictionary value");
}
for (let i = 0; i < vs.length; i += 2) {
d.set(vs[i], vs[i+1]);
}
return d;
}
next(): Value<T> {
if (this.state.inSequence) {
return this.state._withCount(this.state.varint(), () => this._next());
} else {
return this._next();
}
}
_next(): Value<T> {
const tag = this.state.nextbyte();
switch (tag) {
case Tag.False: return this.state.wrap<T>(false);
case Tag.True: return this.state.wrap<T>(true);
case Tag.Float: switch (this.state.count) {
case 4: return this.state.wrap<T>(new SingleFloat(this.state.nextbytes().getFloat32(0, false)));
case 8: return this.state.wrap<T>(new DoubleFloat(this.state.nextbytes().getFloat64(0, false)));
default: this.state.error("Bad floating-point value length " + this.state.count);
}
case Tag.SignedInteger: return this.state.wrap<T>(this.state.nextint());
case Tag.String: return this.state.wrap<T>(chopNul(Bytes.from(this.state.nextbytes())).fromUtf8());
case Tag.ByteString: return this.state.wrap<T>(Bytes.from(this.state.nextbytes()));
case Tag.Symbol: return this.state.wrap<T>(Symbol.for(Bytes.from(this.state.nextbytes()).fromUtf8()));
case Tag.Record: {
const vs = this.nextvalues();
if (vs.length === 0) this.state.error("Too few elements in encoded record");
return this.state.wrap<T>(Record(vs[0], vs.slice(1)));
}
case Tag.Sequence: return this.state.wrap<T>(this.nextvalues());
case Tag.Set: return this.state.wrap<T>(new Set(this.nextvalues()));
case Tag.Dictionary: return this.state.wrap<T>(Decoder.dictionaryFromArray(this.nextvalues()));
case Tag.Embedded: return this.state.wrap<T>(embed(this.embeddedDecode.decode(this.state)));
case Tag.Annotation: {
const vs = this.nextvalues();
if (vs.length === 0) this.state.error("Missing value in encoded annotation");
const anns = vs.slice(1);
const v = vs[0] as Annotated<T>;
return this.state.unshiftAnnotation(anns, v);
}
default: this.state.error("Unsupported Preserves tag: " + tag, -1);
}
}
try_next(): Value<T> | undefined {
return this.state.shortGuard(() => this.next(), () => void 0);
}
atEnd(): boolean {
return this.state.atEnd();
}
mark(): any {
return this.state.mark();
}
restoreMark(m: any): void {
this.state.restoreMark(m);
}
skip(): void {
if (this.state.inSequence) {
this.state.skip(this.state.varint());
} else {
this.next();
}
}
withEmbeddedDecode<S, R>(
embeddedDecode: EmbeddedTypeDecode<S>,
body: (d: TypedDecoder<S>) => R): R
{
return body(new Decoder(this.state, embeddedDecode));
}
skipAnnotations(): void {
if (!this.state.atEnd() && this.state.packet[this.state.index] === Tag.Annotation) {
this.state.index++;
const valueLen = this.state.varint();
this.state._checkLengthInfo(valueLen);
this.state.count = valueLen;
if (!this.state.atEnd() && this.state.packet[this.state.index] === Tag.Annotation) {
this.state.error("Immediately-nested Annotation detected");
}
}
}
nextBoolean(): boolean | undefined {
this.skipAnnotations();
switch (this.state.nextbyte()) {
case Tag.False: return false;
case Tag.True: return true;
default: return this.state._rewind();
}
}
nextFloat(): SingleFloat | undefined {
this.skipAnnotations();
if (this.state.nextbyte() !== Tag.Float || this.state.count !== 4) {
return this.state._rewind();
}
return new SingleFloat(this.state.nextbytes().getFloat32(0, false));
}
nextDouble(): DoubleFloat | undefined {
this.skipAnnotations();
if (this.state.nextbyte() !== Tag.Float || this.state.count !== 8) {
return this.state._rewind();
}
return new DoubleFloat(this.state.nextbytes().getFloat64(0, false));
}
nextEmbedded(): Embedded<T> | undefined {
this.skipAnnotations();
if (this.state.nextbyte() !== Tag.Embedded) return this.state._rewind();
return embed(this.embeddedDecode.decode(this.state));
}
nextSignedInteger(): number | undefined {
this.skipAnnotations();
if (this.state.nextbyte() !== Tag.SignedInteger) return this.state._rewind();
return this.state.nextint();
}
nextString(): string | undefined {
this.skipAnnotations();
if (this.state.nextbyte() !== Tag.String) return this.state._rewind();
return Bytes.from(this.state.nextbytes()).fromUtf8();
}
nextByteString(): Bytes | undefined {
this.skipAnnotations();
if (this.state.nextbyte() !== Tag.ByteString) return this.state._rewind();
return Bytes.from(this.state.nextbytes());
}
nextSymbol(): symbol | undefined {
this.skipAnnotations();
if (this.state.nextbyte() !== Tag.Symbol) return this.state._rewind();
return Symbol.for(Bytes.from(this.state.nextbytes()).fromUtf8());
}
_openSequencelike(expectedTag: number): boolean {
this.skipAnnotations();
if (this.state.nextbyte() !== expectedTag) {
this.state._rewind();
return false;
} else {
this.state.inSequence = true;
return true;
}
}
openRecord(): boolean {
return this._openSequencelike(Tag.Record);
}
openSequence(): boolean {
return this._openSequencelike(Tag.Sequence);
}
openSet(): boolean {
return this._openSequencelike(Tag.Set);
}
openDictionary(): boolean {
return this._openSequencelike(Tag.Dictionary);
}
closeCompound(): boolean {
const r = this.state.atEnd();
if (r) this.state.inSequence = false;
return r;
}
}
export function decode<T>(bs: BytesLike, options: DecoderEmbeddedOptions<T> = {}): Value<T> {
return new Decoder(bs, options).next();
}
export function decodeWithAnnotations<T>(bs: BytesLike,
options: DecoderEmbeddedOptions<T> = {}): Annotated<T> {
return decode(bs, { ... options, includeAnnotations: true }) as Annotated<T>;
}