preserves/implementations/javascript/src/codec.ts

432 lines
14 KiB
TypeScript

// Preserves Binary codec.
import {
underlying,
Annotated,
Dictionary, Set, Bytes, Record, Single, Double,
BytesLike,
Value,
} from './values';
import { Tag } from './constants';
import { PreserveOn } from './symbols';
export type ErrorType = 'DecodeError' | 'EncodeError' | 'ShortPacket';
export const ErrorType = Symbol.for('ErrorType');
export type Encodable<T extends object> =
Value<T> | Preservable<T> | Iterable<Value<T>> | ArrayBufferView;
export interface Preservable<T extends object> {
[PreserveOn](encoder: Encoder<T>): void;
}
export function isPreservable<T extends object>(v: any): v is Preservable<T> {
return typeof v === 'object' && v !== null && typeof v[PreserveOn] === 'function';
}
export abstract class PreservesCodecError {
abstract get [ErrorType](): ErrorType;
static isCodecError(e: any, t: ErrorType): e is PreservesCodecError {
return (e?.[ErrorType] === t);
}
}
export class DecodeError extends Error {
get [ErrorType](): ErrorType { return 'DecodeError' }
static isDecodeError(e: any): e is DecodeError {
return PreservesCodecError.isCodecError(e, 'DecodeError');
}
}
export class EncodeError extends Error {
get [ErrorType](): ErrorType { return 'EncodeError' }
static isEncodeError(e: any): e is EncodeError {
return PreservesCodecError.isCodecError(e, 'EncodeError');
}
readonly irritant: any;
constructor(message: string, irritant: any) {
super(message);
this.irritant = irritant;
}
}
export class ShortPacket extends DecodeError {
get [ErrorType](): ErrorType { return 'ShortPacket' }
static isShortPacket(e: any): e is ShortPacket {
return PreservesCodecError.isCodecError(e, 'ShortPacket');
}
}
export interface DecoderOptions<T extends object> {
includeAnnotations?: boolean;
decodePointer?: (v: Value<T>) => T;
}
export class Decoder<T extends object> {
packet: Uint8Array;
index: number;
options: DecoderOptions<T>;
constructor(packet: BytesLike = new Uint8Array(0), options: DecoderOptions<T> = {}) {
this.packet = underlying(packet);
this.index = 0;
this.options = options;
}
get includeAnnotations(): boolean {
return this.options.includeAnnotations ?? false;
}
write(data: BytesLike) {
this.packet = Bytes.concat([this.packet.slice(this.index), data])._view;
this.index = 0;
}
nextbyte(): number {
if (this.index >= this.packet.length) throw new ShortPacket("Short packet");
// ^ NOTE: greater-than-or-equal-to, not greater-than.
return this.packet[this.index++];
}
nextbytes(n: number): DataView {
const start = this.index;
this.index += n;
if (this.index > this.packet.length) throw new ShortPacket("Short packet");
// ^ NOTE: greater-than, not greater-than-or-equal-to.
return new DataView(this.packet.buffer, this.packet.byteOffset + start, n);
}
varint(): number {
// TODO: Bignums :-/
const v = this.nextbyte();
if (v < 128) return v;
return (this.varint() << 7) + (v - 128);
}
peekend(): boolean {
const matched = this.nextbyte() === Tag.End;
if (!matched) this.index--;
return matched;
}
nextvalues(): Value<T>[] {
const result = [];
while (!this.peekend()) result.push(this.next());
return result;
}
nextint(n: number): number {
// TODO: Bignums :-/
if (n === 0) return 0;
let acc = this.nextbyte();
if (acc & 0x80) acc -= 256;
for (let i = 1; i < n; i++) acc = (acc << 8) | this.nextbyte();
return acc;
}
wrap(v: Value<T>): Value<T> {
return this.includeAnnotations ? new Annotated(v) : v;
}
static dictionaryFromArray<T extends object>(vs: Value<T>[]): Dictionary<T, Value<T>> {
const d = new Dictionary<T, Value<T>>();
if (vs.length % 2) throw new DecodeError("Missing dictionary value");
for (let i = 0; i < vs.length; i += 2) {
d.set(vs[i], vs[i+1]);
}
return d;
}
unshiftAnnotation(a: Value<T>, v: Annotated<T>) {
if (this.includeAnnotations) {
v.annotations.unshift(a);
}
return v;
}
next(): Value<T> {
const tag = this.nextbyte();
switch (tag) {
case Tag.False: return this.wrap(false);
case Tag.True: return this.wrap(true);
case Tag.Float: return this.wrap(new Single(this.nextbytes(4).getFloat32(0, false)));
case Tag.Double: return this.wrap(new Double(this.nextbytes(8).getFloat64(0, false)));
case Tag.End: throw new DecodeError("Unexpected Compound end marker");
case Tag.Annotation: {
const a = this.next();
const v = this.next() as Annotated<T>;
return this.unshiftAnnotation(a, v);
}
case Tag.Pointer: {
const d = this.options.decodePointer;
if (d === void 0) {
throw new DecodeError("No decodePointer function supplied");
}
return this.wrap(d(this.next()));
}
case Tag.SignedInteger: return this.wrap(this.nextint(this.varint()));
case Tag.String: return this.wrap(Bytes.from(this.nextbytes(this.varint())).fromUtf8());
case Tag.ByteString: return this.wrap(Bytes.from(this.nextbytes(this.varint())));
case Tag.Symbol: return this.wrap(Symbol.for(Bytes.from(this.nextbytes(this.varint())).fromUtf8()));
case Tag.Record: {
const vs = this.nextvalues();
if (vs.length === 0) throw new DecodeError("Too few elements in encoded record");
return this.wrap(new Record(vs[0], vs.slice(1)));
}
case Tag.Sequence: return this.wrap(this.nextvalues());
case Tag.Set: return this.wrap(new Set(this.nextvalues()));
case Tag.Dictionary: return this.wrap(Decoder.dictionaryFromArray(this.nextvalues()));
default: {
if (tag >= Tag.SmallInteger_lo && tag <= Tag.SmallInteger_lo + 15) {
const v = tag - Tag.SmallInteger_lo;
return this.wrap(v > 12 ? v - 16 : v);
}
if (tag >= Tag.MediumInteger_lo && tag <= Tag.MediumInteger_lo + 15) {
const n = tag - Tag.MediumInteger_lo;
return this.wrap(this.nextint(n + 1));
}
throw new DecodeError("Unsupported Preserves tag: " + tag);
}
}
}
try_next() {
const start = this.index;
try {
return this.next();
} catch (e) {
if (ShortPacket.isShortPacket(e)) {
this.index = start;
return void 0;
}
throw e;
}
}
}
export function decode<T extends object>(bs: BytesLike, options?: DecoderOptions<T>) {
return new Decoder(bs, options).next();
}
export function decodeWithAnnotations<T extends object>(bs: BytesLike, options: DecoderOptions<T> = {}): Annotated<T> {
return decode(bs, { ... options, includeAnnotations: true }) as Annotated<T>;
}
export interface EncoderOptions<T extends object> {
canonical?: boolean;
includeAnnotations?: boolean;
encodePointer?: (v: T) => Value<T>;
}
function chunkStr(bs: Uint8Array): string {
return String.fromCharCode.apply(null, bs as any as number[]);
}
function isIterable<T>(v: any): v is Iterable<T> {
return typeof v === 'object' && v !== null && typeof v[Symbol.iterator] === 'function';
}
export class Encoder<T extends object> {
chunks: Array<Uint8Array>;
view: DataView;
index: number;
options: EncoderOptions<T>;
constructor(options: EncoderOptions<T> = {}) {
this.chunks = [];
this.view = new DataView(new ArrayBuffer(256));
this.index = 0;
this.options = options;
}
get canonical(): boolean {
return this.options.canonical ?? true;
}
get includeAnnotations(): boolean {
return this.options.includeAnnotations ?? !this.canonical;
}
contents(): Bytes {
if (this.chunks.length === 0) {
const resultLength = this.index;
this.index = 0;
return new Bytes(this.view.buffer.slice(0, resultLength));
} else {
this.rotatebuffer(4096);
return Bytes.concat(this.chunks);
}
}
/* Like contents(), but hands back a string containing binary data "encoded" via latin-1 */
contentsString(): string {
if (this.chunks.length === 0) {
const s = chunkStr(new Uint8Array(this.view.buffer, 0, this.index));
this.index = 0;
return s;
} else {
this.rotatebuffer(4096);
return this.chunks.map(chunkStr).join('');
}
}
rotatebuffer(size: number) {
this.chunks.push(new Uint8Array(this.view.buffer, 0, this.index));
this.view = new DataView(new ArrayBuffer(size));
this.index = 0;
}
makeroom(amount: number) {
if (this.index + amount > this.view.byteLength) {
this.rotatebuffer(amount + 4096);
}
}
emitbyte(b: number) {
this.makeroom(1);
this.view.setUint8(this.index++, b);
}
emitbytes(bs: Uint8Array) {
this.makeroom(bs.length);
(new Uint8Array(this.view.buffer)).set(bs, this.index);
this.index += bs.length;
}
varint(v: number) {
while (v >= 128) {
this.emitbyte((v % 128) + 128);
v = Math.floor(v / 128);
}
this.emitbyte(v);
}
encodeint(v: number) {
// TODO: Bignums :-/
const plain_bitcount = Math.floor(Math.log2(v > 0 ? v : ~v)) + 1;
const signed_bitcount = plain_bitcount + 1;
const bytecount = (signed_bitcount + 7) >> 3;
if (bytecount <= 16) {
this.emitbyte(Tag.MediumInteger_lo + bytecount - 1);
} else {
this.emitbyte(Tag.SignedInteger);
this.varint(bytecount);
}
const enc = (n: number, x: number) => {
if (n > 0) {
enc(n - 1, x >> 8);
this.emitbyte(x & 255);
}
};
enc(bytecount, v);
}
encodebytes(tag: Tag, bs: Uint8Array) {
this.emitbyte(tag);
this.varint(bs.length);
this.emitbytes(bs);
}
encodevalues(tag: Tag, items: Iterable<Value<T>>) {
this.emitbyte(tag);
for (let i of items) { this.push(i); }
this.emitbyte(Tag.End);
}
encoderawvalues(tag: Tag, items: BytesLike[]) {
this.emitbyte(tag);
items.forEach((i) => this.emitbytes(underlying(i)));
this.emitbyte(Tag.End);
}
push(v: Encodable<T>) {
if (isPreservable<never>(v)) {
v[PreserveOn](this as unknown as Encoder<never>);
}
else if (isPreservable<T>(v)) {
v[PreserveOn](this);
}
else if (typeof v === 'boolean') {
this.emitbyte(v ? Tag.True : Tag.False);
}
else if (typeof v === 'number') {
if (v >= -3 && v <= 12) {
this.emitbyte(Tag.SmallInteger_lo + ((v + 16) & 0xf));
} else {
this.encodeint(v);
}
}
else if (typeof v === 'string') {
this.encodebytes(Tag.String, new Bytes(v)._view);
}
else if (typeof v === 'symbol') {
const key = Symbol.keyFor(v);
if (key === void 0) throw new EncodeError("Cannot preserve non-global Symbol", v);
this.encodebytes(Tag.Symbol, new Bytes(key)._view);
}
else if (ArrayBuffer.isView(v)) {
if (v instanceof Uint8Array) {
this.encodebytes(Tag.ByteString, v);
} else {
const bs = new Uint8Array(v.buffer, v.byteOffset, v.byteLength);
this.encodebytes(Tag.ByteString, bs);
}
}
else if (Array.isArray(v)) {
this.encodevalues(Tag.Sequence, v);
}
else if (isIterable<Value<T>>(v)) {
this.encodevalues(Tag.Sequence, v as Iterable<Value<T>>);
}
else {
const e = this.options.encodePointer ?? pointerId;
this.emitbyte(Tag.Pointer);
this.push(e(v));
}
return this; // for chaining
}
}
export function encode<T extends object>(v: Encodable<T>, options?: EncoderOptions<T>): Bytes {
return new Encoder(options).push(v).contents();
}
let _nextId = 0;
const _registry = new WeakMap<object, number>();
export function pointerId(v: object): number {
let id = _registry.get(v);
if (id === void 0) {
id = _nextId++;
_registry.set(v, id);
}
return id;
}
const _canonicalEncoder = new Encoder({ canonical: true });
let _usingCanonicalEncoder = false;
export function canonicalEncode(v: Encodable<any>, options?: EncoderOptions<any>): Bytes {
if (options === void 0 && !_usingCanonicalEncoder) {
_usingCanonicalEncoder = true;
const bs = _canonicalEncoder.push(v).contents();
_usingCanonicalEncoder = false;
return bs;
} else {
return encode(v, { ... options, canonical: true });
}
}
export function canonicalString(v: Encodable<any>): string {
return _canonicalEncoder.push(v).contentsString();
}
export function encodeWithAnnotations<T extends object>(v: Encodable<T>, options: EncoderOptions<T> = {}): Bytes {
return encode(v, { ... options, includeAnnotations: true });
}