// Text syntax reader. import type { Value } from './values'; import { DecodeError, ShortPacket } from './codec'; import { Dictionary, Set } from './dictionary'; import { strip } from './strip'; import { Bytes, unhexDigit } from './bytes'; import { Decoder, DecoderState, neverEmbeddedTypeDecode } from './decoder'; import { Record } from './record'; import { Annotated, newPosition, Position, updatePosition } from './annotated'; import { Double, DoubleFloat, FloatType, Single, SingleFloat } from './float'; import { stringify } from './text'; import { embed, GenericEmbedded, EmbeddedTypeDecode } from './embedded'; export interface ReaderStateOptions { includeAnnotations?: boolean; name?: string | Position; } export interface ReaderOptions extends ReaderStateOptions { embeddedDecode?: EmbeddedTypeDecode; } const MAX_SAFE_INTEGERn = BigInt(Number.MAX_SAFE_INTEGER); const MIN_SAFE_INTEGERn = BigInt(Number.MIN_SAFE_INTEGER); export const NUMBER_RE: RegExp = /^([-+]?\d+)(((\.\d+([eE][-+]?\d+)?)|([eE][-+]?\d+))([fF]?))?$/; // Groups: // 1 - integer part and sign // 2 - decimal part, exponent and Float marker // 3 - decimal part and exponent // 7 - Float marker export class ReaderState { buffer: string; pos: Position; index: number; discarded = 0; options: ReaderStateOptions; constructor(buffer: string, options: ReaderStateOptions) { this.buffer = buffer; switch (typeof options.name) { case 'undefined': this.pos = newPosition(); break; case 'string': this.pos = newPosition(options.name); break; case 'object': this.pos = { ... options.name }; break; } this.index = 0; this.options = options; } error(message: string, pos: Position): never { throw new DecodeError(message, { ... pos }); } get includeAnnotations(): boolean { return this.options.includeAnnotations ?? false; } copyPos(): Position { return { ... this.pos }; } write(data: string) { if (this.atEnd()) { this.buffer = data; } else { this.buffer = this.buffer.substring(this.index) + data; } this.discarded += this.index; this.index = 0; } atEnd(): boolean { return (this.index >= this.buffer.length); } peek(): string { if (this.atEnd()) throw new ShortPacket("Short term", this.pos); return this.buffer[this.index]; } advance(): number { const n = this.index++; updatePosition(this.pos, this.buffer[n]); return n; } nextchar(): string { if (this.atEnd()) throw new ShortPacket("Short term", this.pos); return this.buffer[this.advance()]; } nextcharcode(): number { if (this.atEnd()) throw new ShortPacket("Short term", this.pos); return this.buffer.charCodeAt(this.advance()); } skipws(skipCommas = false) { while (true) { if (this.atEnd()) break; const c = this.peek(); if (!(isSpace(c) || (skipCommas && c === ','))) break; this.advance(); } } readHex2(): number { const x1 = unhexDigit(this.nextcharcode(), DecodeError); const x2 = unhexDigit(this.nextcharcode(), DecodeError); return (x1 << 4) | x2; } readHex4(): number { const x1 = unhexDigit(this.nextcharcode(), DecodeError); const x2 = unhexDigit(this.nextcharcode(), DecodeError); const x3 = unhexDigit(this.nextcharcode(), DecodeError); const x4 = unhexDigit(this.nextcharcode(), DecodeError); return (x1 << 12) | (x2 << 8) | (x3 << 4) | x4; } readHexBinary(): Bytes { const acc: number[] = []; while (true) { this.skipws(); if (this.peek() === '"') { this.advance(); return Bytes.from(acc); } acc.push(this.readHex2()); } } readHexFloat(precision: FloatType): SingleFloat | DoubleFloat { const pos = this.copyPos(); if (this.nextchar() !== '"') { this.error("Missing open-double-quote in hex-encoded floating-point number", pos); } const bs = this.readHexBinary(); switch (precision) { case 'Single': if (bs.length !== 4) this.error("Incorrect number of bytes in hex-encoded Float", pos); return SingleFloat.fromBytes(bs); case 'Double': if (bs.length !== 8) this.error("Incorrect number of bytes in hex-encoded Double", pos); return DoubleFloat.fromBytes(bs); } } readBase64Binary(): Bytes { let acc = ''; while (true) { this.skipws(); const c = this.nextchar(); if (c === ']') break; acc = acc + c; } return Bytes.fromBase64(acc); } requireDelimiter(prefix: string): void { if (this.delimiterFollows()) return; this.error(`Delimiter must follow ${prefix}`, this.pos); } delimiterFollows(): boolean { if (this.atEnd()) return true; const ch = this.peek(); return ('(){}[]<>";,@#:|'.indexOf(ch) !== -1) || isSpace(ch); } readRawSymbolOrNumber(acc: string): Value { while (!this.delimiterFollows()) acc = acc + this.nextchar(); const m = NUMBER_RE.exec(acc); if (m) { if (m[2] === void 0) { let v = BigInt(m[1]); if (v <= MIN_SAFE_INTEGERn || v >= MAX_SAFE_INTEGERn) { return v; } else { return Number(v); } } else if (m[7] === '') { return Double(parseFloat(m[1] + m[3])); } else { return Single(parseFloat(m[1] + m[3])); } } else { return Symbol.for(acc); } } readStringlike(xform: (ch: string) => E, finish: (acc: E[]) => R, terminator: string, hexescape: string, hex: () => E): R { let acc: E[] = []; while (true) { const ch = this.nextchar(); switch (ch) { case terminator: return finish(acc); case '\\': { const ch = this.nextchar(); switch (ch) { case hexescape: acc.push(hex()); break; case terminator: case '\\': case '/': acc.push(xform(ch)); break; case 'b': acc.push(xform('\x08')); break; case 'f': acc.push(xform('\x0c')); break; case 'n': acc.push(xform('\x0a')); break; case 'r': acc.push(xform('\x0d')); break; case 't': acc.push(xform('\x09')); break; default: this.error(`Invalid escape code \\${ch}`, this.pos); } break; } default: acc.push(xform(ch)); break; } } } readString(terminator: string): string { return this.readStringlike(x => x, xs => xs.join(''), terminator, 'u', () => { const n1 = this.readHex4(); if ((n1 >= 0xd800) && (n1 <= 0xdfff)) { if ((this.nextchar() === '\\') && (this.nextchar() === 'u')) { const n2 = this.readHex4(); if ((n2 >= 0xdc00) && (n2 <= 0xdfff) && (n1 <= 0xdbff)) { return String.fromCharCode(n1, n2); } } this.error('Invalid surrogate pair', this.pos); } return String.fromCharCode(n1); }); } readLiteralBinary(): Bytes { return this.readStringlike( x => { const v = x.charCodeAt(0); if (v >= 256) this.error(`Invalid code point ${v} in literal binary`, this.pos); return v; }, Bytes.from, '"', 'x', () => this.readHex2()); } } export const genericEmbeddedTypeDecode: EmbeddedTypeDecode = { decode(s: DecoderState): GenericEmbedded { return new GenericEmbedded(new Decoder(s, this).next()); }, fromValue(v: Value, options: ReaderStateOptions): GenericEmbedded { return new GenericEmbedded(options.includeAnnotations ? v : strip(v)); }, }; export class Reader { state: ReaderState; embeddedType: EmbeddedTypeDecode; constructor(state: ReaderState, embeddedType: EmbeddedTypeDecode); constructor(buffer: string, options?: ReaderOptions); constructor( state_or_buffer: (ReaderState | string) = '', embeddedType_or_options?: (EmbeddedTypeDecode | ReaderOptions)) { if (state_or_buffer instanceof ReaderState) { this.state = state_or_buffer; this.embeddedType = embeddedType_or_options as EmbeddedTypeDecode; } else { const options = (embeddedType_or_options as ReaderOptions) ?? {}; this.state = new ReaderState(state_or_buffer, options); this.embeddedType = options.embeddedDecode ?? neverEmbeddedTypeDecode; } } write(data: string) { this.state.write(data); } readCommentLine(): Value { const startPos = this.state.copyPos(); let acc = ''; while (true) { const c = this.state.nextchar(); if (c === '\n' || c === '\r') { return this.wrap(acc, startPos); } acc = acc + c; } } wrap(v: Value, pos: Position): Value { if (this.state.includeAnnotations && !Annotated.isAnnotated(v)) { v = new Annotated(v, pos); } return v; } annotateNextWith(v: Value): Value { this.state.skipws(); if (this.state.atEnd()) { throw new DecodeError("Trailing annotations and comments are not permitted", this.state.pos); } const u = this.next(); if (this.state.includeAnnotations) (u as Annotated).annotations.unshift(v); return u; } readToEnd(): Array> { const acc = []; while (true) { this.state.skipws(); if (this.state.atEnd()) return acc; acc.push(this.next()); } } next(): Value { this.state.skipws(); const startPos = this.state.copyPos(); const unwrapped = ((): Value => { const c = this.state.nextchar(); switch (c) { case '"': return this.state.readString('"'); case '|': return Symbol.for(this.state.readString('|')); case ';': this.state.error('Semicolon is reserved syntax', startPos); case '@': return this.annotateNextWith(this.next()); case ':': this.state.error('Unexpected key/value separator between items', startPos); case '#': { const c = this.state.nextchar(); switch (c) { case ' ': case '\t': return this.annotateNextWith(this.readCommentLine()); case '\n': case '\r': return this.annotateNextWith(''); case 'f': this.state.requireDelimiter('#f'); return false; case 't': this.state.requireDelimiter('#t'); return true; case '{': return this.readSet(); case '"': return this.state.readLiteralBinary(); case 'x': switch (this.state.nextchar()) { case '"': return this.state.readHexBinary(); case 'f': return this.state.readHexFloat('Single'); case 'd': return this.state.readHexFloat('Double'); default: this.state.error('Invalid #x syntax', startPos); } case '[': return this.state.readBase64Binary(); case '!': return embed(this.embeddedType.fromValue( new Reader(this.state, genericEmbeddedTypeDecode).next(), this.state.options)); default: this.state.error(`Invalid # syntax: ${c}`, startPos); } } case '<': { const label = this.next(); const fields = this.readSequence('>', false); return Record(label, fields); } case '[': return this.readSequence(']', true); case '{': return this.readDictionary(); case '>': this.state.error('Unexpected >', startPos); case ']': this.state.error('Unexpected ]', startPos); case '}': this.state.error('Unexpected }', startPos); case ',': this.state.error('Unexpected ,', startPos); default: return this.state.readRawSymbolOrNumber(c); } })(); return this.wrap(unwrapped, startPos); } seq(skipCommas: boolean, acc: S, update: (v: Value, acc: S) => void, ch: string): S { while (true) { this.state.skipws(skipCommas); if (this.state.peek() === ch) { this.state.advance(); return acc; } update(this.next(), acc); } } readSequence(ch: string, skipCommas: boolean): Array> { return this.seq(skipCommas, [] as Array>, (v, acc) => acc.push(v), ch); } readDictionary(): Dictionary { return this.seq(true, new Dictionary(), (k, acc) => { this.state.skipws(); switch (this.state.peek()) { case ':': if (acc.has(k)) this.state.error( `Duplicate key: ${stringify(k)}`, this.state.pos); this.state.advance(); acc.set(k, this.next()); break; default: this.state.error('Missing key/value separator', this.state.pos); } }, '}'); } readSet(): Set { return this.seq(true, new Set(), (v, acc) => { if (acc.has(v)) this.state.error( `Duplicate value in set: ${stringify(v)}`, this.state.pos); acc.add(v); }, '}'); } } function isSpace(s: string): boolean { return ' \t\n\r'.indexOf(s) !== -1; }