// Text syntax reader. import type { Value } from './values'; import { DecodeError, ShortPacket } from './codec'; import { Dictionary, Set } from './dictionary'; import { strip, unannotate } from './strip'; import { Bytes, unhexDigit } from './bytes'; import { decode, Decoder, DecoderState, neverEmbeddedTypeDecode } from './decoder'; import { Record } from './record'; import { Annotated, newPosition, Position, updatePosition } from './annotated'; import { Double, DoubleFloat, Single, SingleFloat } from './float'; import { stringify } from './text'; import { embed, GenericEmbedded, EmbeddedTypeDecode } from './embedded'; export interface ReaderStateOptions { includeAnnotations?: boolean; name?: string | Position; } export interface ReaderOptions extends ReaderStateOptions { embeddedDecode?: EmbeddedTypeDecode; } type IntOrFloat = 'int' | 'float'; type Numeric = number | SingleFloat | DoubleFloat; type IntContinuation = (kind: IntOrFloat, acc: string) => Numeric; export class ReaderState { buffer: string; pos: Position; index: number; discarded = 0; options: ReaderStateOptions; constructor(buffer: string, options: ReaderStateOptions) { this.buffer = buffer; switch (typeof options.name) { case 'undefined': this.pos = newPosition(); break; case 'string': this.pos = newPosition(options.name); break; case 'object': this.pos = { ... options.name }; break; } this.index = 0; this.options = options; } error(message: string, pos: Position): never { throw new DecodeError(message, { ... pos }); } get includeAnnotations(): boolean { return this.options.includeAnnotations ?? false; } copyPos(): Position { return { ... this.pos }; } write(data: string) { if (this.atEnd()) { this.buffer = data; } else { this.buffer = this.buffer.substr(this.index) + data; } this.discarded += this.index; this.index = 0; } atEnd(): boolean { return (this.index >= this.buffer.length); } peek(): string { if (this.atEnd()) throw new ShortPacket("Short term", this.pos); return this.buffer[this.index]; } advance(): number { const n = this.index++; updatePosition(this.pos, this.buffer[n]); return n; } nextchar(): string { if (this.atEnd()) throw new ShortPacket("Short term", this.pos); return this.buffer[this.advance()]; } nextcharcode(): number { if (this.atEnd()) throw new ShortPacket("Short term", this.pos); return this.buffer.charCodeAt(this.advance()); } skipws() { while (true) { if (this.atEnd()) break; if (!isSpace(this.peek())) break; this.advance(); } } readHex2(): number { const x1 = unhexDigit(this.nextcharcode()); const x2 = unhexDigit(this.nextcharcode()); return (x1 << 4) | x2; } readHex4(): number { const x1 = unhexDigit(this.nextcharcode()); const x2 = unhexDigit(this.nextcharcode()); const x3 = unhexDigit(this.nextcharcode()); const x4 = unhexDigit(this.nextcharcode()); return (x1 << 12) | (x2 << 8) | (x3 << 4) | x4; } readHexBinary(): Bytes { const acc: number[] = []; while (true) { this.skipws(); if (this.peek() === '"') { this.advance(); return Bytes.from(acc); } acc.push(this.readHex2()); } } readBase64Binary(): Bytes { let acc = ''; while (true) { this.skipws(); const c = this.nextchar(); if (c === ']') break; acc = acc + c; } return decodeBase64(acc); } readIntpart(acc: string, ch: string): Numeric { if (ch === '0') return this.readFracexp('int', acc + ch); return this.readDigit1('int', acc, (kind, acc) => this.readFracexp(kind, acc), ch); } readDigit1(kind: IntOrFloat, acc: string, k: IntContinuation, ch?: string): Numeric { if (ch === void 0) ch = this.nextchar(); if (ch >= '0' && ch <= '9') return this.readDigit0(kind, acc + ch, k); this.error('Incomplete number', this.pos); } readDigit0(kind: IntOrFloat, acc: string, k: IntContinuation): Numeric { while (true) { const ch = this.peek(); if (!(ch >= '0' && ch <= '9')) break; this.advance(); acc = acc + ch; } return k(kind, acc); } readFracexp(kind: IntOrFloat, acc: string): Numeric { if (this.peek() === '.') { this.advance(); return this.readDigit1('float', acc + '.', (kind, acc) => this.readExp(kind, acc)); } return this.readExp(kind, acc); } readExp(kind: IntOrFloat, acc: string): Numeric { const ch = this.peek(); if (ch === 'e' || ch === 'E') { this.advance(); return this.readSignAndExp(acc + ch); } return this.finishNumber(kind, acc); } readSignAndExp(acc: string): Numeric { const ch = this.peek(); if (ch === '+' || ch === '-') { this.advance(); return this.readDigit1('float', acc + ch, (kind, acc) => this.finishNumber(kind, acc)); } return this.readDigit1('float', acc, (kind, acc) => this.finishNumber(kind, acc)); } finishNumber(kind: IntOrFloat, acc: string): Numeric { const i = parseFloat(acc); if (kind === 'int') return i; const ch = this.peek(); if (ch === 'f' || ch === 'F') { this.advance(); return Single(i); } else { return Double(i); } } readRawSymbol(acc: string): Value { while (true) { if (this.atEnd()) break; const ch = this.peek(); if (('(){}[]<>";,@#:|'.indexOf(ch) !== -1) || isSpace(ch)) break; this.advance(); acc = acc + ch; } return Symbol.for(acc); } readStringlike(xform: (ch: string) => E, finish: (acc: E[]) => R, terminator: string, hexescape: string, hex: () => E): R { let acc: E[] = []; while (true) { const ch = this.nextchar(); switch (ch) { case terminator: return finish(acc); case '\\': { const ch = this.nextchar(); switch (ch) { case hexescape: acc.push(hex()); break; case terminator: case '\\': case '/': acc.push(xform(ch)); break; case 'b': acc.push(xform('\x08')); break; case 'f': acc.push(xform('\x0c')); break; case 'n': acc.push(xform('\x0a')); break; case 'r': acc.push(xform('\x0d')); break; case 't': acc.push(xform('\x09')); break; default: this.error(`Invalid escape code \\${ch}`, this.pos); } break; } default: acc.push(xform(ch)); break; } } } readString(terminator: string): string { return this.readStringlike(x => x, xs => xs.join(''), terminator, 'u', () => { const n1 = this.readHex4(); if ((n1 >= 0xd800) && (n1 <= 0xdfff)) { if ((this.nextchar() === '\\') && (this.nextchar() === 'u')) { const n2 = this.readHex4(); if ((n2 >= 0xdc00) && (n2 <= 0xdfff) && (n1 <= 0xdbff)) { return String.fromCharCode(n1, n2); } } this.error('Invalid surrogate pair', this.pos); } return String.fromCharCode(n1); }); } readLiteralBinary(): Bytes { return this.readStringlike( x => { const v = x.charCodeAt(0); if (v >= 256) this.error(`Invalid code point ${v} in literal binary`, this.pos); return v; }, Bytes.from, '"', 'x', () => this.readHex2()); } } export const genericEmbeddedTypeDecode: EmbeddedTypeDecode = { decode(s: DecoderState): GenericEmbedded { return new GenericEmbedded(new Decoder(s, this).next()); }, fromValue(v: Value, options: ReaderStateOptions): GenericEmbedded { return new GenericEmbedded(options.includeAnnotations ? v : strip(v)); }, }; export class Reader { state: ReaderState; embeddedType: EmbeddedTypeDecode; constructor(state: ReaderState, embeddedType: EmbeddedTypeDecode); constructor(buffer: string, options?: ReaderOptions); constructor( state_or_buffer: (ReaderState | string) = '', embeddedType_or_options?: (EmbeddedTypeDecode | ReaderOptions)) { if (state_or_buffer instanceof ReaderState) { this.state = state_or_buffer; this.embeddedType = embeddedType_or_options as EmbeddedTypeDecode; } else { const options = (embeddedType_or_options as ReaderOptions) ?? {}; this.state = new ReaderState(state_or_buffer, options); this.embeddedType = options.embeddedDecode ?? neverEmbeddedTypeDecode; } } write(data: string) { this.state.write(data); } readCommentLine(): Value { const startPos = this.state.copyPos(); let acc = ''; while (true) { const c = this.state.nextchar(); if (c === '\n' || c === '\r') { return this.wrap(acc, startPos); } acc = acc + c; } } wrap(v: Value, pos: Position): Value { if (this.state.includeAnnotations && !Annotated.isAnnotated(v)) { v = new Annotated(v, pos); } return v; } annotateNextWith(v: Value): Value { this.state.skipws(); if (this.state.atEnd()) { throw new DecodeError("Trailing annotations and comments are not permitted", this.state.pos); } const u = this.next(); if (this.state.includeAnnotations) (u as Annotated).annotations.unshift(v); return u; } readToEnd(): Array> { const acc = []; while (true) { this.state.skipws(); if (this.state.atEnd()) return acc; acc.push(this.next()); } } next(): Value { this.state.skipws(); const startPos = this.state.copyPos(); const unwrapped = ((): Value => { const c = this.state.nextchar(); switch (c) { case '-': return this.state.readIntpart('-', this.state.nextchar()); case '0': case '1': case '2': case '3': case '4': case '5': case '6': case '7': case '8': case '9': return this.state.readIntpart('', c); case '"': return this.state.readString('"'); case '|': return Symbol.for(this.state.readString('|')); case ';': return this.annotateNextWith(this.readCommentLine()); case '@': return this.annotateNextWith(this.next()); case ':': this.state.error('Unexpected key/value separator between items', startPos); case '#': { const c = this.state.nextchar(); switch (c) { case 'f': return false; case 't': return true; case '{': return this.seq(new Set(), (v, s) => s.add(v), '}'); case '"': return this.state.readLiteralBinary(); case 'x': if (this.state.nextchar() !== '"') { this.state.error('Expected open-quote at start of hex ByteString', startPos); } return this.state.readHexBinary(); case '[': return this.state.readBase64Binary(); case '=': { const bs = unannotate(this.next()); if (!Bytes.isBytes(bs)) this.state.error('ByteString must follow #=', startPos); return decode(bs, { embeddedDecode: this.embeddedType, includeAnnotations: this.state.options.includeAnnotations, }); } case '!': return embed(this.embeddedType.fromValue( new Reader(this.state, genericEmbeddedTypeDecode).next(), this.state.options)); default: this.state.error(`Invalid # syntax: ${c}`, startPos); } } case '<': { const label = this.next(); const fields = this.readSequence('>'); return Record(label, fields); } case '[': return this.readSequence(']'); case '{': return this.readDictionary(); case '>': this.state.error('Unexpected >', startPos); case ']': this.state.error('Unexpected ]', startPos); case '}': this.state.error('Unexpected }', startPos); default: return this.state.readRawSymbol(c); } })(); return this.wrap(unwrapped, startPos); } seq(acc: S, update: (v: Value, acc: S) => void, ch: string): S { while (true) { this.state.skipws(); if (this.state.peek() === ch) { this.state.advance(); return acc; } update(this.next(), acc); } } readSequence(ch: string): Array> { return this.seq([] as Array>, (v, acc) => acc.push(v), ch); } readDictionary(): Dictionary { return this.seq(new Dictionary(), (k, acc) => { this.state.skipws(); switch (this.state.peek()) { case ':': if (acc.has(k)) this.state.error( `Duplicate key: ${stringify(k)}`, this.state.pos); this.state.advance(); acc.set(k, this.next()); break; default: this.state.error('Missing key/value separator', this.state.pos); } }, '}'); } } const BASE64: {[key: string]: number} = {}; [... 'ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789'].forEach( (c, i) => BASE64[c] = i); BASE64['+'] = BASE64['-'] = 62; BASE64['/'] = BASE64['_'] = 63; export function decodeBase64(s: string): Bytes { const bs = new Uint8Array(Math.floor(s.length * 3/4)); let i = 0; let j = 0; while (i < s.length) { const v1 = BASE64[s[i++]]; const v2 = BASE64[s[i++]]; const v3 = BASE64[s[i++]]; const v4 = BASE64[s[i++]]; const v = (v1 << 18) | (v2 << 12) | (v3 << 6) | v4; bs[j++] = (v >> 16) & 255; if (v3 === void 0) break; bs[j++] = (v >> 8) & 255; if (v4 === void 0) break; bs[j++] = v & 255; } return Bytes.from(bs.subarray(0, j)); } function isSpace(s: string): boolean { return ' \t\n\r,'.indexOf(s) !== -1; }