2021-03-05 20:16:14 +00:00
|
|
|
// Text syntax reader.
|
|
|
|
|
|
|
|
import type { Value } from './values';
|
|
|
|
import { DecodeError, ShortPacket } from './codec';
|
|
|
|
import { Dictionary, Set } from './dictionary';
|
|
|
|
import { unannotate } from './strip';
|
|
|
|
import { Bytes, unhexDigit } from './bytes';
|
|
|
|
import { decode } from './decoder';
|
|
|
|
import { Record } from './record';
|
2021-03-11 13:43:06 +00:00
|
|
|
import { Annotated, newPosition, Position, updatePosition } from './annotated';
|
2021-03-05 20:16:14 +00:00
|
|
|
import { Double, DoubleFloat, Single, SingleFloat } from './float';
|
2021-03-10 22:14:26 +00:00
|
|
|
import { stringify } from './text';
|
2021-03-05 20:16:14 +00:00
|
|
|
|
2021-03-10 22:14:26 +00:00
|
|
|
export interface ReaderOptions<T> {
|
2021-03-05 20:16:14 +00:00
|
|
|
includeAnnotations?: boolean;
|
|
|
|
decodePointer?: (v: Value<T>) => T;
|
2021-03-11 13:43:06 +00:00
|
|
|
name?: string | Position;
|
2021-03-05 20:16:14 +00:00
|
|
|
}
|
|
|
|
|
|
|
|
type IntOrFloat = 'int' | 'float';
|
|
|
|
type Numeric = number | SingleFloat | DoubleFloat;
|
|
|
|
type IntContinuation = (kind: IntOrFloat, acc: string) => Numeric;
|
|
|
|
|
2021-03-10 22:14:26 +00:00
|
|
|
export class Reader<T> {
|
2021-03-05 20:16:14 +00:00
|
|
|
buffer: string;
|
2021-03-11 13:43:06 +00:00
|
|
|
pos: Position;
|
2021-03-05 20:16:14 +00:00
|
|
|
index: number;
|
|
|
|
discarded = 0;
|
|
|
|
options: ReaderOptions<T>;
|
|
|
|
|
|
|
|
constructor(buffer: string = '', options: ReaderOptions<T> = {}) {
|
|
|
|
this.buffer = buffer;
|
2021-03-11 13:43:06 +00:00
|
|
|
switch (typeof options.name) {
|
|
|
|
case 'undefined': this.pos = newPosition(); break;
|
|
|
|
case 'string': this.pos = newPosition(options.name); break;
|
|
|
|
case 'object': this.pos = { ... options.name }; break;
|
|
|
|
}
|
2021-03-05 20:16:14 +00:00
|
|
|
this.index = 0;
|
|
|
|
this.options = options;
|
|
|
|
}
|
|
|
|
|
|
|
|
get includeAnnotations(): boolean {
|
|
|
|
return this.options.includeAnnotations ?? false;
|
|
|
|
}
|
|
|
|
|
|
|
|
write(data: string) {
|
|
|
|
if (this.atEnd()) {
|
|
|
|
this.buffer = data;
|
|
|
|
} else {
|
|
|
|
this.buffer = this.buffer.substr(this.index) + data;
|
|
|
|
}
|
|
|
|
this.discarded += this.index;
|
|
|
|
this.index = 0;
|
|
|
|
}
|
|
|
|
|
2021-03-11 13:43:06 +00:00
|
|
|
error(message: string, pos: Position): never {
|
|
|
|
throw new DecodeError(message, { ... pos });
|
2021-03-05 20:16:14 +00:00
|
|
|
}
|
|
|
|
|
|
|
|
atEnd(): boolean {
|
|
|
|
return (this.index >= this.buffer.length);
|
|
|
|
}
|
|
|
|
|
|
|
|
peek(): string {
|
2021-03-11 13:43:06 +00:00
|
|
|
if (this.atEnd()) throw new ShortPacket("Short term", this.pos);
|
2021-03-05 20:16:14 +00:00
|
|
|
return this.buffer[this.index];
|
|
|
|
}
|
|
|
|
|
2021-03-11 13:43:06 +00:00
|
|
|
advance(): number {
|
|
|
|
const n = this.index++;
|
|
|
|
updatePosition(this.pos, this.buffer[n]);
|
|
|
|
return n;
|
|
|
|
}
|
|
|
|
|
2021-03-05 20:16:14 +00:00
|
|
|
nextchar(): string {
|
2021-03-11 13:43:06 +00:00
|
|
|
if (this.atEnd()) throw new ShortPacket("Short term", this.pos);
|
|
|
|
return this.buffer[this.advance()];
|
2021-03-05 20:16:14 +00:00
|
|
|
}
|
|
|
|
|
|
|
|
nextcharcode(): number {
|
2021-03-11 13:43:06 +00:00
|
|
|
if (this.atEnd()) throw new ShortPacket("Short term", this.pos);
|
|
|
|
return this.buffer.charCodeAt(this.advance());
|
2021-03-05 20:16:14 +00:00
|
|
|
}
|
|
|
|
|
|
|
|
skipws() {
|
|
|
|
while (true) {
|
2021-03-09 14:59:40 +00:00
|
|
|
if (this.atEnd()) break;
|
2021-03-05 20:16:14 +00:00
|
|
|
if (!isSpace(this.peek())) break;
|
2021-03-11 13:43:06 +00:00
|
|
|
this.advance();
|
2021-03-05 20:16:14 +00:00
|
|
|
}
|
|
|
|
}
|
|
|
|
|
|
|
|
readCommentLine(): Value<T> {
|
2021-03-11 13:43:06 +00:00
|
|
|
const startPos = { ... this.pos };
|
2021-03-05 20:16:14 +00:00
|
|
|
let acc = '';
|
|
|
|
while (true) {
|
|
|
|
const c = this.nextchar();
|
|
|
|
if (c === '\n' || c === '\r') {
|
2021-03-11 13:43:06 +00:00
|
|
|
return this.wrap(acc, startPos);
|
2021-03-05 20:16:14 +00:00
|
|
|
}
|
|
|
|
acc = acc + c;
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
2021-03-11 13:43:06 +00:00
|
|
|
wrap(v: Value<T>, pos: Position): Value<T> {
|
|
|
|
if (this.includeAnnotations && !Annotated.isAnnotated(v)) {
|
|
|
|
v = new Annotated(v, pos);
|
2021-03-05 20:16:14 +00:00
|
|
|
}
|
2021-03-11 13:43:06 +00:00
|
|
|
return v;
|
2021-03-05 20:16:14 +00:00
|
|
|
}
|
|
|
|
|
|
|
|
annotateNextWith(v: Value<T>): Value<T> {
|
2021-03-11 13:43:06 +00:00
|
|
|
this.skipws();
|
|
|
|
if (this.atEnd()) {
|
|
|
|
throw new DecodeError("Trailing annotations and comments are not permitted", this.pos);
|
|
|
|
}
|
2021-03-05 20:16:14 +00:00
|
|
|
const u = this.next();
|
|
|
|
if (this.includeAnnotations) (u as Annotated<T>).annotations.unshift(v);
|
|
|
|
return u;
|
|
|
|
}
|
|
|
|
|
2021-03-09 14:59:40 +00:00
|
|
|
readToEnd(): Array<Value<T>> {
|
|
|
|
const acc = [];
|
|
|
|
while (true) {
|
|
|
|
this.skipws();
|
|
|
|
if (this.atEnd()) return acc;
|
|
|
|
acc.push(this.next());
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
2021-03-05 20:16:14 +00:00
|
|
|
next(): Value<T> {
|
|
|
|
this.skipws();
|
2021-03-11 13:43:06 +00:00
|
|
|
const startPos = { ... this.pos };
|
|
|
|
const unwrapped = (() => {
|
|
|
|
const c = this.nextchar();
|
|
|
|
switch (c) {
|
|
|
|
case '-':
|
|
|
|
return this.readIntpart('-', this.nextchar());
|
|
|
|
case '0': case '1': case '2': case '3': case '4':
|
|
|
|
case '5': case '6': case '7': case '8': case '9':
|
|
|
|
return this.readIntpart('', c);
|
|
|
|
case '"':
|
|
|
|
return this.readString('"');
|
|
|
|
case '|':
|
|
|
|
return Symbol.for(this.readString('|'));
|
|
|
|
case ';':
|
|
|
|
return this.annotateNextWith(this.readCommentLine());
|
|
|
|
case '@':
|
|
|
|
return this.annotateNextWith(this.next());
|
|
|
|
case ':':
|
|
|
|
this.error('Unexpected key/value separator between items', startPos);
|
|
|
|
case '#': {
|
|
|
|
const c = this.nextchar();
|
|
|
|
switch (c) {
|
|
|
|
case 'f': return false;
|
|
|
|
case 't': return true;
|
|
|
|
case '{': return this.seq(new Set<T>(), (v, s) => s.add(v), '}');
|
|
|
|
case '"': return this.readLiteralBinary();
|
|
|
|
case 'x':
|
|
|
|
if (this.nextchar() !== '"') {
|
|
|
|
this.error('Expected open-quote at start of hex ByteString',
|
|
|
|
startPos);
|
|
|
|
}
|
|
|
|
return this.readHexBinary();
|
|
|
|
case '[': return this.readBase64Binary();
|
|
|
|
case '=': {
|
|
|
|
const bs = unannotate(this.next());
|
|
|
|
if (!Bytes.isBytes(bs)) this.error('ByteString must follow #=',
|
|
|
|
startPos);
|
|
|
|
return decode<T>(bs, {
|
2021-03-12 19:41:35 +00:00
|
|
|
decodePointer: d => this.options.decodePointer?.(d.next()),
|
2021-03-11 13:43:06 +00:00
|
|
|
includeAnnotations: this.options.includeAnnotations,
|
|
|
|
});
|
2021-03-05 20:16:14 +00:00
|
|
|
}
|
2021-03-11 13:43:06 +00:00
|
|
|
case '!': {
|
|
|
|
const d = this.options.decodePointer;
|
|
|
|
if (d === void 0) {
|
|
|
|
this.error("No decodePointer function supplied", startPos);
|
|
|
|
}
|
|
|
|
return d(this.next());
|
|
|
|
}
|
|
|
|
default:
|
|
|
|
this.error(`Invalid # syntax: ${c}`, startPos);
|
2021-03-05 20:16:14 +00:00
|
|
|
}
|
|
|
|
}
|
2021-03-11 13:43:06 +00:00
|
|
|
case '<': {
|
|
|
|
const label = this.next();
|
|
|
|
const fields = this.readSequence('>');
|
|
|
|
return Record(label, fields);
|
|
|
|
}
|
|
|
|
case '[': return this.readSequence(']');
|
|
|
|
case '{': return this.readDictionary();
|
|
|
|
case '>': this.error('Unexpected >', startPos);
|
|
|
|
case ']': this.error('Unexpected ]', startPos);
|
|
|
|
case '}': this.error('Unexpected }', startPos);
|
|
|
|
default:
|
|
|
|
return this.readRawSymbol(c);
|
2021-03-05 20:16:14 +00:00
|
|
|
}
|
2021-03-11 13:43:06 +00:00
|
|
|
})();
|
|
|
|
return this.wrap(unwrapped, startPos);
|
2021-03-05 20:16:14 +00:00
|
|
|
}
|
|
|
|
|
|
|
|
seq<S>(acc: S, update: (v: Value<T>, acc: S) => void, ch: string): S {
|
|
|
|
while (true) {
|
|
|
|
this.skipws();
|
|
|
|
if (this.peek() === ch) {
|
2021-03-11 13:43:06 +00:00
|
|
|
this.advance();
|
2021-03-05 20:16:14 +00:00
|
|
|
return acc;
|
|
|
|
}
|
|
|
|
update(this.next(), acc);
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
|
|
|
readSequence(ch: string): Array<Value<T>> {
|
|
|
|
return this.seq([] as Array<Value<T>>, (v, acc) => acc.push(v), ch);
|
|
|
|
}
|
|
|
|
|
|
|
|
readHexBinary(): Bytes {
|
|
|
|
const acc: number[] = [];
|
|
|
|
while (true) {
|
|
|
|
this.skipws();
|
|
|
|
if (this.peek() === '"') {
|
2021-03-11 13:43:06 +00:00
|
|
|
this.advance();
|
2021-03-05 20:16:14 +00:00
|
|
|
return Bytes.from(acc);
|
|
|
|
}
|
|
|
|
acc.push(this.readHex2());
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
|
|
|
readDictionary(): Dictionary<Value<T>, T> {
|
|
|
|
return this.seq(new Dictionary<Value<T>, T>(),
|
|
|
|
(k, acc) => {
|
|
|
|
this.skipws();
|
|
|
|
switch (this.peek()) {
|
|
|
|
case ':':
|
|
|
|
if (acc.has(k)) this.error(
|
2021-03-11 13:43:06 +00:00
|
|
|
`Duplicate key: ${stringify(k)}`, this.pos);
|
|
|
|
this.advance();
|
2021-03-05 20:16:14 +00:00
|
|
|
acc.set(k, this.next());
|
|
|
|
break;
|
|
|
|
default:
|
2021-03-11 13:43:06 +00:00
|
|
|
this.error('Missing key/value separator', this.pos);
|
2021-03-05 20:16:14 +00:00
|
|
|
}
|
|
|
|
},
|
|
|
|
'}');
|
|
|
|
}
|
|
|
|
|
|
|
|
readBase64Binary(): Bytes {
|
|
|
|
let acc = '';
|
|
|
|
while (true) {
|
|
|
|
this.skipws();
|
|
|
|
const c = this.nextchar();
|
|
|
|
if (c === ']') break;
|
|
|
|
acc = acc + c;
|
|
|
|
}
|
|
|
|
return decodeBase64(acc);
|
|
|
|
}
|
|
|
|
|
|
|
|
readIntpart(acc: string, ch: string): Numeric {
|
|
|
|
if (ch === '0') return this.readFracexp('int', acc + ch);
|
|
|
|
return this.readDigit1('int', acc, (kind, acc) => this.readFracexp(kind, acc), ch);
|
|
|
|
}
|
|
|
|
|
|
|
|
readDigit1(kind: IntOrFloat, acc: string, k: IntContinuation, ch?: string): Numeric {
|
|
|
|
if (ch === void 0) ch = this.nextchar();
|
|
|
|
if (ch >= '0' && ch <= '9') return this.readDigit0(kind, acc + ch, k);
|
2021-03-11 13:43:06 +00:00
|
|
|
this.error('Incomplete number', this.pos);
|
2021-03-05 20:16:14 +00:00
|
|
|
}
|
|
|
|
|
|
|
|
readDigit0(kind: IntOrFloat, acc: string, k: IntContinuation): Numeric {
|
|
|
|
while (true) {
|
|
|
|
const ch = this.peek();
|
|
|
|
if (!(ch >= '0' && ch <= '9')) break;
|
2021-03-11 13:43:06 +00:00
|
|
|
this.advance();
|
2021-03-05 20:16:14 +00:00
|
|
|
acc = acc + ch;
|
|
|
|
}
|
|
|
|
return k(kind, acc);
|
|
|
|
}
|
|
|
|
|
|
|
|
readFracexp(kind: IntOrFloat, acc: string): Numeric {
|
|
|
|
if (this.peek() === '.') {
|
2021-03-11 13:43:06 +00:00
|
|
|
this.advance();
|
2021-03-05 20:16:14 +00:00
|
|
|
return this.readDigit1('float', acc + '.', (kind, acc) => this.readExp(kind, acc));
|
|
|
|
}
|
|
|
|
return this.readExp(kind, acc);
|
|
|
|
}
|
|
|
|
|
|
|
|
readExp(kind: IntOrFloat, acc: string): Numeric {
|
|
|
|
const ch = this.peek();
|
|
|
|
if (ch === 'e' || ch === 'E') {
|
2021-03-11 13:43:06 +00:00
|
|
|
this.advance();
|
2021-03-05 20:16:14 +00:00
|
|
|
return this.readSignAndExp(acc + ch);
|
|
|
|
}
|
|
|
|
return this.finishNumber(kind, acc);
|
|
|
|
}
|
|
|
|
|
|
|
|
readSignAndExp(acc: string): Numeric {
|
|
|
|
const ch = this.peek();
|
|
|
|
if (ch === '+' || ch === '-') {
|
2021-03-11 13:43:06 +00:00
|
|
|
this.advance();
|
2021-03-05 20:16:14 +00:00
|
|
|
return this.readDigit1('float', acc + ch, (kind, acc) => this.finishNumber(kind, acc));
|
|
|
|
}
|
|
|
|
return this.readDigit1('float', acc, (kind, acc) => this.finishNumber(kind, acc));
|
|
|
|
}
|
|
|
|
|
|
|
|
finishNumber(kind: IntOrFloat, acc: string): Numeric {
|
|
|
|
const i = parseFloat(acc);
|
|
|
|
if (kind === 'int') return i;
|
|
|
|
const ch = this.peek();
|
|
|
|
if (ch === 'f' || ch === 'F') {
|
2021-03-11 13:43:06 +00:00
|
|
|
this.advance();
|
2021-03-05 20:16:14 +00:00
|
|
|
return Single(i);
|
|
|
|
} else {
|
|
|
|
return Double(i);
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
|
|
|
readRawSymbol(acc: string): Value<T> {
|
|
|
|
while (true) {
|
|
|
|
if (this.atEnd()) break;
|
|
|
|
const ch = this.peek();
|
|
|
|
if (('(){}[]<>";,@#:|'.indexOf(ch) !== -1) || isSpace(ch)) break;
|
2021-03-11 13:43:06 +00:00
|
|
|
this.advance();
|
2021-03-05 20:16:14 +00:00
|
|
|
acc = acc + ch;
|
|
|
|
}
|
|
|
|
return Symbol.for(acc);
|
|
|
|
}
|
|
|
|
|
|
|
|
readStringlike<E, R>(xform: (ch: string) => E,
|
|
|
|
finish: (acc: E[]) => R,
|
|
|
|
terminator: string,
|
|
|
|
hexescape: string,
|
|
|
|
hex: () => E): R
|
|
|
|
{
|
|
|
|
let acc: E[] = [];
|
|
|
|
while (true) {
|
|
|
|
const ch = this.nextchar();
|
|
|
|
switch (ch) {
|
|
|
|
case terminator:
|
|
|
|
return finish(acc);
|
|
|
|
case '\\': {
|
|
|
|
const ch = this.nextchar();
|
|
|
|
switch (ch) {
|
|
|
|
case hexescape: acc.push(hex()); break;
|
|
|
|
|
|
|
|
case terminator:
|
|
|
|
case '\\':
|
|
|
|
case '/':
|
|
|
|
acc.push(xform(ch)); break;
|
|
|
|
|
|
|
|
case 'b': acc.push(xform('\x08')); break;
|
|
|
|
case 'f': acc.push(xform('\x0c')); break;
|
|
|
|
case 'n': acc.push(xform('\x0a')); break;
|
|
|
|
case 'r': acc.push(xform('\x0d')); break;
|
|
|
|
case 't': acc.push(xform('\x09')); break;
|
|
|
|
|
|
|
|
default:
|
2021-03-11 13:43:06 +00:00
|
|
|
this.error(`Invalid escape code \\${ch}`, this.pos);
|
2021-03-05 20:16:14 +00:00
|
|
|
}
|
|
|
|
break;
|
|
|
|
}
|
|
|
|
default:
|
|
|
|
acc.push(xform(ch));
|
|
|
|
break;
|
|
|
|
}
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
|
|
|
readHex2(): number {
|
|
|
|
const x1 = unhexDigit(this.nextcharcode());
|
|
|
|
const x2 = unhexDigit(this.nextcharcode());
|
|
|
|
return (x1 << 4) | x2;
|
|
|
|
}
|
|
|
|
|
|
|
|
readHex4(): number {
|
|
|
|
const x1 = unhexDigit(this.nextcharcode());
|
|
|
|
const x2 = unhexDigit(this.nextcharcode());
|
|
|
|
const x3 = unhexDigit(this.nextcharcode());
|
|
|
|
const x4 = unhexDigit(this.nextcharcode());
|
|
|
|
return (x1 << 12) | (x2 << 8) | (x3 << 4) | x4;
|
|
|
|
}
|
|
|
|
|
|
|
|
readString(terminator: string): string {
|
|
|
|
return this.readStringlike(x => x, xs => xs.join(''), terminator, 'u', () => {
|
|
|
|
const n1 = this.readHex4();
|
|
|
|
if ((n1 >= 0xd800) && (n1 <= 0xdfff)) {
|
|
|
|
if ((this.nextchar() === '\\') && (this.nextchar() === 'u')) {
|
|
|
|
const n2 = this.readHex4();
|
|
|
|
if ((n2 >= 0xdc00) && (n2 <= 0xdfff) && (n1 <= 0xdbff)) {
|
|
|
|
return String.fromCharCode(n1, n2);
|
|
|
|
}
|
|
|
|
}
|
2021-03-11 13:43:06 +00:00
|
|
|
this.error('Invalid surrogate pair', this.pos);
|
2021-03-05 20:16:14 +00:00
|
|
|
}
|
|
|
|
return String.fromCharCode(n1);
|
|
|
|
});
|
|
|
|
}
|
|
|
|
|
|
|
|
readLiteralBinary(): Bytes {
|
|
|
|
return this.readStringlike(
|
|
|
|
x => {
|
|
|
|
const v = x.charCodeAt(0);
|
2021-03-11 13:43:06 +00:00
|
|
|
if (v >= 256) this.error(`Invalid code point ${v} in literal binary`, this.pos);
|
2021-03-05 20:16:14 +00:00
|
|
|
return v;
|
|
|
|
},
|
|
|
|
Bytes.from,
|
|
|
|
'"',
|
|
|
|
'x',
|
|
|
|
() => this.readHex2());
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
|
|
|
const BASE64: {[key: string]: number} = {};
|
|
|
|
[... 'ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789'].forEach(
|
|
|
|
(c, i) => BASE64[c] = i);
|
|
|
|
BASE64['+'] = BASE64['-'] = 62;
|
|
|
|
BASE64['/'] = BASE64['_'] = 63;
|
|
|
|
|
|
|
|
export function decodeBase64(s: string): Bytes {
|
|
|
|
const bs = new Uint8Array(Math.floor(s.length * 3/4));
|
|
|
|
let i = 0;
|
|
|
|
let j = 0;
|
|
|
|
while (i < s.length) {
|
|
|
|
const v1 = BASE64[s[i++]];
|
|
|
|
const v2 = BASE64[s[i++]];
|
|
|
|
const v3 = BASE64[s[i++]];
|
|
|
|
const v4 = BASE64[s[i++]];
|
|
|
|
const v = (v1 << 18) | (v2 << 12) | (v3 << 6) | v4;
|
|
|
|
bs[j++] = (v >> 16) & 255;
|
|
|
|
if (v3 === void 0) break;
|
|
|
|
bs[j++] = (v >> 8) & 255;
|
|
|
|
if (v4 === void 0) break;
|
|
|
|
bs[j++] = v & 255;
|
|
|
|
}
|
|
|
|
return Bytes.from(bs.subarray(0, j));
|
|
|
|
}
|
|
|
|
|
|
|
|
function isSpace(s: string): boolean {
|
|
|
|
return ' \t\n\r,'.indexOf(s) !== -1;
|
|
|
|
}
|