Text syntax reader in Javascript implementation

This commit is contained in:
Tony Garnock-Jones 2021-03-05 21:16:14 +01:00
parent a05bf0cb7a
commit dc96f74075
9 changed files with 511 additions and 27 deletions

View File

@ -1,6 +1,6 @@
{
"name": "preserves",
"version": "0.6.4",
"version": "0.7.0",
"description": "Experimental data serialization format",
"homepage": "https://gitlab.com/preserves/preserves",
"license": "Apache-2.0",

View File

@ -9,6 +9,7 @@ export * from './float';
export * from './fold';
export * from './fromjs';
export * from './is';
export * from './reader';
export * from './record';
export * from './strip';
export * from './symbols';

View File

@ -0,0 +1,410 @@
// Text syntax reader.
import type { Value } from './values';
import { DecodeError, ShortPacket } from './codec';
import { Dictionary, Set } from './dictionary';
import { unannotate } from './strip';
import { Bytes, unhexDigit } from './bytes';
import { decode } from './decoder';
import { Record } from './record';
import { annotate, Annotated } from './annotated';
import { Double, DoubleFloat, Single, SingleFloat } from './float';
export interface ReaderOptions<T extends object> {
includeAnnotations?: boolean;
decodePointer?: (v: Value<T>) => T;
}
type IntOrFloat = 'int' | 'float';
type Numeric = number | SingleFloat | DoubleFloat;
type IntContinuation = (kind: IntOrFloat, acc: string) => Numeric;
export class Reader<T extends object> {
buffer: string;
index: number;
discarded = 0;
options: ReaderOptions<T>;
constructor(buffer: string = '', options: ReaderOptions<T> = {}) {
this.buffer = buffer;
this.index = 0;
this.options = options;
}
get includeAnnotations(): boolean {
return this.options.includeAnnotations ?? false;
}
write(data: string) {
if (this.atEnd()) {
this.buffer = data;
} else {
this.buffer = this.buffer.substr(this.index) + data;
}
this.discarded += this.index;
this.index = 0;
}
error(message: string, index = this.index): never {
throw new DecodeError(
`${message} (position ${this.discarded + index})`);
}
atEnd(): boolean {
return (this.index >= this.buffer.length);
}
peek(): string {
if (this.atEnd()) throw new ShortPacket("Short term");
return this.buffer[this.index];
}
nextchar(): string {
if (this.atEnd()) throw new ShortPacket("Short term");
return this.buffer[this.index++];
}
nextcharcode(): number {
if (this.atEnd()) throw new ShortPacket("Short term");
return this.buffer.charCodeAt(this.index++);
}
skipws() {
while (true) {
if (!isSpace(this.peek())) break;
this.index++;
}
}
readCommentLine(): Value<T> {
let acc = '';
while (true) {
const c = this.nextchar();
if (c === '\n' || c === '\r') {
return this.wrap(acc);
}
acc = acc + c;
}
}
wrap(v: Value<T>): Value<T> {
if (this.includeAnnotations) {
return annotate(v);
} else {
return v;
}
}
annotateNextWith(v: Value<T>): Value<T> {
const u = this.next();
if (this.includeAnnotations) (u as Annotated<T>).annotations.unshift(v);
return u;
}
next(): Value<T> {
return this.wrap(this._next());
}
_next(): Value<T> {
this.skipws();
const startPos = this.index;
const c = this.nextchar();
switch (c) {
case '-':
return this.readIntpart('-', this.nextchar());
case '0': case '1': case '2': case '3': case '4':
case '5': case '6': case '7': case '8': case '9':
return this.readIntpart('', c);
case '"':
return this.readString('"');
case '|':
return Symbol.for(this.readString('|'));
case ';':
return this.annotateNextWith(this.readCommentLine());
case '@':
return this.annotateNextWith(this.next());
case ':':
this.error('Unexpected key/value separator between items', startPos);
case '#': {
const c = this.nextchar();
switch (c) {
case 'f': return false;
case 't': return true;
case '{': return this.seq(new Set<T>(), (v, s) => s.add(v), '}');
case '"': return this.readLiteralBinary();
case 'x':
if (this.nextchar() !== '"') {
this.error('Expected open-quote at start of hex ByteString', startPos);
}
return this.readHexBinary();
case '[': return this.readBase64Binary();
case '=': {
const bs = unannotate(this.next());
if (!Bytes.isBytes(bs)) this.error('ByteString must follow #=', startPos);
return decode<T>(bs, {
decodePointer: this.options.decodePointer,
includeAnnotations: this.options.includeAnnotations,
});
}
case '!': {
const d = this.options.decodePointer;
if (d === void 0) this.error("No decodePointer function supplied");
return d(this.next());
}
default:
this.error(`Invalid # syntax: ${c}`, startPos);
}
}
case '<': {
const label = this.next();
const fields = this.readSequence('>');
return Record(label, fields);
}
case '[': return this.readSequence(']');
case '{': return this.readDictionary();
case '>': this.error('Unexpected >', startPos);
case ']': this.error('Unexpected ]', startPos);
case '}': this.error('Unexpected }', startPos);
default:
return this.readRawSymbol(c);
}
}
seq<S>(acc: S, update: (v: Value<T>, acc: S) => void, ch: string): S {
while (true) {
this.skipws();
if (this.peek() === ch) {
this.index++;
return acc;
}
update(this.next(), acc);
}
}
readSequence(ch: string): Array<Value<T>> {
return this.seq([] as Array<Value<T>>, (v, acc) => acc.push(v), ch);
}
readHexBinary(): Bytes {
const acc: number[] = [];
while (true) {
this.skipws();
if (this.peek() === '"') {
this.index++;
return Bytes.from(acc);
}
acc.push(this.readHex2());
}
}
readDictionary(): Dictionary<Value<T>, T> {
return this.seq(new Dictionary<Value<T>, T>(),
(k, acc) => {
this.skipws();
switch (this.peek()) {
case ':':
if (acc.has(k)) this.error(
`Duplicate key: ${k.asPreservesText()}`);
this.index++;
acc.set(k, this.next());
break;
default:
this.error('Missing key/value separator');
}
},
'}');
}
readBase64Binary(): Bytes {
let acc = '';
while (true) {
this.skipws();
const c = this.nextchar();
if (c === ']') break;
acc = acc + c;
}
return decodeBase64(acc);
}
readIntpart(acc: string, ch: string): Numeric {
if (ch === '0') return this.readFracexp('int', acc + ch);
return this.readDigit1('int', acc, (kind, acc) => this.readFracexp(kind, acc), ch);
}
readDigit1(kind: IntOrFloat, acc: string, k: IntContinuation, ch?: string): Numeric {
if (ch === void 0) ch = this.nextchar();
if (ch >= '0' && ch <= '9') return this.readDigit0(kind, acc + ch, k);
this.error('Incomplete number');
}
readDigit0(kind: IntOrFloat, acc: string, k: IntContinuation): Numeric {
while (true) {
const ch = this.peek();
if (!(ch >= '0' && ch <= '9')) break;
this.index++;
acc = acc + ch;
}
return k(kind, acc);
}
readFracexp(kind: IntOrFloat, acc: string): Numeric {
if (this.peek() === '.') {
this.index++;
return this.readDigit1('float', acc + '.', (kind, acc) => this.readExp(kind, acc));
}
return this.readExp(kind, acc);
}
readExp(kind: IntOrFloat, acc: string): Numeric {
const ch = this.peek();
if (ch === 'e' || ch === 'E') {
this.index++;
return this.readSignAndExp(acc + ch);
}
return this.finishNumber(kind, acc);
}
readSignAndExp(acc: string): Numeric {
const ch = this.peek();
if (ch === '+' || ch === '-') {
this.index++;
return this.readDigit1('float', acc + ch, (kind, acc) => this.finishNumber(kind, acc));
}
return this.readDigit1('float', acc, (kind, acc) => this.finishNumber(kind, acc));
}
finishNumber(kind: IntOrFloat, acc: string): Numeric {
const i = parseFloat(acc);
if (kind === 'int') return i;
const ch = this.peek();
if (ch === 'f' || ch === 'F') {
this.index++;
return Single(i);
} else {
return Double(i);
}
}
readRawSymbol(acc: string): Value<T> {
while (true) {
if (this.atEnd()) break;
const ch = this.peek();
if (('(){}[]<>";,@#:|'.indexOf(ch) !== -1) || isSpace(ch)) break;
this.index++;
acc = acc + ch;
}
return Symbol.for(acc);
}
readStringlike<E, R>(xform: (ch: string) => E,
finish: (acc: E[]) => R,
terminator: string,
hexescape: string,
hex: () => E): R
{
let acc: E[] = [];
while (true) {
const ch = this.nextchar();
switch (ch) {
case terminator:
return finish(acc);
case '\\': {
const ch = this.nextchar();
switch (ch) {
case hexescape: acc.push(hex()); break;
case terminator:
case '\\':
case '/':
acc.push(xform(ch)); break;
case 'b': acc.push(xform('\x08')); break;
case 'f': acc.push(xform('\x0c')); break;
case 'n': acc.push(xform('\x0a')); break;
case 'r': acc.push(xform('\x0d')); break;
case 't': acc.push(xform('\x09')); break;
default:
this.error(`Invalid escape code \\${ch}`);
}
break;
}
default:
acc.push(xform(ch));
break;
}
}
}
readHex2(): number {
const x1 = unhexDigit(this.nextcharcode());
const x2 = unhexDigit(this.nextcharcode());
return (x1 << 4) | x2;
}
readHex4(): number {
const x1 = unhexDigit(this.nextcharcode());
const x2 = unhexDigit(this.nextcharcode());
const x3 = unhexDigit(this.nextcharcode());
const x4 = unhexDigit(this.nextcharcode());
return (x1 << 12) | (x2 << 8) | (x3 << 4) | x4;
}
readString(terminator: string): string {
return this.readStringlike(x => x, xs => xs.join(''), terminator, 'u', () => {
const n1 = this.readHex4();
if ((n1 >= 0xd800) && (n1 <= 0xdfff)) {
if ((this.nextchar() === '\\') && (this.nextchar() === 'u')) {
const n2 = this.readHex4();
if ((n2 >= 0xdc00) && (n2 <= 0xdfff) && (n1 <= 0xdbff)) {
return String.fromCharCode(n1, n2);
}
}
this.error('Invalid surrogate pair');
}
return String.fromCharCode(n1);
});
}
readLiteralBinary(): Bytes {
return this.readStringlike(
x => {
const v = x.charCodeAt(0);
if (v >= 256) this.error(`Invalid code point ${v} in literal binary`);
return v;
},
Bytes.from,
'"',
'x',
() => this.readHex2());
}
}
const BASE64: {[key: string]: number} = {};
[... 'ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789'].forEach(
(c, i) => BASE64[c] = i);
BASE64['+'] = BASE64['-'] = 62;
BASE64['/'] = BASE64['_'] = 63;
export function decodeBase64(s: string): Bytes {
const bs = new Uint8Array(Math.floor(s.length * 3/4));
let i = 0;
let j = 0;
while (i < s.length) {
const v1 = BASE64[s[i++]];
const v2 = BASE64[s[i++]];
const v3 = BASE64[s[i++]];
const v4 = BASE64[s[i++]];
const v = (v1 << 18) | (v2 << 12) | (v3 << 6) | v4;
bs[j++] = (v >> 16) & 255;
if (v3 === void 0) break;
bs[j++] = (v >> 8) & 255;
if (v4 === void 0) break;
bs[j++] = v & 255;
}
return Bytes.from(bs.subarray(0, j));
}
function isSpace(s: string): boolean {
return ' \t\n\r,'.indexOf(s) !== -1;
}

View File

@ -74,14 +74,14 @@ export namespace Record {
Array.prototype.asPreservesText = function (): string {
if ('label' in (this as any)) {
const r = this as Record<Value, Tuple<Value>, DefaultPointer>;
return r.label.asPreservesText() +
'(' + r.map(f => {
return '<' + r.label.asPreservesText() + (r.length > 0 ? ' ': '') +
r.map(f => {
try {
return f.asPreservesText();
} catch (e) {
return Record.fallbackToString(f);
}
}).join(', ') + ')';
}).join(' ') + '>';
} else {
return '[' + this.map(i => i.asPreservesText()).join(', ') + ']';
}

View File

@ -29,7 +29,7 @@ declare global {
Object.defineProperty(Object.prototype, 'asPreservesText', {
enumerable: false,
writable: true,
value: function(): string { return '#!' + stringify(this); }
value: function(): string { return '#!' + JSON.stringify(this); }
});
Boolean.prototype.asPreservesText = function (): string {

View File

@ -1,4 +1,4 @@
import { Bytes, fromJS } from '../src/index';
import { Bytes, decodeBase64, fromJS } from '../src/index';
import './test-utils';
describe('immutable byte arrays', () => {
@ -80,3 +80,39 @@ describe('immutable byte arrays', () => {
});
});
});
describe('base64 decoder', () => {
describe('RFC4648 tests', () => {
it('10.0', () => expect(decodeBase64("")).is(Bytes.of()));
it('10.1', () => expect(decodeBase64("Zg==")).is(Bytes.of(102)));
it('10.2', () => expect(decodeBase64("Zm8=")).is(Bytes.of(102, 111)));
it('10.3', () => expect(decodeBase64("Zm9v")).is(Bytes.of(102, 111, 111)));
it('10.4', () => expect(decodeBase64("Zm9vYg==")).is(Bytes.of(102, 111, 111, 98)));
it('10.5', () => expect(decodeBase64("Zm9vYmE=")).is(Bytes.of(102, 111, 111, 98, 97)));
it('10.6', () => expect(decodeBase64("Zm9vYmFy")).is(Bytes.of(102, 111, 111, 98, 97, 114)));
it('10.1b', () => expect(decodeBase64("Zg")).is(Bytes.of(102)));
it('10.2b', () => expect(decodeBase64("Zm8")).is(Bytes.of(102, 111)));
it('10.4b', () => expect(decodeBase64("Zm9vYg")).is(Bytes.of(102, 111, 111, 98)));
it('10.5b', () => expect(decodeBase64("Zm9vYmE")).is(Bytes.of(102, 111, 111, 98, 97)));
});
describe('RFC4648 examples', () => {
it('example0', () =>
expect(decodeBase64('FPucA9l+')).is(Bytes.of(0x14, 0xfb, 0x9c, 0x03, 0xd9, 0x7e)));
it('example1', () =>
expect(decodeBase64('FPucA9k=')).is(Bytes.of(0x14, 0xfb, 0x9c, 0x03, 0xd9)));
it('example1b', () =>
expect(decodeBase64('FPucA9k')).is(Bytes.of(0x14, 0xfb, 0x9c, 0x03, 0xd9)));
it('example2', () =>
expect(decodeBase64('FPucAw==')).is(Bytes.of(0x14, 0xfb, 0x9c, 0x03)));
it('example2b', () =>
expect(decodeBase64('FPucAw=')).is(Bytes.of(0x14, 0xfb, 0x9c, 0x03)));
it('example2c', () =>
expect(decodeBase64('FPucAw')).is(Bytes.of(0x14, 0xfb, 0x9c, 0x03)));
});
describe('Misc test cases', () => {
it('gQ==', () => expect(decodeBase64('gQ==')).is(Bytes.of(0x81)));
});
});

View File

@ -12,29 +12,10 @@ import {
} from '../src/index';
const { Tag } = Constants;
import './test-utils';
import { decodePointer, encodePointer, Pointer } from './test-utils';
import * as fs from 'fs';
class Pointer {
v: Value<Pointer>;
constructor(v: Value<Pointer>) {
this.v = v;
}
equals(other: any, is: (a: any, b: any) => boolean) {
return Object.is(other.constructor, this.constructor) && is(this.v, other.v);
}
}
function decodePointer(v: Value<Pointer>): Pointer {
return new Pointer(strip(v));
}
function encodePointer(w: Pointer): Value<Pointer> {
return w.v;
}
const _discard = Symbol.for('discard');
const _capture = Symbol.for('capture');
const _observe = Symbol.for('observe');

View File

@ -0,0 +1,32 @@
import { Bytes, Decoder, encode, Reader } from '../src/index';
import './test-utils';
import { decodePointer, encodePointer, Pointer } from './test-utils';
import * as fs from 'fs';
describe('reading common test suite', () => {
const samples_bin = fs.readFileSync(__dirname + '/../../../tests/samples.bin');
const samples_txt = fs.readFileSync(__dirname + '/../../../tests/samples.txt', 'utf-8');
it('should read equal to decoded binary without annotations', () => {
const s1 = new Reader(samples_txt, { decodePointer, includeAnnotations: false }).next();
const s2 = new Decoder(samples_bin, { decodePointer, includeAnnotations: false }).next();
expect(s1).is(s2);
});
it('should read equal to decoded binary with annotations', () => {
const s1 = new Reader(samples_txt, { decodePointer, includeAnnotations: true }).next();
const s2 = new Decoder(samples_bin, { decodePointer, includeAnnotations: true }).next();
expect(s1).is(s2);
});
it('should read and encode back to binary with annotations', () => {
const s = new Reader(samples_txt, { decodePointer, includeAnnotations: true }).next();
const bs = Bytes.toIO(encode(s, {
encodePointer,
includeAnnotations: true,
canonical: true,
}));
expect(bs).toEqual(new Uint8Array(samples_bin));
});
});

View File

@ -1,4 +1,4 @@
import { Value, is, preserves } from '../src/index';
import { Value, is, preserves, strip } from '../src/index';
import '../src/node_support';
declare global {
@ -34,3 +34,27 @@ expect.extend({
}
}
});
export class Pointer {
v: Value<Pointer>;
constructor(v: Value<Pointer>) {
this.v = v;
}
equals(other: any, is: (a: any, b: any) => boolean) {
return Object.is(other.constructor, this.constructor) && is(this.v, other.v);
}
asPreservesText(): string {
return '#!' + this.v.asPreservesText();
}
}
export function decodePointer(v: Value<Pointer>): Pointer {
return new Pointer(strip(v));
}
export function encodePointer(w: Pointer): Value<Pointer> {
return w.v;
}