From dc96f74075ece5025a93ff7336dff59716b570c6 Mon Sep 17 00:00:00 2001 From: Tony Garnock-Jones Date: Fri, 5 Mar 2021 21:16:14 +0100 Subject: [PATCH] Text syntax reader in Javascript implementation --- implementations/javascript/package.json | 2 +- implementations/javascript/src/index.ts | 1 + implementations/javascript/src/reader.ts | 410 ++++++++++++++++++ implementations/javascript/src/record.ts | 6 +- implementations/javascript/src/text.ts | 2 +- implementations/javascript/test/bytes.test.ts | 38 +- implementations/javascript/test/codec.test.ts | 21 +- .../javascript/test/reader.test.ts | 32 ++ implementations/javascript/test/test-utils.ts | 26 +- 9 files changed, 511 insertions(+), 27 deletions(-) create mode 100644 implementations/javascript/src/reader.ts create mode 100644 implementations/javascript/test/reader.test.ts diff --git a/implementations/javascript/package.json b/implementations/javascript/package.json index 38a0fe2..f7588fc 100644 --- a/implementations/javascript/package.json +++ b/implementations/javascript/package.json @@ -1,6 +1,6 @@ { "name": "preserves", - "version": "0.6.4", + "version": "0.7.0", "description": "Experimental data serialization format", "homepage": "https://gitlab.com/preserves/preserves", "license": "Apache-2.0", diff --git a/implementations/javascript/src/index.ts b/implementations/javascript/src/index.ts index 3781d03..89b73b9 100644 --- a/implementations/javascript/src/index.ts +++ b/implementations/javascript/src/index.ts @@ -9,6 +9,7 @@ export * from './float'; export * from './fold'; export * from './fromjs'; export * from './is'; +export * from './reader'; export * from './record'; export * from './strip'; export * from './symbols'; diff --git a/implementations/javascript/src/reader.ts b/implementations/javascript/src/reader.ts new file mode 100644 index 0000000..7cd1071 --- /dev/null +++ b/implementations/javascript/src/reader.ts @@ -0,0 +1,410 @@ +// Text syntax reader. + +import type { Value } from './values'; +import { DecodeError, ShortPacket } from './codec'; +import { Dictionary, Set } from './dictionary'; +import { unannotate } from './strip'; +import { Bytes, unhexDigit } from './bytes'; +import { decode } from './decoder'; +import { Record } from './record'; +import { annotate, Annotated } from './annotated'; +import { Double, DoubleFloat, Single, SingleFloat } from './float'; + +export interface ReaderOptions { + includeAnnotations?: boolean; + decodePointer?: (v: Value) => T; +} + +type IntOrFloat = 'int' | 'float'; +type Numeric = number | SingleFloat | DoubleFloat; +type IntContinuation = (kind: IntOrFloat, acc: string) => Numeric; + +export class Reader { + buffer: string; + index: number; + discarded = 0; + options: ReaderOptions; + + constructor(buffer: string = '', options: ReaderOptions = {}) { + this.buffer = buffer; + this.index = 0; + this.options = options; + } + + get includeAnnotations(): boolean { + return this.options.includeAnnotations ?? false; + } + + write(data: string) { + if (this.atEnd()) { + this.buffer = data; + } else { + this.buffer = this.buffer.substr(this.index) + data; + } + this.discarded += this.index; + this.index = 0; + } + + error(message: string, index = this.index): never { + throw new DecodeError( + `${message} (position ${this.discarded + index})`); + } + + atEnd(): boolean { + return (this.index >= this.buffer.length); + } + + peek(): string { + if (this.atEnd()) throw new ShortPacket("Short term"); + return this.buffer[this.index]; + } + + nextchar(): string { + if (this.atEnd()) throw new ShortPacket("Short term"); + return this.buffer[this.index++]; + } + + nextcharcode(): number { + if (this.atEnd()) throw new ShortPacket("Short term"); + return this.buffer.charCodeAt(this.index++); + } + + skipws() { + while (true) { + if (!isSpace(this.peek())) break; + this.index++; + } + } + + readCommentLine(): Value { + let acc = ''; + while (true) { + const c = this.nextchar(); + if (c === '\n' || c === '\r') { + return this.wrap(acc); + } + acc = acc + c; + } + } + + wrap(v: Value): Value { + if (this.includeAnnotations) { + return annotate(v); + } else { + return v; + } + } + + annotateNextWith(v: Value): Value { + const u = this.next(); + if (this.includeAnnotations) (u as Annotated).annotations.unshift(v); + return u; + } + + next(): Value { + return this.wrap(this._next()); + } + + _next(): Value { + this.skipws(); + const startPos = this.index; + const c = this.nextchar(); + switch (c) { + case '-': + return this.readIntpart('-', this.nextchar()); + case '0': case '1': case '2': case '3': case '4': + case '5': case '6': case '7': case '8': case '9': + return this.readIntpart('', c); + case '"': + return this.readString('"'); + case '|': + return Symbol.for(this.readString('|')); + case ';': + return this.annotateNextWith(this.readCommentLine()); + case '@': + return this.annotateNextWith(this.next()); + case ':': + this.error('Unexpected key/value separator between items', startPos); + case '#': { + const c = this.nextchar(); + switch (c) { + case 'f': return false; + case 't': return true; + case '{': return this.seq(new Set(), (v, s) => s.add(v), '}'); + case '"': return this.readLiteralBinary(); + case 'x': + if (this.nextchar() !== '"') { + this.error('Expected open-quote at start of hex ByteString', startPos); + } + return this.readHexBinary(); + case '[': return this.readBase64Binary(); + case '=': { + const bs = unannotate(this.next()); + if (!Bytes.isBytes(bs)) this.error('ByteString must follow #=', startPos); + return decode(bs, { + decodePointer: this.options.decodePointer, + includeAnnotations: this.options.includeAnnotations, + }); + } + case '!': { + const d = this.options.decodePointer; + if (d === void 0) this.error("No decodePointer function supplied"); + return d(this.next()); + } + default: + this.error(`Invalid # syntax: ${c}`, startPos); + } + } + case '<': { + const label = this.next(); + const fields = this.readSequence('>'); + return Record(label, fields); + } + case '[': return this.readSequence(']'); + case '{': return this.readDictionary(); + case '>': this.error('Unexpected >', startPos); + case ']': this.error('Unexpected ]', startPos); + case '}': this.error('Unexpected }', startPos); + default: + return this.readRawSymbol(c); + } + } + + seq(acc: S, update: (v: Value, acc: S) => void, ch: string): S { + while (true) { + this.skipws(); + if (this.peek() === ch) { + this.index++; + return acc; + } + update(this.next(), acc); + } + } + + readSequence(ch: string): Array> { + return this.seq([] as Array>, (v, acc) => acc.push(v), ch); + } + + readHexBinary(): Bytes { + const acc: number[] = []; + while (true) { + this.skipws(); + if (this.peek() === '"') { + this.index++; + return Bytes.from(acc); + } + acc.push(this.readHex2()); + } + } + + readDictionary(): Dictionary, T> { + return this.seq(new Dictionary, T>(), + (k, acc) => { + this.skipws(); + switch (this.peek()) { + case ':': + if (acc.has(k)) this.error( + `Duplicate key: ${k.asPreservesText()}`); + this.index++; + acc.set(k, this.next()); + break; + default: + this.error('Missing key/value separator'); + } + }, + '}'); + } + + readBase64Binary(): Bytes { + let acc = ''; + while (true) { + this.skipws(); + const c = this.nextchar(); + if (c === ']') break; + acc = acc + c; + } + return decodeBase64(acc); + } + + readIntpart(acc: string, ch: string): Numeric { + if (ch === '0') return this.readFracexp('int', acc + ch); + return this.readDigit1('int', acc, (kind, acc) => this.readFracexp(kind, acc), ch); + } + + readDigit1(kind: IntOrFloat, acc: string, k: IntContinuation, ch?: string): Numeric { + if (ch === void 0) ch = this.nextchar(); + if (ch >= '0' && ch <= '9') return this.readDigit0(kind, acc + ch, k); + this.error('Incomplete number'); + } + + readDigit0(kind: IntOrFloat, acc: string, k: IntContinuation): Numeric { + while (true) { + const ch = this.peek(); + if (!(ch >= '0' && ch <= '9')) break; + this.index++; + acc = acc + ch; + } + return k(kind, acc); + } + + readFracexp(kind: IntOrFloat, acc: string): Numeric { + if (this.peek() === '.') { + this.index++; + return this.readDigit1('float', acc + '.', (kind, acc) => this.readExp(kind, acc)); + } + return this.readExp(kind, acc); + } + + readExp(kind: IntOrFloat, acc: string): Numeric { + const ch = this.peek(); + if (ch === 'e' || ch === 'E') { + this.index++; + return this.readSignAndExp(acc + ch); + } + return this.finishNumber(kind, acc); + } + + readSignAndExp(acc: string): Numeric { + const ch = this.peek(); + if (ch === '+' || ch === '-') { + this.index++; + return this.readDigit1('float', acc + ch, (kind, acc) => this.finishNumber(kind, acc)); + } + return this.readDigit1('float', acc, (kind, acc) => this.finishNumber(kind, acc)); + } + + finishNumber(kind: IntOrFloat, acc: string): Numeric { + const i = parseFloat(acc); + if (kind === 'int') return i; + const ch = this.peek(); + if (ch === 'f' || ch === 'F') { + this.index++; + return Single(i); + } else { + return Double(i); + } + } + + readRawSymbol(acc: string): Value { + while (true) { + if (this.atEnd()) break; + const ch = this.peek(); + if (('(){}[]<>";,@#:|'.indexOf(ch) !== -1) || isSpace(ch)) break; + this.index++; + acc = acc + ch; + } + return Symbol.for(acc); + } + + readStringlike(xform: (ch: string) => E, + finish: (acc: E[]) => R, + terminator: string, + hexescape: string, + hex: () => E): R + { + let acc: E[] = []; + while (true) { + const ch = this.nextchar(); + switch (ch) { + case terminator: + return finish(acc); + case '\\': { + const ch = this.nextchar(); + switch (ch) { + case hexescape: acc.push(hex()); break; + + case terminator: + case '\\': + case '/': + acc.push(xform(ch)); break; + + case 'b': acc.push(xform('\x08')); break; + case 'f': acc.push(xform('\x0c')); break; + case 'n': acc.push(xform('\x0a')); break; + case 'r': acc.push(xform('\x0d')); break; + case 't': acc.push(xform('\x09')); break; + + default: + this.error(`Invalid escape code \\${ch}`); + } + break; + } + default: + acc.push(xform(ch)); + break; + } + } + } + + readHex2(): number { + const x1 = unhexDigit(this.nextcharcode()); + const x2 = unhexDigit(this.nextcharcode()); + return (x1 << 4) | x2; + } + + readHex4(): number { + const x1 = unhexDigit(this.nextcharcode()); + const x2 = unhexDigit(this.nextcharcode()); + const x3 = unhexDigit(this.nextcharcode()); + const x4 = unhexDigit(this.nextcharcode()); + return (x1 << 12) | (x2 << 8) | (x3 << 4) | x4; + } + + readString(terminator: string): string { + return this.readStringlike(x => x, xs => xs.join(''), terminator, 'u', () => { + const n1 = this.readHex4(); + if ((n1 >= 0xd800) && (n1 <= 0xdfff)) { + if ((this.nextchar() === '\\') && (this.nextchar() === 'u')) { + const n2 = this.readHex4(); + if ((n2 >= 0xdc00) && (n2 <= 0xdfff) && (n1 <= 0xdbff)) { + return String.fromCharCode(n1, n2); + } + } + this.error('Invalid surrogate pair'); + } + return String.fromCharCode(n1); + }); + } + + readLiteralBinary(): Bytes { + return this.readStringlike( + x => { + const v = x.charCodeAt(0); + if (v >= 256) this.error(`Invalid code point ${v} in literal binary`); + return v; + }, + Bytes.from, + '"', + 'x', + () => this.readHex2()); + } +} + +const BASE64: {[key: string]: number} = {}; +[... 'ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789'].forEach( + (c, i) => BASE64[c] = i); +BASE64['+'] = BASE64['-'] = 62; +BASE64['/'] = BASE64['_'] = 63; + +export function decodeBase64(s: string): Bytes { + const bs = new Uint8Array(Math.floor(s.length * 3/4)); + let i = 0; + let j = 0; + while (i < s.length) { + const v1 = BASE64[s[i++]]; + const v2 = BASE64[s[i++]]; + const v3 = BASE64[s[i++]]; + const v4 = BASE64[s[i++]]; + const v = (v1 << 18) | (v2 << 12) | (v3 << 6) | v4; + bs[j++] = (v >> 16) & 255; + if (v3 === void 0) break; + bs[j++] = (v >> 8) & 255; + if (v4 === void 0) break; + bs[j++] = v & 255; + } + return Bytes.from(bs.subarray(0, j)); +} + +function isSpace(s: string): boolean { + return ' \t\n\r,'.indexOf(s) !== -1; +} diff --git a/implementations/javascript/src/record.ts b/implementations/javascript/src/record.ts index b51abaa..7d9a6d0 100644 --- a/implementations/javascript/src/record.ts +++ b/implementations/javascript/src/record.ts @@ -74,14 +74,14 @@ export namespace Record { Array.prototype.asPreservesText = function (): string { if ('label' in (this as any)) { const r = this as Record, DefaultPointer>; - return r.label.asPreservesText() + - '(' + r.map(f => { + return '<' + r.label.asPreservesText() + (r.length > 0 ? ' ': '') + + r.map(f => { try { return f.asPreservesText(); } catch (e) { return Record.fallbackToString(f); } - }).join(', ') + ')'; + }).join(' ') + '>'; } else { return '[' + this.map(i => i.asPreservesText()).join(', ') + ']'; } diff --git a/implementations/javascript/src/text.ts b/implementations/javascript/src/text.ts index 12379a6..d0c5eb3 100644 --- a/implementations/javascript/src/text.ts +++ b/implementations/javascript/src/text.ts @@ -29,7 +29,7 @@ declare global { Object.defineProperty(Object.prototype, 'asPreservesText', { enumerable: false, writable: true, - value: function(): string { return '#!' + stringify(this); } + value: function(): string { return '#!' + JSON.stringify(this); } }); Boolean.prototype.asPreservesText = function (): string { diff --git a/implementations/javascript/test/bytes.test.ts b/implementations/javascript/test/bytes.test.ts index bf19173..9948324 100644 --- a/implementations/javascript/test/bytes.test.ts +++ b/implementations/javascript/test/bytes.test.ts @@ -1,4 +1,4 @@ -import { Bytes, fromJS } from '../src/index'; +import { Bytes, decodeBase64, fromJS } from '../src/index'; import './test-utils'; describe('immutable byte arrays', () => { @@ -80,3 +80,39 @@ describe('immutable byte arrays', () => { }); }); }); + +describe('base64 decoder', () => { + describe('RFC4648 tests', () => { + it('10.0', () => expect(decodeBase64("")).is(Bytes.of())); + it('10.1', () => expect(decodeBase64("Zg==")).is(Bytes.of(102))); + it('10.2', () => expect(decodeBase64("Zm8=")).is(Bytes.of(102, 111))); + it('10.3', () => expect(decodeBase64("Zm9v")).is(Bytes.of(102, 111, 111))); + it('10.4', () => expect(decodeBase64("Zm9vYg==")).is(Bytes.of(102, 111, 111, 98))); + it('10.5', () => expect(decodeBase64("Zm9vYmE=")).is(Bytes.of(102, 111, 111, 98, 97))); + it('10.6', () => expect(decodeBase64("Zm9vYmFy")).is(Bytes.of(102, 111, 111, 98, 97, 114))); + + it('10.1b', () => expect(decodeBase64("Zg")).is(Bytes.of(102))); + it('10.2b', () => expect(decodeBase64("Zm8")).is(Bytes.of(102, 111))); + it('10.4b', () => expect(decodeBase64("Zm9vYg")).is(Bytes.of(102, 111, 111, 98))); + it('10.5b', () => expect(decodeBase64("Zm9vYmE")).is(Bytes.of(102, 111, 111, 98, 97))); + }); + + describe('RFC4648 examples', () => { + it('example0', () => + expect(decodeBase64('FPucA9l+')).is(Bytes.of(0x14, 0xfb, 0x9c, 0x03, 0xd9, 0x7e))); + it('example1', () => + expect(decodeBase64('FPucA9k=')).is(Bytes.of(0x14, 0xfb, 0x9c, 0x03, 0xd9))); + it('example1b', () => + expect(decodeBase64('FPucA9k')).is(Bytes.of(0x14, 0xfb, 0x9c, 0x03, 0xd9))); + it('example2', () => + expect(decodeBase64('FPucAw==')).is(Bytes.of(0x14, 0xfb, 0x9c, 0x03))); + it('example2b', () => + expect(decodeBase64('FPucAw=')).is(Bytes.of(0x14, 0xfb, 0x9c, 0x03))); + it('example2c', () => + expect(decodeBase64('FPucAw')).is(Bytes.of(0x14, 0xfb, 0x9c, 0x03))); + }); + + describe('Misc test cases', () => { + it('gQ==', () => expect(decodeBase64('gQ==')).is(Bytes.of(0x81))); + }); +}); diff --git a/implementations/javascript/test/codec.test.ts b/implementations/javascript/test/codec.test.ts index 59d4140..71c88b5 100644 --- a/implementations/javascript/test/codec.test.ts +++ b/implementations/javascript/test/codec.test.ts @@ -12,29 +12,10 @@ import { } from '../src/index'; const { Tag } = Constants; import './test-utils'; +import { decodePointer, encodePointer, Pointer } from './test-utils'; import * as fs from 'fs'; -class Pointer { - v: Value; - - constructor(v: Value) { - this.v = v; - } - - equals(other: any, is: (a: any, b: any) => boolean) { - return Object.is(other.constructor, this.constructor) && is(this.v, other.v); - } -} - -function decodePointer(v: Value): Pointer { - return new Pointer(strip(v)); -} - -function encodePointer(w: Pointer): Value { - return w.v; -} - const _discard = Symbol.for('discard'); const _capture = Symbol.for('capture'); const _observe = Symbol.for('observe'); diff --git a/implementations/javascript/test/reader.test.ts b/implementations/javascript/test/reader.test.ts new file mode 100644 index 0000000..386acc6 --- /dev/null +++ b/implementations/javascript/test/reader.test.ts @@ -0,0 +1,32 @@ +import { Bytes, Decoder, encode, Reader } from '../src/index'; +import './test-utils'; +import { decodePointer, encodePointer, Pointer } from './test-utils'; + +import * as fs from 'fs'; + +describe('reading common test suite', () => { + const samples_bin = fs.readFileSync(__dirname + '/../../../tests/samples.bin'); + const samples_txt = fs.readFileSync(__dirname + '/../../../tests/samples.txt', 'utf-8'); + + it('should read equal to decoded binary without annotations', () => { + const s1 = new Reader(samples_txt, { decodePointer, includeAnnotations: false }).next(); + const s2 = new Decoder(samples_bin, { decodePointer, includeAnnotations: false }).next(); + expect(s1).is(s2); + }); + + it('should read equal to decoded binary with annotations', () => { + const s1 = new Reader(samples_txt, { decodePointer, includeAnnotations: true }).next(); + const s2 = new Decoder(samples_bin, { decodePointer, includeAnnotations: true }).next(); + expect(s1).is(s2); + }); + + it('should read and encode back to binary with annotations', () => { + const s = new Reader(samples_txt, { decodePointer, includeAnnotations: true }).next(); + const bs = Bytes.toIO(encode(s, { + encodePointer, + includeAnnotations: true, + canonical: true, + })); + expect(bs).toEqual(new Uint8Array(samples_bin)); + }); +}); diff --git a/implementations/javascript/test/test-utils.ts b/implementations/javascript/test/test-utils.ts index 2562512..5b51377 100644 --- a/implementations/javascript/test/test-utils.ts +++ b/implementations/javascript/test/test-utils.ts @@ -1,4 +1,4 @@ -import { Value, is, preserves } from '../src/index'; +import { Value, is, preserves, strip } from '../src/index'; import '../src/node_support'; declare global { @@ -34,3 +34,27 @@ expect.extend({ } } }); + +export class Pointer { + v: Value; + + constructor(v: Value) { + this.v = v; + } + + equals(other: any, is: (a: any, b: any) => boolean) { + return Object.is(other.constructor, this.constructor) && is(this.v, other.v); + } + + asPreservesText(): string { + return '#!' + this.v.asPreservesText(); + } +} + +export function decodePointer(v: Value): Pointer { + return new Pointer(strip(v)); +} + +export function encodePointer(w: Pointer): Value { + return w.v; +}