From d8a041a647eaf58a0b74a355e2b9512696d6ab7b Mon Sep 17 00:00:00 2001 From: Tony Garnock-Jones Date: Tue, 12 Jan 2021 14:18:26 +0100 Subject: [PATCH] Reuse buffers during encoding, where safe to do so; in particular, during simple canonicalization --- implementations/javascript/src/codec.ts | 43 ++++++++++++++++++- implementations/javascript/src/values.ts | 19 +++----- implementations/javascript/test/codec.test.ts | 16 ++++++- 3 files changed, 62 insertions(+), 16 deletions(-) diff --git a/implementations/javascript/src/codec.ts b/implementations/javascript/src/codec.ts index 365c937..57b3278 100644 --- a/implementations/javascript/src/codec.ts +++ b/implementations/javascript/src/codec.ts @@ -205,6 +205,10 @@ export interface EncoderOptions { includeAnnotations?: boolean; } +function chunkStr(bs: Uint8Array): string { + return String.fromCharCode.apply(null, bs); +} + export class Encoder { chunks: Array; view: DataView; @@ -227,8 +231,26 @@ export class Encoder { } contents(): Bytes { - this.rotatebuffer(4096); - return Bytes.concat(this.chunks); + if (this.chunks.length === 0) { + const resultLength = this.index; + this.index = 0; + return new Bytes(this.view.buffer.slice(0, resultLength)); + } else { + this.rotatebuffer(4096); + return Bytes.concat(this.chunks); + } + } + + /* Like contents(), but hands back a string containing binary data "encoded" via latin-1 */ + contentsString(): string { + if (this.chunks.length === 0) { + const s = chunkStr(new Uint8Array(this.view.buffer, 0, this.index)); + this.index = 0; + return s; + } else { + this.rotatebuffer(4096); + return this.chunks.map(chunkStr).join(''); + } } rotatebuffer(size: number) { @@ -347,6 +369,23 @@ export function encode(v: any, options?: EncoderOptions): Bytes { return new Encoder(options).push(v).contents(); } +const _canonicalEncoder = new Encoder({ canonical: true }); +let _usingCanonicalEncoder = false; +export function canonicalEncode(v: any, options?: EncoderOptions): Bytes { + if (options === void 0 && !_usingCanonicalEncoder) { + _usingCanonicalEncoder = true; + const bs = _canonicalEncoder.push(v).contents(); + _usingCanonicalEncoder = false; + return bs; + } else { + return encode(v, { ... options, canonical: true }); + } +} + +export function canonicalString(v: any): string { + return _canonicalEncoder.push(v).contentsString(); +} + export function encodeWithAnnotations(v: any, options: EncoderOptions = {}): Bytes { return encode(v, { ... options, includeAnnotations: true }); } diff --git a/implementations/javascript/src/values.ts b/implementations/javascript/src/values.ts index efcd593..42430da 100644 --- a/implementations/javascript/src/values.ts +++ b/implementations/javascript/src/values.ts @@ -2,7 +2,7 @@ import { PreserveOn, AsPreserve } from './symbols'; import { Tag } from './constants'; -import { Encoder, encode } from './codec'; +import { Encoder, canonicalEncode, canonicalString } from './codec'; import { stringify } from './text'; import { _iterMap, FlexMap, FlexSet } from './flex'; @@ -144,7 +144,7 @@ export class Bytes { maybeByteIterable.byteOffset, maybeByteIterable.byteLength); } else if (maybeByteIterable instanceof ArrayBuffer) { - this._view = new Uint8Array(maybeByteIterable.slice(0)); + this._view = new Uint8Array(maybeByteIterable); } else if (typeof maybeByteIterable === 'string') { this._view = textEncoder.encode(maybeByteIterable); } else if (typeof maybeByteIterable === 'number') { @@ -557,12 +557,6 @@ export function hash(a: Value): number { export type DictionaryType = 'Dictionary' | 'Set'; export const DictionaryType = Symbol.for('DictionaryType'); -export function _canonicalString(item: Value): string { - const bs = encode(item, { canonical: true })._view; - const s = String.fromCharCode.apply(null, bs); - return s; -} - export class Dictionary extends FlexMap { get [DictionaryType](): DictionaryType { return 'Dictionary'; @@ -583,7 +577,7 @@ export class Dictionary extends FlexMap { } constructor(items?: Iterable) { - super(_canonicalString, _iterMap(items?.[Symbol.iterator](), ([k,v]) => [fromJS(k), v])); + super(canonicalString, _iterMap(items?.[Symbol.iterator](), ([k,v]) => [fromJS(k), v])); } mapEntries(f: (entry: [Value, T]) => [Value, R]): Dictionary { @@ -615,8 +609,7 @@ export class Dictionary extends FlexMap { [PreserveOn](encoder: Encoder) { if (encoder.canonical) { const pieces = Array.from(this).map(([k, v]) => - Bytes.concat([encode(k, { canonical: true }), - encode(v, { canonical: true })])); + Bytes.concat([canonicalEncode(k), canonicalEncode(v)])); pieces.sort(Bytes.compare); encoder.encoderawvalues(Tag.Dictionary, pieces); } else { @@ -640,7 +633,7 @@ export class Set extends FlexSet { } constructor(items?: Iterable) { - super(_canonicalString, _iterMap(items?.[Symbol.iterator](), fromJS)); + super(canonicalString, _iterMap(items?.[Symbol.iterator](), fromJS)); } map(f: (value: Value) => Value): Set { @@ -671,7 +664,7 @@ export class Set extends FlexSet { [PreserveOn](encoder: Encoder) { if (encoder.canonical) { - const pieces = Array.from(this).map(k => encode(k, { canonical: true })); + const pieces = Array.from(this).map(k => canonicalEncode(k)); pieces.sort(Bytes.compare); encoder.encoderawvalues(Tag.Set, pieces); } else { diff --git a/implementations/javascript/test/codec.test.ts b/implementations/javascript/test/codec.test.ts index a5f3dc7..fbbcc9c 100644 --- a/implementations/javascript/test/codec.test.ts +++ b/implementations/javascript/test/codec.test.ts @@ -1,12 +1,13 @@ import { Value, Dictionary, - decode, decodeWithAnnotations, encodeWithAnnotations, + decode, decodeWithAnnotations, encodeWithAnnotations, canonicalEncode, DecodeError, ShortPacket, Bytes, Record, annotate, strip, peel, preserves, + fromJS, } from '../src/index'; import './test-utils'; @@ -58,6 +59,19 @@ describe('parsing from subarray', () => { }); }); +describe('reusing buffer space', () => { + it('should be done safely, even with nested dictionaries', () => { + expect(canonicalEncode(fromJS(['aaa', {a: 1}, 'zzz'])).toHex()).is( + `b5 + b103616161 + b7 + b10161 91 + 84 + b1037a7a7a + 84`.replace(/\s+/g, '')); + }); +}); + describe('common test suite', () => { const samples_bin = fs.readFileSync(__dirname + '/../../../tests/samples.bin'); const samples = decodeWithAnnotations(samples_bin);