Reuse buffers during encoding, where safe to do so; in particular, during simple canonicalization

This commit is contained in:
Tony Garnock-Jones 2021-01-12 14:18:26 +01:00
parent 320215dca0
commit d8a041a647
3 changed files with 62 additions and 16 deletions

View File

@ -205,6 +205,10 @@ export interface EncoderOptions {
includeAnnotations?: boolean;
}
function chunkStr(bs: Uint8Array): string {
return String.fromCharCode.apply(null, bs);
}
export class Encoder {
chunks: Array<Uint8Array>;
view: DataView;
@ -227,8 +231,26 @@ export class Encoder {
}
contents(): Bytes {
this.rotatebuffer(4096);
return Bytes.concat(this.chunks);
if (this.chunks.length === 0) {
const resultLength = this.index;
this.index = 0;
return new Bytes(this.view.buffer.slice(0, resultLength));
} else {
this.rotatebuffer(4096);
return Bytes.concat(this.chunks);
}
}
/* Like contents(), but hands back a string containing binary data "encoded" via latin-1 */
contentsString(): string {
if (this.chunks.length === 0) {
const s = chunkStr(new Uint8Array(this.view.buffer, 0, this.index));
this.index = 0;
return s;
} else {
this.rotatebuffer(4096);
return this.chunks.map(chunkStr).join('');
}
}
rotatebuffer(size: number) {
@ -347,6 +369,23 @@ export function encode(v: any, options?: EncoderOptions): Bytes {
return new Encoder(options).push(v).contents();
}
const _canonicalEncoder = new Encoder({ canonical: true });
let _usingCanonicalEncoder = false;
export function canonicalEncode(v: any, options?: EncoderOptions): Bytes {
if (options === void 0 && !_usingCanonicalEncoder) {
_usingCanonicalEncoder = true;
const bs = _canonicalEncoder.push(v).contents();
_usingCanonicalEncoder = false;
return bs;
} else {
return encode(v, { ... options, canonical: true });
}
}
export function canonicalString(v: any): string {
return _canonicalEncoder.push(v).contentsString();
}
export function encodeWithAnnotations(v: any, options: EncoderOptions = {}): Bytes {
return encode(v, { ... options, includeAnnotations: true });
}

View File

@ -2,7 +2,7 @@
import { PreserveOn, AsPreserve } from './symbols';
import { Tag } from './constants';
import { Encoder, encode } from './codec';
import { Encoder, canonicalEncode, canonicalString } from './codec';
import { stringify } from './text';
import { _iterMap, FlexMap, FlexSet } from './flex';
@ -144,7 +144,7 @@ export class Bytes {
maybeByteIterable.byteOffset,
maybeByteIterable.byteLength);
} else if (maybeByteIterable instanceof ArrayBuffer) {
this._view = new Uint8Array(maybeByteIterable.slice(0));
this._view = new Uint8Array(maybeByteIterable);
} else if (typeof maybeByteIterable === 'string') {
this._view = textEncoder.encode(maybeByteIterable);
} else if (typeof maybeByteIterable === 'number') {
@ -557,12 +557,6 @@ export function hash(a: Value): number {
export type DictionaryType = 'Dictionary' | 'Set';
export const DictionaryType = Symbol.for('DictionaryType');
export function _canonicalString(item: Value): string {
const bs = encode(item, { canonical: true })._view;
const s = String.fromCharCode.apply(null, bs);
return s;
}
export class Dictionary<T> extends FlexMap<Value, T> {
get [DictionaryType](): DictionaryType {
return 'Dictionary';
@ -583,7 +577,7 @@ export class Dictionary<T> extends FlexMap<Value, T> {
}
constructor(items?: Iterable<readonly [any, T]>) {
super(_canonicalString, _iterMap(items?.[Symbol.iterator](), ([k,v]) => [fromJS(k), v]));
super(canonicalString, _iterMap(items?.[Symbol.iterator](), ([k,v]) => [fromJS(k), v]));
}
mapEntries<R>(f: (entry: [Value, T]) => [Value, R]): Dictionary<R> {
@ -615,8 +609,7 @@ export class Dictionary<T> extends FlexMap<Value, T> {
[PreserveOn](encoder: Encoder) {
if (encoder.canonical) {
const pieces = Array.from(this).map(([k, v]) =>
Bytes.concat([encode(k, { canonical: true }),
encode(v, { canonical: true })]));
Bytes.concat([canonicalEncode(k), canonicalEncode(v)]));
pieces.sort(Bytes.compare);
encoder.encoderawvalues(Tag.Dictionary, pieces);
} else {
@ -640,7 +633,7 @@ export class Set extends FlexSet<Value> {
}
constructor(items?: Iterable<any>) {
super(_canonicalString, _iterMap(items?.[Symbol.iterator](), fromJS));
super(canonicalString, _iterMap(items?.[Symbol.iterator](), fromJS));
}
map(f: (value: Value) => Value): Set {
@ -671,7 +664,7 @@ export class Set extends FlexSet<Value> {
[PreserveOn](encoder: Encoder) {
if (encoder.canonical) {
const pieces = Array.from(this).map(k => encode(k, { canonical: true }));
const pieces = Array.from(this).map(k => canonicalEncode(k));
pieces.sort(Bytes.compare);
encoder.encoderawvalues(Tag.Set, pieces);
} else {

View File

@ -1,12 +1,13 @@
import {
Value,
Dictionary,
decode, decodeWithAnnotations, encodeWithAnnotations,
decode, decodeWithAnnotations, encodeWithAnnotations, canonicalEncode,
DecodeError, ShortPacket,
Bytes, Record,
annotate,
strip, peel,
preserves,
fromJS,
} from '../src/index';
import './test-utils';
@ -58,6 +59,19 @@ describe('parsing from subarray', () => {
});
});
describe('reusing buffer space', () => {
it('should be done safely, even with nested dictionaries', () => {
expect(canonicalEncode(fromJS(['aaa', {a: 1}, 'zzz'])).toHex()).is(
`b5
b103616161
b7
b10161 91
84
b1037a7a7a
84`.replace(/\s+/g, ''));
});
});
describe('common test suite', () => {
const samples_bin = fs.readFileSync(__dirname + '/../../../tests/samples.bin');
const samples = decodeWithAnnotations(samples_bin);