diff --git a/implementations/javascript/packages/core/src/bytes.ts b/implementations/javascript/packages/core/src/bytes.ts index 40cae0b..c53c8e4 100644 --- a/implementations/javascript/packages/core/src/bytes.ts +++ b/implementations/javascript/packages/core/src/bytes.ts @@ -35,6 +35,10 @@ export class Bytes implements Preservable, PreserveWritable { } } + dataview(): DataView { + return new DataView(this._view.buffer, this._view.byteOffset, this._view.byteLength); + } + get length(): number { return this._view.length; } @@ -179,6 +183,10 @@ export function underlying(b: Bytes | Uint8Array): Uint8Array { return (b instanceof Uint8Array) ? b : b._view; } +export function dataview(b: Bytes | DataView): DataView { + return (b instanceof DataView) ? b : b.dataview(); +} + // Uint8Array / TypedArray methods export interface Bytes { diff --git a/implementations/javascript/packages/core/src/decoder.ts b/implementations/javascript/packages/core/src/decoder.ts index f5316c1..25b35e8 100644 --- a/implementations/javascript/packages/core/src/decoder.ts +++ b/implementations/javascript/packages/core/src/decoder.ts @@ -216,8 +216,8 @@ export class Decoder implements TypedDecoder { switch (tag) { case Tag.False: return this.state.wrap(false); case Tag.True: return this.state.wrap(true); - case Tag.Float: return this.state.wrap(new SingleFloat(this.state.nextbytes(4).getFloat32(0, false))); - case Tag.Double: return this.state.wrap(new DoubleFloat(this.state.nextbytes(8).getFloat64(0, false))); + case Tag.Float: return this.state.wrap(SingleFloat.fromBytes(this.state.nextbytes(4))); + case Tag.Double: return this.state.wrap(DoubleFloat.fromBytes(this.state.nextbytes(8))); case Tag.End: throw new DecodeError("Unexpected Compound end marker"); case Tag.Annotation: { const a = this.next(); @@ -294,7 +294,7 @@ export class Decoder implements TypedDecoder { nextFloat(): SingleFloat | undefined { this.skipAnnotations(); switch (this.state.nextbyte()) { - case Tag.Float: return new SingleFloat(this.state.nextbytes(4).getFloat32(0, false)); + case Tag.Float: return SingleFloat.fromBytes(this.state.nextbytes(4)); default: return void 0; } } @@ -302,7 +302,7 @@ export class Decoder implements TypedDecoder { nextDouble(): DoubleFloat | undefined { this.skipAnnotations(); switch (this.state.nextbyte()) { - case Tag.Double: return new DoubleFloat(this.state.nextbytes(8).getFloat64(0, false)); + case Tag.Double: return DoubleFloat.fromBytes(this.state.nextbytes(8)); default: return void 0; } } diff --git a/implementations/javascript/packages/core/src/float.ts b/implementations/javascript/packages/core/src/float.ts index a831a44..a95e943 100644 --- a/implementations/javascript/packages/core/src/float.ts +++ b/implementations/javascript/packages/core/src/float.ts @@ -4,6 +4,7 @@ import { Value } from "./values"; import type { GenericEmbedded } from "./embedded"; import type { Encoder, Preservable } from "./encoder"; import type { Writer, PreserveWritable } from "./writer"; +import { Bytes, dataview, underlying } from "./bytes"; export type FloatType = 'Single' | 'Double'; export const FloatType = Symbol.for('FloatType'); @@ -19,8 +20,15 @@ export abstract class Float { return stringify(this); } + abstract toBytes(): Bytes; + equals(other: any): boolean { - return Object.is(other.constructor, this.constructor) && (other.value === this.value); + if (!Object.is(other.constructor, this.constructor)) return false; + if (Number.isNaN(this.value) && Number.isNaN(other.value)) { + return other.toBytes().equals(this.toBytes()); + } else { + return Object.is(other.value, this.value); + } } hashCode(): number { @@ -44,24 +52,72 @@ export function floatValue(f: any): number { } } +export function floatlikeString(f: number): string { + if (Object.is(f, -0)) return '-0.0'; + const s = '' + f; + if (s.includes('.') || s.includes('e') || s.includes('E')) return s; + return s + '.0'; +} + export class SingleFloat extends Float implements Preservable, PreserveWritable { __as_preserve__(): Value { return this; } + static fromBytes(bs: Bytes | DataView): SingleFloat { + const view = dataview(bs); + const vf = view.getInt32(0, false); + if ((vf & 0x7f800000) === 0x7f800000) { + // NaN or inf. Preserve quiet/signalling bit by manually expanding to double-precision. + const sign = vf >> 31; + const payload = vf & 0x007fffff; + const dbs = new Bytes(8); + const dview = dataview(dbs); + dview.setInt16(0, (sign << 15) | 0x7ff0 | (payload >> 19), false); + dview.setInt32(2, (payload & 0x7ffff) << 13, false); + return new SingleFloat(dview.getFloat64(0, false)); + } else { + return new SingleFloat(dataview(bs).getFloat32(0, false)); + } + } + static __from_preserve__(v: Value): undefined | SingleFloat { return Float.isSingle(v) ? v : void 0; } + __w(v: DataView, offset: number) { + if (Number.isNaN(this.value)) { + const dbs = new Bytes(8); + const dview = dataview(dbs); + dview.setFloat64(0, this.value, false); + const sign = dview.getInt8(0) >> 7; + const payload = (dview.getInt32(1, false) >> 5) & 0x007fffff; + const vf = (sign << 31) | 0x7f800000 | payload; + v.setInt32(offset, vf, false); + } else { + v.setFloat32(offset, this.value, false); + } + } + __preserve_on__(encoder: Encoder) { encoder.state.emitbyte(Tag.Float); encoder.state.makeroom(4); - encoder.state.view.setFloat32(encoder.state.index, this.value, false); + this.__w(encoder.state.view, encoder.state.index); encoder.state.index += 4; } + toBytes(): Bytes { + const bs = new Bytes(4); + this.__w(bs.dataview(), 0); + return bs; + } + __preserve_text_on__(w: Writer) { - w.state.pieces.push('' + this.value + 'f'); + if (Number.isFinite(this.value)) { + w.state.pieces.push(floatlikeString(this.value) + 'f'); + } else { + w.state.pieces.push('#xf"', this.toBytes().toHex(), '"'); + } } get [FloatType](): 'Single' { @@ -78,6 +134,10 @@ export class DoubleFloat extends Float implements Preservable, PreserveWrit return this; } + static fromBytes(bs: Bytes | DataView): DoubleFloat { + return new DoubleFloat(dataview(bs).getFloat64(0, false)); + } + static __from_preserve__(v: Value): undefined | DoubleFloat { return Float.isDouble(v) ? v : void 0; } @@ -89,8 +149,18 @@ export class DoubleFloat extends Float implements Preservable, PreserveWrit encoder.state.index += 8; } + toBytes(): Bytes { + const bs = new Bytes(8); + bs.dataview().setFloat64(0, this.value, false); + return bs; + } + __preserve_text_on__(w: Writer) { - w.state.pieces.push('' + this.value); + if (Number.isFinite(this.value)) { + w.state.pieces.push(floatlikeString(this.value)); + } else { + w.state.pieces.push('#xd"', this.toBytes().toHex(), '"'); + } } get [FloatType](): 'Double' { diff --git a/implementations/javascript/packages/core/src/reader.ts b/implementations/javascript/packages/core/src/reader.ts index 220f1f7..4c914d7 100644 --- a/implementations/javascript/packages/core/src/reader.ts +++ b/implementations/javascript/packages/core/src/reader.ts @@ -3,12 +3,12 @@ import type { Value } from './values'; import { DecodeError, ShortPacket } from './codec'; import { Dictionary, Set } from './dictionary'; -import { strip, unannotate } from './strip'; -import { Bytes, unhexDigit } from './bytes'; -import { decode, Decoder, DecoderState, neverEmbeddedTypeDecode } from './decoder'; +import { strip } from './strip'; +import { Bytes, underlying, unhexDigit } from './bytes'; +import { Decoder, DecoderState, neverEmbeddedTypeDecode } from './decoder'; import { Record } from './record'; import { Annotated, newPosition, Position, updatePosition } from './annotated'; -import { Double, DoubleFloat, Single, SingleFloat } from './float'; +import { Double, DoubleFloat, FloatType, Single, SingleFloat } from './float'; import { stringify } from './text'; import { embed, GenericEmbedded, EmbeddedTypeDecode } from './embedded'; @@ -25,6 +25,13 @@ type IntOrFloat = 'int' | 'float'; type Numeric = number | SingleFloat | DoubleFloat; type IntContinuation = (kind: IntOrFloat, acc: string) => Numeric; +export const NUMBER_RE: RegExp = /^([-+]?\d+)(((\.\d+([eE][-+]?\d+)?)|([eE][-+]?\d+))([fF]?))?$/; +// Groups: +// 1 - integer part and sign +// 2 - decimal part, exponent and Float marker +// 3 - decimal part and exponent +// 7 - Float marker + export class ReaderState { buffer: string; pos: Position; @@ -124,6 +131,22 @@ export class ReaderState { } } + readHexFloat(precision: FloatType): SingleFloat | DoubleFloat { + const pos = this.copyPos(); + if (this.nextchar() !== '"') { + this.error("Missing open-double-quote in hex-encoded floating-point number", pos); + } + const bs = this.readHexBinary(); + switch (precision) { + case 'Single': + if (bs.length !== 4) this.error("Incorrect number of bytes in hex-encoded Float", pos); + return SingleFloat.fromBytes(bs); + case 'Double': + if (bs.length !== 8) this.error("Incorrect number of bytes in hex-encoded Double", pos); + return DoubleFloat.fromBytes(bs); + } + } + readBase64Binary(): Bytes { let acc = ''; while (true) { @@ -135,67 +158,7 @@ export class ReaderState { return decodeBase64(acc); } - readIntpart(acc: string, ch: string): Numeric { - if (ch === '0') return this.readFracexp('int', acc + ch); - return this.readDigit1('int', acc, (kind, acc) => this.readFracexp(kind, acc), ch); - } - - readDigit1(kind: IntOrFloat, acc: string, k: IntContinuation, ch?: string): Numeric { - if (ch === void 0) ch = this.nextchar(); - if (ch >= '0' && ch <= '9') return this.readDigit0(kind, acc + ch, k); - this.error('Incomplete number', this.pos); - } - - readDigit0(kind: IntOrFloat, acc: string, k: IntContinuation): Numeric { - while (true) { - if (this.atEnd()) break; - const ch = this.peek(); - if (!(ch >= '0' && ch <= '9')) break; - this.advance(); - acc = acc + ch; - } - return k(kind, acc); - } - - readFracexp(kind: IntOrFloat, acc: string): Numeric { - if (!this.atEnd() && this.peek() === '.') { - this.advance(); - return this.readDigit1('float', acc + '.', (kind, acc) => this.readExp(kind, acc)); - } - return this.readExp(kind, acc); - } - - readExp(kind: IntOrFloat, acc: string): Numeric { - const ch = this.atEnd() ? '' : this.peek(); - if (ch === 'e' || ch === 'E') { - this.advance(); - return this.readSignAndExp(acc + ch); - } - return this.finishNumber(kind, acc); - } - - readSignAndExp(acc: string): Numeric { - const ch = this.peek(); - if (ch === '+' || ch === '-') { - this.advance(); - return this.readDigit1('float', acc + ch, (kind, acc) => this.finishNumber(kind, acc)); - } - return this.readDigit1('float', acc, (kind, acc) => this.finishNumber(kind, acc)); - } - - finishNumber(kind: IntOrFloat, acc: string): Numeric { - const i = parseFloat(acc); - if (kind === 'int') return i; - const ch = this.atEnd() ? '' : this.peek(); - if (ch === 'f' || ch === 'F') { - this.advance(); - return Single(i); - } else { - return Double(i); - } - } - - readRawSymbol(acc: string): Value { + readRawSymbolOrNumber(acc: string): Value { while (true) { if (this.atEnd()) break; const ch = this.peek(); @@ -203,7 +166,20 @@ export class ReaderState { this.advance(); acc = acc + ch; } - return Symbol.for(acc); + const m = NUMBER_RE.exec(acc); + if (m) { + if (m[2] === void 0) { + let v = parseInt(m[1]); + if (Object.is(v, -0)) v = 0; + return v; + } else if (m[7] === '') { + return Double(parseFloat(m[1] + m[3])); + } else { + return Single(parseFloat(m[1] + m[3])); + } + } else { + return Symbol.for(acc); + } } readStringlike(xform: (ch: string) => E, @@ -355,11 +331,6 @@ export class Reader { const unwrapped = ((): Value => { const c = this.state.nextchar(); switch (c) { - case '-': - return this.state.readIntpart('-', this.state.nextchar()); - case '0': case '1': case '2': case '3': case '4': - case '5': case '6': case '7': case '8': case '9': - return this.state.readIntpart('', c); case '"': return this.state.readString('"'); case '|': @@ -377,22 +348,13 @@ export class Reader { case 't': return true; case '{': return this.seq(new Set(), (v, s) => s.add(v), '}'); case '"': return this.state.readLiteralBinary(); - case 'x': - if (this.state.nextchar() !== '"') { - this.state.error('Expected open-quote at start of hex ByteString', - startPos); - } - return this.state.readHexBinary(); - case '[': return this.state.readBase64Binary(); - case '=': { - const bs = unannotate(this.next()); - if (!Bytes.isBytes(bs)) this.state.error('ByteString must follow #=', - startPos); - return decode(bs, { - embeddedDecode: this.embeddedType, - includeAnnotations: this.state.options.includeAnnotations, - }); + case 'x': switch (this.state.nextchar()) { + case '"': return this.state.readHexBinary(); + case 'f': return this.state.readHexFloat('Single'); + case 'd': return this.state.readHexFloat('Double'); + default: this.state.error('Invalid #x syntax', startPos); } + case '[': return this.state.readBase64Binary(); case '!': return embed(this.embeddedType.fromValue( new Reader(this.state, genericEmbeddedTypeDecode).next(), this.state.options)); @@ -411,7 +373,7 @@ export class Reader { case ']': this.state.error('Unexpected ]', startPos); case '}': this.state.error('Unexpected }', startPos); default: - return this.state.readRawSymbol(c); + return this.state.readRawSymbolOrNumber(c); } })(); return this.wrap(unwrapped, startPos); diff --git a/implementations/javascript/packages/core/src/text.ts b/implementations/javascript/packages/core/src/text.ts index 2fb8146..9cf2cdd 100644 --- a/implementations/javascript/packages/core/src/text.ts +++ b/implementations/javascript/packages/core/src/text.ts @@ -4,7 +4,7 @@ import type { Value } from './values'; import { Annotated } from './annotated'; import { Bytes } from './bytes'; import { KeyedDictionary, KeyedSet } from './dictionary'; -import { Writer, Writable, WriterOptions, EmbeddedWriter, WriterState } from './writer'; +import { Writer, WriterOptions, EmbeddedWriter, WriterState } from './writer'; import { fromJS } from './fromjs'; export const stringifyEmbeddedWrite: EmbeddedWriter = { diff --git a/implementations/javascript/packages/core/src/writer.ts b/implementations/javascript/packages/core/src/writer.ts index be55d34..8409e5c 100644 --- a/implementations/javascript/packages/core/src/writer.ts +++ b/implementations/javascript/packages/core/src/writer.ts @@ -3,6 +3,7 @@ import { Record, Tuple } from "./record"; import type { GenericEmbedded, Embedded, EmbeddedTypeEncode } from "./embedded"; import { Encoder, EncoderState } from "./encoder"; import type { Value } from "./values"; +import { NUMBER_RE } from './reader'; export type Writable = Value | PreserveWritable | Iterable> | ArrayBufferView; @@ -270,8 +271,7 @@ export class Writer { case 'symbol': { const s = v.description!; // FIXME: This regular expression is conservatively correct, but Anglo-chauvinistic. - const m = /^[a-zA-Z~!$%^&*?_=+/.][-a-zA-Z~!$%^&*?_=+/.0-9]*$/.exec(s); - if (m) { + if (/^[-a-zA-Z0-9~!$%^&*?_=+/.]+$/.exec(s) && !NUMBER_RE.exec(s)) { this.state.pieces.push(s); } else { this.state.pieces.push(this.state.escapeStringlike(s, '|')); diff --git a/implementations/python/tests/samples.bin b/implementations/python/tests/samples.bin index e05af50..0d2ad15 100644 Binary files a/implementations/python/tests/samples.bin and b/implementations/python/tests/samples.bin differ diff --git a/implementations/python/tests/samples.pr b/implementations/python/tests/samples.pr index 76061db..a29deb4 100644 --- a/implementations/python/tests/samples.pr +++ b/implementations/python/tests/samples.pr @@ -85,11 +85,15 @@ double6: @"Invalid chars" double7: @"Positive infinity" double8: @"Negative infinity" - double9: @"-NaN" - double10: @"-NaN" - double11: @"+NaN" - double12: @"+NaN" + double9: @"-qNaN" + double10: @"-qNaN" + double11: @"+qNaN" + double12: @"+qNaN" double13: @"Bad spacing" + double14: @"-sNaN" + double15: @"-sNaN" + double16: @"+sNaN" + double17: @"+sNaN" float0: float+0: float-0: @@ -100,11 +104,15 @@ float5: @"Invalid chars" float6: @"Positive infinity" float7: @"Negative infinity" - float8: @"+NaN" - float9: @"+NaN" - float10: @"-NaN" - float11: @"-NaN" + float8: @"+sNaN" + float9: @"+sNaN" + float10: @"-sNaN" + float11: @"-sNaN" float12: @"Bad spacing" + float13: @"+qNaN" + float14: @"+qNaN" + float15: @"-qNaN" + float16: @"-qNaN" int-257: int-256: int-255: diff --git a/implementations/racket/preserves/preserves/tests/samples.pr b/implementations/racket/preserves/preserves/tests/samples.pr index 76061db..a29deb4 100644 --- a/implementations/racket/preserves/preserves/tests/samples.pr +++ b/implementations/racket/preserves/preserves/tests/samples.pr @@ -85,11 +85,15 @@ double6: @"Invalid chars" double7: @"Positive infinity" double8: @"Negative infinity" - double9: @"-NaN" - double10: @"-NaN" - double11: @"+NaN" - double12: @"+NaN" + double9: @"-qNaN" + double10: @"-qNaN" + double11: @"+qNaN" + double12: @"+qNaN" double13: @"Bad spacing" + double14: @"-sNaN" + double15: @"-sNaN" + double16: @"+sNaN" + double17: @"+sNaN" float0: float+0: float-0: @@ -100,11 +104,15 @@ float5: @"Invalid chars" float6: @"Positive infinity" float7: @"Negative infinity" - float8: @"+NaN" - float9: @"+NaN" - float10: @"-NaN" - float11: @"-NaN" + float8: @"+sNaN" + float9: @"+sNaN" + float10: @"-sNaN" + float11: @"-sNaN" float12: @"Bad spacing" + float13: @"+qNaN" + float14: @"+qNaN" + float15: @"-qNaN" + float16: @"-qNaN" int-257: int-256: int-255: diff --git a/preserves-text.md b/preserves-text.md index 6af4dae..a6e9ca4 100644 --- a/preserves-text.md +++ b/preserves-text.md @@ -143,7 +143,7 @@ so if a `SymbolOrNumber` also matches the grammar for `Float`, `Double` or `SignedInteger`, then it must be interpreted as one of those, and otherwise it must be interpreted as a bare `Symbol`. - SymbolOrNumber = *baresymchar + SymbolOrNumber = 1*baresymchar baresymchar = ALPHA / DIGIT / sympunct / symuchar sympunct = "~" / "!" / "$" / "%" / "^" / "&" / "*" / "?" / "_" / "=" / "+" / "-" / "/" / "." @@ -168,7 +168,6 @@ either.[^reading-and-writing-floats-accurately] Double = flt SignedInteger = int - digit1-9 = %x31-39 nat = 1*DIGIT int = ["-"/"+"] nat frac = "." 1*DIGIT @@ -292,5 +291,22 @@ The text syntax for `Boolean`s, `Symbol`s, and `ByteString`s is directly inspired by [Racket](https://racket-lang.org/)'s lexical syntax. +## Appendix. Regular expressions for bare symbols and numbers + +When parsing, if a token matches both `SymbolOrNumber` and `Number`, it's a +number; use `Float`, `Double` and `SignedInteger` to disambiguate. If it +matches `SymbolOrNumber` but not `Number`, it's a "bare" `Symbol`. + + SymbolOrNumber: ^[-a-zA-Z0-9~!$%^&*?_=+/.]+$ + Number: ^([-+]?\d+)(((\.\d+([eE][-+]?\d+)?)|([eE][-+]?\d+))([fF]?))?$ + Float: ^([-+]?\d+)(((\.\d+([eE][-+]?\d+)?)|([eE][-+]?\d+))[fF])$ + Double: ^([-+]?\d+)(((\.\d+([eE][-+]?\d+)?)|([eE][-+]?\d+)))$ + SignedInteger: ^([-+]?\d+)$ + +When printing, if a symbol matches both `SymbolOrNumber` and `Number` or +neither `SymbolOrNumber` nor `Number`, it must be quoted (`|...|`). If it +matches `SymbolOrNumber` but not `Number`, it may be printed as a "bare" +`Symbol`. + ## Notes diff --git a/tests/samples.bin b/tests/samples.bin index e05af50..0d2ad15 100644 Binary files a/tests/samples.bin and b/tests/samples.bin differ diff --git a/tests/samples.pr b/tests/samples.pr index 76061db..a29deb4 100644 --- a/tests/samples.pr +++ b/tests/samples.pr @@ -85,11 +85,15 @@ double6: @"Invalid chars" double7: @"Positive infinity" double8: @"Negative infinity" - double9: @"-NaN" - double10: @"-NaN" - double11: @"+NaN" - double12: @"+NaN" + double9: @"-qNaN" + double10: @"-qNaN" + double11: @"+qNaN" + double12: @"+qNaN" double13: @"Bad spacing" + double14: @"-sNaN" + double15: @"-sNaN" + double16: @"+sNaN" + double17: @"+sNaN" float0: float+0: float-0: @@ -100,11 +104,15 @@ float5: @"Invalid chars" float6: @"Positive infinity" float7: @"Negative infinity" - float8: @"+NaN" - float9: @"+NaN" - float10: @"-NaN" - float11: @"-NaN" + float8: @"+sNaN" + float9: @"+sNaN" + float10: @"-sNaN" + float11: @"-sNaN" float12: @"Bad spacing" + float13: @"+qNaN" + float14: @"+qNaN" + float15: @"-qNaN" + float16: @"-qNaN" int-257: int-256: int-255: