Repair text syntax for numbers and symbols. Closes #19/#36/#37/#38.

Numbers and (bare) Symbols are now disambiguated after reading, which
permits leading `+`, leading `0`, and a wider range of acceptable
Symbols.

Updates spec text, test cases, and implementations. Some ancillary fixes
to Python's comparison routines are also included.
This commit is contained in:
Tony Garnock-Jones 2022-11-06 22:27:01 +01:00
parent 351feba8d2
commit 269ed2391a
31 changed files with 864 additions and 553 deletions

View File

@ -35,6 +35,10 @@ export class Bytes implements Preservable<any>, PreserveWritable<any> {
} }
} }
dataview(): DataView {
return new DataView(this._view.buffer, this._view.byteOffset, this._view.byteLength);
}
get length(): number { get length(): number {
return this._view.length; return this._view.length;
} }
@ -179,6 +183,10 @@ export function underlying(b: Bytes | Uint8Array): Uint8Array {
return (b instanceof Uint8Array) ? b : b._view; return (b instanceof Uint8Array) ? b : b._view;
} }
export function dataview(b: Bytes | DataView): DataView {
return (b instanceof DataView) ? b : b.dataview();
}
// Uint8Array / TypedArray methods // Uint8Array / TypedArray methods
export interface Bytes { export interface Bytes {

View File

@ -216,8 +216,8 @@ export class Decoder<T = never> implements TypedDecoder<T> {
switch (tag) { switch (tag) {
case Tag.False: return this.state.wrap<T>(false); case Tag.False: return this.state.wrap<T>(false);
case Tag.True: return this.state.wrap<T>(true); case Tag.True: return this.state.wrap<T>(true);
case Tag.Float: return this.state.wrap<T>(new SingleFloat(this.state.nextbytes(4).getFloat32(0, false))); case Tag.Float: return this.state.wrap<T>(SingleFloat.fromBytes(this.state.nextbytes(4)));
case Tag.Double: return this.state.wrap<T>(new DoubleFloat(this.state.nextbytes(8).getFloat64(0, false))); case Tag.Double: return this.state.wrap<T>(DoubleFloat.fromBytes(this.state.nextbytes(8)));
case Tag.End: throw new DecodeError("Unexpected Compound end marker"); case Tag.End: throw new DecodeError("Unexpected Compound end marker");
case Tag.Annotation: { case Tag.Annotation: {
const a = this.next(); const a = this.next();
@ -294,7 +294,7 @@ export class Decoder<T = never> implements TypedDecoder<T> {
nextFloat(): SingleFloat | undefined { nextFloat(): SingleFloat | undefined {
this.skipAnnotations(); this.skipAnnotations();
switch (this.state.nextbyte()) { switch (this.state.nextbyte()) {
case Tag.Float: return new SingleFloat(this.state.nextbytes(4).getFloat32(0, false)); case Tag.Float: return SingleFloat.fromBytes(this.state.nextbytes(4));
default: return void 0; default: return void 0;
} }
} }
@ -302,7 +302,7 @@ export class Decoder<T = never> implements TypedDecoder<T> {
nextDouble(): DoubleFloat | undefined { nextDouble(): DoubleFloat | undefined {
this.skipAnnotations(); this.skipAnnotations();
switch (this.state.nextbyte()) { switch (this.state.nextbyte()) {
case Tag.Double: return new DoubleFloat(this.state.nextbytes(8).getFloat64(0, false)); case Tag.Double: return DoubleFloat.fromBytes(this.state.nextbytes(8));
default: return void 0; default: return void 0;
} }
} }

View File

@ -4,6 +4,7 @@ import { Value } from "./values";
import type { GenericEmbedded } from "./embedded"; import type { GenericEmbedded } from "./embedded";
import type { Encoder, Preservable } from "./encoder"; import type { Encoder, Preservable } from "./encoder";
import type { Writer, PreserveWritable } from "./writer"; import type { Writer, PreserveWritable } from "./writer";
import { Bytes, dataview, underlying } from "./bytes";
export type FloatType = 'Single' | 'Double'; export type FloatType = 'Single' | 'Double';
export const FloatType = Symbol.for('FloatType'); export const FloatType = Symbol.for('FloatType');
@ -19,8 +20,15 @@ export abstract class Float {
return stringify(this); return stringify(this);
} }
abstract toBytes(): Bytes;
equals(other: any): boolean { equals(other: any): boolean {
return Object.is(other.constructor, this.constructor) && (other.value === this.value); if (!Object.is(other.constructor, this.constructor)) return false;
if (Number.isNaN(this.value) && Number.isNaN(other.value)) {
return other.toBytes().equals(this.toBytes());
} else {
return Object.is(other.value, this.value);
}
} }
hashCode(): number { hashCode(): number {
@ -44,24 +52,72 @@ export function floatValue(f: any): number {
} }
} }
export function floatlikeString(f: number): string {
if (Object.is(f, -0)) return '-0.0';
const s = '' + f;
if (s.includes('.') || s.includes('e') || s.includes('E')) return s;
return s + '.0';
}
export class SingleFloat extends Float implements Preservable<any>, PreserveWritable<any> { export class SingleFloat extends Float implements Preservable<any>, PreserveWritable<any> {
__as_preserve__<T = GenericEmbedded>(): Value<T> { __as_preserve__<T = GenericEmbedded>(): Value<T> {
return this; return this;
} }
static fromBytes(bs: Bytes | DataView): SingleFloat {
const view = dataview(bs);
const vf = view.getInt32(0, false);
if ((vf & 0x7f800000) === 0x7f800000) {
// NaN or inf. Preserve quiet/signalling bit by manually expanding to double-precision.
const sign = vf >> 31;
const payload = vf & 0x007fffff;
const dbs = new Bytes(8);
const dview = dataview(dbs);
dview.setInt16(0, (sign << 15) | 0x7ff0 | (payload >> 19), false);
dview.setInt32(2, (payload & 0x7ffff) << 13, false);
return new SingleFloat(dview.getFloat64(0, false));
} else {
return new SingleFloat(dataview(bs).getFloat32(0, false));
}
}
static __from_preserve__<T>(v: Value<T>): undefined | SingleFloat { static __from_preserve__<T>(v: Value<T>): undefined | SingleFloat {
return Float.isSingle(v) ? v : void 0; return Float.isSingle(v) ? v : void 0;
} }
__w(v: DataView, offset: number) {
if (Number.isNaN(this.value)) {
const dbs = new Bytes(8);
const dview = dataview(dbs);
dview.setFloat64(0, this.value, false);
const sign = dview.getInt8(0) >> 7;
const payload = (dview.getInt32(1, false) >> 5) & 0x007fffff;
const vf = (sign << 31) | 0x7f800000 | payload;
v.setInt32(offset, vf, false);
} else {
v.setFloat32(offset, this.value, false);
}
}
__preserve_on__(encoder: Encoder<any>) { __preserve_on__(encoder: Encoder<any>) {
encoder.state.emitbyte(Tag.Float); encoder.state.emitbyte(Tag.Float);
encoder.state.makeroom(4); encoder.state.makeroom(4);
encoder.state.view.setFloat32(encoder.state.index, this.value, false); this.__w(encoder.state.view, encoder.state.index);
encoder.state.index += 4; encoder.state.index += 4;
} }
toBytes(): Bytes {
const bs = new Bytes(4);
this.__w(bs.dataview(), 0);
return bs;
}
__preserve_text_on__(w: Writer<any>) { __preserve_text_on__(w: Writer<any>) {
w.state.pieces.push('' + this.value + 'f'); if (Number.isFinite(this.value)) {
w.state.pieces.push(floatlikeString(this.value) + 'f');
} else {
w.state.pieces.push('#xf"', this.toBytes().toHex(), '"');
}
} }
get [FloatType](): 'Single' { get [FloatType](): 'Single' {
@ -78,6 +134,10 @@ export class DoubleFloat extends Float implements Preservable<any>, PreserveWrit
return this; return this;
} }
static fromBytes(bs: Bytes | DataView): DoubleFloat {
return new DoubleFloat(dataview(bs).getFloat64(0, false));
}
static __from_preserve__<T>(v: Value<T>): undefined | DoubleFloat { static __from_preserve__<T>(v: Value<T>): undefined | DoubleFloat {
return Float.isDouble(v) ? v : void 0; return Float.isDouble(v) ? v : void 0;
} }
@ -89,8 +149,18 @@ export class DoubleFloat extends Float implements Preservable<any>, PreserveWrit
encoder.state.index += 8; encoder.state.index += 8;
} }
toBytes(): Bytes {
const bs = new Bytes(8);
bs.dataview().setFloat64(0, this.value, false);
return bs;
}
__preserve_text_on__(w: Writer<any>) { __preserve_text_on__(w: Writer<any>) {
w.state.pieces.push('' + this.value); if (Number.isFinite(this.value)) {
w.state.pieces.push(floatlikeString(this.value));
} else {
w.state.pieces.push('#xd"', this.toBytes().toHex(), '"');
}
} }
get [FloatType](): 'Double' { get [FloatType](): 'Double' {

View File

@ -3,12 +3,12 @@
import type { Value } from './values'; import type { Value } from './values';
import { DecodeError, ShortPacket } from './codec'; import { DecodeError, ShortPacket } from './codec';
import { Dictionary, Set } from './dictionary'; import { Dictionary, Set } from './dictionary';
import { strip, unannotate } from './strip'; import { strip } from './strip';
import { Bytes, unhexDigit } from './bytes'; import { Bytes, underlying, unhexDigit } from './bytes';
import { decode, Decoder, DecoderState, neverEmbeddedTypeDecode } from './decoder'; import { Decoder, DecoderState, neverEmbeddedTypeDecode } from './decoder';
import { Record } from './record'; import { Record } from './record';
import { Annotated, newPosition, Position, updatePosition } from './annotated'; import { Annotated, newPosition, Position, updatePosition } from './annotated';
import { Double, DoubleFloat, Single, SingleFloat } from './float'; import { Double, DoubleFloat, FloatType, Single, SingleFloat } from './float';
import { stringify } from './text'; import { stringify } from './text';
import { embed, GenericEmbedded, EmbeddedTypeDecode } from './embedded'; import { embed, GenericEmbedded, EmbeddedTypeDecode } from './embedded';
@ -25,6 +25,13 @@ type IntOrFloat = 'int' | 'float';
type Numeric = number | SingleFloat | DoubleFloat; type Numeric = number | SingleFloat | DoubleFloat;
type IntContinuation = (kind: IntOrFloat, acc: string) => Numeric; type IntContinuation = (kind: IntOrFloat, acc: string) => Numeric;
export const NUMBER_RE: RegExp = /^([-+]?\d+)(((\.\d+([eE][-+]?\d+)?)|([eE][-+]?\d+))([fF]?))?$/;
// Groups:
// 1 - integer part and sign
// 2 - decimal part, exponent and Float marker
// 3 - decimal part and exponent
// 7 - Float marker
export class ReaderState { export class ReaderState {
buffer: string; buffer: string;
pos: Position; pos: Position;
@ -124,6 +131,22 @@ export class ReaderState {
} }
} }
readHexFloat(precision: FloatType): SingleFloat | DoubleFloat {
const pos = this.copyPos();
if (this.nextchar() !== '"') {
this.error("Missing open-double-quote in hex-encoded floating-point number", pos);
}
const bs = this.readHexBinary();
switch (precision) {
case 'Single':
if (bs.length !== 4) this.error("Incorrect number of bytes in hex-encoded Float", pos);
return SingleFloat.fromBytes(bs);
case 'Double':
if (bs.length !== 8) this.error("Incorrect number of bytes in hex-encoded Double", pos);
return DoubleFloat.fromBytes(bs);
}
}
readBase64Binary(): Bytes { readBase64Binary(): Bytes {
let acc = ''; let acc = '';
while (true) { while (true) {
@ -135,67 +158,7 @@ export class ReaderState {
return decodeBase64(acc); return decodeBase64(acc);
} }
readIntpart(acc: string, ch: string): Numeric { readRawSymbolOrNumber<T>(acc: string): Value<T> {
if (ch === '0') return this.readFracexp('int', acc + ch);
return this.readDigit1('int', acc, (kind, acc) => this.readFracexp(kind, acc), ch);
}
readDigit1(kind: IntOrFloat, acc: string, k: IntContinuation, ch?: string): Numeric {
if (ch === void 0) ch = this.nextchar();
if (ch >= '0' && ch <= '9') return this.readDigit0(kind, acc + ch, k);
this.error('Incomplete number', this.pos);
}
readDigit0(kind: IntOrFloat, acc: string, k: IntContinuation): Numeric {
while (true) {
if (this.atEnd()) break;
const ch = this.peek();
if (!(ch >= '0' && ch <= '9')) break;
this.advance();
acc = acc + ch;
}
return k(kind, acc);
}
readFracexp(kind: IntOrFloat, acc: string): Numeric {
if (!this.atEnd() && this.peek() === '.') {
this.advance();
return this.readDigit1('float', acc + '.', (kind, acc) => this.readExp(kind, acc));
}
return this.readExp(kind, acc);
}
readExp(kind: IntOrFloat, acc: string): Numeric {
const ch = this.atEnd() ? '' : this.peek();
if (ch === 'e' || ch === 'E') {
this.advance();
return this.readSignAndExp(acc + ch);
}
return this.finishNumber(kind, acc);
}
readSignAndExp(acc: string): Numeric {
const ch = this.peek();
if (ch === '+' || ch === '-') {
this.advance();
return this.readDigit1('float', acc + ch, (kind, acc) => this.finishNumber(kind, acc));
}
return this.readDigit1('float', acc, (kind, acc) => this.finishNumber(kind, acc));
}
finishNumber(kind: IntOrFloat, acc: string): Numeric {
const i = parseFloat(acc);
if (kind === 'int') return i;
const ch = this.atEnd() ? '' : this.peek();
if (ch === 'f' || ch === 'F') {
this.advance();
return Single(i);
} else {
return Double(i);
}
}
readRawSymbol<T>(acc: string): Value<T> {
while (true) { while (true) {
if (this.atEnd()) break; if (this.atEnd()) break;
const ch = this.peek(); const ch = this.peek();
@ -203,7 +166,20 @@ export class ReaderState {
this.advance(); this.advance();
acc = acc + ch; acc = acc + ch;
} }
return Symbol.for(acc); const m = NUMBER_RE.exec(acc);
if (m) {
if (m[2] === void 0) {
let v = parseInt(m[1]);
if (Object.is(v, -0)) v = 0;
return v;
} else if (m[7] === '') {
return Double(parseFloat(m[1] + m[3]));
} else {
return Single(parseFloat(m[1] + m[3]));
}
} else {
return Symbol.for(acc);
}
} }
readStringlike<E, R>(xform: (ch: string) => E, readStringlike<E, R>(xform: (ch: string) => E,
@ -355,11 +331,6 @@ export class Reader<T> {
const unwrapped = ((): Value<T> => { const unwrapped = ((): Value<T> => {
const c = this.state.nextchar(); const c = this.state.nextchar();
switch (c) { switch (c) {
case '-':
return this.state.readIntpart('-', this.state.nextchar());
case '0': case '1': case '2': case '3': case '4':
case '5': case '6': case '7': case '8': case '9':
return this.state.readIntpart('', c);
case '"': case '"':
return this.state.readString('"'); return this.state.readString('"');
case '|': case '|':
@ -377,22 +348,13 @@ export class Reader<T> {
case 't': return true; case 't': return true;
case '{': return this.seq(new Set<T>(), (v, s) => s.add(v), '}'); case '{': return this.seq(new Set<T>(), (v, s) => s.add(v), '}');
case '"': return this.state.readLiteralBinary(); case '"': return this.state.readLiteralBinary();
case 'x': case 'x': switch (this.state.nextchar()) {
if (this.state.nextchar() !== '"') { case '"': return this.state.readHexBinary();
this.state.error('Expected open-quote at start of hex ByteString', case 'f': return this.state.readHexFloat('Single');
startPos); case 'd': return this.state.readHexFloat('Double');
} default: this.state.error('Invalid #x syntax', startPos);
return this.state.readHexBinary();
case '[': return this.state.readBase64Binary();
case '=': {
const bs = unannotate(this.next());
if (!Bytes.isBytes(bs)) this.state.error('ByteString must follow #=',
startPos);
return decode<T>(bs, {
embeddedDecode: this.embeddedType,
includeAnnotations: this.state.options.includeAnnotations,
});
} }
case '[': return this.state.readBase64Binary();
case '!': return embed(this.embeddedType.fromValue( case '!': return embed(this.embeddedType.fromValue(
new Reader<GenericEmbedded>(this.state, genericEmbeddedTypeDecode).next(), new Reader<GenericEmbedded>(this.state, genericEmbeddedTypeDecode).next(),
this.state.options)); this.state.options));
@ -411,7 +373,7 @@ export class Reader<T> {
case ']': this.state.error('Unexpected ]', startPos); case ']': this.state.error('Unexpected ]', startPos);
case '}': this.state.error('Unexpected }', startPos); case '}': this.state.error('Unexpected }', startPos);
default: default:
return this.state.readRawSymbol(c); return this.state.readRawSymbolOrNumber(c);
} }
})(); })();
return this.wrap(unwrapped, startPos); return this.wrap(unwrapped, startPos);

View File

@ -4,7 +4,7 @@ import type { Value } from './values';
import { Annotated } from './annotated'; import { Annotated } from './annotated';
import { Bytes } from './bytes'; import { Bytes } from './bytes';
import { KeyedDictionary, KeyedSet } from './dictionary'; import { KeyedDictionary, KeyedSet } from './dictionary';
import { Writer, Writable, WriterOptions, EmbeddedWriter, WriterState } from './writer'; import { Writer, WriterOptions, EmbeddedWriter, WriterState } from './writer';
import { fromJS } from './fromjs'; import { fromJS } from './fromjs';
export const stringifyEmbeddedWrite: EmbeddedWriter<any> = { export const stringifyEmbeddedWrite: EmbeddedWriter<any> = {

View File

@ -3,6 +3,7 @@ import { Record, Tuple } from "./record";
import type { GenericEmbedded, Embedded, EmbeddedTypeEncode } from "./embedded"; import type { GenericEmbedded, Embedded, EmbeddedTypeEncode } from "./embedded";
import { Encoder, EncoderState } from "./encoder"; import { Encoder, EncoderState } from "./encoder";
import type { Value } from "./values"; import type { Value } from "./values";
import { NUMBER_RE } from './reader';
export type Writable<T> = export type Writable<T> =
Value<T> | PreserveWritable<T> | Iterable<Value<T>> | ArrayBufferView; Value<T> | PreserveWritable<T> | Iterable<Value<T>> | ArrayBufferView;
@ -270,8 +271,7 @@ export class Writer<T> {
case 'symbol': { case 'symbol': {
const s = v.description!; const s = v.description!;
// FIXME: This regular expression is conservatively correct, but Anglo-chauvinistic. // FIXME: This regular expression is conservatively correct, but Anglo-chauvinistic.
const m = /^[a-zA-Z~!$%^&*?_=+/.][-a-zA-Z~!$%^&*?_=+/.0-9]*$/.exec(s); if (/^[-a-zA-Z0-9~!$%^&*?_=+/.]+$/.exec(s) && !NUMBER_RE.exec(s)) {
if (m) {
this.state.pieces.push(s); this.state.pieces.push(s);
} else { } else {
this.state.pieces.push(this.state.escapeStringlike(s, '|')); this.state.pieces.push(this.state.escapeStringlike(s, '|'));

View File

@ -72,7 +72,7 @@ class Decoder(BinaryCodec):
tag = self.nextbyte() tag = self.nextbyte()
if tag == 0x80: return self.wrap(False) if tag == 0x80: return self.wrap(False)
if tag == 0x81: return self.wrap(True) if tag == 0x81: return self.wrap(True)
if tag == 0x82: return self.wrap(Float(struct.unpack('>f', self.nextbytes(4))[0])) if tag == 0x82: return self.wrap(Float.from_bytes(self.nextbytes(4)))
if tag == 0x83: return self.wrap(struct.unpack('>d', self.nextbytes(8))[0]) if tag == 0x83: return self.wrap(struct.unpack('>d', self.nextbytes(8))[0])
if tag == 0x84: raise DecodeError('Unexpected end-of-stream marker') if tag == 0x84: raise DecodeError('Unexpected end-of-stream marker')
if tag == 0x85: if tag == 0x85:

View File

@ -2,7 +2,7 @@ import numbers
from enum import Enum from enum import Enum
from functools import cmp_to_key from functools import cmp_to_key
from .values import preserve, Float, Embedded, Record, Symbol from .values import preserve, Float, Embedded, Record, Symbol, cmp_floats, _unwrap
from .compat import basestring_ from .compat import basestring_
class TypeNumber(Enum): class TypeNumber(Enum):
@ -19,7 +19,7 @@ class TypeNumber(Enum):
SET = 9 SET = 9
DICTIONARY = 10 DICTIONARY = 10
EMBEDDED = 10 EMBEDDED = 11
def type_number(v): def type_number(v):
if hasattr(v, '__preserve__'): if hasattr(v, '__preserve__'):
@ -84,12 +84,17 @@ def _item_key(item):
return item[0] return item[0]
def _eq(a, b): def _eq(a, b):
a = _unwrap(a)
b = _unwrap(b)
ta = type_number(a) ta = type_number(a)
tb = type_number(b) tb = type_number(b)
if ta != tb: return False if ta != tb: return False
if ta == TypeNumber.DOUBLE:
return cmp_floats(a, b) == 0
if ta == TypeNumber.EMBEDDED: if ta == TypeNumber.EMBEDDED:
return ta.embeddedValue == tb.embeddedValue return _eq(a.embeddedValue, b.embeddedValue)
if ta == TypeNumber.RECORD: if ta == TypeNumber.RECORD:
return _eq(a.key, b.key) and _eq_sequences(a.fields, b.fields) return _eq(a.key, b.key) and _eq_sequences(a.fields, b.fields)
@ -118,13 +123,18 @@ def _cmp_sequences(aa, bb):
return len(aa) - len(bb) return len(aa) - len(bb)
def _cmp(a, b): def _cmp(a, b):
a = _unwrap(a)
b = _unwrap(b)
ta = type_number(a) ta = type_number(a)
tb = type_number(b) tb = type_number(b)
if ta.value < tb.value: return -1 if ta.value < tb.value: return -1
if tb.value < ta.value: return 1 if tb.value < ta.value: return 1
if ta == TypeNumber.DOUBLE:
return cmp_floats(a, b)
if ta == TypeNumber.EMBEDDED: if ta == TypeNumber.EMBEDDED:
return _simplecmp(ta.embeddedValue, tb.embeddedValue) return _cmp(a.embeddedValue, b.embeddedValue)
if ta == TypeNumber.RECORD: if ta == TypeNumber.RECORD:
v = _cmp(a.key, b.key) v = _cmp(a.key, b.key)

View File

@ -1,6 +1,7 @@
import numbers import numbers
import struct import struct
import base64 import base64
import math
from .values import * from .values import *
from .error import * from .error import *
@ -9,6 +10,8 @@ from .binary import Decoder
class TextCodec(object): pass class TextCodec(object): pass
NUMBER_RE = re.compile(r'^([-+]?\d+)(((\.\d+([eE][-+]?\d+)?)|([eE][-+]?\d+))([fF]?))?$')
class Parser(TextCodec): class Parser(TextCodec):
def __init__(self, input_buffer=u'', include_annotations=False, parse_embedded=lambda x: x): def __init__(self, input_buffer=u'', include_annotations=False, parse_embedded=lambda x: x):
super(Parser, self).__init__() super(Parser, self).__init__()
@ -66,50 +69,6 @@ class Parser(TextCodec):
return self.wrap(u''.join(s)) return self.wrap(u''.join(s))
s.append(c) s.append(c)
def read_intpart(self, acc, c):
if c == '0':
acc.append(c)
else:
self.read_digit1(acc, c)
return self.read_fracexp(acc)
def read_fracexp(self, acc):
is_float = False
if self.peek() == '.':
is_float = True
acc.append(self.nextchar())
self.read_digit1(acc, self.nextchar())
if self.peek() in 'eE':
acc.append(self.nextchar())
return self.read_sign_and_exp(acc)
else:
return self.finish_number(acc, is_float)
def read_sign_and_exp(self, acc):
if self.peek() in '+-':
acc.append(self.nextchar())
self.read_digit1(acc, self.nextchar())
return self.finish_number(acc, True)
def finish_number(self, acc, is_float):
if is_float:
if self.peek() in 'fF':
self.skip()
return Float(float(u''.join(acc)))
else:
return float(u''.join(acc))
else:
return int(u''.join(acc))
def read_digit1(self, acc, c):
if not c.isdigit():
raise DecodeError('Incomplete number')
acc.append(c)
while not self._atend():
if not self.peek().isdigit():
break
acc.append(self.nextchar())
def read_stringlike(self, terminator, hexescape, hexescaper): def read_stringlike(self, terminator, hexescape, hexescaper):
acc = [] acc = []
while True: while True:
@ -186,6 +145,16 @@ class Parser(TextCodec):
if c == '=': continue if c == '=': continue
acc.append(c) acc.append(c)
def read_hex_float(self, bytecount):
if self.nextchar() != '"':
raise DecodeError('Missing open-double-quote in hex-encoded floating-point number')
bs = self.read_hex_binary()
if len(bs) != bytecount:
raise DecodeError('Incorrect number of bytes in hex-encoded floating-point number')
if bytecount == 4: return Float.from_bytes(bs)
if bytecount == 8: return struct.unpack('>d', bs)[0]
raise DecodeError('Unsupported byte count in hex-encoded floating-point number')
def upto(self, delimiter): def upto(self, delimiter):
vs = [] vs = []
while True: while True:
@ -208,14 +177,24 @@ class Parser(TextCodec):
raise DecodeError('Missing expected key/value separator') raise DecodeError('Missing expected key/value separator')
acc.append(self.next()) acc.append(self.next())
def read_raw_symbol(self, acc): def read_raw_symbol_or_number(self, acc):
while not self._atend(): while not self._atend():
c = self.peek() c = self.peek()
if c.isspace() or c in '(){}[]<>";,@#:|': if c.isspace() or c in '(){}[]<>";,@#:|':
break break
self.skip() self.skip()
acc.append(c) acc.append(c)
return Symbol(u''.join(acc)) acc = u''.join(acc)
m = NUMBER_RE.match(acc)
if m:
if m[2] is None:
return int(m[1])
elif m[7] == '':
return float(m[1] + m[3])
else:
return Float(float(m[1] + m[3]))
else:
return Symbol(acc)
def wrap(self, v): def wrap(self, v):
return Annotated(v) if self.include_annotations else v return Annotated(v) if self.include_annotations else v
@ -223,12 +202,6 @@ class Parser(TextCodec):
def next(self): def next(self):
self.skip_whitespace() self.skip_whitespace()
c = self.peek() c = self.peek()
if c == '-':
self.skip()
return self.wrap(self.read_intpart(['-'], self.nextchar()))
if c.isdigit():
self.skip()
return self.wrap(self.read_intpart([], c))
if c == '"': if c == '"':
self.skip() self.skip()
return self.wrap(self.read_string('"')) return self.wrap(self.read_string('"'))
@ -251,9 +224,11 @@ class Parser(TextCodec):
if c == '{': return self.wrap(frozenset(self.upto('}'))) if c == '{': return self.wrap(frozenset(self.upto('}')))
if c == '"': return self.wrap(self.read_literal_binary()) if c == '"': return self.wrap(self.read_literal_binary())
if c == 'x': if c == 'x':
if self.nextchar() != '"': c = self.nextchar()
raise DecodeError('Expected open-quote at start of hex ByteString') if c == '"': return self.wrap(self.read_hex_binary())
return self.wrap(self.read_hex_binary()) if c == 'f': return self.wrap(self.read_hex_float(4))
if c == 'd': return self.wrap(self.read_hex_float(8))
raise DecodeError('Invalid #x syntax')
if c == '[': return self.wrap(self.read_base64_binary()) if c == '[': return self.wrap(self.read_base64_binary())
if c == '=': if c == '=':
old_ann = self.include_annotations old_ann = self.include_annotations
@ -286,7 +261,7 @@ class Parser(TextCodec):
if c in '>]}': if c in '>]}':
raise DecodeError('Unexpected ' + c) raise DecodeError('Unexpected ' + c)
self.skip() self.skip()
return self.wrap(self.read_raw_symbol([c])) return self.wrap(self.read_raw_symbol_or_number([c]))
def try_next(self): def try_next(self):
start = self.index start = self.index
@ -385,7 +360,10 @@ class Formatter(TextCodec):
elif v is True: elif v is True:
self.chunks.append('#t') self.chunks.append('#t')
elif isinstance(v, float): elif isinstance(v, float):
self.chunks.append(repr(v)) if math.isnan(v) or math.isinf(v):
self.chunks.append('#xd"' + struct.pack('>d', v).hex() + '"')
else:
self.chunks.append(repr(v))
elif isinstance(v, numbers.Number): elif isinstance(v, numbers.Number):
self.chunks.append('%d' % (v,)) self.chunks.append('%d' % (v,))
elif isinstance(v, bytes): elif isinstance(v, bytes):

View File

@ -1,6 +1,7 @@
import re import re
import sys import sys
import struct import struct
import math
from .error import DecodeError from .error import DecodeError
@ -9,6 +10,16 @@ def preserve(v):
v = v.__preserve__() v = v.__preserve__()
return v return v
def float_to_int(v):
return struct.unpack('>Q', struct.pack('>d', v))[0]
def cmp_floats(a, b):
a = float_to_int(a)
b = float_to_int(b)
if a & 0x8000000000000000: a = a ^ 0x7fffffffffffffff
if b & 0x8000000000000000: b = b ^ 0x7fffffffffffffff
return a - b
class Float(object): class Float(object):
def __init__(self, value): def __init__(self, value):
self.value = value self.value = value
@ -16,7 +27,12 @@ class Float(object):
def __eq__(self, other): def __eq__(self, other):
other = _unwrap(other) other = _unwrap(other)
if other.__class__ is self.__class__: if other.__class__ is self.__class__:
return self.value == other.value return cmp_floats(self.value, other.value) == 0
def __lt__(self, other):
other = _unwrap(other)
if other.__class__ is self.__class__:
return cmp_floats(self.value, other.value) < 0
def __ne__(self, other): def __ne__(self, other):
return not self.__eq__(other) return not self.__eq__(other)
@ -27,15 +43,41 @@ class Float(object):
def __repr__(self): def __repr__(self):
return 'Float(' + repr(self.value) + ')' return 'Float(' + repr(self.value) + ')'
def _to_bytes(self):
if math.isnan(self.value) or math.isinf(self.value):
dbs = struct.pack('>d', self.value)
vd = struct.unpack('>Q', dbs)[0]
sign = vd >> 63
payload = (vd >> 29) & 0x007fffff
vf = (sign << 31) | 0x7f800000 | payload
return struct.pack('>I', vf)
else:
return struct.pack('>f', self.value)
def __preserve_write_binary__(self, encoder): def __preserve_write_binary__(self, encoder):
encoder.buffer.append(0x82) encoder.buffer.append(0x82)
encoder.buffer.extend(struct.pack('>f', self.value)) encoder.buffer.extend(self._to_bytes())
def __preserve_write_text__(self, formatter): def __preserve_write_text__(self, formatter):
formatter.chunks.append(repr(self.value) + 'f') if math.isnan(self.value) or math.isinf(self.value):
formatter.chunks.append('#xf"' + self._to_bytes().hex() + '"')
else:
formatter.chunks.append(repr(self.value) + 'f')
@staticmethod
def from_bytes(bs):
vf = struct.unpack('>I', bs)[0]
if (vf & 0x7f800000) == 0x7f800000:
# NaN or inf. Preserve quiet/signalling bit by manually expanding to double-precision.
sign = vf >> 31
payload = vf & 0x007fffff
dbs = struct.pack('>Q', (sign << 63) | 0x7ff0000000000000 | (payload << 29))
return Float(struct.unpack('>d', dbs)[0])
else:
return Float(struct.unpack('>f', bs)[0])
# FIXME: This regular expression is conservatively correct, but Anglo-chauvinistic. # FIXME: This regular expression is conservatively correct, but Anglo-chauvinistic.
RAW_SYMBOL_RE = re.compile(r'^[a-zA-Z~!$%^&*?_=+/.][-a-zA-Z~!$%^&*?_=+/.0-9]*$') RAW_SYMBOL_RE = re.compile(r'^[-a-zA-Z0-9~!$%^&*?_=+/.]+$')
class Symbol(object): class Symbol(object):
def __init__(self, name): def __init__(self, name):

View File

@ -74,9 +74,45 @@
dict3: @"Duplicate key" <ParseError "{ a: 1, a: 2 }"> dict3: @"Duplicate key" <ParseError "{ a: 1, a: 2 }">
dict4: @"Unexpected close brace" <ParseError "}"> dict4: @"Unexpected close brace" <ParseError "}">
dict5: @"Missing value" <DecodeError #x"b7 91 92 93 84"> dict5: @"Missing value" <DecodeError #x"b7 91 92 93 84">
double0: <Test #x"830000000000000000" 0.0>
double+0: <Test #x"830000000000000000" +0.0>
double-0: <Test #x"838000000000000000" -0.0>
double1: <Test #x"833ff0000000000000" 1.0> double1: <Test #x"833ff0000000000000" 1.0>
double2: <Test #x"83fe3cb7b759bf0426" -1.202e300> double2: <Test #x"83fe3cb7b759bf0426" -1.202e300>
double3: <Test #x"83123456789abcdef0" #xd"12 34 56 78 9a bc de f0">
double4: @"Fewer than 16 digits" <ParseError "#xd\"12345678\"">
double5: @"More than 16 digits" <ParseError "#xd\"123456789abcdef012\"">
double6: @"Invalid chars" <ParseError "#xd\"12zz56789abcdef0\"">
double7: @"Positive infinity" <Test #x"837ff0000000000000" #xd"7ff0000000000000">
double8: @"Negative infinity" <Test #x"83fff0000000000000" #xd"fff0000000000000">
double9: @"-qNaN" <Test #x"83fff0000000000001" #xd"fff0000000000001">
double10: @"-qNaN" <Test #x"83fff0000000000111" #xd"fff0000000000111">
double11: @"+qNaN" <Test #x"837ff0000000000001" #xd"7ff0000000000001">
double12: @"+qNaN" <Test #x"837ff0000000000111" #xd"7ff0000000000111">
double13: @"Bad spacing" <ParseError "#xd\"12345 6789abcdef0\"">
double14: @"-sNaN" <Test #x"83fff8000000000001" #xd"fff8000000000001">
double15: @"-sNaN" <Test #x"83fff8000000000111" #xd"fff8000000000111">
double16: @"+sNaN" <Test #x"837ff8000000000001" #xd"7ff8000000000001">
double17: @"+sNaN" <Test #x"837ff8000000000111" #xd"7ff8000000000111">
float0: <Test #x"8200000000" 0.0f>
float+0: <Test #x"8200000000" +0.0f>
float-0: <Test #x"8280000000" -0.0f>
float1: <Test #x"823f800000" 1.0f> float1: <Test #x"823f800000" 1.0f>
float2: <Test #x"8212345678" #xf"12 34 56 78">
float3: @"Fewer than 8 digits" <ParseError "#xf\"123456\"">
float4: @"More than 8 digits" <ParseError "#xf\"123456789a\"">
float5: @"Invalid chars" <ParseError "#xf\"12zz5678\"">
float6: @"Positive infinity" <Test #x"827f800000" #xf"7f800000">
float7: @"Negative infinity" <Test #x"82ff800000" #xf"ff800000">
float8: @"+sNaN" <Test #x"827f800001" #xf"7f800001">
float9: @"+sNaN" <Test #x"827f800111" #xf"7f800111">
float10: @"-sNaN" <Test #x"82ff800001" #xf"ff800001">
float11: @"-sNaN" <Test #x"82ff800111" #xf"ff800111">
float12: @"Bad spacing" <ParseError "#xf\"12345 678\"">
float13: @"+qNaN" <Test #x"827fc00001" #xf"7fc00001">
float14: @"+qNaN" <Test #x"827fc00111" #xf"7fc00111">
float15: @"-qNaN" <Test #x"82ffc00001" #xf"ffc00001">
float16: @"-qNaN" <Test #x"82ffc00111" #xf"ffc00111">
int-257: <Test #x"a1feff" -257> int-257: <Test #x"a1feff" -257>
int-256: <Test #x"a1ff00" -256> int-256: <Test #x"a1ff00" -256>
int-255: <Test #x"a1ff01" -255> int-255: <Test #x"a1ff01" -255>
@ -89,10 +125,13 @@
int-2: <Test #x"9e" -2> int-2: <Test #x"9e" -2>
int-1: <Test #x"9f" -1> int-1: <Test #x"9f" -1>
int0: <Test #x"90" 0> int0: <Test #x"90" 0>
int+0: <Test #x"90" +0>
int-0: <Test #x"90" -0>
int1: <Test #x"91" 1> int1: <Test #x"91" 1>
int12: <Test #x"9c" 12> int12: <Test #x"9c" 12>
int13: <Test #x"a00d" 13> int13: <Test #x"a00d" 13>
int127: <Test #x"a07f" 127> int127: <Test #x"a07f" 127>
int+127: <Test #x"a07f" +127>
int128: <Test #x"a10080" 128> int128: <Test #x"a10080" 128>
int255: <Test #x"a100ff" 255> int255: <Test #x"a100ff" 255>
int256: <Test #x"a10100" 256> int256: <Test #x"a10100" 256>
@ -112,6 +151,8 @@
list8: @"Missing close bracket" <ParseShort "["> list8: @"Missing close bracket" <ParseShort "[">
list9: @"Unexpected close bracket" <ParseError "]"> list9: @"Unexpected close bracket" <ParseError "]">
list10: @"Missing end byte" <DecodeShort #x"b58080"> list10: @"Missing end byte" <DecodeShort #x"b58080">
list11: <Test #x"b59184" [01]>
list12: <Test #x"b59c84" [12]>
noinput0: @"No input at all" <DecodeEOF #x""> noinput0: @"No input at all" <DecodeEOF #x"">
embed0: <Test #x"8690" #!0> embed0: <Test #x"8690" #!0>
embed1: <Test #x"868690" #!#!0> embed1: <Test #x"868690" #!#!0>
@ -138,17 +179,22 @@
string5: <Test #x"b104f09d849e" "\uD834\uDD1E"> string5: <Test #x"b104f09d849e" "\uD834\uDD1E">
symbol0: <Test #x"b300" ||> symbol0: <Test #x"b300" ||>
symbol2: <Test #x"b30568656c6c6f" hello> symbol2: <Test #x"b30568656c6c6f" hello>
symbol3: <Test #x"b305312d322d33" 1-2-3>
symbol4: <Test #x"b305612d622d63" a-b-c>
symbol5: <Test #x"b305612b622b63" a+b+c>
symbol6: <Test #x"b3012b" +>
symbol7: <Test #x"b3032b2b2b" +++>
symbol8: <Test #x"b3012d" ->
symbol9: <Test #x"b3032d2d2d" --->
symbol10: <Test #x"b3022d61" -a>
symbol11: <Test #x"b3042d2d2d61" ---a>
symbol12: <Test #x"b3042d2d2d31" ---1>
symbol13: <Test #x"b3042b312e78" +1.x>
tag0: @"Unexpected end tag" <DecodeError #x"84"> tag0: @"Unexpected end tag" <DecodeError #x"84">
tag1: @"Invalid tag" <DecodeError #x"10"> tag1: @"Invalid tag" <DecodeError #x"10">
tag2: @"Invalid tag" <DecodeError #x"61b10110"> tag2: @"Invalid tag" <DecodeError #x"61b10110">
whitespace0: @"Leading spaces have to eventually yield something" <ParseShort " "> whitespace0: @"Leading spaces have to eventually yield something" <ParseShort " ">
whitespace1: @"No input at all" <ParseEOF ""> whitespace1: @"No input at all" <ParseEOF "">
value1: <Test #"\xB2\x06corymb" #=#"\xB2\x06corymb">
value2: <Test #"\x81" #=#"\x81">
value3: <Test #"\x81" #=#[gQ]>
value4: <Test #"\x81" #=#[gQ==]>
value5: <Test #"\x81" #= #[gQ==]>
value6: <Test #x"b591929384" #=#x"b591929384">
longlist14: <Test #x"b5808080808080808080808080808084" longlist14: <Test #x"b5808080808080808080808080808084"
[#f #f #f #f #f [#f #f #f #f #f

View File

@ -1,9 +1,9 @@
import unittest from utils import PreservesTestCase
from preserves import * from preserves import *
from preserves.compare import * from preserves.compare import *
class BasicCompareTests(unittest.TestCase): class BasicCompareTests(PreservesTestCase):
def test_eq_identity(self): def test_eq_identity(self):
self.assertTrue(eq(1, 1)) self.assertTrue(eq(1, 1))
self.assertFalse(eq(1, 1.0)) self.assertFalse(eq(1, 1.0))

View File

@ -1,30 +1,30 @@
import unittest from utils import PreservesTestCase
from preserves import * from preserves import *
from preserves.path import parse from preserves.path import parse
class BasicPathTests(unittest.TestCase): class BasicPathTests(PreservesTestCase):
def test_identity(self): def test_identity(self):
self.assertEqual(parse('').exec(1), (1,)) self.assertPreservesEqual(parse('').exec(1), (1,))
self.assertEqual(parse('').exec([]), ([],)) self.assertPreservesEqual(parse('').exec([]), ([],))
self.assertEqual(parse('').exec(Record(Symbol('hi'), [])), (Record(Symbol('hi'), []),)) self.assertPreservesEqual(parse('').exec(Record(Symbol('hi'), [])), (Record(Symbol('hi'), []),))
def test_children(self): def test_children(self):
self.assertEqual(parse('/').exec([1, 2, 3]), (1, 2, 3)) self.assertPreservesEqual(parse('/').exec([1, 2, 3]), (1, 2, 3))
self.assertEqual(parse('/').exec([1, [2], 3]), (1, [2], 3)) self.assertPreservesEqual(parse('/').exec([1, [2], 3]), (1, [2], 3))
self.assertEqual(parse('/').exec(Record(Symbol('hi'), [1, [2], 3])), (1, [2], 3)) self.assertPreservesEqual(parse('/').exec(Record(Symbol('hi'), [1, [2], 3])), (1, [2], 3))
def test_label(self): def test_label(self):
self.assertEqual(parse('.^').exec([1, 2, 3]), ()) self.assertPreservesEqual(parse('.^').exec([1, 2, 3]), ())
self.assertEqual(parse('.^').exec([1, [2], 3]), ()) self.assertPreservesEqual(parse('.^').exec([1, [2], 3]), ())
self.assertEqual(parse('.^').exec(Record(Symbol('hi'), [1, [2], 3])), (Symbol('hi'),)) self.assertPreservesEqual(parse('.^').exec(Record(Symbol('hi'), [1, [2], 3])), (Symbol('hi'),))
def test_count(self): def test_count(self):
self.assertEqual(parse('<count / ^ hi>').exec([ Record(Symbol('hi'), [1]), self.assertPreservesEqual(parse('<count / ^ hi>').exec([ Record(Symbol('hi'), [1]),
Record(Symbol('no'), [2]), Record(Symbol('no'), [2]),
Record(Symbol('hi'), [3]) ]), Record(Symbol('hi'), [3]) ]),
(2,)) (2,))
self.assertEqual(parse('/ <count ^ hi>').exec([ Record(Symbol('hi'), [1]), self.assertPreservesEqual(parse('/ <count ^ hi>').exec([ Record(Symbol('hi'), [1]),
Record(Symbol('no'), [2]), Record(Symbol('no'), [2]),
Record(Symbol('hi'), [3]) ]), Record(Symbol('hi'), [3]) ]),
(1, 0, 1)) (1, 0, 1))

View File

@ -1,11 +1,12 @@
import numbers import numbers
import os import os
import sys import sys
import unittest
# Make `preserves` available for imports # Make `preserves` available for imports
sys.path.insert(0, os.path.abspath(os.path.join(os.path.dirname(__file__), '..'))) sys.path.insert(0, os.path.abspath(os.path.join(os.path.dirname(__file__), '..')))
from utils import PreservesTestCase
from preserves import * from preserves import *
from preserves.compat import basestring_, ord_ from preserves.compat import basestring_, ord_
from preserves.values import _unwrap from preserves.values import _unwrap
@ -49,33 +50,33 @@ def _e(v):
def _R(k, *args): def _R(k, *args):
return Record(Symbol(k), args) return Record(Symbol(k), args)
class BinaryCodecTests(unittest.TestCase): class BinaryCodecTests(PreservesTestCase):
def _roundtrip(self, forward, expected, back=None, nondeterministic=False): def _roundtrip(self, forward, expected, back=None, nondeterministic=False):
if back is None: back = forward if back is None: back = forward
self.assertEqual(_d(_e(forward)), back) self.assertPreservesEqual(_d(_e(forward)), back)
self.assertEqual(_d(_e(back)), back) self.assertPreservesEqual(_d(_e(back)), back)
self.assertEqual(_d(expected), back) self.assertPreservesEqual(_d(expected), back)
if not nondeterministic: if not nondeterministic:
actual = _e(forward) actual = _e(forward)
self.assertEqual(actual, expected, '%s != %s' % (_hex(actual), _hex(expected))) self.assertPreservesEqual(actual, expected, '%s != %s' % (_hex(actual), _hex(expected)))
def test_decode_varint(self): def test_decode_varint(self):
with self.assertRaises(DecodeError): with self.assertRaises(DecodeError):
Decoder(_buf()).varint() Decoder(_buf()).varint()
self.assertEqual(Decoder(_buf(0)).varint(), 0) self.assertPreservesEqual(Decoder(_buf(0)).varint(), 0)
self.assertEqual(Decoder(_buf(10)).varint(), 10) self.assertPreservesEqual(Decoder(_buf(10)).varint(), 10)
self.assertEqual(Decoder(_buf(100)).varint(), 100) self.assertPreservesEqual(Decoder(_buf(100)).varint(), 100)
self.assertEqual(Decoder(_buf(200, 1)).varint(), 200) self.assertPreservesEqual(Decoder(_buf(200, 1)).varint(), 200)
self.assertEqual(Decoder(_buf(0b10101100, 0b00000010)).varint(), 300) self.assertPreservesEqual(Decoder(_buf(0b10101100, 0b00000010)).varint(), 300)
self.assertEqual(Decoder(_buf(128, 148, 235, 220, 3)).varint(), 1000000000) self.assertPreservesEqual(Decoder(_buf(128, 148, 235, 220, 3)).varint(), 1000000000)
def test_encode_varint(self): def test_encode_varint(self):
self.assertEqual(_varint(0), _buf(0)) self.assertPreservesEqual(_varint(0), _buf(0))
self.assertEqual(_varint(10), _buf(10)) self.assertPreservesEqual(_varint(10), _buf(10))
self.assertEqual(_varint(100), _buf(100)) self.assertPreservesEqual(_varint(100), _buf(100))
self.assertEqual(_varint(200), _buf(200, 1)) self.assertPreservesEqual(_varint(200), _buf(200, 1))
self.assertEqual(_varint(300), _buf(0b10101100, 0b00000010)) self.assertPreservesEqual(_varint(300), _buf(0b10101100, 0b00000010))
self.assertEqual(_varint(1000000000), _buf(128, 148, 235, 220, 3)) self.assertPreservesEqual(_varint(1000000000), _buf(128, 148, 235, 220, 3))
def test_simple_seq(self): def test_simple_seq(self):
self._roundtrip([1,2,3,4], _buf(0xb5, 0x91, 0x92, 0x93, 0x94, 0x84), back=(1,2,3,4)) self._roundtrip([1,2,3,4], _buf(0xb5, 0x91, 0x92, 0x93, 0x94, 0x84), back=(1,2,3,4))
@ -157,7 +158,7 @@ class BinaryCodecTests(unittest.TestCase):
# python 3 # python 3
bs = _e(d.items()) bs = _e(d.items())
self.assertRegex(_hex(bs), r) self.assertRegex(_hex(bs), r)
self.assertEqual(sorted(_d(bs)), [(u'a', 1), (u'b', 2), (u'c', 3)]) self.assertPreservesEqual(sorted(_d(bs)), [(u'a', 1), (u'b', 2), (u'c', 3)])
def test_long_sequence(self): def test_long_sequence(self):
self._roundtrip((False,) * 14, _buf(0xb5, b'\x80' * 14, 0x84)) self._roundtrip((False,) * 14, _buf(0xb5, b'\x80' * 14, 0x84))
@ -172,9 +173,9 @@ class BinaryCodecTests(unittest.TestCase):
a1 = Embedded(A(1)) a1 = Embedded(A(1))
a2 = Embedded(A(1)) a2 = Embedded(A(1))
self.assertNotEqual(encode(a1, encode_embedded=id), encode(a2, encode_embedded=id)) self.assertNotEqual(encode(a1, encode_embedded=id), encode(a2, encode_embedded=id))
self.assertEqual(encode(a1, encode_embedded=id), encode(a1, encode_embedded=id)) self.assertPreservesEqual(encode(a1, encode_embedded=id), encode(a1, encode_embedded=id))
self.assertEqual(ord_(encode(a1, encode_embedded=id)[0]), 0x86) self.assertPreservesEqual(ord_(encode(a1, encode_embedded=id)[0]), 0x86)
self.assertEqual(ord_(encode(a2, encode_embedded=id)[0]), 0x86) self.assertPreservesEqual(ord_(encode(a2, encode_embedded=id)[0]), 0x86)
def test_decode_embedded_absent(self): def test_decode_embedded_absent(self):
with self.assertRaises(DecodeError): with self.assertRaises(DecodeError):
@ -185,15 +186,15 @@ class BinaryCodecTests(unittest.TestCase):
def enc(p): def enc(p):
objects.append(p) objects.append(p)
return len(objects) - 1 return len(objects) - 1
self.assertEqual(encode([Embedded(object()), Embedded(object())], encode_embedded = enc), self.assertPreservesEqual(encode([Embedded(object()), Embedded(object())], encode_embedded = enc),
b'\xb5\x86\x90\x86\x91\x84') b'\xb5\x86\x90\x86\x91\x84')
def test_decode_embedded(self): def test_decode_embedded(self):
objects = [123, 234] objects = [123, 234]
def dec(v): def dec(v):
return objects[v] return objects[v]
self.assertEqual(decode(b'\xb5\x86\x90\x86\x91\x84', decode_embedded = dec), self.assertPreservesEqual(decode(b'\xb5\x86\x90\x86\x91\x84', decode_embedded = dec),
(Embedded(123), Embedded(234))) (Embedded(123), Embedded(234)))
def load_binary_samples(): def load_binary_samples():
with open(os.path.join(os.path.dirname(__file__), 'samples.bin'), 'rb') as f: with open(os.path.join(os.path.dirname(__file__), 'samples.bin'), 'rb') as f:
@ -203,16 +204,16 @@ def load_text_samples():
with open(os.path.join(os.path.dirname(__file__), 'samples.pr'), 'rt') as f: with open(os.path.join(os.path.dirname(__file__), 'samples.pr'), 'rt') as f:
return Parser(f.read(), include_annotations=True, parse_embedded=lambda x: x).next() return Parser(f.read(), include_annotations=True, parse_embedded=lambda x: x).next()
class TextCodecTests(unittest.TestCase): class TextCodecTests(PreservesTestCase):
def test_samples_bin_eq_txt(self): def test_samples_bin_eq_txt(self):
b = load_binary_samples() b = load_binary_samples()
t = load_text_samples() t = load_text_samples()
self.assertEqual(b, t) self.assertPreservesEqual(b, t)
def test_txt_roundtrip(self): def test_txt_roundtrip(self):
b = load_binary_samples() b = load_binary_samples()
s = stringify(b, format_embedded=lambda x: x) s = stringify(b, format_embedded=lambda x: x)
self.assertEqual(parse(s, include_annotations=True, parse_embedded=lambda x: x), b) self.assertPreservesEqual(parse(s, include_annotations=True, parse_embedded=lambda x: x), b)
def add_method(d, tName, fn): def add_method(d, tName, fn):
if hasattr(fn, 'func_name'): if hasattr(fn, 'func_name'):
@ -254,14 +255,14 @@ def install_test(d, variant, tName, binaryForm, annotatedTextForm):
entry = get_expected_values(tName, textForm) entry = get_expected_values(tName, textForm)
forward = entry['forward'] forward = entry['forward']
back = entry['back'] back = entry['back']
def test_match_expected(self): self.assertEqual(textForm, back) def test_match_expected(self): self.assertPreservesEqual(textForm, back)
def test_roundtrip(self): self.assertEqual(self.DS(self.E(textForm)), back) def test_roundtrip(self): self.assertPreservesEqual(self.DS(self.E(textForm)), back)
def test_forward(self): self.assertEqual(self.DS(self.E(forward)), back) def test_forward(self): self.assertPreservesEqual(self.DS(self.E(forward)), back)
def test_back(self): self.assertEqual(self.DS(binaryForm), back) def test_back(self): self.assertPreservesEqual(self.DS(binaryForm), back)
def test_back_ann(self): self.assertEqual(self.D(self.E(annotatedTextForm)), annotatedTextForm) def test_back_ann(self): self.assertPreservesEqual(self.D(self.E(annotatedTextForm)), annotatedTextForm)
def test_encode(self): self.assertEqual(self.E(forward), binaryForm) def test_encode(self): self.assertPreservesEqual(self.E(forward), binaryForm)
def test_encode_canonical(self): self.assertEqual(self.EC(annotatedTextForm), binaryForm) def test_encode_canonical(self): self.assertPreservesEqual(self.EC(annotatedTextForm), binaryForm)
def test_encode_ann(self): self.assertEqual(self.E(annotatedTextForm), binaryForm) def test_encode_ann(self): self.assertPreservesEqual(self.E(annotatedTextForm), binaryForm)
add_method(d, tName, test_match_expected) add_method(d, tName, test_match_expected)
add_method(d, tName, test_roundtrip) add_method(d, tName, test_roundtrip)
add_method(d, tName, test_forward) add_method(d, tName, test_forward)
@ -284,7 +285,7 @@ def install_exn_test(d, tName, bs, check_proc):
self.fail('did not fail as expected') self.fail('did not fail as expected')
add_method(d, tName, test_exn) add_method(d, tName, test_exn)
class CommonTestSuite(unittest.TestCase): class CommonTestSuite(PreservesTestCase):
TestCases = Record.makeConstructor('TestCases', 'cases') TestCases = Record.makeConstructor('TestCases', 'cases')
samples = load_binary_samples() samples = load_binary_samples()
@ -325,7 +326,7 @@ class CommonTestSuite(unittest.TestCase):
def EC(self, v): def EC(self, v):
return encode(v, encode_embedded=lambda x: x, canonicalize=True) return encode(v, encode_embedded=lambda x: x, canonicalize=True)
class RecordTests(unittest.TestCase): class RecordTests(PreservesTestCase):
def test_getters(self): def test_getters(self):
T = Record.makeConstructor('t', 'x y z') T = Record.makeConstructor('t', 'x y z')
T2 = Record.makeConstructor('t', 'x y z') T2 = Record.makeConstructor('t', 'x y z')
@ -334,8 +335,8 @@ class RecordTests(unittest.TestCase):
self.assertTrue(T.isClassOf(t)) self.assertTrue(T.isClassOf(t))
self.assertTrue(T2.isClassOf(t)) self.assertTrue(T2.isClassOf(t))
self.assertFalse(U.isClassOf(t)) self.assertFalse(U.isClassOf(t))
self.assertEqual(T._x(t), 1) self.assertPreservesEqual(T._x(t), 1)
self.assertEqual(T2._y(t), 2) self.assertPreservesEqual(T2._y(t), 2)
self.assertEqual(T._z(t), 3) self.assertPreservesEqual(T._z(t), 3)
with self.assertRaises(TypeError): with self.assertRaises(TypeError):
U._x(t) U._x(t)

View File

@ -1,4 +1,4 @@
import unittest from utils import PreservesTestCase
from preserves import * from preserves import *
from preserves.schema import meta, Compiler from preserves.schema import meta, Compiler
@ -8,7 +8,7 @@ def literal_schema(modname, s):
c.load_schema((Symbol(modname),), preserve(s)) c.load_schema((Symbol(modname),), preserve(s))
return c.root return c.root
class BasicSchemaTests(unittest.TestCase): class BasicSchemaTests(PreservesTestCase):
def test_dictionary_literal(self): def test_dictionary_literal(self):
m = literal_schema( m = literal_schema(
's', 's',
@ -22,7 +22,7 @@ class BasicSchemaTests(unittest.TestCase):
}> }>
''')) '''))
self.assertEqual(m.s.C.decode({'core': Symbol('true')}), m.s.C()) self.assertEqual(m.s.C.decode({'core': Symbol('true')}), m.s.C())
self.assertEqual(preserve(m.s.C()), {'core': Symbol('true')}) self.assertPreservesEqual(preserve(m.s.C()), {'core': Symbol('true')})
def test_alternation_of_dictionary_literal(self): def test_alternation_of_dictionary_literal(self):
m = literal_schema( m = literal_schema(
@ -40,6 +40,6 @@ class BasicSchemaTests(unittest.TestCase):
}> }>
''')) '''))
self.assertEqual(m.s.C.decode({'core': Symbol('true')}), m.s.C.core()) self.assertEqual(m.s.C.decode({'core': Symbol('true')}), m.s.C.core())
self.assertEqual(preserve(m.s.C.core()), {'core': Symbol('true')}) self.assertPreservesEqual(preserve(m.s.C.core()), {'core': Symbol('true')})
self.assertEqual(m.s.C.decode({'notcore': Symbol('true')}), m.s.C.notcore()) self.assertEqual(m.s.C.decode({'notcore': Symbol('true')}), m.s.C.notcore())
self.assertEqual(preserve(m.s.C.notcore()), {'notcore': Symbol('true')}) self.assertPreservesEqual(preserve(m.s.C.notcore()), {'notcore': Symbol('true')})

View File

@ -0,0 +1,9 @@
import unittest
from preserves import cmp
class PreservesTestCase(unittest.TestCase):
def assertPreservesEqual(self, a, b, msg=None):
if msg is None:
msg = 'Expected %s to be Preserves-equal to %s' % (a, b)
self.assertTrue(cmp(a, b) == 0, msg)

View File

@ -0,0 +1,101 @@
#lang racket/base
;; Conversion between binary32 and binary64 big-endian external format (byte-vectors) and
;; internal double-precision floating-point numbers, with special attention paid to
;; preservation of the quiet/signaling bit of NaNs, which otherwise is frequently disturbed by
;; hardware-level conversion between single and double precision.
(provide bytes->float
float->bytes
bytes->double
double->bytes)
(require "float.rkt")
(require (only-in racket/math nan? infinite?))
(module binary racket/base
(provide (all-defined-out))
(define (binary32-nan-or-inf? bs)
(and (= (bitwise-bit-field (bytes-ref bs 0) 0 7) #x7f)
(bitwise-bit-set? (bytes-ref bs 1) 7)))
(define (binary64-nan-or-inf? bs)
(and (= (bitwise-bit-field (bytes-ref bs 0) 0 7) #x7f)
(= (bitwise-bit-field (bytes-ref bs 1) 4 8) #x0f)))
(define (sign-bit-set? bs)
(bitwise-bit-set? (bytes-ref bs 0) 0)))
(require (submod "." binary))
(define (bytes->float bs)
(if (binary32-nan-or-inf? bs)
(let* ((vf (integer-bytes->integer bs #f #t))
(signexp (bitwise-bit-field vf 23 32))
(payload (bitwise-bit-field vf 0 23))
(vd (bitwise-ior (arithmetic-shift signexp 55)
#x0070000000000000
(arithmetic-shift payload 29)))
(dbs (integer->integer-bytes vd 8 #f #t)))
(float (floating-point-bytes->real dbs #t 0 8)))
(float (floating-point-bytes->real bs #t 0 4))))
(define (float->bytes v)
(let ((v (float-value v)))
(if (or (nan? v) (infinite? v))
(let* ((dbs (real->floating-point-bytes v 8 #t))
(vd (integer-bytes->integer dbs #f #t))
(signexp (bitwise-bit-field vd 55 64))
(payload (bitwise-bit-field vd 29 52))
(vf (bitwise-ior (arithmetic-shift signexp 23)
payload))
(bs (integer->integer-bytes vf 4 #f #t)))
bs)
(real->floating-point-bytes v 4 #t))))
(define (bytes->double bs)
(floating-point-bytes->real bs #t 0 8))
(define (double->bytes v)
(real->floating-point-bytes v 8 #t))
(module+ test
(require rackunit)
(require file/sha1)
(define (check-roundtrip-double hex)
(check-equal? (bytes->hex-string (double->bytes (bytes->double (hex-string->bytes hex))))
hex))
(define (check-roundtrip-float hex)
(check-equal? (bytes->hex-string (float->bytes (bytes->float (hex-string->bytes hex))))
hex))
(check-roundtrip-double "0123456789abcdef")
(check-roundtrip-double "7ff0000000000321")
(check-roundtrip-double "7ff0000000000001")
(check-roundtrip-double "7ff0000000000000")
(check-roundtrip-double "fff0000000000321")
(check-roundtrip-double "fff0000000000001")
(check-roundtrip-double "fff0000000000000")
(check-roundtrip-double "7ff8000000000321")
(check-roundtrip-double "7ff8000000000001")
(check-roundtrip-double "7ff8000000000000")
(check-roundtrip-double "fff8000000000321")
(check-roundtrip-double "fff8000000000001")
(check-roundtrip-double "fff8000000000000")
(check-roundtrip-float "01234567")
(check-roundtrip-float "7f800321")
(check-roundtrip-float "7f800001")
(check-roundtrip-float "7f800000")
(check-roundtrip-float "ff800321")
(check-roundtrip-float "ff800001")
(check-roundtrip-float "ff800000")
(check-roundtrip-float "7fc00321")
(check-roundtrip-float "7fc00001")
(check-roundtrip-float "7fc00000")
(check-roundtrip-float "ffc00321")
(check-roundtrip-float "ffc00001")
(check-roundtrip-float "ffc00000")
)

View File

@ -8,8 +8,8 @@
;;--------------------------------------------------------------------------- ;;---------------------------------------------------------------------------
;; Representing values ;; Representing values
(require "float.rkt" "float-bytes.rkt")
(struct record (label fields) #:transparent) (struct record (label fields) #:transparent)
(struct float (value) #:transparent) ;; a marker for single-precision I/O
(struct annotated (annotations item) #:transparent) (struct annotated (annotations item) #:transparent)
(struct embedded (value) #:transparent) (struct embedded (value) #:transparent)
@ -23,8 +23,8 @@
(match (next-byte) (match (next-byte)
[#x80 #f] [#x80 #f]
[#x81 #t] [#x81 #t]
[#x82 (float (floating-point-bytes->real (next-bytes 4) #t 0 4))] [#x82 (bytes->float (next-bytes 4))]
[#x83 (floating-point-bytes->real (next-bytes 8) #t 0 8)] [#x83 (bytes->double (next-bytes 8))]
[#x84 '#:end] [#x84 '#:end]
[#x85 (let ((a (next))) [#x85 (let ((a (next)))
(match (next) (match (next)
@ -80,8 +80,8 @@
(match v (match v
[#f (write-byte #x80 out-port)] [#f (write-byte #x80 out-port)]
[#t (write-byte #x81 out-port)] [#t (write-byte #x81 out-port)]
[(float v) (write-byte #x82 out-port) (output-bytes (real->floating-point-bytes v 4 #t))] [(float _) (write-byte #x82 out-port) (output-bytes (float->bytes v))]
[(? flonum?) (write-byte #x83 out-port) (output-bytes (real->floating-point-bytes v 8 #t))] [(? flonum?) (write-byte #x83 out-port) (output-bytes (double->bytes v))]
[(annotated as v) [(annotated as v)
(for [(a (in-list as))] (write-byte #x85 out-port) (output a)) (for [(a (in-list as))] (write-byte #x85 out-port) (output a))

View File

@ -7,6 +7,7 @@
(require "record.rkt") (require "record.rkt")
(require "embedded.rkt") (require "embedded.rkt")
(require "float.rkt") (require "float.rkt")
(require "float-bytes.rkt")
(require "annotation.rkt") (require "annotation.rkt")
(require "varint.rkt") (require "varint.rkt")
(require racket/set) (require racket/set)
@ -70,8 +71,8 @@
(match lead-byte (match lead-byte
[#x80 #f] [#x80 #f]
[#x81 #t] [#x81 #t]
[#x82 (float (floating-point-bytes->real (next-bytes 4) #t 0 4))] [#x82 (bytes->float (next-bytes 4))]
[#x83 (floating-point-bytes->real (next-bytes 8) #t 0 8)] [#x83 (bytes->double (next-bytes 8))]
[#x84 '#:end] [#x84 '#:end]
[#x85 (let ((a (next))) [#x85 (let ((a (next)))
(if read-annotations? (if read-annotations?

View File

@ -10,6 +10,7 @@
(require "read-binary.rkt") (require "read-binary.rkt")
(require "record.rkt") (require "record.rkt")
(require "float.rkt") (require "float.rkt")
(require "float-bytes.rkt")
(require syntax/readerr) (require syntax/readerr)
(require (only-in file/sha1 hex-string->bytes)) (require (only-in file/sha1 hex-string->bytes))
(require (only-in net/base64 base64-decode)) (require (only-in net/base64 base64-decode))
@ -67,8 +68,6 @@
(define (next*) (define (next*)
(skip-whitespace) (skip-whitespace)
(match (next-char) (match (next-char)
[#\- (read-intpart (list #\-) (next-char))]
[(and c (or #\0 #\1 #\2 #\3 #\4 #\5 #\6 #\7 #\8 #\9)) (read-intpart '() c)]
[#\" (read-string #\")] [#\" (read-string #\")]
[(== PIPE) (string->symbol (read-string PIPE))] [(== PIPE) (string->symbol (read-string PIPE))]
@ -82,21 +81,12 @@
[#\t #t] [#\t #t]
[#\{ (sequence-fold (set) set-add* values #\})] [#\{ (sequence-fold (set) set-add* values #\})]
[#\" (read-literal-binary)] [#\" (read-literal-binary)]
[#\x (if (eqv? (next-char) #\") [#\x (match (next-char)
(read-hex-binary '()) [#\" (read-hex-binary '())]
(parse-error "Expected open-quote at start of hex ByteString"))] [#\f (read-hex-float 'float)]
[#\d (read-hex-float 'double)]
[c (parse-error "Invalid #x syntax: ~v" c)])]
[#\[ (read-base64-binary '())] [#\[ (read-base64-binary '())]
[#\= (define bs (read-preserve/text in-port #:read-syntax? #t #:source source))
(when (not (bytes? (annotated-item bs)))
(parse-error "ByteString must follow #="))
(when (not (null? (annotated-annotations bs)))
(parse-error "Annotations not permitted after #="))
(bytes->preserve
(annotated-item bs)
(lambda (message . args)
(apply parse-error (string-append "Inline binary value: " message) args))
#:read-syntax? read-syntax?
#:on-short (lambda () (parse-error "Incomplete inline binary value")))]
[#\! (embedded (decode-embedded (next)))] [#\! (embedded (decode-embedded (next)))]
[c (parse-error "Invalid # syntax: ~v" c)])] [c (parse-error "Invalid # syntax: ~v" c)])]
@ -110,7 +100,7 @@
[#\] (parse-error "Unexpected ]")] [#\] (parse-error "Unexpected ]")]
[#\} (parse-error "Unexpected }")] [#\} (parse-error "Unexpected }")]
[c (read-raw-symbol (list c))])) [c (read-raw-symbol-or-number (list c))]))
(define (set-add* s e) (define (set-add* s e)
(when (set-member? s e) (parse-error "Duplicate set element: ~v" e)) (when (set-member? s e) (parse-error "Duplicate set element: ~v" e))
@ -159,49 +149,6 @@
(annotated '() loc v)))) (annotated '() loc v))))
(lambda (pos0 v) v))) (lambda (pos0 v) v)))
;;---------------------------------------------------------------------------
;; Numbers
(define (read-intpart acc-rev ch)
(match ch
[#\0 (read-fracexp (cons ch acc-rev))]
[_ (read-digit+ acc-rev read-fracexp ch)]))
(define (read-digit* acc-rev k)
(match (peek-char in-port)
[(? char? (? char-numeric?)) (read-digit* (cons (read-char in-port) acc-rev) k)]
[_ (k acc-rev)]))
(define (read-digit+ acc-rev k [ch (read-char in-port)])
(match ch
[(? char? (? char-numeric?)) (read-digit* (cons ch acc-rev) k)]
[_ (parse-error "Incomplete number")]))
(define (read-fracexp acc-rev)
(match (peek-char in-port)
[#\. (read-digit+ (cons (read-char in-port) acc-rev) read-exp)]
[_ (read-exp acc-rev)]))
(define (read-exp acc-rev)
(match (peek-char in-port)
[(or #\e #\E) (read-sign-and-exp (cons (read-char in-port) acc-rev))]
[_ (finish-number acc-rev)]))
(define (read-sign-and-exp acc-rev)
(match (peek-char in-port)
[(or #\+ #\-) (read-digit+ (cons (read-char in-port) acc-rev) finish-number)]
[_ (read-digit+ acc-rev finish-number)]))
(define (finish-number acc-rev)
(define s (list->string (reverse acc-rev)))
(define n (string->number s 10))
(when (not n) (parse-error "Invalid number: ~v" s))
(if (flonum? n)
(match (peek-char in-port)
[(or #\f #\F) (read-char in-port) (float n)]
[_ n])
n))
;;--------------------------------------------------------------------------- ;;---------------------------------------------------------------------------
;; String-like things ;; String-like things
@ -279,6 +226,19 @@
[else [else
(parse-error "Invalid hex character")])) (parse-error "Invalid hex character")]))
;;---------------------------------------------------------------------------
;; Hex-encoded floating point numbers
(define (read-hex-float precision)
(unless (eqv? (next-char) #\")
(parse-error "Missing open-double-quote in hex-encoded floating-point number"))
(define bs (read-hex-binary '()))
(unless (= (bytes-length bs) (match precision ['float 4] ['double 8]))
(parse-error "Incorrect number of bytes in hex-encoded floating-point number"))
(match precision
['float (bytes->float bs)]
['double (bytes->double bs)]))
;;--------------------------------------------------------------------------- ;;---------------------------------------------------------------------------
;; Base64-encoded ByteStrings ;; Base64-encoded ByteStrings
@ -334,16 +294,56 @@
#\})) #\}))
;;--------------------------------------------------------------------------- ;;---------------------------------------------------------------------------
;; "Raw" symbols ;; "Raw" symbols and numbers
(define (read-raw-symbol acc) (define (read-raw-symbol-or-number acc)
(match (peek-char in-port) (match (peek-char in-port)
[(or (? eof-object?) [(or (? eof-object?)
(? char? (or #\( #\) #\{ #\} #\[ #\] #\< #\> (? char? (or #\( #\) #\{ #\} #\[ #\] #\< #\>
#\" #\; #\, #\@ #\# #\: (== PIPE) #\" #\; #\, #\@ #\# #\: (== PIPE)
(? char-whitespace?)))) (? char-whitespace?))))
(string->symbol (list->string (reverse acc)))] (let ((input (reverse acc)))
[_ (read-raw-symbol (cons (read-char in-port) acc))])) (or (analyze-number input)
(string->symbol (list->string input))))]
[_ (read-raw-symbol-or-number (cons (read-char in-port) acc))]))
(define (analyze-number input)
(match input
[(cons (and sign (or #\+ #\-)) input) (read-digit+ (list sign) read-fracexp input)]
[_ (read-digit+ (list) read-fracexp input)]))
(define (read-digit* acc-rev k input)
(match input
[(cons (? char? (? char-numeric? d)) input) (read-digit* (cons d acc-rev) k input)]
[_ (k acc-rev input)]))
(define (read-digit+ acc-rev k input)
(match input
[(cons (? char? (? char-numeric? d)) input) (read-digit* (cons d acc-rev) k input)]
[_ #f]))
(define (read-fracexp acc-rev input)
(match input
[(cons #\. input) (read-digit+ (cons #\. acc-rev) read-exp input)]
[_ (read-exp acc-rev input)]))
(define (read-exp acc-rev input)
(match input
[(cons (and e (or #\e #\E)) input) (read-sign-and-exp (cons e acc-rev) input)]
[_ (finish-number acc-rev input)]))
(define (read-sign-and-exp acc-rev input)
(match input
[(cons (and sign (or #\+ #\-)) input) (read-digit+ (cons sign acc-rev) finish-number input)]
[_ (read-digit+ acc-rev finish-number input)]))
(define (finish-number acc-rev input)
(define s (list->string (reverse acc-rev)))
(define n (string->number s 10))
(cond [(not n) #f]
[(and (flonum? n) (member input '((#\f) (#\F)))) (float n)]
[(equal? input '()) n]
[else #f]))
;;--------------------------------------------------------------------------- ;;---------------------------------------------------------------------------
;; Main entry point to parser ;; Main entry point to parser

View File

@ -74,9 +74,45 @@
dict3: @"Duplicate key" <ParseError "{ a: 1, a: 2 }"> dict3: @"Duplicate key" <ParseError "{ a: 1, a: 2 }">
dict4: @"Unexpected close brace" <ParseError "}"> dict4: @"Unexpected close brace" <ParseError "}">
dict5: @"Missing value" <DecodeError #x"b7 91 92 93 84"> dict5: @"Missing value" <DecodeError #x"b7 91 92 93 84">
double0: <Test #x"830000000000000000" 0.0>
double+0: <Test #x"830000000000000000" +0.0>
double-0: <Test #x"838000000000000000" -0.0>
double1: <Test #x"833ff0000000000000" 1.0> double1: <Test #x"833ff0000000000000" 1.0>
double2: <Test #x"83fe3cb7b759bf0426" -1.202e300> double2: <Test #x"83fe3cb7b759bf0426" -1.202e300>
double3: <Test #x"83123456789abcdef0" #xd"12 34 56 78 9a bc de f0">
double4: @"Fewer than 16 digits" <ParseError "#xd\"12345678\"">
double5: @"More than 16 digits" <ParseError "#xd\"123456789abcdef012\"">
double6: @"Invalid chars" <ParseError "#xd\"12zz56789abcdef0\"">
double7: @"Positive infinity" <Test #x"837ff0000000000000" #xd"7ff0000000000000">
double8: @"Negative infinity" <Test #x"83fff0000000000000" #xd"fff0000000000000">
double9: @"-qNaN" <Test #x"83fff0000000000001" #xd"fff0000000000001">
double10: @"-qNaN" <Test #x"83fff0000000000111" #xd"fff0000000000111">
double11: @"+qNaN" <Test #x"837ff0000000000001" #xd"7ff0000000000001">
double12: @"+qNaN" <Test #x"837ff0000000000111" #xd"7ff0000000000111">
double13: @"Bad spacing" <ParseError "#xd\"12345 6789abcdef0\"">
double14: @"-sNaN" <Test #x"83fff8000000000001" #xd"fff8000000000001">
double15: @"-sNaN" <Test #x"83fff8000000000111" #xd"fff8000000000111">
double16: @"+sNaN" <Test #x"837ff8000000000001" #xd"7ff8000000000001">
double17: @"+sNaN" <Test #x"837ff8000000000111" #xd"7ff8000000000111">
float0: <Test #x"8200000000" 0.0f>
float+0: <Test #x"8200000000" +0.0f>
float-0: <Test #x"8280000000" -0.0f>
float1: <Test #x"823f800000" 1.0f> float1: <Test #x"823f800000" 1.0f>
float2: <Test #x"8212345678" #xf"12 34 56 78">
float3: @"Fewer than 8 digits" <ParseError "#xf\"123456\"">
float4: @"More than 8 digits" <ParseError "#xf\"123456789a\"">
float5: @"Invalid chars" <ParseError "#xf\"12zz5678\"">
float6: @"Positive infinity" <Test #x"827f800000" #xf"7f800000">
float7: @"Negative infinity" <Test #x"82ff800000" #xf"ff800000">
float8: @"+sNaN" <Test #x"827f800001" #xf"7f800001">
float9: @"+sNaN" <Test #x"827f800111" #xf"7f800111">
float10: @"-sNaN" <Test #x"82ff800001" #xf"ff800001">
float11: @"-sNaN" <Test #x"82ff800111" #xf"ff800111">
float12: @"Bad spacing" <ParseError "#xf\"12345 678\"">
float13: @"+qNaN" <Test #x"827fc00001" #xf"7fc00001">
float14: @"+qNaN" <Test #x"827fc00111" #xf"7fc00111">
float15: @"-qNaN" <Test #x"82ffc00001" #xf"ffc00001">
float16: @"-qNaN" <Test #x"82ffc00111" #xf"ffc00111">
int-257: <Test #x"a1feff" -257> int-257: <Test #x"a1feff" -257>
int-256: <Test #x"a1ff00" -256> int-256: <Test #x"a1ff00" -256>
int-255: <Test #x"a1ff01" -255> int-255: <Test #x"a1ff01" -255>
@ -89,10 +125,13 @@
int-2: <Test #x"9e" -2> int-2: <Test #x"9e" -2>
int-1: <Test #x"9f" -1> int-1: <Test #x"9f" -1>
int0: <Test #x"90" 0> int0: <Test #x"90" 0>
int+0: <Test #x"90" +0>
int-0: <Test #x"90" -0>
int1: <Test #x"91" 1> int1: <Test #x"91" 1>
int12: <Test #x"9c" 12> int12: <Test #x"9c" 12>
int13: <Test #x"a00d" 13> int13: <Test #x"a00d" 13>
int127: <Test #x"a07f" 127> int127: <Test #x"a07f" 127>
int+127: <Test #x"a07f" +127>
int128: <Test #x"a10080" 128> int128: <Test #x"a10080" 128>
int255: <Test #x"a100ff" 255> int255: <Test #x"a100ff" 255>
int256: <Test #x"a10100" 256> int256: <Test #x"a10100" 256>
@ -112,6 +151,8 @@
list8: @"Missing close bracket" <ParseShort "["> list8: @"Missing close bracket" <ParseShort "[">
list9: @"Unexpected close bracket" <ParseError "]"> list9: @"Unexpected close bracket" <ParseError "]">
list10: @"Missing end byte" <DecodeShort #x"b58080"> list10: @"Missing end byte" <DecodeShort #x"b58080">
list11: <Test #x"b59184" [01]>
list12: <Test #x"b59c84" [12]>
noinput0: @"No input at all" <DecodeEOF #x""> noinput0: @"No input at all" <DecodeEOF #x"">
embed0: <Test #x"8690" #!0> embed0: <Test #x"8690" #!0>
embed1: <Test #x"868690" #!#!0> embed1: <Test #x"868690" #!#!0>
@ -138,17 +179,22 @@
string5: <Test #x"b104f09d849e" "\uD834\uDD1E"> string5: <Test #x"b104f09d849e" "\uD834\uDD1E">
symbol0: <Test #x"b300" ||> symbol0: <Test #x"b300" ||>
symbol2: <Test #x"b30568656c6c6f" hello> symbol2: <Test #x"b30568656c6c6f" hello>
symbol3: <Test #x"b305312d322d33" 1-2-3>
symbol4: <Test #x"b305612d622d63" a-b-c>
symbol5: <Test #x"b305612b622b63" a+b+c>
symbol6: <Test #x"b3012b" +>
symbol7: <Test #x"b3032b2b2b" +++>
symbol8: <Test #x"b3012d" ->
symbol9: <Test #x"b3032d2d2d" --->
symbol10: <Test #x"b3022d61" -a>
symbol11: <Test #x"b3042d2d2d61" ---a>
symbol12: <Test #x"b3042d2d2d31" ---1>
symbol13: <Test #x"b3042b312e78" +1.x>
tag0: @"Unexpected end tag" <DecodeError #x"84"> tag0: @"Unexpected end tag" <DecodeError #x"84">
tag1: @"Invalid tag" <DecodeError #x"10"> tag1: @"Invalid tag" <DecodeError #x"10">
tag2: @"Invalid tag" <DecodeError #x"61b10110"> tag2: @"Invalid tag" <DecodeError #x"61b10110">
whitespace0: @"Leading spaces have to eventually yield something" <ParseShort " "> whitespace0: @"Leading spaces have to eventually yield something" <ParseShort " ">
whitespace1: @"No input at all" <ParseEOF ""> whitespace1: @"No input at all" <ParseEOF "">
value1: <Test #"\xB2\x06corymb" #=#"\xB2\x06corymb">
value2: <Test #"\x81" #=#"\x81">
value3: <Test #"\x81" #=#[gQ]>
value4: <Test #"\x81" #=#[gQ==]>
value5: <Test #"\x81" #= #[gQ==]>
value6: <Test #x"b591929384" #=#x"b591929384">
longlist14: <Test #x"b5808080808080808080808080808084" longlist14: <Test #x"b5808080808080808080808080808084"
[#f #f #f #f #f [#f #f #f #f #f

View File

@ -8,6 +8,7 @@
(require "record.rkt") (require "record.rkt")
(require "embedded.rkt") (require "embedded.rkt")
(require "float.rkt") (require "float.rkt")
(require "float-bytes.rkt")
(require "annotation.rkt") (require "annotation.rkt")
(require "varint.rkt") (require "varint.rkt")
(require "object-id.rkt") (require "object-id.rkt")
@ -86,12 +87,12 @@
[#f (output-byte #x80)] [#f (output-byte #x80)]
[#t (output-byte #x81)] [#t (output-byte #x81)]
[(float v) [(float _)
(output-byte #x82) (output-byte #x82)
(output-bytes (real->floating-point-bytes v 4 #t))] (output-bytes (float->bytes v))]
[(? flonum?) [(? flonum?)
(output-byte #x83) (output-byte #x83)
(output-bytes (real->floating-point-bytes v 8 #t))] (output-bytes (double->bytes v))]
[(annotated as _ v) [(annotated as _ v)
(when write-annotations? (when write-annotations?

View File

@ -12,11 +12,14 @@
(require "embedded.rkt") (require "embedded.rkt")
(require "annotation.rkt") (require "annotation.rkt")
(require "float.rkt") (require "float.rkt")
(require "float-bytes.rkt")
(require "record.rkt") (require "record.rkt")
(require "object-id.rkt") (require "object-id.rkt")
(require racket/dict) (require racket/dict)
(require racket/set) (require racket/set)
(require (only-in racket/port with-output-to-string)) (require (only-in racket/port with-output-to-string))
(require (only-in racket/math nan? infinite?))
(require (only-in file/sha1 bytes->hex-string))
(define PIPE #\|) (define PIPE #\|)
@ -132,6 +135,15 @@
(write-binary-stringlike v) (write-binary-stringlike v)
(write-binary-base64 outer-distance v))))) (write-binary-base64 outer-distance v)))))
(define (write-float v precision)
(if (or (nan? v) (infinite? v))
(! "#x~a\"~a\""
(match precision ['float "f"] ['double "d"])
(bytes->hex-string (match precision
['float (float->bytes (float v))]
['double (double->bytes v)])))
(! "~v~a" v (match precision ['float "f"] ['double ""]))))
(define (write-value distance v) (define (write-value distance v)
(match v (match v
[(annotated annotations _ item) [(annotated annotations _ item)
@ -143,8 +155,8 @@
(write-value distance item)] (write-value distance item)]
[#f (! "#f")] [#f (! "#f")]
[#t (! "#t")] [#t (! "#t")]
[(float v) (! "~vf" v)] [(float v) (write-float v 'float)]
[(? flonum?) (! "~v" v)] [(? flonum?) (write-float v 'double)]
[(? integer? x) (! "~v" v)] [(? integer? x) (! "~v" v)]
[(? string?) [(? string?)
(! "\"") (! "\"")

View File

@ -15,6 +15,7 @@ gitlab = { repository = "preserves/preserves" }
base64 = "0.13" base64 = "0.13"
dtoa = "0.4" dtoa = "0.4"
num = "0.4" num = "0.4"
lazy_static = "1.4.0"
regex = "1.5" regex = "1.5"
serde = { version = "1.0", features = ["derive"] } serde = { version = "1.0", features = ["derive"] }
serde_bytes = "0.11" serde_bytes = "0.11"

View File

@ -26,8 +26,11 @@ use crate::value::reader::BinarySource;
use crate::value::reader::ReaderResult; use crate::value::reader::ReaderResult;
use crate::value::repr::Annotations; use crate::value::repr::Annotations;
use lazy_static::lazy_static;
use num::bigint::BigInt; use num::bigint::BigInt;
use std::convert::TryInto;
use std::io; use std::io;
use std::iter::FromIterator; use std::iter::FromIterator;
use std::marker::PhantomData; use std::marker::PhantomData;
@ -137,86 +140,21 @@ impl<'de, 'src, D: Embeddable, Dec: DomainParse<D>, S: BinarySource<'de>>
} }
} }
fn read_intpart<N: NestedValue>(&mut self, mut bs: Vec<u8>, c: u8) -> io::Result<N> { fn read_hex_float<N: NestedValue>(&mut self, bytecount: usize) -> io::Result<N> {
match c { if self.next_byte()? != b'"' {
b'0' => { return Err(io_syntax_error("Missing open-double-quote in hex-encoded floating-point number"));
bs.push(c);
self.read_fracexp(bs)
}
_ => {
self.read_digit1(&mut bs, c)?;
self.read_fracexp(bs)
}
} }
} let bs = self.read_hex_binary()?;
if bs.len() != bytecount {
fn read_fracexp<N: NestedValue>(&mut self, mut bs: Vec<u8>) -> io::Result<N> { return Err(io_syntax_error("Incorrect number of bytes in hex-encoded floating-point number"));
let mut is_float = false;
match self.peek() {
Ok(b'.') => {
is_float = true;
bs.push(self.next_byte()?);
let c = self.next_byte()?;
self.read_digit1(&mut bs, c)?;
}
_ => ()
} }
match self.peek() { match bytecount {
Ok(b'e') | Ok(b'E') => { 4 => Ok(Value::from(f32::from_bits(u32::from_be_bytes(bs.try_into().unwrap()))).wrap()),
bs.push(self.next_byte()?); 8 => Ok(Value::from(f64::from_bits(u64::from_be_bytes(bs.try_into().unwrap()))).wrap()),
self.read_sign_and_exp(bs) _ => Err(io_syntax_error("Unsupported byte count in hex-encoded floating-point number")),
}
_ => self.finish_number(bs, is_float)
} }
} }
fn read_sign_and_exp<N: NestedValue>(&mut self, mut bs: Vec<u8>) -> io::Result<N> {
match self.peek()? {
b'+' | b'-' => bs.push(self.next_byte()?),
_ => (),
}
let c = self.next_byte()?;
self.read_digit1(&mut bs, c)?;
self.finish_number(bs, true)
}
fn finish_number<N: NestedValue>(&mut self, bs: Vec<u8>, is_float: bool) -> io::Result<N> {
let s = decode_utf8(bs)?;
if is_float {
match self.peek() {
Ok(b'f') | Ok(b'F') => {
self.skip()?;
Ok(N::new(s.parse::<f32>().map_err(
|_| io_syntax_error(&format!(
"Invalid single-precision number: {:?}", s)))?))
}
_ =>
Ok(N::new(s.parse::<f64>().map_err(
|_| io_syntax_error(&format!(
"Invalid double-precision number: {:?}", s)))?))
}
} else {
Ok(N::new(s.parse::<BigInt>().map_err(
|_| io_syntax_error(&format!(
"Invalid signed-integer number: {:?}", s)))?))
}
}
fn read_digit1(&mut self, bs: &mut Vec<u8>, c: u8) -> io::Result<()>
{
if !(c as char).is_digit(10) {
return Err(io_syntax_error("Incomplete number"));
}
bs.push(c);
while let Ok(c) = self.peek() {
if !(c as char).is_digit(10) {
break;
}
bs.push(self.next_byte()?);
}
Ok(())
}
fn read_stringlike<X, H, R>( fn read_stringlike<X, H, R>(
&mut self, &mut self,
mut seed: R, mut seed: R,
@ -299,14 +237,13 @@ impl<'de, 'src, D: Embeddable, Dec: DomainParse<D>, S: BinarySource<'de>>
|bs, r| Ok(bs.push(r.hexnum(2)? as u8)))?[..])) |bs, r| Ok(bs.push(r.hexnum(2)? as u8)))?[..]))
} }
fn read_hex_binary<N: NestedValue>(&mut self) -> io::Result<N> { fn read_hex_binary(&mut self) -> io::Result<Vec<u8>> {
let mut s = String::new(); let mut s = String::new();
loop { loop {
self.skip_whitespace(); self.skip_whitespace();
let c1 = self.next_byte()? as char; let c1 = self.next_byte()? as char;
if c1 == '"' { if c1 == '"' {
let bs = hex::HexParser::Strict.decode(&s).unwrap(); return Ok(hex::HexParser::Strict.decode(&s).unwrap());
return Ok(N::new(&bs[..]));
} }
let c2 = self.next_byte()? as char; let c2 = self.next_byte()? as char;
if !(c1.is_digit(16) && c2.is_digit(16)) { if !(c1.is_digit(16) && c2.is_digit(16)) {
@ -364,7 +301,11 @@ impl<'de, 'src, D: Embeddable, Dec: DomainParse<D>, S: BinarySource<'de>>
} }
} }
fn read_raw_symbol<N: NestedValue>(&mut self, mut bs: Vec<u8>) -> io::Result<N> { fn read_raw_symbol_or_number<N: NestedValue>(&mut self, mut bs: Vec<u8>) -> io::Result<N> {
lazy_static! {
static ref NUMBER_RE: regex::Regex = regex::Regex::new(
r"^([-+]?\d+)(((\.\d+([eE][-+]?\d+)?)|([eE][-+]?\d+))([fF]?))?$").unwrap();
}
loop { loop {
let c = match self.peek() { let c = match self.peek() {
Err(e) if is_eof_io_error(&e) => b' ', Err(e) if is_eof_io_error(&e) => b' ',
@ -374,8 +315,33 @@ impl<'de, 'src, D: Embeddable, Dec: DomainParse<D>, S: BinarySource<'de>>
}; };
match c { match c {
b'(' | b')' | b'{' | b'}' | b'[' | b']' | b'<' | b'>' | b'(' | b')' | b'{' | b'}' | b'[' | b']' | b'<' | b'>' |
b'"' | b';' | b',' | b'@' | b'#' | b':' | b'|' | b' ' => b'"' | b';' | b',' | b'@' | b'#' | b':' | b'|' | b' ' => {
return Ok(N::symbol(&decode_utf8(bs)?)), let s = decode_utf8(bs)?;
return match NUMBER_RE.captures(&s) {
None => Ok(N::symbol(&s)),
Some(m) => match m.get(2) {
None => Ok(N::new(s.parse::<BigInt>().map_err(
|_| io_syntax_error(&format!(
"Invalid signed-integer number: {:?}", s)))?)),
Some(_) => {
if let Some(maybe_f) = m.get(7) {
let s = m[1].to_owned() + &m[3];
if maybe_f.range().is_empty() {
Ok(N::new(s.parse::<f64>().map_err(
|_| io_syntax_error(&format!(
"Invalid double-precision number: {:?}", s)))?))
} else {
Ok(N::new(s.parse::<f32>().map_err(
|_| io_syntax_error(&format!(
"Invalid single-precision number: {:?}", s)))?))
}
} else {
panic!("Internal error: cannot analyze number {:?}", s)
}
}
}
}
}
c => { c => {
self.skip()?; self.skip()?;
bs.push(c) bs.push(c)
@ -396,15 +362,6 @@ impl<'de, 'src, N: NestedValue, Dec: DomainParse<N::Embedded>, S: BinarySource<'
Err(e) => return Err(e.into()), Err(e) => return Err(e.into()),
}; };
Ok(Some(match c { Ok(Some(match c {
b'-' => {
self.skip()?;
let c1 = self.next_byte()?;
self.read_intpart(vec![b'-'], c1)?
}
b'0' | b'1' | b'2' | b'3' | b'4' | b'5' | b'6' | b'7' | b'8' | b'9' => {
self.skip()?;
self.read_intpart(Vec::new(), c)?
}
b'"' => { b'"' => {
self.skip()?; self.skip()?;
N::new(self.read_string(b'"')?) N::new(self.read_string(b'"')?)
@ -435,26 +392,13 @@ impl<'de, 'src, N: NestedValue, Dec: DomainParse<N::Embedded>, S: BinarySource<'
b't' => N::new(true), b't' => N::new(true),
b'{' => N::new(Set::from_iter(self.upto(b'}', read_annotations)?.into_iter())), b'{' => N::new(Set::from_iter(self.upto(b'}', read_annotations)?.into_iter())),
b'"' => self.read_literal_binary()?, b'"' => self.read_literal_binary()?,
b'x' => if self.next_byte()? == b'"' { b'x' => match self.next_byte()? {
self.read_hex_binary()? b'"' => N::new(&self.read_hex_binary()?[..]),
} else { b'f' => self.read_hex_float(4)?,
return Err(io_syntax_error("Expected open-quote at start of hex ByteString")); b'd' => self.read_hex_float(8)?,
_ => return Err(io_syntax_error("Invalid #x syntax")),
}, },
b'[' => self.read_base64_binary()?, b'[' => self.read_base64_binary()?,
b'=' => {
let bs_val: N = self.demand_next(true)?;
if bs_val.annotations().slice().len() > 0 {
return Err(io_syntax_error("Annotations not permitted after #="));
}
match bs_val.value().as_bytestring() {
None =>
return Err(io_syntax_error("ByteString must follow #=")),
Some(bs) =>
crate::value::BytesBinarySource::new(bs)
.packed(ViaCodec::new(&mut self.dec))
.demand_next(read_annotations)?
}
}
b'!' => { b'!' => {
let v = self.next_iovalue(read_annotations)?; let v = self.next_iovalue(read_annotations)?;
Value::Embedded(self.dec.parse_embedded(&v)?).wrap() Value::Embedded(self.dec.parse_embedded(&v)?).wrap()
@ -483,7 +427,7 @@ impl<'de, 'src, N: NestedValue, Dec: DomainParse<N::Embedded>, S: BinarySource<'
b'}' => return Err(io_syntax_error("Unexpected }")), b'}' => return Err(io_syntax_error("Unexpected }")),
other => { other => {
self.skip()?; self.skip()?;
self.read_raw_symbol(vec![other])? self.read_raw_symbol_or_number(vec![other])?
} }
})) }))
} }

View File

@ -1,3 +1,4 @@
use crate::hex::HexFormatter;
use crate::value::DomainEncode; use crate::value::DomainEncode;
use crate::value::IOValue; use crate::value::IOValue;
use crate::value::IOValueDomainCodec; use crate::value::IOValueDomainCodec;
@ -6,6 +7,8 @@ use crate::value::Writer;
use crate::value::suspendable::Suspendable; use crate::value::suspendable::Suspendable;
use crate::value::writer::CompoundWriter; use crate::value::writer::CompoundWriter;
use lazy_static::lazy_static;
use num::bigint::BigInt; use num::bigint::BigInt;
use std::io; use std::io;
@ -231,13 +234,23 @@ impl<W: io::Write> Writer for TextWriter<W> {
} }
fn write_f32(&mut self, v: f32) -> io::Result<()> { fn write_f32(&mut self, v: f32) -> io::Result<()> {
dtoa::write(&mut *self.w, v)?; if v.is_nan() || v.is_infinite() {
write!(self.w, "f") write!(self.w, "#xf\"{}\"",
HexFormatter::Packed.encode(&u32::to_be_bytes(f32::to_bits(v))))
} else {
dtoa::write(&mut *self.w, v)?;
write!(self.w, "f")
}
} }
fn write_f64(&mut self, v: f64) -> io::Result<()> { fn write_f64(&mut self, v: f64) -> io::Result<()> {
dtoa::write(&mut *self.w, v)?; if v.is_nan() || v.is_infinite() {
Ok(()) write!(self.w, "#xd\"{}\"",
HexFormatter::Packed.encode(&u64::to_be_bytes(f64::to_bits(v))))
} else {
dtoa::write(&mut *self.w, v)?;
Ok(())
}
} }
simple_writer_method!(write_i8, i8); simple_writer_method!(write_i8, i8);
@ -269,9 +282,12 @@ impl<W: io::Write> Writer for TextWriter<W> {
} }
fn write_symbol(&mut self, v: &str) -> io::Result<()> { fn write_symbol(&mut self, v: &str) -> io::Result<()> {
// FIXME: This regular expression is conservatively correct, but Anglo-chauvinistic. lazy_static! {
let re = regex::Regex::new("^[a-zA-Z~!$%^&*?_=+/.][-a-zA-Z~!$%^&*?_=+/.0-9]*$").unwrap(); // FIXME: This regular expression is conservatively correct, but Anglo-chauvinistic.
if re.is_match(v) { static ref RE: regex::Regex =
regex::Regex::new("^[-a-zA-Z0-9~!$%^&*?_=+/.]+$").unwrap();
}
if RE.is_match(v) {
write!(self.w, "{}", v) write!(self.w, "{}", v)
} else { } else {
write!(self.w, "|")?; write!(self.w, "|")?;

View File

@ -40,10 +40,10 @@ Standalone documents may have trailing whitespace.
Any `Value` may be preceded by whitespace. Any `Value` may be preceded by whitespace.
Value = ws (Record / Collection / Atom / Embedded / Machine) Value = ws (Record / Collection / Atom / Embedded)
Collection = Sequence / Dictionary / Set Collection = Sequence / Dictionary / Set
Atom = Boolean / Float / Double / SignedInteger / Atom = Boolean / String / ByteString /
String / ByteString / Symbol QuotedSymbol / SymbolOrNumber
Each `Record` is an angle-bracket enclosed grouping of its Each `Record` is an angle-bracket enclosed grouping of its
label-`Value` followed by its field-`Value`s. label-`Value` followed by its field-`Value`s.
@ -73,55 +73,6 @@ false, respectively.
Boolean = %s"#t" / %s"#f" Boolean = %s"#t" / %s"#f"
Numeric data follow the
[JSON grammar](https://tools.ietf.org/html/rfc8259#section-6), with
the addition of a trailing “f” distinguishing `Float` from `Double`
values. `Float`s and `Double`s always have either a fractional part or
an exponent part, where `SignedInteger`s never have
either.[^reading-and-writing-floats-accurately]
[^arbitrary-precision-signedinteger]
Float = flt %i"f"
Double = flt
SignedInteger = int
digit1-9 = %x31-39
nat = %x30 / ( digit1-9 *DIGIT )
int = ["-"] nat
frac = "." 1*DIGIT
exp = %i"e" ["-"/"+"] 1*DIGIT
flt = int (frac exp / frac / exp)
[^reading-and-writing-floats-accurately]: **Implementation note.**
Your language's standard library likely has a good routine for
converting between decimal notation and IEEE 754 floating-point.
However, if not, or if you are interested in the challenges of
accurately reading and writing floating point numbers, see the
excellent matched pair of 1990 papers by Clinger and Steele &
White, and a recent follow-up by Jaffer:
Clinger, William D. How to Read Floating Point Numbers
Accurately. In Proc. PLDI. White Plains, New York, 1990.
<https://doi.org/10.1145/93542.93557>.
Steele, Guy L., Jr., and Jon L. White. How to Print
Floating-Point Numbers Accurately. In Proc. PLDI. White Plains,
New York, 1990. <https://doi.org/10.1145/93542.93559>.
Jaffer, Aubrey. Easy Accurate Reading and Writing of
Floating-Point Numbers. ArXiv:1310.8121 [Cs], 27 October 2013.
<http://arxiv.org/abs/1310.8121>.
[^arbitrary-precision-signedinteger]: **Implementation note.** Be
aware when implementing reading and writing of `SignedInteger`s
that the data model *requires* arbitrary-precision integers. Your
implementation may (but, ideally, should not) truncate precision
when reading or writing a `SignedInteger`; however, if it does so,
it should (a) signal its client that truncation has occurred, and
(b) make it clear to the client that comparing such truncated
values for equality or ordering will not yield results that match
the expected semantics of the data model.
`String`s are, `String`s are,
[as in JSON](https://tools.ietf.org/html/rfc8259#section-7), possibly [as in JSON](https://tools.ietf.org/html/rfc8259#section-7), possibly
escaped text surrounded by double quotes. The escaping rules are the escaped text surrounded by double quotes. The escaping rules are the
@ -177,62 +128,109 @@ Base64 characters are allowed.
ByteString =/ "#[" *(ws / base64char) ws "]" ByteString =/ "#[" *(ws / base64char) ws "]"
base64char = %x41-5A / %x61-7A / %x30-39 / "+" / "/" / "-" / "_" / "=" base64char = %x41-5A / %x61-7A / %x30-39 / "+" / "/" / "-" / "_" / "="
A `Symbol` may be written in a “bare” form[^cf-sexp-token] so long as A `Symbol` may be written in either of two forms.
it conforms to certain restrictions on the characters appearing in the
symbol. Alternatively, it may be written in a quoted form. The quoted
form is much the same as the syntax for `String`s, including embedded
escape syntax, except using a bar or pipe character (`|`) instead of a
double quote mark.
Symbol = symstart *symcont / "|" *symchar "|" The first is a quoted form, much the same as the syntax for `String`s,
symstart = ALPHA / sympunct / symustart including embedded escape syntax, except using a bar or pipe character
symcont = ALPHA / sympunct / symustart / symucont / DIGIT / "-" (`|`) instead of a double quote mark.
sympunct = "~" / "!" / "$" / "%" / "^" / "&" / "*" /
"?" / "_" / "=" / "+" / "/" / "." QuotedSymbol = "|" *symchar "|"
symchar = unescaped / %x22 / escape (escaped / %x7C / %s"u" 4HEXDIG) symchar = unescaped / %x22 / escape (escaped / %x7C / %s"u" 4HEXDIG)
symustart = <any code point greater than 127 whose Unicode
category is Lu, Ll, Lt, Lm, Lo, Mn, Mc, Me, Alternatively, a `Symbol` may be written in a “bare” form[^cf-sexp-token].
Pc, Po, Sc, Sm, Sk, So, or Co> The grammar for numeric data is a subset of the grammar for bare `Symbol`s,
symucont = <any code point greater than 127 whose Unicode so if a `SymbolOrNumber` also matches the grammar for `Float`, `Double` or
category is Nd, Nl, No, or Pd> `SignedInteger`, then it must be interpreted as one of those, and otherwise
it must be interpreted as a bare `Symbol`.
SymbolOrNumber = 1*baresymchar
baresymchar = ALPHA / DIGIT / sympunct / symuchar
sympunct = "~" / "!" / "$" / "%" / "^" / "&" / "*" /
"?" / "_" / "=" / "+" / "-" / "/" / "."
symuchar = <any code point greater than 127 whose Unicode
category is Lu, Ll, Lt, Lm, Lo, Mn, Mc, Me, Nd,
Nl, No, Pc, Pd, Po, Sc, Sm, Sk, So, or Co>
[^cf-sexp-token]: Compare with the [SPKI S-expression][sexp.txt] [^cf-sexp-token]: Compare with the [SPKI S-expression][sexp.txt]
definition of “token representation”, and with the definition of “token representation”, and with the
[R6RS definition of identifiers](http://www.r6rs.org/final/html/r6rs/r6rs-Z-H-7.html#node_sec_4.2.4). [R6RS definition of identifiers](http://www.r6rs.org/final/html/r6rs/r6rs-Z-H-7.html#node_sec_4.2.4).
An `Embedded` is written as a `Value` chosen to represent the denoted Numeric data follow the [JSON
object, prefixed with `#!`. grammar](https://tools.ietf.org/html/rfc8259#section-6) except that leading
zeros are permitted and an optional leading `+` sign is allowed. The
addition of a trailing “f” distinguishes a `Float` from a `Double` value.
`Float`s and `Double`s always have either a fractional part or an exponent
part, where `SignedInteger`s never have
either.[^reading-and-writing-floats-accurately]
[^arbitrary-precision-signedinteger]
Float = flt %i"f"
Double = flt
SignedInteger = int
nat = 1*DIGIT
int = ["-"/"+"] nat
frac = "." 1*DIGIT
exp = %i"e" ["-"/"+"] 1*DIGIT
flt = int (frac exp / frac / exp)
[^reading-and-writing-floats-accurately]: **Implementation note.**
Your language's standard library likely has a good routine for
converting between decimal notation and IEEE 754 floating-point.
However, if not, or if you are interested in the challenges of
accurately reading and writing floating point numbers, see the
excellent matched pair of 1990 papers by Clinger and Steele &
White, and a recent follow-up by Jaffer:
Clinger, William D. How to Read Floating Point Numbers
Accurately. In Proc. PLDI. White Plains, New York, 1990.
<https://doi.org/10.1145/93542.93557>.
Steele, Guy L., Jr., and Jon L. White. How to Print
Floating-Point Numbers Accurately. In Proc. PLDI. White Plains,
New York, 1990. <https://doi.org/10.1145/93542.93559>.
Jaffer, Aubrey. Easy Accurate Reading and Writing of
Floating-Point Numbers. ArXiv:1310.8121 [Cs], 27 October 2013.
<http://arxiv.org/abs/1310.8121>.
[^arbitrary-precision-signedinteger]: **Implementation note.** Be
aware when implementing reading and writing of `SignedInteger`s
that the data model *requires* arbitrary-precision integers. Your
implementation may (but, ideally, should not) truncate precision
when reading or writing a `SignedInteger`; however, if it does so,
it should (a) signal its client that truncation has occurred, and
(b) make it clear to the client that comparing such truncated
values for equality or ordering will not yield results that match
the expected semantics of the data model.
Some valid IEEE 754 `Float`s and `Double`s are not covered by the grammar
above, namely, the several million NaNs and the two infinities. These are
represented as raw hexadecimal strings similar to hexadecimal
`ByteString`s. Implementations are free to use hexadecimal floating-point
syntax whereever convenient, even for values representable using the
grammar above.[^rationale-no-general-machine-syntax]
Value =/ HexFloat / HexDouble
HexFloat = "#xf" %x22 4(ws 2HEXDIG) ws %x22
HexDouble = "#xd" %x22 8(ws 2HEXDIG) ws %x22
[^rationale-no-general-machine-syntax]: **Rationale.** Previous versions
of this specification included an escape to the [machine-oriented
binary syntax](preserves-binary.html) by prefixing a `ByteString`
containing the binary representation of a `Value` with `#=`. The only
true need for this feature was to represent otherwise-unrepresentable
floating-point values. Instead, this specification allows such
floating-point values to be written directly. Removing the `#=` syntax
simplifies implementations (there is no longer any need to support the
machine-oriented syntax) and avoids complications around treatment of
annotations potentially contained within machine-encoded values.
Finally, an `Embedded` is written as a `Value` chosen to represent the
denoted object, prefixed with `#!`.
Embedded = "#!" Value Embedded = "#!" Value
Finally, any `Value` may be represented by escaping from the textual
syntax to the [machine-oriented binary syntax](preserves-binary.html)
by prefixing a `ByteString` containing the binary representation of the
`Value` with `#=`.[^rationale-switch-to-binary]
[^no-literal-binary-in-text] [^machine-value-annotations]
Machine = "#=" ws ByteString
[^rationale-switch-to-binary]: **Rationale.** The textual syntax
cannot express every `Value`: specifically, it cannot express the
several million floating-point NaNs, or the two floating-point
Infinities. Since the machine-oriented binary format for `Value`s
expresses each `Value` with precision, embedding binary `Value`s
solves the problem.
[^no-literal-binary-in-text]: Every text is ultimately physically
stored as bytes; therefore, it might seem possible to escape to the
raw form of binary encoding from within a piece of textual syntax.
However, while bytes must be involved in any *representation* of
text, the text *itself* is logically a sequence of *code points* and
is not *intrinsically* a binary structure at all. It would be
incoherent to expect to be able to access the representation of the
text from within the text itself.
[^machine-value-annotations]: Any text-syntax annotations preceding
the `#` are prepended to any binary-syntax annotations yielded by
decoding the `ByteString`.
## Annotations ## Annotations
When written down, a `Value` may have an associated sequence of When written down, a `Value` may have an associated sequence of
@ -293,5 +291,22 @@ The text syntax for `Boolean`s, `Symbol`s, and `ByteString`s is
directly inspired by [Racket](https://racket-lang.org/)'s lexical directly inspired by [Racket](https://racket-lang.org/)'s lexical
syntax. syntax.
## Appendix. Regular expressions for bare symbols and numbers
When parsing, if a token matches both `SymbolOrNumber` and `Number`, it's a
number; use `Float`, `Double` and `SignedInteger` to disambiguate. If it
matches `SymbolOrNumber` but not `Number`, it's a "bare" `Symbol`.
SymbolOrNumber: ^[-a-zA-Z0-9~!$%^&*?_=+/.]+$
Number: ^([-+]?\d+)(((\.\d+([eE][-+]?\d+)?)|([eE][-+]?\d+))([fF]?))?$
Float: ^([-+]?\d+)(((\.\d+([eE][-+]?\d+)?)|([eE][-+]?\d+))[fF])$
Double: ^([-+]?\d+)(((\.\d+([eE][-+]?\d+)?)|([eE][-+]?\d+)))$
SignedInteger: ^([-+]?\d+)$
When printing, if a symbol matches both `SymbolOrNumber` and `Number` or
neither `SymbolOrNumber` nor `Number`, it must be quoted (`|...|`). If it
matches `SymbolOrNumber` but not `Number`, it may be printed as a "bare"
`Symbol`.
<!-- Heading to visually offset the footnotes from the main document: --> <!-- Heading to visually offset the footnotes from the main document: -->
## Notes ## Notes

View File

@ -220,21 +220,23 @@ The total ordering specified [above](#total-order) means that the following stat
<!-- TODO: Give some examples of large and small Preserves, perhaps --> <!-- TODO: Give some examples of large and small Preserves, perhaps -->
<!-- translated from various JSON blobs floating around the internet. --> <!-- translated from various JSON blobs floating around the internet. -->
| Value | Encoded byte sequence | | Value | Encoded byte sequence |
|-----------------------------|---------------------------------------------------------------------------------| |-----------------------------------------------------|---------------------------------------------------------------------------------|
| `<capture <discard>>` | B4 B3 07 'c' 'a' 'p' 't' 'u' 'r' 'e' B4 B3 07 'd' 'i' 's' 'c' 'a' 'r' 'd' 84 84 | | `<capture <discard>>` | B4 B3 07 'c' 'a' 'p' 't' 'u' 'r' 'e' B4 B3 07 'd' 'i' 's' 'c' 'a' 'r' 'd' 84 84 |
| `[1 2 3 4]` | B5 91 92 93 94 84 | | `[1 2 3 4]` | B5 91 92 93 94 84 |
| `[-2 -1 0 1]` | B5 9E 9F 90 91 84 | | `[-2 -1 0 1]` | B5 9E 9F 90 91 84 |
| `"hello"` (format B) | B1 05 'h' 'e' 'l' 'l' 'o' | | `"hello"` (format B) | B1 05 'h' 'e' 'l' 'l' 'o' |
| `["a" b #"c" [] #{} #t #f]` | B5 B1 01 'a' B3 01 'b' B2 01 'c' B5 84 B6 84 81 80 84 | | `["a" b #"c" [] #{} #t #f]` | B5 B1 01 'a' B3 01 'b' B2 01 'c' B5 84 B6 84 81 80 84 |
| `-257` | A1 FE FF | | `-257` | A1 FE FF |
| `-1` | 9F | | `-1` | 9F |
| `0` | 90 | | `0` | 90 |
| `1` | 91 | | `1` | 91 |
| `255` | A1 00 FF | | `255` | A1 00 FF |
| `1.0f` | 82 3F 80 00 00 | | `1.0f` | 82 3F 80 00 00 |
| `1.0` | 83 3F F0 00 00 00 00 00 00 | | `1.0` | 83 3F F0 00 00 00 00 00 00 |
| `-1.202e300` | 83 FE 3C B7 B7 59 BF 04 26 | | `-1.202e300` | 83 FE 3C B7 B7 59 BF 04 26 |
| `#xf"7f800000"`, positive `Float` infinity | 82 7F 80 00 00 |
| `#xd"fff0000000000000"`, negative `Double` infinity | 83 FF F0 00 00 00 00 00 00 |
The next example uses a non-`Symbol` label for a record.[^extensibility2] The `Record` The next example uses a non-`Symbol` label for a record.[^extensibility2] The `Record`

Binary file not shown.

View File

@ -74,9 +74,45 @@
dict3: @"Duplicate key" <ParseError "{ a: 1, a: 2 }"> dict3: @"Duplicate key" <ParseError "{ a: 1, a: 2 }">
dict4: @"Unexpected close brace" <ParseError "}"> dict4: @"Unexpected close brace" <ParseError "}">
dict5: @"Missing value" <DecodeError #x"b7 91 92 93 84"> dict5: @"Missing value" <DecodeError #x"b7 91 92 93 84">
double0: <Test #x"830000000000000000" 0.0>
double+0: <Test #x"830000000000000000" +0.0>
double-0: <Test #x"838000000000000000" -0.0>
double1: <Test #x"833ff0000000000000" 1.0> double1: <Test #x"833ff0000000000000" 1.0>
double2: <Test #x"83fe3cb7b759bf0426" -1.202e300> double2: <Test #x"83fe3cb7b759bf0426" -1.202e300>
double3: <Test #x"83123456789abcdef0" #xd"12 34 56 78 9a bc de f0">
double4: @"Fewer than 16 digits" <ParseError "#xd\"12345678\"">
double5: @"More than 16 digits" <ParseError "#xd\"123456789abcdef012\"">
double6: @"Invalid chars" <ParseError "#xd\"12zz56789abcdef0\"">
double7: @"Positive infinity" <Test #x"837ff0000000000000" #xd"7ff0000000000000">
double8: @"Negative infinity" <Test #x"83fff0000000000000" #xd"fff0000000000000">
double9: @"-qNaN" <Test #x"83fff0000000000001" #xd"fff0000000000001">
double10: @"-qNaN" <Test #x"83fff0000000000111" #xd"fff0000000000111">
double11: @"+qNaN" <Test #x"837ff0000000000001" #xd"7ff0000000000001">
double12: @"+qNaN" <Test #x"837ff0000000000111" #xd"7ff0000000000111">
double13: @"Bad spacing" <ParseError "#xd\"12345 6789abcdef0\"">
double14: @"-sNaN" <Test #x"83fff8000000000001" #xd"fff8000000000001">
double15: @"-sNaN" <Test #x"83fff8000000000111" #xd"fff8000000000111">
double16: @"+sNaN" <Test #x"837ff8000000000001" #xd"7ff8000000000001">
double17: @"+sNaN" <Test #x"837ff8000000000111" #xd"7ff8000000000111">
float0: <Test #x"8200000000" 0.0f>
float+0: <Test #x"8200000000" +0.0f>
float-0: <Test #x"8280000000" -0.0f>
float1: <Test #x"823f800000" 1.0f> float1: <Test #x"823f800000" 1.0f>
float2: <Test #x"8212345678" #xf"12 34 56 78">
float3: @"Fewer than 8 digits" <ParseError "#xf\"123456\"">
float4: @"More than 8 digits" <ParseError "#xf\"123456789a\"">
float5: @"Invalid chars" <ParseError "#xf\"12zz5678\"">
float6: @"Positive infinity" <Test #x"827f800000" #xf"7f800000">
float7: @"Negative infinity" <Test #x"82ff800000" #xf"ff800000">
float8: @"+sNaN" <Test #x"827f800001" #xf"7f800001">
float9: @"+sNaN" <Test #x"827f800111" #xf"7f800111">
float10: @"-sNaN" <Test #x"82ff800001" #xf"ff800001">
float11: @"-sNaN" <Test #x"82ff800111" #xf"ff800111">
float12: @"Bad spacing" <ParseError "#xf\"12345 678\"">
float13: @"+qNaN" <Test #x"827fc00001" #xf"7fc00001">
float14: @"+qNaN" <Test #x"827fc00111" #xf"7fc00111">
float15: @"-qNaN" <Test #x"82ffc00001" #xf"ffc00001">
float16: @"-qNaN" <Test #x"82ffc00111" #xf"ffc00111">
int-257: <Test #x"a1feff" -257> int-257: <Test #x"a1feff" -257>
int-256: <Test #x"a1ff00" -256> int-256: <Test #x"a1ff00" -256>
int-255: <Test #x"a1ff01" -255> int-255: <Test #x"a1ff01" -255>
@ -89,10 +125,13 @@
int-2: <Test #x"9e" -2> int-2: <Test #x"9e" -2>
int-1: <Test #x"9f" -1> int-1: <Test #x"9f" -1>
int0: <Test #x"90" 0> int0: <Test #x"90" 0>
int+0: <Test #x"90" +0>
int-0: <Test #x"90" -0>
int1: <Test #x"91" 1> int1: <Test #x"91" 1>
int12: <Test #x"9c" 12> int12: <Test #x"9c" 12>
int13: <Test #x"a00d" 13> int13: <Test #x"a00d" 13>
int127: <Test #x"a07f" 127> int127: <Test #x"a07f" 127>
int+127: <Test #x"a07f" +127>
int128: <Test #x"a10080" 128> int128: <Test #x"a10080" 128>
int255: <Test #x"a100ff" 255> int255: <Test #x"a100ff" 255>
int256: <Test #x"a10100" 256> int256: <Test #x"a10100" 256>
@ -112,6 +151,8 @@
list8: @"Missing close bracket" <ParseShort "["> list8: @"Missing close bracket" <ParseShort "[">
list9: @"Unexpected close bracket" <ParseError "]"> list9: @"Unexpected close bracket" <ParseError "]">
list10: @"Missing end byte" <DecodeShort #x"b58080"> list10: @"Missing end byte" <DecodeShort #x"b58080">
list11: <Test #x"b59184" [01]>
list12: <Test #x"b59c84" [12]>
noinput0: @"No input at all" <DecodeEOF #x""> noinput0: @"No input at all" <DecodeEOF #x"">
embed0: <Test #x"8690" #!0> embed0: <Test #x"8690" #!0>
embed1: <Test #x"868690" #!#!0> embed1: <Test #x"868690" #!#!0>
@ -138,17 +179,22 @@
string5: <Test #x"b104f09d849e" "\uD834\uDD1E"> string5: <Test #x"b104f09d849e" "\uD834\uDD1E">
symbol0: <Test #x"b300" ||> symbol0: <Test #x"b300" ||>
symbol2: <Test #x"b30568656c6c6f" hello> symbol2: <Test #x"b30568656c6c6f" hello>
symbol3: <Test #x"b305312d322d33" 1-2-3>
symbol4: <Test #x"b305612d622d63" a-b-c>
symbol5: <Test #x"b305612b622b63" a+b+c>
symbol6: <Test #x"b3012b" +>
symbol7: <Test #x"b3032b2b2b" +++>
symbol8: <Test #x"b3012d" ->
symbol9: <Test #x"b3032d2d2d" --->
symbol10: <Test #x"b3022d61" -a>
symbol11: <Test #x"b3042d2d2d61" ---a>
symbol12: <Test #x"b3042d2d2d31" ---1>
symbol13: <Test #x"b3042b312e78" +1.x>
tag0: @"Unexpected end tag" <DecodeError #x"84"> tag0: @"Unexpected end tag" <DecodeError #x"84">
tag1: @"Invalid tag" <DecodeError #x"10"> tag1: @"Invalid tag" <DecodeError #x"10">
tag2: @"Invalid tag" <DecodeError #x"61b10110"> tag2: @"Invalid tag" <DecodeError #x"61b10110">
whitespace0: @"Leading spaces have to eventually yield something" <ParseShort " "> whitespace0: @"Leading spaces have to eventually yield something" <ParseShort " ">
whitespace1: @"No input at all" <ParseEOF ""> whitespace1: @"No input at all" <ParseEOF "">
value1: <Test #"\xB2\x06corymb" #=#"\xB2\x06corymb">
value2: <Test #"\x81" #=#"\x81">
value3: <Test #"\x81" #=#[gQ]>
value4: <Test #"\x81" #=#[gQ==]>
value5: <Test #"\x81" #= #[gQ==]>
value6: <Test #x"b591929384" #=#x"b591929384">
longlist14: <Test #x"b5808080808080808080808080808084" longlist14: <Test #x"b5808080808080808080808080808084"
[#f #f #f #f #f [#f #f #f #f #f