Repair text syntax for numbers and symbols. Closes #19/#36/#37/#38.

Numbers and (bare) Symbols are now disambiguated after reading, which
permits leading `+`, leading `0`, and a wider range of acceptable
Symbols.

Updates spec text, test cases, and implementations. Some ancillary fixes
to Python's comparison routines are also included.
This commit is contained in:
Tony Garnock-Jones 2022-11-06 22:27:01 +01:00
parent 351feba8d2
commit 269ed2391a
31 changed files with 864 additions and 553 deletions

View File

@ -35,6 +35,10 @@ export class Bytes implements Preservable<any>, PreserveWritable<any> {
}
}
dataview(): DataView {
return new DataView(this._view.buffer, this._view.byteOffset, this._view.byteLength);
}
get length(): number {
return this._view.length;
}
@ -179,6 +183,10 @@ export function underlying(b: Bytes | Uint8Array): Uint8Array {
return (b instanceof Uint8Array) ? b : b._view;
}
export function dataview(b: Bytes | DataView): DataView {
return (b instanceof DataView) ? b : b.dataview();
}
// Uint8Array / TypedArray methods
export interface Bytes {

View File

@ -216,8 +216,8 @@ export class Decoder<T = never> implements TypedDecoder<T> {
switch (tag) {
case Tag.False: return this.state.wrap<T>(false);
case Tag.True: return this.state.wrap<T>(true);
case Tag.Float: return this.state.wrap<T>(new SingleFloat(this.state.nextbytes(4).getFloat32(0, false)));
case Tag.Double: return this.state.wrap<T>(new DoubleFloat(this.state.nextbytes(8).getFloat64(0, false)));
case Tag.Float: return this.state.wrap<T>(SingleFloat.fromBytes(this.state.nextbytes(4)));
case Tag.Double: return this.state.wrap<T>(DoubleFloat.fromBytes(this.state.nextbytes(8)));
case Tag.End: throw new DecodeError("Unexpected Compound end marker");
case Tag.Annotation: {
const a = this.next();
@ -294,7 +294,7 @@ export class Decoder<T = never> implements TypedDecoder<T> {
nextFloat(): SingleFloat | undefined {
this.skipAnnotations();
switch (this.state.nextbyte()) {
case Tag.Float: return new SingleFloat(this.state.nextbytes(4).getFloat32(0, false));
case Tag.Float: return SingleFloat.fromBytes(this.state.nextbytes(4));
default: return void 0;
}
}
@ -302,7 +302,7 @@ export class Decoder<T = never> implements TypedDecoder<T> {
nextDouble(): DoubleFloat | undefined {
this.skipAnnotations();
switch (this.state.nextbyte()) {
case Tag.Double: return new DoubleFloat(this.state.nextbytes(8).getFloat64(0, false));
case Tag.Double: return DoubleFloat.fromBytes(this.state.nextbytes(8));
default: return void 0;
}
}

View File

@ -4,6 +4,7 @@ import { Value } from "./values";
import type { GenericEmbedded } from "./embedded";
import type { Encoder, Preservable } from "./encoder";
import type { Writer, PreserveWritable } from "./writer";
import { Bytes, dataview, underlying } from "./bytes";
export type FloatType = 'Single' | 'Double';
export const FloatType = Symbol.for('FloatType');
@ -19,8 +20,15 @@ export abstract class Float {
return stringify(this);
}
abstract toBytes(): Bytes;
equals(other: any): boolean {
return Object.is(other.constructor, this.constructor) && (other.value === this.value);
if (!Object.is(other.constructor, this.constructor)) return false;
if (Number.isNaN(this.value) && Number.isNaN(other.value)) {
return other.toBytes().equals(this.toBytes());
} else {
return Object.is(other.value, this.value);
}
}
hashCode(): number {
@ -44,24 +52,72 @@ export function floatValue(f: any): number {
}
}
export function floatlikeString(f: number): string {
if (Object.is(f, -0)) return '-0.0';
const s = '' + f;
if (s.includes('.') || s.includes('e') || s.includes('E')) return s;
return s + '.0';
}
export class SingleFloat extends Float implements Preservable<any>, PreserveWritable<any> {
__as_preserve__<T = GenericEmbedded>(): Value<T> {
return this;
}
static fromBytes(bs: Bytes | DataView): SingleFloat {
const view = dataview(bs);
const vf = view.getInt32(0, false);
if ((vf & 0x7f800000) === 0x7f800000) {
// NaN or inf. Preserve quiet/signalling bit by manually expanding to double-precision.
const sign = vf >> 31;
const payload = vf & 0x007fffff;
const dbs = new Bytes(8);
const dview = dataview(dbs);
dview.setInt16(0, (sign << 15) | 0x7ff0 | (payload >> 19), false);
dview.setInt32(2, (payload & 0x7ffff) << 13, false);
return new SingleFloat(dview.getFloat64(0, false));
} else {
return new SingleFloat(dataview(bs).getFloat32(0, false));
}
}
static __from_preserve__<T>(v: Value<T>): undefined | SingleFloat {
return Float.isSingle(v) ? v : void 0;
}
__w(v: DataView, offset: number) {
if (Number.isNaN(this.value)) {
const dbs = new Bytes(8);
const dview = dataview(dbs);
dview.setFloat64(0, this.value, false);
const sign = dview.getInt8(0) >> 7;
const payload = (dview.getInt32(1, false) >> 5) & 0x007fffff;
const vf = (sign << 31) | 0x7f800000 | payload;
v.setInt32(offset, vf, false);
} else {
v.setFloat32(offset, this.value, false);
}
}
__preserve_on__(encoder: Encoder<any>) {
encoder.state.emitbyte(Tag.Float);
encoder.state.makeroom(4);
encoder.state.view.setFloat32(encoder.state.index, this.value, false);
this.__w(encoder.state.view, encoder.state.index);
encoder.state.index += 4;
}
toBytes(): Bytes {
const bs = new Bytes(4);
this.__w(bs.dataview(), 0);
return bs;
}
__preserve_text_on__(w: Writer<any>) {
w.state.pieces.push('' + this.value + 'f');
if (Number.isFinite(this.value)) {
w.state.pieces.push(floatlikeString(this.value) + 'f');
} else {
w.state.pieces.push('#xf"', this.toBytes().toHex(), '"');
}
}
get [FloatType](): 'Single' {
@ -78,6 +134,10 @@ export class DoubleFloat extends Float implements Preservable<any>, PreserveWrit
return this;
}
static fromBytes(bs: Bytes | DataView): DoubleFloat {
return new DoubleFloat(dataview(bs).getFloat64(0, false));
}
static __from_preserve__<T>(v: Value<T>): undefined | DoubleFloat {
return Float.isDouble(v) ? v : void 0;
}
@ -89,8 +149,18 @@ export class DoubleFloat extends Float implements Preservable<any>, PreserveWrit
encoder.state.index += 8;
}
toBytes(): Bytes {
const bs = new Bytes(8);
bs.dataview().setFloat64(0, this.value, false);
return bs;
}
__preserve_text_on__(w: Writer<any>) {
w.state.pieces.push('' + this.value);
if (Number.isFinite(this.value)) {
w.state.pieces.push(floatlikeString(this.value));
} else {
w.state.pieces.push('#xd"', this.toBytes().toHex(), '"');
}
}
get [FloatType](): 'Double' {

View File

@ -3,12 +3,12 @@
import type { Value } from './values';
import { DecodeError, ShortPacket } from './codec';
import { Dictionary, Set } from './dictionary';
import { strip, unannotate } from './strip';
import { Bytes, unhexDigit } from './bytes';
import { decode, Decoder, DecoderState, neverEmbeddedTypeDecode } from './decoder';
import { strip } from './strip';
import { Bytes, underlying, unhexDigit } from './bytes';
import { Decoder, DecoderState, neverEmbeddedTypeDecode } from './decoder';
import { Record } from './record';
import { Annotated, newPosition, Position, updatePosition } from './annotated';
import { Double, DoubleFloat, Single, SingleFloat } from './float';
import { Double, DoubleFloat, FloatType, Single, SingleFloat } from './float';
import { stringify } from './text';
import { embed, GenericEmbedded, EmbeddedTypeDecode } from './embedded';
@ -25,6 +25,13 @@ type IntOrFloat = 'int' | 'float';
type Numeric = number | SingleFloat | DoubleFloat;
type IntContinuation = (kind: IntOrFloat, acc: string) => Numeric;
export const NUMBER_RE: RegExp = /^([-+]?\d+)(((\.\d+([eE][-+]?\d+)?)|([eE][-+]?\d+))([fF]?))?$/;
// Groups:
// 1 - integer part and sign
// 2 - decimal part, exponent and Float marker
// 3 - decimal part and exponent
// 7 - Float marker
export class ReaderState {
buffer: string;
pos: Position;
@ -124,6 +131,22 @@ export class ReaderState {
}
}
readHexFloat(precision: FloatType): SingleFloat | DoubleFloat {
const pos = this.copyPos();
if (this.nextchar() !== '"') {
this.error("Missing open-double-quote in hex-encoded floating-point number", pos);
}
const bs = this.readHexBinary();
switch (precision) {
case 'Single':
if (bs.length !== 4) this.error("Incorrect number of bytes in hex-encoded Float", pos);
return SingleFloat.fromBytes(bs);
case 'Double':
if (bs.length !== 8) this.error("Incorrect number of bytes in hex-encoded Double", pos);
return DoubleFloat.fromBytes(bs);
}
}
readBase64Binary(): Bytes {
let acc = '';
while (true) {
@ -135,67 +158,7 @@ export class ReaderState {
return decodeBase64(acc);
}
readIntpart(acc: string, ch: string): Numeric {
if (ch === '0') return this.readFracexp('int', acc + ch);
return this.readDigit1('int', acc, (kind, acc) => this.readFracexp(kind, acc), ch);
}
readDigit1(kind: IntOrFloat, acc: string, k: IntContinuation, ch?: string): Numeric {
if (ch === void 0) ch = this.nextchar();
if (ch >= '0' && ch <= '9') return this.readDigit0(kind, acc + ch, k);
this.error('Incomplete number', this.pos);
}
readDigit0(kind: IntOrFloat, acc: string, k: IntContinuation): Numeric {
while (true) {
if (this.atEnd()) break;
const ch = this.peek();
if (!(ch >= '0' && ch <= '9')) break;
this.advance();
acc = acc + ch;
}
return k(kind, acc);
}
readFracexp(kind: IntOrFloat, acc: string): Numeric {
if (!this.atEnd() && this.peek() === '.') {
this.advance();
return this.readDigit1('float', acc + '.', (kind, acc) => this.readExp(kind, acc));
}
return this.readExp(kind, acc);
}
readExp(kind: IntOrFloat, acc: string): Numeric {
const ch = this.atEnd() ? '' : this.peek();
if (ch === 'e' || ch === 'E') {
this.advance();
return this.readSignAndExp(acc + ch);
}
return this.finishNumber(kind, acc);
}
readSignAndExp(acc: string): Numeric {
const ch = this.peek();
if (ch === '+' || ch === '-') {
this.advance();
return this.readDigit1('float', acc + ch, (kind, acc) => this.finishNumber(kind, acc));
}
return this.readDigit1('float', acc, (kind, acc) => this.finishNumber(kind, acc));
}
finishNumber(kind: IntOrFloat, acc: string): Numeric {
const i = parseFloat(acc);
if (kind === 'int') return i;
const ch = this.atEnd() ? '' : this.peek();
if (ch === 'f' || ch === 'F') {
this.advance();
return Single(i);
} else {
return Double(i);
}
}
readRawSymbol<T>(acc: string): Value<T> {
readRawSymbolOrNumber<T>(acc: string): Value<T> {
while (true) {
if (this.atEnd()) break;
const ch = this.peek();
@ -203,7 +166,20 @@ export class ReaderState {
this.advance();
acc = acc + ch;
}
return Symbol.for(acc);
const m = NUMBER_RE.exec(acc);
if (m) {
if (m[2] === void 0) {
let v = parseInt(m[1]);
if (Object.is(v, -0)) v = 0;
return v;
} else if (m[7] === '') {
return Double(parseFloat(m[1] + m[3]));
} else {
return Single(parseFloat(m[1] + m[3]));
}
} else {
return Symbol.for(acc);
}
}
readStringlike<E, R>(xform: (ch: string) => E,
@ -355,11 +331,6 @@ export class Reader<T> {
const unwrapped = ((): Value<T> => {
const c = this.state.nextchar();
switch (c) {
case '-':
return this.state.readIntpart('-', this.state.nextchar());
case '0': case '1': case '2': case '3': case '4':
case '5': case '6': case '7': case '8': case '9':
return this.state.readIntpart('', c);
case '"':
return this.state.readString('"');
case '|':
@ -377,22 +348,13 @@ export class Reader<T> {
case 't': return true;
case '{': return this.seq(new Set<T>(), (v, s) => s.add(v), '}');
case '"': return this.state.readLiteralBinary();
case 'x':
if (this.state.nextchar() !== '"') {
this.state.error('Expected open-quote at start of hex ByteString',
startPos);
}
return this.state.readHexBinary();
case '[': return this.state.readBase64Binary();
case '=': {
const bs = unannotate(this.next());
if (!Bytes.isBytes(bs)) this.state.error('ByteString must follow #=',
startPos);
return decode<T>(bs, {
embeddedDecode: this.embeddedType,
includeAnnotations: this.state.options.includeAnnotations,
});
case 'x': switch (this.state.nextchar()) {
case '"': return this.state.readHexBinary();
case 'f': return this.state.readHexFloat('Single');
case 'd': return this.state.readHexFloat('Double');
default: this.state.error('Invalid #x syntax', startPos);
}
case '[': return this.state.readBase64Binary();
case '!': return embed(this.embeddedType.fromValue(
new Reader<GenericEmbedded>(this.state, genericEmbeddedTypeDecode).next(),
this.state.options));
@ -411,7 +373,7 @@ export class Reader<T> {
case ']': this.state.error('Unexpected ]', startPos);
case '}': this.state.error('Unexpected }', startPos);
default:
return this.state.readRawSymbol(c);
return this.state.readRawSymbolOrNumber(c);
}
})();
return this.wrap(unwrapped, startPos);

View File

@ -4,7 +4,7 @@ import type { Value } from './values';
import { Annotated } from './annotated';
import { Bytes } from './bytes';
import { KeyedDictionary, KeyedSet } from './dictionary';
import { Writer, Writable, WriterOptions, EmbeddedWriter, WriterState } from './writer';
import { Writer, WriterOptions, EmbeddedWriter, WriterState } from './writer';
import { fromJS } from './fromjs';
export const stringifyEmbeddedWrite: EmbeddedWriter<any> = {

View File

@ -3,6 +3,7 @@ import { Record, Tuple } from "./record";
import type { GenericEmbedded, Embedded, EmbeddedTypeEncode } from "./embedded";
import { Encoder, EncoderState } from "./encoder";
import type { Value } from "./values";
import { NUMBER_RE } from './reader';
export type Writable<T> =
Value<T> | PreserveWritable<T> | Iterable<Value<T>> | ArrayBufferView;
@ -270,8 +271,7 @@ export class Writer<T> {
case 'symbol': {
const s = v.description!;
// FIXME: This regular expression is conservatively correct, but Anglo-chauvinistic.
const m = /^[a-zA-Z~!$%^&*?_=+/.][-a-zA-Z~!$%^&*?_=+/.0-9]*$/.exec(s);
if (m) {
if (/^[-a-zA-Z0-9~!$%^&*?_=+/.]+$/.exec(s) && !NUMBER_RE.exec(s)) {
this.state.pieces.push(s);
} else {
this.state.pieces.push(this.state.escapeStringlike(s, '|'));

View File

@ -72,7 +72,7 @@ class Decoder(BinaryCodec):
tag = self.nextbyte()
if tag == 0x80: return self.wrap(False)
if tag == 0x81: return self.wrap(True)
if tag == 0x82: return self.wrap(Float(struct.unpack('>f', self.nextbytes(4))[0]))
if tag == 0x82: return self.wrap(Float.from_bytes(self.nextbytes(4)))
if tag == 0x83: return self.wrap(struct.unpack('>d', self.nextbytes(8))[0])
if tag == 0x84: raise DecodeError('Unexpected end-of-stream marker')
if tag == 0x85:

View File

@ -2,7 +2,7 @@ import numbers
from enum import Enum
from functools import cmp_to_key
from .values import preserve, Float, Embedded, Record, Symbol
from .values import preserve, Float, Embedded, Record, Symbol, cmp_floats, _unwrap
from .compat import basestring_
class TypeNumber(Enum):
@ -19,7 +19,7 @@ class TypeNumber(Enum):
SET = 9
DICTIONARY = 10
EMBEDDED = 10
EMBEDDED = 11
def type_number(v):
if hasattr(v, '__preserve__'):
@ -84,12 +84,17 @@ def _item_key(item):
return item[0]
def _eq(a, b):
a = _unwrap(a)
b = _unwrap(b)
ta = type_number(a)
tb = type_number(b)
if ta != tb: return False
if ta == TypeNumber.DOUBLE:
return cmp_floats(a, b) == 0
if ta == TypeNumber.EMBEDDED:
return ta.embeddedValue == tb.embeddedValue
return _eq(a.embeddedValue, b.embeddedValue)
if ta == TypeNumber.RECORD:
return _eq(a.key, b.key) and _eq_sequences(a.fields, b.fields)
@ -118,13 +123,18 @@ def _cmp_sequences(aa, bb):
return len(aa) - len(bb)
def _cmp(a, b):
a = _unwrap(a)
b = _unwrap(b)
ta = type_number(a)
tb = type_number(b)
if ta.value < tb.value: return -1
if tb.value < ta.value: return 1
if ta == TypeNumber.DOUBLE:
return cmp_floats(a, b)
if ta == TypeNumber.EMBEDDED:
return _simplecmp(ta.embeddedValue, tb.embeddedValue)
return _cmp(a.embeddedValue, b.embeddedValue)
if ta == TypeNumber.RECORD:
v = _cmp(a.key, b.key)

View File

@ -1,6 +1,7 @@
import numbers
import struct
import base64
import math
from .values import *
from .error import *
@ -9,6 +10,8 @@ from .binary import Decoder
class TextCodec(object): pass
NUMBER_RE = re.compile(r'^([-+]?\d+)(((\.\d+([eE][-+]?\d+)?)|([eE][-+]?\d+))([fF]?))?$')
class Parser(TextCodec):
def __init__(self, input_buffer=u'', include_annotations=False, parse_embedded=lambda x: x):
super(Parser, self).__init__()
@ -66,50 +69,6 @@ class Parser(TextCodec):
return self.wrap(u''.join(s))
s.append(c)
def read_intpart(self, acc, c):
if c == '0':
acc.append(c)
else:
self.read_digit1(acc, c)
return self.read_fracexp(acc)
def read_fracexp(self, acc):
is_float = False
if self.peek() == '.':
is_float = True
acc.append(self.nextchar())
self.read_digit1(acc, self.nextchar())
if self.peek() in 'eE':
acc.append(self.nextchar())
return self.read_sign_and_exp(acc)
else:
return self.finish_number(acc, is_float)
def read_sign_and_exp(self, acc):
if self.peek() in '+-':
acc.append(self.nextchar())
self.read_digit1(acc, self.nextchar())
return self.finish_number(acc, True)
def finish_number(self, acc, is_float):
if is_float:
if self.peek() in 'fF':
self.skip()
return Float(float(u''.join(acc)))
else:
return float(u''.join(acc))
else:
return int(u''.join(acc))
def read_digit1(self, acc, c):
if not c.isdigit():
raise DecodeError('Incomplete number')
acc.append(c)
while not self._atend():
if not self.peek().isdigit():
break
acc.append(self.nextchar())
def read_stringlike(self, terminator, hexescape, hexescaper):
acc = []
while True:
@ -186,6 +145,16 @@ class Parser(TextCodec):
if c == '=': continue
acc.append(c)
def read_hex_float(self, bytecount):
if self.nextchar() != '"':
raise DecodeError('Missing open-double-quote in hex-encoded floating-point number')
bs = self.read_hex_binary()
if len(bs) != bytecount:
raise DecodeError('Incorrect number of bytes in hex-encoded floating-point number')
if bytecount == 4: return Float.from_bytes(bs)
if bytecount == 8: return struct.unpack('>d', bs)[0]
raise DecodeError('Unsupported byte count in hex-encoded floating-point number')
def upto(self, delimiter):
vs = []
while True:
@ -208,14 +177,24 @@ class Parser(TextCodec):
raise DecodeError('Missing expected key/value separator')
acc.append(self.next())
def read_raw_symbol(self, acc):
def read_raw_symbol_or_number(self, acc):
while not self._atend():
c = self.peek()
if c.isspace() or c in '(){}[]<>";,@#:|':
break
self.skip()
acc.append(c)
return Symbol(u''.join(acc))
acc = u''.join(acc)
m = NUMBER_RE.match(acc)
if m:
if m[2] is None:
return int(m[1])
elif m[7] == '':
return float(m[1] + m[3])
else:
return Float(float(m[1] + m[3]))
else:
return Symbol(acc)
def wrap(self, v):
return Annotated(v) if self.include_annotations else v
@ -223,12 +202,6 @@ class Parser(TextCodec):
def next(self):
self.skip_whitespace()
c = self.peek()
if c == '-':
self.skip()
return self.wrap(self.read_intpart(['-'], self.nextchar()))
if c.isdigit():
self.skip()
return self.wrap(self.read_intpart([], c))
if c == '"':
self.skip()
return self.wrap(self.read_string('"'))
@ -251,9 +224,11 @@ class Parser(TextCodec):
if c == '{': return self.wrap(frozenset(self.upto('}')))
if c == '"': return self.wrap(self.read_literal_binary())
if c == 'x':
if self.nextchar() != '"':
raise DecodeError('Expected open-quote at start of hex ByteString')
return self.wrap(self.read_hex_binary())
c = self.nextchar()
if c == '"': return self.wrap(self.read_hex_binary())
if c == 'f': return self.wrap(self.read_hex_float(4))
if c == 'd': return self.wrap(self.read_hex_float(8))
raise DecodeError('Invalid #x syntax')
if c == '[': return self.wrap(self.read_base64_binary())
if c == '=':
old_ann = self.include_annotations
@ -286,7 +261,7 @@ class Parser(TextCodec):
if c in '>]}':
raise DecodeError('Unexpected ' + c)
self.skip()
return self.wrap(self.read_raw_symbol([c]))
return self.wrap(self.read_raw_symbol_or_number([c]))
def try_next(self):
start = self.index
@ -385,7 +360,10 @@ class Formatter(TextCodec):
elif v is True:
self.chunks.append('#t')
elif isinstance(v, float):
self.chunks.append(repr(v))
if math.isnan(v) or math.isinf(v):
self.chunks.append('#xd"' + struct.pack('>d', v).hex() + '"')
else:
self.chunks.append(repr(v))
elif isinstance(v, numbers.Number):
self.chunks.append('%d' % (v,))
elif isinstance(v, bytes):

View File

@ -1,6 +1,7 @@
import re
import sys
import struct
import math
from .error import DecodeError
@ -9,6 +10,16 @@ def preserve(v):
v = v.__preserve__()
return v
def float_to_int(v):
return struct.unpack('>Q', struct.pack('>d', v))[0]
def cmp_floats(a, b):
a = float_to_int(a)
b = float_to_int(b)
if a & 0x8000000000000000: a = a ^ 0x7fffffffffffffff
if b & 0x8000000000000000: b = b ^ 0x7fffffffffffffff
return a - b
class Float(object):
def __init__(self, value):
self.value = value
@ -16,7 +27,12 @@ class Float(object):
def __eq__(self, other):
other = _unwrap(other)
if other.__class__ is self.__class__:
return self.value == other.value
return cmp_floats(self.value, other.value) == 0
def __lt__(self, other):
other = _unwrap(other)
if other.__class__ is self.__class__:
return cmp_floats(self.value, other.value) < 0
def __ne__(self, other):
return not self.__eq__(other)
@ -27,15 +43,41 @@ class Float(object):
def __repr__(self):
return 'Float(' + repr(self.value) + ')'
def _to_bytes(self):
if math.isnan(self.value) or math.isinf(self.value):
dbs = struct.pack('>d', self.value)
vd = struct.unpack('>Q', dbs)[0]
sign = vd >> 63
payload = (vd >> 29) & 0x007fffff
vf = (sign << 31) | 0x7f800000 | payload
return struct.pack('>I', vf)
else:
return struct.pack('>f', self.value)
def __preserve_write_binary__(self, encoder):
encoder.buffer.append(0x82)
encoder.buffer.extend(struct.pack('>f', self.value))
encoder.buffer.extend(self._to_bytes())
def __preserve_write_text__(self, formatter):
formatter.chunks.append(repr(self.value) + 'f')
if math.isnan(self.value) or math.isinf(self.value):
formatter.chunks.append('#xf"' + self._to_bytes().hex() + '"')
else:
formatter.chunks.append(repr(self.value) + 'f')
@staticmethod
def from_bytes(bs):
vf = struct.unpack('>I', bs)[0]
if (vf & 0x7f800000) == 0x7f800000:
# NaN or inf. Preserve quiet/signalling bit by manually expanding to double-precision.
sign = vf >> 31
payload = vf & 0x007fffff
dbs = struct.pack('>Q', (sign << 63) | 0x7ff0000000000000 | (payload << 29))
return Float(struct.unpack('>d', dbs)[0])
else:
return Float(struct.unpack('>f', bs)[0])
# FIXME: This regular expression is conservatively correct, but Anglo-chauvinistic.
RAW_SYMBOL_RE = re.compile(r'^[a-zA-Z~!$%^&*?_=+/.][-a-zA-Z~!$%^&*?_=+/.0-9]*$')
RAW_SYMBOL_RE = re.compile(r'^[-a-zA-Z0-9~!$%^&*?_=+/.]+$')
class Symbol(object):
def __init__(self, name):

View File

@ -74,9 +74,45 @@
dict3: @"Duplicate key" <ParseError "{ a: 1, a: 2 }">
dict4: @"Unexpected close brace" <ParseError "}">
dict5: @"Missing value" <DecodeError #x"b7 91 92 93 84">
double0: <Test #x"830000000000000000" 0.0>
double+0: <Test #x"830000000000000000" +0.0>
double-0: <Test #x"838000000000000000" -0.0>
double1: <Test #x"833ff0000000000000" 1.0>
double2: <Test #x"83fe3cb7b759bf0426" -1.202e300>
double3: <Test #x"83123456789abcdef0" #xd"12 34 56 78 9a bc de f0">
double4: @"Fewer than 16 digits" <ParseError "#xd\"12345678\"">
double5: @"More than 16 digits" <ParseError "#xd\"123456789abcdef012\"">
double6: @"Invalid chars" <ParseError "#xd\"12zz56789abcdef0\"">
double7: @"Positive infinity" <Test #x"837ff0000000000000" #xd"7ff0000000000000">
double8: @"Negative infinity" <Test #x"83fff0000000000000" #xd"fff0000000000000">
double9: @"-qNaN" <Test #x"83fff0000000000001" #xd"fff0000000000001">
double10: @"-qNaN" <Test #x"83fff0000000000111" #xd"fff0000000000111">
double11: @"+qNaN" <Test #x"837ff0000000000001" #xd"7ff0000000000001">
double12: @"+qNaN" <Test #x"837ff0000000000111" #xd"7ff0000000000111">
double13: @"Bad spacing" <ParseError "#xd\"12345 6789abcdef0\"">
double14: @"-sNaN" <Test #x"83fff8000000000001" #xd"fff8000000000001">
double15: @"-sNaN" <Test #x"83fff8000000000111" #xd"fff8000000000111">
double16: @"+sNaN" <Test #x"837ff8000000000001" #xd"7ff8000000000001">
double17: @"+sNaN" <Test #x"837ff8000000000111" #xd"7ff8000000000111">
float0: <Test #x"8200000000" 0.0f>
float+0: <Test #x"8200000000" +0.0f>
float-0: <Test #x"8280000000" -0.0f>
float1: <Test #x"823f800000" 1.0f>
float2: <Test #x"8212345678" #xf"12 34 56 78">
float3: @"Fewer than 8 digits" <ParseError "#xf\"123456\"">
float4: @"More than 8 digits" <ParseError "#xf\"123456789a\"">
float5: @"Invalid chars" <ParseError "#xf\"12zz5678\"">
float6: @"Positive infinity" <Test #x"827f800000" #xf"7f800000">
float7: @"Negative infinity" <Test #x"82ff800000" #xf"ff800000">
float8: @"+sNaN" <Test #x"827f800001" #xf"7f800001">
float9: @"+sNaN" <Test #x"827f800111" #xf"7f800111">
float10: @"-sNaN" <Test #x"82ff800001" #xf"ff800001">
float11: @"-sNaN" <Test #x"82ff800111" #xf"ff800111">
float12: @"Bad spacing" <ParseError "#xf\"12345 678\"">
float13: @"+qNaN" <Test #x"827fc00001" #xf"7fc00001">
float14: @"+qNaN" <Test #x"827fc00111" #xf"7fc00111">
float15: @"-qNaN" <Test #x"82ffc00001" #xf"ffc00001">
float16: @"-qNaN" <Test #x"82ffc00111" #xf"ffc00111">
int-257: <Test #x"a1feff" -257>
int-256: <Test #x"a1ff00" -256>
int-255: <Test #x"a1ff01" -255>
@ -89,10 +125,13 @@
int-2: <Test #x"9e" -2>
int-1: <Test #x"9f" -1>
int0: <Test #x"90" 0>
int+0: <Test #x"90" +0>
int-0: <Test #x"90" -0>
int1: <Test #x"91" 1>
int12: <Test #x"9c" 12>
int13: <Test #x"a00d" 13>
int127: <Test #x"a07f" 127>
int+127: <Test #x"a07f" +127>
int128: <Test #x"a10080" 128>
int255: <Test #x"a100ff" 255>
int256: <Test #x"a10100" 256>
@ -112,6 +151,8 @@
list8: @"Missing close bracket" <ParseShort "[">
list9: @"Unexpected close bracket" <ParseError "]">
list10: @"Missing end byte" <DecodeShort #x"b58080">
list11: <Test #x"b59184" [01]>
list12: <Test #x"b59c84" [12]>
noinput0: @"No input at all" <DecodeEOF #x"">
embed0: <Test #x"8690" #!0>
embed1: <Test #x"868690" #!#!0>
@ -138,17 +179,22 @@
string5: <Test #x"b104f09d849e" "\uD834\uDD1E">
symbol0: <Test #x"b300" ||>
symbol2: <Test #x"b30568656c6c6f" hello>
symbol3: <Test #x"b305312d322d33" 1-2-3>
symbol4: <Test #x"b305612d622d63" a-b-c>
symbol5: <Test #x"b305612b622b63" a+b+c>
symbol6: <Test #x"b3012b" +>
symbol7: <Test #x"b3032b2b2b" +++>
symbol8: <Test #x"b3012d" ->
symbol9: <Test #x"b3032d2d2d" --->
symbol10: <Test #x"b3022d61" -a>
symbol11: <Test #x"b3042d2d2d61" ---a>
symbol12: <Test #x"b3042d2d2d31" ---1>
symbol13: <Test #x"b3042b312e78" +1.x>
tag0: @"Unexpected end tag" <DecodeError #x"84">
tag1: @"Invalid tag" <DecodeError #x"10">
tag2: @"Invalid tag" <DecodeError #x"61b10110">
whitespace0: @"Leading spaces have to eventually yield something" <ParseShort " ">
whitespace1: @"No input at all" <ParseEOF "">
value1: <Test #"\xB2\x06corymb" #=#"\xB2\x06corymb">
value2: <Test #"\x81" #=#"\x81">
value3: <Test #"\x81" #=#[gQ]>
value4: <Test #"\x81" #=#[gQ==]>
value5: <Test #"\x81" #= #[gQ==]>
value6: <Test #x"b591929384" #=#x"b591929384">
longlist14: <Test #x"b5808080808080808080808080808084"
[#f #f #f #f #f

View File

@ -1,9 +1,9 @@
import unittest
from utils import PreservesTestCase
from preserves import *
from preserves.compare import *
class BasicCompareTests(unittest.TestCase):
class BasicCompareTests(PreservesTestCase):
def test_eq_identity(self):
self.assertTrue(eq(1, 1))
self.assertFalse(eq(1, 1.0))

View File

@ -1,30 +1,30 @@
import unittest
from utils import PreservesTestCase
from preserves import *
from preserves.path import parse
class BasicPathTests(unittest.TestCase):
class BasicPathTests(PreservesTestCase):
def test_identity(self):
self.assertEqual(parse('').exec(1), (1,))
self.assertEqual(parse('').exec([]), ([],))
self.assertEqual(parse('').exec(Record(Symbol('hi'), [])), (Record(Symbol('hi'), []),))
self.assertPreservesEqual(parse('').exec(1), (1,))
self.assertPreservesEqual(parse('').exec([]), ([],))
self.assertPreservesEqual(parse('').exec(Record(Symbol('hi'), [])), (Record(Symbol('hi'), []),))
def test_children(self):
self.assertEqual(parse('/').exec([1, 2, 3]), (1, 2, 3))
self.assertEqual(parse('/').exec([1, [2], 3]), (1, [2], 3))
self.assertEqual(parse('/').exec(Record(Symbol('hi'), [1, [2], 3])), (1, [2], 3))
self.assertPreservesEqual(parse('/').exec([1, 2, 3]), (1, 2, 3))
self.assertPreservesEqual(parse('/').exec([1, [2], 3]), (1, [2], 3))
self.assertPreservesEqual(parse('/').exec(Record(Symbol('hi'), [1, [2], 3])), (1, [2], 3))
def test_label(self):
self.assertEqual(parse('.^').exec([1, 2, 3]), ())
self.assertEqual(parse('.^').exec([1, [2], 3]), ())
self.assertEqual(parse('.^').exec(Record(Symbol('hi'), [1, [2], 3])), (Symbol('hi'),))
self.assertPreservesEqual(parse('.^').exec([1, 2, 3]), ())
self.assertPreservesEqual(parse('.^').exec([1, [2], 3]), ())
self.assertPreservesEqual(parse('.^').exec(Record(Symbol('hi'), [1, [2], 3])), (Symbol('hi'),))
def test_count(self):
self.assertEqual(parse('<count / ^ hi>').exec([ Record(Symbol('hi'), [1]),
Record(Symbol('no'), [2]),
Record(Symbol('hi'), [3]) ]),
self.assertPreservesEqual(parse('<count / ^ hi>').exec([ Record(Symbol('hi'), [1]),
Record(Symbol('no'), [2]),
Record(Symbol('hi'), [3]) ]),
(2,))
self.assertEqual(parse('/ <count ^ hi>').exec([ Record(Symbol('hi'), [1]),
Record(Symbol('no'), [2]),
Record(Symbol('hi'), [3]) ]),
self.assertPreservesEqual(parse('/ <count ^ hi>').exec([ Record(Symbol('hi'), [1]),
Record(Symbol('no'), [2]),
Record(Symbol('hi'), [3]) ]),
(1, 0, 1))

View File

@ -1,11 +1,12 @@
import numbers
import os
import sys
import unittest
# Make `preserves` available for imports
sys.path.insert(0, os.path.abspath(os.path.join(os.path.dirname(__file__), '..')))
from utils import PreservesTestCase
from preserves import *
from preserves.compat import basestring_, ord_
from preserves.values import _unwrap
@ -49,33 +50,33 @@ def _e(v):
def _R(k, *args):
return Record(Symbol(k), args)
class BinaryCodecTests(unittest.TestCase):
class BinaryCodecTests(PreservesTestCase):
def _roundtrip(self, forward, expected, back=None, nondeterministic=False):
if back is None: back = forward
self.assertEqual(_d(_e(forward)), back)
self.assertEqual(_d(_e(back)), back)
self.assertEqual(_d(expected), back)
self.assertPreservesEqual(_d(_e(forward)), back)
self.assertPreservesEqual(_d(_e(back)), back)
self.assertPreservesEqual(_d(expected), back)
if not nondeterministic:
actual = _e(forward)
self.assertEqual(actual, expected, '%s != %s' % (_hex(actual), _hex(expected)))
self.assertPreservesEqual(actual, expected, '%s != %s' % (_hex(actual), _hex(expected)))
def test_decode_varint(self):
with self.assertRaises(DecodeError):
Decoder(_buf()).varint()
self.assertEqual(Decoder(_buf(0)).varint(), 0)
self.assertEqual(Decoder(_buf(10)).varint(), 10)
self.assertEqual(Decoder(_buf(100)).varint(), 100)
self.assertEqual(Decoder(_buf(200, 1)).varint(), 200)
self.assertEqual(Decoder(_buf(0b10101100, 0b00000010)).varint(), 300)
self.assertEqual(Decoder(_buf(128, 148, 235, 220, 3)).varint(), 1000000000)
self.assertPreservesEqual(Decoder(_buf(0)).varint(), 0)
self.assertPreservesEqual(Decoder(_buf(10)).varint(), 10)
self.assertPreservesEqual(Decoder(_buf(100)).varint(), 100)
self.assertPreservesEqual(Decoder(_buf(200, 1)).varint(), 200)
self.assertPreservesEqual(Decoder(_buf(0b10101100, 0b00000010)).varint(), 300)
self.assertPreservesEqual(Decoder(_buf(128, 148, 235, 220, 3)).varint(), 1000000000)
def test_encode_varint(self):
self.assertEqual(_varint(0), _buf(0))
self.assertEqual(_varint(10), _buf(10))
self.assertEqual(_varint(100), _buf(100))
self.assertEqual(_varint(200), _buf(200, 1))
self.assertEqual(_varint(300), _buf(0b10101100, 0b00000010))
self.assertEqual(_varint(1000000000), _buf(128, 148, 235, 220, 3))
self.assertPreservesEqual(_varint(0), _buf(0))
self.assertPreservesEqual(_varint(10), _buf(10))
self.assertPreservesEqual(_varint(100), _buf(100))
self.assertPreservesEqual(_varint(200), _buf(200, 1))
self.assertPreservesEqual(_varint(300), _buf(0b10101100, 0b00000010))
self.assertPreservesEqual(_varint(1000000000), _buf(128, 148, 235, 220, 3))
def test_simple_seq(self):
self._roundtrip([1,2,3,4], _buf(0xb5, 0x91, 0x92, 0x93, 0x94, 0x84), back=(1,2,3,4))
@ -157,7 +158,7 @@ class BinaryCodecTests(unittest.TestCase):
# python 3
bs = _e(d.items())
self.assertRegex(_hex(bs), r)
self.assertEqual(sorted(_d(bs)), [(u'a', 1), (u'b', 2), (u'c', 3)])
self.assertPreservesEqual(sorted(_d(bs)), [(u'a', 1), (u'b', 2), (u'c', 3)])
def test_long_sequence(self):
self._roundtrip((False,) * 14, _buf(0xb5, b'\x80' * 14, 0x84))
@ -172,9 +173,9 @@ class BinaryCodecTests(unittest.TestCase):
a1 = Embedded(A(1))
a2 = Embedded(A(1))
self.assertNotEqual(encode(a1, encode_embedded=id), encode(a2, encode_embedded=id))
self.assertEqual(encode(a1, encode_embedded=id), encode(a1, encode_embedded=id))
self.assertEqual(ord_(encode(a1, encode_embedded=id)[0]), 0x86)
self.assertEqual(ord_(encode(a2, encode_embedded=id)[0]), 0x86)
self.assertPreservesEqual(encode(a1, encode_embedded=id), encode(a1, encode_embedded=id))
self.assertPreservesEqual(ord_(encode(a1, encode_embedded=id)[0]), 0x86)
self.assertPreservesEqual(ord_(encode(a2, encode_embedded=id)[0]), 0x86)
def test_decode_embedded_absent(self):
with self.assertRaises(DecodeError):
@ -185,15 +186,15 @@ class BinaryCodecTests(unittest.TestCase):
def enc(p):
objects.append(p)
return len(objects) - 1
self.assertEqual(encode([Embedded(object()), Embedded(object())], encode_embedded = enc),
b'\xb5\x86\x90\x86\x91\x84')
self.assertPreservesEqual(encode([Embedded(object()), Embedded(object())], encode_embedded = enc),
b'\xb5\x86\x90\x86\x91\x84')
def test_decode_embedded(self):
objects = [123, 234]
def dec(v):
return objects[v]
self.assertEqual(decode(b'\xb5\x86\x90\x86\x91\x84', decode_embedded = dec),
(Embedded(123), Embedded(234)))
self.assertPreservesEqual(decode(b'\xb5\x86\x90\x86\x91\x84', decode_embedded = dec),
(Embedded(123), Embedded(234)))
def load_binary_samples():
with open(os.path.join(os.path.dirname(__file__), 'samples.bin'), 'rb') as f:
@ -203,16 +204,16 @@ def load_text_samples():
with open(os.path.join(os.path.dirname(__file__), 'samples.pr'), 'rt') as f:
return Parser(f.read(), include_annotations=True, parse_embedded=lambda x: x).next()
class TextCodecTests(unittest.TestCase):
class TextCodecTests(PreservesTestCase):
def test_samples_bin_eq_txt(self):
b = load_binary_samples()
t = load_text_samples()
self.assertEqual(b, t)
self.assertPreservesEqual(b, t)
def test_txt_roundtrip(self):
b = load_binary_samples()
s = stringify(b, format_embedded=lambda x: x)
self.assertEqual(parse(s, include_annotations=True, parse_embedded=lambda x: x), b)
self.assertPreservesEqual(parse(s, include_annotations=True, parse_embedded=lambda x: x), b)
def add_method(d, tName, fn):
if hasattr(fn, 'func_name'):
@ -254,14 +255,14 @@ def install_test(d, variant, tName, binaryForm, annotatedTextForm):
entry = get_expected_values(tName, textForm)
forward = entry['forward']
back = entry['back']
def test_match_expected(self): self.assertEqual(textForm, back)
def test_roundtrip(self): self.assertEqual(self.DS(self.E(textForm)), back)
def test_forward(self): self.assertEqual(self.DS(self.E(forward)), back)
def test_back(self): self.assertEqual(self.DS(binaryForm), back)
def test_back_ann(self): self.assertEqual(self.D(self.E(annotatedTextForm)), annotatedTextForm)
def test_encode(self): self.assertEqual(self.E(forward), binaryForm)
def test_encode_canonical(self): self.assertEqual(self.EC(annotatedTextForm), binaryForm)
def test_encode_ann(self): self.assertEqual(self.E(annotatedTextForm), binaryForm)
def test_match_expected(self): self.assertPreservesEqual(textForm, back)
def test_roundtrip(self): self.assertPreservesEqual(self.DS(self.E(textForm)), back)
def test_forward(self): self.assertPreservesEqual(self.DS(self.E(forward)), back)
def test_back(self): self.assertPreservesEqual(self.DS(binaryForm), back)
def test_back_ann(self): self.assertPreservesEqual(self.D(self.E(annotatedTextForm)), annotatedTextForm)
def test_encode(self): self.assertPreservesEqual(self.E(forward), binaryForm)
def test_encode_canonical(self): self.assertPreservesEqual(self.EC(annotatedTextForm), binaryForm)
def test_encode_ann(self): self.assertPreservesEqual(self.E(annotatedTextForm), binaryForm)
add_method(d, tName, test_match_expected)
add_method(d, tName, test_roundtrip)
add_method(d, tName, test_forward)
@ -284,7 +285,7 @@ def install_exn_test(d, tName, bs, check_proc):
self.fail('did not fail as expected')
add_method(d, tName, test_exn)
class CommonTestSuite(unittest.TestCase):
class CommonTestSuite(PreservesTestCase):
TestCases = Record.makeConstructor('TestCases', 'cases')
samples = load_binary_samples()
@ -325,7 +326,7 @@ class CommonTestSuite(unittest.TestCase):
def EC(self, v):
return encode(v, encode_embedded=lambda x: x, canonicalize=True)
class RecordTests(unittest.TestCase):
class RecordTests(PreservesTestCase):
def test_getters(self):
T = Record.makeConstructor('t', 'x y z')
T2 = Record.makeConstructor('t', 'x y z')
@ -334,8 +335,8 @@ class RecordTests(unittest.TestCase):
self.assertTrue(T.isClassOf(t))
self.assertTrue(T2.isClassOf(t))
self.assertFalse(U.isClassOf(t))
self.assertEqual(T._x(t), 1)
self.assertEqual(T2._y(t), 2)
self.assertEqual(T._z(t), 3)
self.assertPreservesEqual(T._x(t), 1)
self.assertPreservesEqual(T2._y(t), 2)
self.assertPreservesEqual(T._z(t), 3)
with self.assertRaises(TypeError):
U._x(t)

View File

@ -1,4 +1,4 @@
import unittest
from utils import PreservesTestCase
from preserves import *
from preserves.schema import meta, Compiler
@ -8,7 +8,7 @@ def literal_schema(modname, s):
c.load_schema((Symbol(modname),), preserve(s))
return c.root
class BasicSchemaTests(unittest.TestCase):
class BasicSchemaTests(PreservesTestCase):
def test_dictionary_literal(self):
m = literal_schema(
's',
@ -22,7 +22,7 @@ class BasicSchemaTests(unittest.TestCase):
}>
'''))
self.assertEqual(m.s.C.decode({'core': Symbol('true')}), m.s.C())
self.assertEqual(preserve(m.s.C()), {'core': Symbol('true')})
self.assertPreservesEqual(preserve(m.s.C()), {'core': Symbol('true')})
def test_alternation_of_dictionary_literal(self):
m = literal_schema(
@ -40,6 +40,6 @@ class BasicSchemaTests(unittest.TestCase):
}>
'''))
self.assertEqual(m.s.C.decode({'core': Symbol('true')}), m.s.C.core())
self.assertEqual(preserve(m.s.C.core()), {'core': Symbol('true')})
self.assertPreservesEqual(preserve(m.s.C.core()), {'core': Symbol('true')})
self.assertEqual(m.s.C.decode({'notcore': Symbol('true')}), m.s.C.notcore())
self.assertEqual(preserve(m.s.C.notcore()), {'notcore': Symbol('true')})
self.assertPreservesEqual(preserve(m.s.C.notcore()), {'notcore': Symbol('true')})

View File

@ -0,0 +1,9 @@
import unittest
from preserves import cmp
class PreservesTestCase(unittest.TestCase):
def assertPreservesEqual(self, a, b, msg=None):
if msg is None:
msg = 'Expected %s to be Preserves-equal to %s' % (a, b)
self.assertTrue(cmp(a, b) == 0, msg)

View File

@ -0,0 +1,101 @@
#lang racket/base
;; Conversion between binary32 and binary64 big-endian external format (byte-vectors) and
;; internal double-precision floating-point numbers, with special attention paid to
;; preservation of the quiet/signaling bit of NaNs, which otherwise is frequently disturbed by
;; hardware-level conversion between single and double precision.
(provide bytes->float
float->bytes
bytes->double
double->bytes)
(require "float.rkt")
(require (only-in racket/math nan? infinite?))
(module binary racket/base
(provide (all-defined-out))
(define (binary32-nan-or-inf? bs)
(and (= (bitwise-bit-field (bytes-ref bs 0) 0 7) #x7f)
(bitwise-bit-set? (bytes-ref bs 1) 7)))
(define (binary64-nan-or-inf? bs)
(and (= (bitwise-bit-field (bytes-ref bs 0) 0 7) #x7f)
(= (bitwise-bit-field (bytes-ref bs 1) 4 8) #x0f)))
(define (sign-bit-set? bs)
(bitwise-bit-set? (bytes-ref bs 0) 0)))
(require (submod "." binary))
(define (bytes->float bs)
(if (binary32-nan-or-inf? bs)
(let* ((vf (integer-bytes->integer bs #f #t))
(signexp (bitwise-bit-field vf 23 32))
(payload (bitwise-bit-field vf 0 23))
(vd (bitwise-ior (arithmetic-shift signexp 55)
#x0070000000000000
(arithmetic-shift payload 29)))
(dbs (integer->integer-bytes vd 8 #f #t)))
(float (floating-point-bytes->real dbs #t 0 8)))
(float (floating-point-bytes->real bs #t 0 4))))
(define (float->bytes v)
(let ((v (float-value v)))
(if (or (nan? v) (infinite? v))
(let* ((dbs (real->floating-point-bytes v 8 #t))
(vd (integer-bytes->integer dbs #f #t))
(signexp (bitwise-bit-field vd 55 64))
(payload (bitwise-bit-field vd 29 52))
(vf (bitwise-ior (arithmetic-shift signexp 23)
payload))
(bs (integer->integer-bytes vf 4 #f #t)))
bs)
(real->floating-point-bytes v 4 #t))))
(define (bytes->double bs)
(floating-point-bytes->real bs #t 0 8))
(define (double->bytes v)
(real->floating-point-bytes v 8 #t))
(module+ test
(require rackunit)
(require file/sha1)
(define (check-roundtrip-double hex)
(check-equal? (bytes->hex-string (double->bytes (bytes->double (hex-string->bytes hex))))
hex))
(define (check-roundtrip-float hex)
(check-equal? (bytes->hex-string (float->bytes (bytes->float (hex-string->bytes hex))))
hex))
(check-roundtrip-double "0123456789abcdef")
(check-roundtrip-double "7ff0000000000321")
(check-roundtrip-double "7ff0000000000001")
(check-roundtrip-double "7ff0000000000000")
(check-roundtrip-double "fff0000000000321")
(check-roundtrip-double "fff0000000000001")
(check-roundtrip-double "fff0000000000000")
(check-roundtrip-double "7ff8000000000321")
(check-roundtrip-double "7ff8000000000001")
(check-roundtrip-double "7ff8000000000000")
(check-roundtrip-double "fff8000000000321")
(check-roundtrip-double "fff8000000000001")
(check-roundtrip-double "fff8000000000000")
(check-roundtrip-float "01234567")
(check-roundtrip-float "7f800321")
(check-roundtrip-float "7f800001")
(check-roundtrip-float "7f800000")
(check-roundtrip-float "ff800321")
(check-roundtrip-float "ff800001")
(check-roundtrip-float "ff800000")
(check-roundtrip-float "7fc00321")
(check-roundtrip-float "7fc00001")
(check-roundtrip-float "7fc00000")
(check-roundtrip-float "ffc00321")
(check-roundtrip-float "ffc00001")
(check-roundtrip-float "ffc00000")
)

View File

@ -8,8 +8,8 @@
;;---------------------------------------------------------------------------
;; Representing values
(require "float.rkt" "float-bytes.rkt")
(struct record (label fields) #:transparent)
(struct float (value) #:transparent) ;; a marker for single-precision I/O
(struct annotated (annotations item) #:transparent)
(struct embedded (value) #:transparent)
@ -23,8 +23,8 @@
(match (next-byte)
[#x80 #f]
[#x81 #t]
[#x82 (float (floating-point-bytes->real (next-bytes 4) #t 0 4))]
[#x83 (floating-point-bytes->real (next-bytes 8) #t 0 8)]
[#x82 (bytes->float (next-bytes 4))]
[#x83 (bytes->double (next-bytes 8))]
[#x84 '#:end]
[#x85 (let ((a (next)))
(match (next)
@ -80,8 +80,8 @@
(match v
[#f (write-byte #x80 out-port)]
[#t (write-byte #x81 out-port)]
[(float v) (write-byte #x82 out-port) (output-bytes (real->floating-point-bytes v 4 #t))]
[(? flonum?) (write-byte #x83 out-port) (output-bytes (real->floating-point-bytes v 8 #t))]
[(float _) (write-byte #x82 out-port) (output-bytes (float->bytes v))]
[(? flonum?) (write-byte #x83 out-port) (output-bytes (double->bytes v))]
[(annotated as v)
(for [(a (in-list as))] (write-byte #x85 out-port) (output a))

View File

@ -7,6 +7,7 @@
(require "record.rkt")
(require "embedded.rkt")
(require "float.rkt")
(require "float-bytes.rkt")
(require "annotation.rkt")
(require "varint.rkt")
(require racket/set)
@ -70,8 +71,8 @@
(match lead-byte
[#x80 #f]
[#x81 #t]
[#x82 (float (floating-point-bytes->real (next-bytes 4) #t 0 4))]
[#x83 (floating-point-bytes->real (next-bytes 8) #t 0 8)]
[#x82 (bytes->float (next-bytes 4))]
[#x83 (bytes->double (next-bytes 8))]
[#x84 '#:end]
[#x85 (let ((a (next)))
(if read-annotations?

View File

@ -10,6 +10,7 @@
(require "read-binary.rkt")
(require "record.rkt")
(require "float.rkt")
(require "float-bytes.rkt")
(require syntax/readerr)
(require (only-in file/sha1 hex-string->bytes))
(require (only-in net/base64 base64-decode))
@ -67,8 +68,6 @@
(define (next*)
(skip-whitespace)
(match (next-char)
[#\- (read-intpart (list #\-) (next-char))]
[(and c (or #\0 #\1 #\2 #\3 #\4 #\5 #\6 #\7 #\8 #\9)) (read-intpart '() c)]
[#\" (read-string #\")]
[(== PIPE) (string->symbol (read-string PIPE))]
@ -82,21 +81,12 @@
[#\t #t]
[#\{ (sequence-fold (set) set-add* values #\})]
[#\" (read-literal-binary)]
[#\x (if (eqv? (next-char) #\")
(read-hex-binary '())
(parse-error "Expected open-quote at start of hex ByteString"))]
[#\x (match (next-char)
[#\" (read-hex-binary '())]
[#\f (read-hex-float 'float)]
[#\d (read-hex-float 'double)]
[c (parse-error "Invalid #x syntax: ~v" c)])]
[#\[ (read-base64-binary '())]
[#\= (define bs (read-preserve/text in-port #:read-syntax? #t #:source source))
(when (not (bytes? (annotated-item bs)))
(parse-error "ByteString must follow #="))
(when (not (null? (annotated-annotations bs)))
(parse-error "Annotations not permitted after #="))
(bytes->preserve
(annotated-item bs)
(lambda (message . args)
(apply parse-error (string-append "Inline binary value: " message) args))
#:read-syntax? read-syntax?
#:on-short (lambda () (parse-error "Incomplete inline binary value")))]
[#\! (embedded (decode-embedded (next)))]
[c (parse-error "Invalid # syntax: ~v" c)])]
@ -110,7 +100,7 @@
[#\] (parse-error "Unexpected ]")]
[#\} (parse-error "Unexpected }")]
[c (read-raw-symbol (list c))]))
[c (read-raw-symbol-or-number (list c))]))
(define (set-add* s e)
(when (set-member? s e) (parse-error "Duplicate set element: ~v" e))
@ -159,49 +149,6 @@
(annotated '() loc v))))
(lambda (pos0 v) v)))
;;---------------------------------------------------------------------------
;; Numbers
(define (read-intpart acc-rev ch)
(match ch
[#\0 (read-fracexp (cons ch acc-rev))]
[_ (read-digit+ acc-rev read-fracexp ch)]))
(define (read-digit* acc-rev k)
(match (peek-char in-port)
[(? char? (? char-numeric?)) (read-digit* (cons (read-char in-port) acc-rev) k)]
[_ (k acc-rev)]))
(define (read-digit+ acc-rev k [ch (read-char in-port)])
(match ch
[(? char? (? char-numeric?)) (read-digit* (cons ch acc-rev) k)]
[_ (parse-error "Incomplete number")]))
(define (read-fracexp acc-rev)
(match (peek-char in-port)
[#\. (read-digit+ (cons (read-char in-port) acc-rev) read-exp)]
[_ (read-exp acc-rev)]))
(define (read-exp acc-rev)
(match (peek-char in-port)
[(or #\e #\E) (read-sign-and-exp (cons (read-char in-port) acc-rev))]
[_ (finish-number acc-rev)]))
(define (read-sign-and-exp acc-rev)
(match (peek-char in-port)
[(or #\+ #\-) (read-digit+ (cons (read-char in-port) acc-rev) finish-number)]
[_ (read-digit+ acc-rev finish-number)]))
(define (finish-number acc-rev)
(define s (list->string (reverse acc-rev)))
(define n (string->number s 10))
(when (not n) (parse-error "Invalid number: ~v" s))
(if (flonum? n)
(match (peek-char in-port)
[(or #\f #\F) (read-char in-port) (float n)]
[_ n])
n))
;;---------------------------------------------------------------------------
;; String-like things
@ -279,6 +226,19 @@
[else
(parse-error "Invalid hex character")]))
;;---------------------------------------------------------------------------
;; Hex-encoded floating point numbers
(define (read-hex-float precision)
(unless (eqv? (next-char) #\")
(parse-error "Missing open-double-quote in hex-encoded floating-point number"))
(define bs (read-hex-binary '()))
(unless (= (bytes-length bs) (match precision ['float 4] ['double 8]))
(parse-error "Incorrect number of bytes in hex-encoded floating-point number"))
(match precision
['float (bytes->float bs)]
['double (bytes->double bs)]))
;;---------------------------------------------------------------------------
;; Base64-encoded ByteStrings
@ -334,16 +294,56 @@
#\}))
;;---------------------------------------------------------------------------
;; "Raw" symbols
;; "Raw" symbols and numbers
(define (read-raw-symbol acc)
(define (read-raw-symbol-or-number acc)
(match (peek-char in-port)
[(or (? eof-object?)
(? char? (or #\( #\) #\{ #\} #\[ #\] #\< #\>
#\" #\; #\, #\@ #\# #\: (== PIPE)
(? char-whitespace?))))
(string->symbol (list->string (reverse acc)))]
[_ (read-raw-symbol (cons (read-char in-port) acc))]))
(let ((input (reverse acc)))
(or (analyze-number input)
(string->symbol (list->string input))))]
[_ (read-raw-symbol-or-number (cons (read-char in-port) acc))]))
(define (analyze-number input)
(match input
[(cons (and sign (or #\+ #\-)) input) (read-digit+ (list sign) read-fracexp input)]
[_ (read-digit+ (list) read-fracexp input)]))
(define (read-digit* acc-rev k input)
(match input
[(cons (? char? (? char-numeric? d)) input) (read-digit* (cons d acc-rev) k input)]
[_ (k acc-rev input)]))
(define (read-digit+ acc-rev k input)
(match input
[(cons (? char? (? char-numeric? d)) input) (read-digit* (cons d acc-rev) k input)]
[_ #f]))
(define (read-fracexp acc-rev input)
(match input
[(cons #\. input) (read-digit+ (cons #\. acc-rev) read-exp input)]
[_ (read-exp acc-rev input)]))
(define (read-exp acc-rev input)
(match input
[(cons (and e (or #\e #\E)) input) (read-sign-and-exp (cons e acc-rev) input)]
[_ (finish-number acc-rev input)]))
(define (read-sign-and-exp acc-rev input)
(match input
[(cons (and sign (or #\+ #\-)) input) (read-digit+ (cons sign acc-rev) finish-number input)]
[_ (read-digit+ acc-rev finish-number input)]))
(define (finish-number acc-rev input)
(define s (list->string (reverse acc-rev)))
(define n (string->number s 10))
(cond [(not n) #f]
[(and (flonum? n) (member input '((#\f) (#\F)))) (float n)]
[(equal? input '()) n]
[else #f]))
;;---------------------------------------------------------------------------
;; Main entry point to parser

View File

@ -74,9 +74,45 @@
dict3: @"Duplicate key" <ParseError "{ a: 1, a: 2 }">
dict4: @"Unexpected close brace" <ParseError "}">
dict5: @"Missing value" <DecodeError #x"b7 91 92 93 84">
double0: <Test #x"830000000000000000" 0.0>
double+0: <Test #x"830000000000000000" +0.0>
double-0: <Test #x"838000000000000000" -0.0>
double1: <Test #x"833ff0000000000000" 1.0>
double2: <Test #x"83fe3cb7b759bf0426" -1.202e300>
double3: <Test #x"83123456789abcdef0" #xd"12 34 56 78 9a bc de f0">
double4: @"Fewer than 16 digits" <ParseError "#xd\"12345678\"">
double5: @"More than 16 digits" <ParseError "#xd\"123456789abcdef012\"">
double6: @"Invalid chars" <ParseError "#xd\"12zz56789abcdef0\"">
double7: @"Positive infinity" <Test #x"837ff0000000000000" #xd"7ff0000000000000">
double8: @"Negative infinity" <Test #x"83fff0000000000000" #xd"fff0000000000000">
double9: @"-qNaN" <Test #x"83fff0000000000001" #xd"fff0000000000001">
double10: @"-qNaN" <Test #x"83fff0000000000111" #xd"fff0000000000111">
double11: @"+qNaN" <Test #x"837ff0000000000001" #xd"7ff0000000000001">
double12: @"+qNaN" <Test #x"837ff0000000000111" #xd"7ff0000000000111">
double13: @"Bad spacing" <ParseError "#xd\"12345 6789abcdef0\"">
double14: @"-sNaN" <Test #x"83fff8000000000001" #xd"fff8000000000001">
double15: @"-sNaN" <Test #x"83fff8000000000111" #xd"fff8000000000111">
double16: @"+sNaN" <Test #x"837ff8000000000001" #xd"7ff8000000000001">
double17: @"+sNaN" <Test #x"837ff8000000000111" #xd"7ff8000000000111">
float0: <Test #x"8200000000" 0.0f>
float+0: <Test #x"8200000000" +0.0f>
float-0: <Test #x"8280000000" -0.0f>
float1: <Test #x"823f800000" 1.0f>
float2: <Test #x"8212345678" #xf"12 34 56 78">
float3: @"Fewer than 8 digits" <ParseError "#xf\"123456\"">
float4: @"More than 8 digits" <ParseError "#xf\"123456789a\"">
float5: @"Invalid chars" <ParseError "#xf\"12zz5678\"">
float6: @"Positive infinity" <Test #x"827f800000" #xf"7f800000">
float7: @"Negative infinity" <Test #x"82ff800000" #xf"ff800000">
float8: @"+sNaN" <Test #x"827f800001" #xf"7f800001">
float9: @"+sNaN" <Test #x"827f800111" #xf"7f800111">
float10: @"-sNaN" <Test #x"82ff800001" #xf"ff800001">
float11: @"-sNaN" <Test #x"82ff800111" #xf"ff800111">
float12: @"Bad spacing" <ParseError "#xf\"12345 678\"">
float13: @"+qNaN" <Test #x"827fc00001" #xf"7fc00001">
float14: @"+qNaN" <Test #x"827fc00111" #xf"7fc00111">
float15: @"-qNaN" <Test #x"82ffc00001" #xf"ffc00001">
float16: @"-qNaN" <Test #x"82ffc00111" #xf"ffc00111">
int-257: <Test #x"a1feff" -257>
int-256: <Test #x"a1ff00" -256>
int-255: <Test #x"a1ff01" -255>
@ -89,10 +125,13 @@
int-2: <Test #x"9e" -2>
int-1: <Test #x"9f" -1>
int0: <Test #x"90" 0>
int+0: <Test #x"90" +0>
int-0: <Test #x"90" -0>
int1: <Test #x"91" 1>
int12: <Test #x"9c" 12>
int13: <Test #x"a00d" 13>
int127: <Test #x"a07f" 127>
int+127: <Test #x"a07f" +127>
int128: <Test #x"a10080" 128>
int255: <Test #x"a100ff" 255>
int256: <Test #x"a10100" 256>
@ -112,6 +151,8 @@
list8: @"Missing close bracket" <ParseShort "[">
list9: @"Unexpected close bracket" <ParseError "]">
list10: @"Missing end byte" <DecodeShort #x"b58080">
list11: <Test #x"b59184" [01]>
list12: <Test #x"b59c84" [12]>
noinput0: @"No input at all" <DecodeEOF #x"">
embed0: <Test #x"8690" #!0>
embed1: <Test #x"868690" #!#!0>
@ -138,17 +179,22 @@
string5: <Test #x"b104f09d849e" "\uD834\uDD1E">
symbol0: <Test #x"b300" ||>
symbol2: <Test #x"b30568656c6c6f" hello>
symbol3: <Test #x"b305312d322d33" 1-2-3>
symbol4: <Test #x"b305612d622d63" a-b-c>
symbol5: <Test #x"b305612b622b63" a+b+c>
symbol6: <Test #x"b3012b" +>
symbol7: <Test #x"b3032b2b2b" +++>
symbol8: <Test #x"b3012d" ->
symbol9: <Test #x"b3032d2d2d" --->
symbol10: <Test #x"b3022d61" -a>
symbol11: <Test #x"b3042d2d2d61" ---a>
symbol12: <Test #x"b3042d2d2d31" ---1>
symbol13: <Test #x"b3042b312e78" +1.x>
tag0: @"Unexpected end tag" <DecodeError #x"84">
tag1: @"Invalid tag" <DecodeError #x"10">
tag2: @"Invalid tag" <DecodeError #x"61b10110">
whitespace0: @"Leading spaces have to eventually yield something" <ParseShort " ">
whitespace1: @"No input at all" <ParseEOF "">
value1: <Test #"\xB2\x06corymb" #=#"\xB2\x06corymb">
value2: <Test #"\x81" #=#"\x81">
value3: <Test #"\x81" #=#[gQ]>
value4: <Test #"\x81" #=#[gQ==]>
value5: <Test #"\x81" #= #[gQ==]>
value6: <Test #x"b591929384" #=#x"b591929384">
longlist14: <Test #x"b5808080808080808080808080808084"
[#f #f #f #f #f

View File

@ -8,6 +8,7 @@
(require "record.rkt")
(require "embedded.rkt")
(require "float.rkt")
(require "float-bytes.rkt")
(require "annotation.rkt")
(require "varint.rkt")
(require "object-id.rkt")
@ -86,12 +87,12 @@
[#f (output-byte #x80)]
[#t (output-byte #x81)]
[(float v)
[(float _)
(output-byte #x82)
(output-bytes (real->floating-point-bytes v 4 #t))]
(output-bytes (float->bytes v))]
[(? flonum?)
(output-byte #x83)
(output-bytes (real->floating-point-bytes v 8 #t))]
(output-bytes (double->bytes v))]
[(annotated as _ v)
(when write-annotations?

View File

@ -12,11 +12,14 @@
(require "embedded.rkt")
(require "annotation.rkt")
(require "float.rkt")
(require "float-bytes.rkt")
(require "record.rkt")
(require "object-id.rkt")
(require racket/dict)
(require racket/set)
(require (only-in racket/port with-output-to-string))
(require (only-in racket/math nan? infinite?))
(require (only-in file/sha1 bytes->hex-string))
(define PIPE #\|)
@ -132,6 +135,15 @@
(write-binary-stringlike v)
(write-binary-base64 outer-distance v)))))
(define (write-float v precision)
(if (or (nan? v) (infinite? v))
(! "#x~a\"~a\""
(match precision ['float "f"] ['double "d"])
(bytes->hex-string (match precision
['float (float->bytes (float v))]
['double (double->bytes v)])))
(! "~v~a" v (match precision ['float "f"] ['double ""]))))
(define (write-value distance v)
(match v
[(annotated annotations _ item)
@ -143,8 +155,8 @@
(write-value distance item)]
[#f (! "#f")]
[#t (! "#t")]
[(float v) (! "~vf" v)]
[(? flonum?) (! "~v" v)]
[(float v) (write-float v 'float)]
[(? flonum?) (write-float v 'double)]
[(? integer? x) (! "~v" v)]
[(? string?)
(! "\"")

View File

@ -15,6 +15,7 @@ gitlab = { repository = "preserves/preserves" }
base64 = "0.13"
dtoa = "0.4"
num = "0.4"
lazy_static = "1.4.0"
regex = "1.5"
serde = { version = "1.0", features = ["derive"] }
serde_bytes = "0.11"

View File

@ -26,8 +26,11 @@ use crate::value::reader::BinarySource;
use crate::value::reader::ReaderResult;
use crate::value::repr::Annotations;
use lazy_static::lazy_static;
use num::bigint::BigInt;
use std::convert::TryInto;
use std::io;
use std::iter::FromIterator;
use std::marker::PhantomData;
@ -137,86 +140,21 @@ impl<'de, 'src, D: Embeddable, Dec: DomainParse<D>, S: BinarySource<'de>>
}
}
fn read_intpart<N: NestedValue>(&mut self, mut bs: Vec<u8>, c: u8) -> io::Result<N> {
match c {
b'0' => {
bs.push(c);
self.read_fracexp(bs)
}
_ => {
self.read_digit1(&mut bs, c)?;
self.read_fracexp(bs)
}
fn read_hex_float<N: NestedValue>(&mut self, bytecount: usize) -> io::Result<N> {
if self.next_byte()? != b'"' {
return Err(io_syntax_error("Missing open-double-quote in hex-encoded floating-point number"));
}
}
fn read_fracexp<N: NestedValue>(&mut self, mut bs: Vec<u8>) -> io::Result<N> {
let mut is_float = false;
match self.peek() {
Ok(b'.') => {
is_float = true;
bs.push(self.next_byte()?);
let c = self.next_byte()?;
self.read_digit1(&mut bs, c)?;
}
_ => ()
let bs = self.read_hex_binary()?;
if bs.len() != bytecount {
return Err(io_syntax_error("Incorrect number of bytes in hex-encoded floating-point number"));
}
match self.peek() {
Ok(b'e') | Ok(b'E') => {
bs.push(self.next_byte()?);
self.read_sign_and_exp(bs)
}
_ => self.finish_number(bs, is_float)
match bytecount {
4 => Ok(Value::from(f32::from_bits(u32::from_be_bytes(bs.try_into().unwrap()))).wrap()),
8 => Ok(Value::from(f64::from_bits(u64::from_be_bytes(bs.try_into().unwrap()))).wrap()),
_ => Err(io_syntax_error("Unsupported byte count in hex-encoded floating-point number")),
}
}
fn read_sign_and_exp<N: NestedValue>(&mut self, mut bs: Vec<u8>) -> io::Result<N> {
match self.peek()? {
b'+' | b'-' => bs.push(self.next_byte()?),
_ => (),
}
let c = self.next_byte()?;
self.read_digit1(&mut bs, c)?;
self.finish_number(bs, true)
}
fn finish_number<N: NestedValue>(&mut self, bs: Vec<u8>, is_float: bool) -> io::Result<N> {
let s = decode_utf8(bs)?;
if is_float {
match self.peek() {
Ok(b'f') | Ok(b'F') => {
self.skip()?;
Ok(N::new(s.parse::<f32>().map_err(
|_| io_syntax_error(&format!(
"Invalid single-precision number: {:?}", s)))?))
}
_ =>
Ok(N::new(s.parse::<f64>().map_err(
|_| io_syntax_error(&format!(
"Invalid double-precision number: {:?}", s)))?))
}
} else {
Ok(N::new(s.parse::<BigInt>().map_err(
|_| io_syntax_error(&format!(
"Invalid signed-integer number: {:?}", s)))?))
}
}
fn read_digit1(&mut self, bs: &mut Vec<u8>, c: u8) -> io::Result<()>
{
if !(c as char).is_digit(10) {
return Err(io_syntax_error("Incomplete number"));
}
bs.push(c);
while let Ok(c) = self.peek() {
if !(c as char).is_digit(10) {
break;
}
bs.push(self.next_byte()?);
}
Ok(())
}
fn read_stringlike<X, H, R>(
&mut self,
mut seed: R,
@ -299,14 +237,13 @@ impl<'de, 'src, D: Embeddable, Dec: DomainParse<D>, S: BinarySource<'de>>
|bs, r| Ok(bs.push(r.hexnum(2)? as u8)))?[..]))
}
fn read_hex_binary<N: NestedValue>(&mut self) -> io::Result<N> {
fn read_hex_binary(&mut self) -> io::Result<Vec<u8>> {
let mut s = String::new();
loop {
self.skip_whitespace();
let c1 = self.next_byte()? as char;
if c1 == '"' {
let bs = hex::HexParser::Strict.decode(&s).unwrap();
return Ok(N::new(&bs[..]));
return Ok(hex::HexParser::Strict.decode(&s).unwrap());
}
let c2 = self.next_byte()? as char;
if !(c1.is_digit(16) && c2.is_digit(16)) {
@ -364,7 +301,11 @@ impl<'de, 'src, D: Embeddable, Dec: DomainParse<D>, S: BinarySource<'de>>
}
}
fn read_raw_symbol<N: NestedValue>(&mut self, mut bs: Vec<u8>) -> io::Result<N> {
fn read_raw_symbol_or_number<N: NestedValue>(&mut self, mut bs: Vec<u8>) -> io::Result<N> {
lazy_static! {
static ref NUMBER_RE: regex::Regex = regex::Regex::new(
r"^([-+]?\d+)(((\.\d+([eE][-+]?\d+)?)|([eE][-+]?\d+))([fF]?))?$").unwrap();
}
loop {
let c = match self.peek() {
Err(e) if is_eof_io_error(&e) => b' ',
@ -374,8 +315,33 @@ impl<'de, 'src, D: Embeddable, Dec: DomainParse<D>, S: BinarySource<'de>>
};
match c {
b'(' | b')' | b'{' | b'}' | b'[' | b']' | b'<' | b'>' |
b'"' | b';' | b',' | b'@' | b'#' | b':' | b'|' | b' ' =>
return Ok(N::symbol(&decode_utf8(bs)?)),
b'"' | b';' | b',' | b'@' | b'#' | b':' | b'|' | b' ' => {
let s = decode_utf8(bs)?;
return match NUMBER_RE.captures(&s) {
None => Ok(N::symbol(&s)),
Some(m) => match m.get(2) {
None => Ok(N::new(s.parse::<BigInt>().map_err(
|_| io_syntax_error(&format!(
"Invalid signed-integer number: {:?}", s)))?)),
Some(_) => {
if let Some(maybe_f) = m.get(7) {
let s = m[1].to_owned() + &m[3];
if maybe_f.range().is_empty() {
Ok(N::new(s.parse::<f64>().map_err(
|_| io_syntax_error(&format!(
"Invalid double-precision number: {:?}", s)))?))
} else {
Ok(N::new(s.parse::<f32>().map_err(
|_| io_syntax_error(&format!(
"Invalid single-precision number: {:?}", s)))?))
}
} else {
panic!("Internal error: cannot analyze number {:?}", s)
}
}
}
}
}
c => {
self.skip()?;
bs.push(c)
@ -396,15 +362,6 @@ impl<'de, 'src, N: NestedValue, Dec: DomainParse<N::Embedded>, S: BinarySource<'
Err(e) => return Err(e.into()),
};
Ok(Some(match c {
b'-' => {
self.skip()?;
let c1 = self.next_byte()?;
self.read_intpart(vec![b'-'], c1)?
}
b'0' | b'1' | b'2' | b'3' | b'4' | b'5' | b'6' | b'7' | b'8' | b'9' => {
self.skip()?;
self.read_intpart(Vec::new(), c)?
}
b'"' => {
self.skip()?;
N::new(self.read_string(b'"')?)
@ -435,26 +392,13 @@ impl<'de, 'src, N: NestedValue, Dec: DomainParse<N::Embedded>, S: BinarySource<'
b't' => N::new(true),
b'{' => N::new(Set::from_iter(self.upto(b'}', read_annotations)?.into_iter())),
b'"' => self.read_literal_binary()?,
b'x' => if self.next_byte()? == b'"' {
self.read_hex_binary()?
} else {
return Err(io_syntax_error("Expected open-quote at start of hex ByteString"));
b'x' => match self.next_byte()? {
b'"' => N::new(&self.read_hex_binary()?[..]),
b'f' => self.read_hex_float(4)?,
b'd' => self.read_hex_float(8)?,
_ => return Err(io_syntax_error("Invalid #x syntax")),
},
b'[' => self.read_base64_binary()?,
b'=' => {
let bs_val: N = self.demand_next(true)?;
if bs_val.annotations().slice().len() > 0 {
return Err(io_syntax_error("Annotations not permitted after #="));
}
match bs_val.value().as_bytestring() {
None =>
return Err(io_syntax_error("ByteString must follow #=")),
Some(bs) =>
crate::value::BytesBinarySource::new(bs)
.packed(ViaCodec::new(&mut self.dec))
.demand_next(read_annotations)?
}
}
b'!' => {
let v = self.next_iovalue(read_annotations)?;
Value::Embedded(self.dec.parse_embedded(&v)?).wrap()
@ -483,7 +427,7 @@ impl<'de, 'src, N: NestedValue, Dec: DomainParse<N::Embedded>, S: BinarySource<'
b'}' => return Err(io_syntax_error("Unexpected }")),
other => {
self.skip()?;
self.read_raw_symbol(vec![other])?
self.read_raw_symbol_or_number(vec![other])?
}
}))
}

View File

@ -1,3 +1,4 @@
use crate::hex::HexFormatter;
use crate::value::DomainEncode;
use crate::value::IOValue;
use crate::value::IOValueDomainCodec;
@ -6,6 +7,8 @@ use crate::value::Writer;
use crate::value::suspendable::Suspendable;
use crate::value::writer::CompoundWriter;
use lazy_static::lazy_static;
use num::bigint::BigInt;
use std::io;
@ -231,13 +234,23 @@ impl<W: io::Write> Writer for TextWriter<W> {
}
fn write_f32(&mut self, v: f32) -> io::Result<()> {
dtoa::write(&mut *self.w, v)?;
write!(self.w, "f")
if v.is_nan() || v.is_infinite() {
write!(self.w, "#xf\"{}\"",
HexFormatter::Packed.encode(&u32::to_be_bytes(f32::to_bits(v))))
} else {
dtoa::write(&mut *self.w, v)?;
write!(self.w, "f")
}
}
fn write_f64(&mut self, v: f64) -> io::Result<()> {
dtoa::write(&mut *self.w, v)?;
Ok(())
if v.is_nan() || v.is_infinite() {
write!(self.w, "#xd\"{}\"",
HexFormatter::Packed.encode(&u64::to_be_bytes(f64::to_bits(v))))
} else {
dtoa::write(&mut *self.w, v)?;
Ok(())
}
}
simple_writer_method!(write_i8, i8);
@ -269,9 +282,12 @@ impl<W: io::Write> Writer for TextWriter<W> {
}
fn write_symbol(&mut self, v: &str) -> io::Result<()> {
// FIXME: This regular expression is conservatively correct, but Anglo-chauvinistic.
let re = regex::Regex::new("^[a-zA-Z~!$%^&*?_=+/.][-a-zA-Z~!$%^&*?_=+/.0-9]*$").unwrap();
if re.is_match(v) {
lazy_static! {
// FIXME: This regular expression is conservatively correct, but Anglo-chauvinistic.
static ref RE: regex::Regex =
regex::Regex::new("^[-a-zA-Z0-9~!$%^&*?_=+/.]+$").unwrap();
}
if RE.is_match(v) {
write!(self.w, "{}", v)
} else {
write!(self.w, "|")?;

View File

@ -40,10 +40,10 @@ Standalone documents may have trailing whitespace.
Any `Value` may be preceded by whitespace.
Value = ws (Record / Collection / Atom / Embedded / Machine)
Value = ws (Record / Collection / Atom / Embedded)
Collection = Sequence / Dictionary / Set
Atom = Boolean / Float / Double / SignedInteger /
String / ByteString / Symbol
Atom = Boolean / String / ByteString /
QuotedSymbol / SymbolOrNumber
Each `Record` is an angle-bracket enclosed grouping of its
label-`Value` followed by its field-`Value`s.
@ -73,55 +73,6 @@ false, respectively.
Boolean = %s"#t" / %s"#f"
Numeric data follow the
[JSON grammar](https://tools.ietf.org/html/rfc8259#section-6), with
the addition of a trailing “f” distinguishing `Float` from `Double`
values. `Float`s and `Double`s always have either a fractional part or
an exponent part, where `SignedInteger`s never have
either.[^reading-and-writing-floats-accurately]
[^arbitrary-precision-signedinteger]
Float = flt %i"f"
Double = flt
SignedInteger = int
digit1-9 = %x31-39
nat = %x30 / ( digit1-9 *DIGIT )
int = ["-"] nat
frac = "." 1*DIGIT
exp = %i"e" ["-"/"+"] 1*DIGIT
flt = int (frac exp / frac / exp)
[^reading-and-writing-floats-accurately]: **Implementation note.**
Your language's standard library likely has a good routine for
converting between decimal notation and IEEE 754 floating-point.
However, if not, or if you are interested in the challenges of
accurately reading and writing floating point numbers, see the
excellent matched pair of 1990 papers by Clinger and Steele &
White, and a recent follow-up by Jaffer:
Clinger, William D. How to Read Floating Point Numbers
Accurately. In Proc. PLDI. White Plains, New York, 1990.
<https://doi.org/10.1145/93542.93557>.
Steele, Guy L., Jr., and Jon L. White. How to Print
Floating-Point Numbers Accurately. In Proc. PLDI. White Plains,
New York, 1990. <https://doi.org/10.1145/93542.93559>.
Jaffer, Aubrey. Easy Accurate Reading and Writing of
Floating-Point Numbers. ArXiv:1310.8121 [Cs], 27 October 2013.
<http://arxiv.org/abs/1310.8121>.
[^arbitrary-precision-signedinteger]: **Implementation note.** Be
aware when implementing reading and writing of `SignedInteger`s
that the data model *requires* arbitrary-precision integers. Your
implementation may (but, ideally, should not) truncate precision
when reading or writing a `SignedInteger`; however, if it does so,
it should (a) signal its client that truncation has occurred, and
(b) make it clear to the client that comparing such truncated
values for equality or ordering will not yield results that match
the expected semantics of the data model.
`String`s are,
[as in JSON](https://tools.ietf.org/html/rfc8259#section-7), possibly
escaped text surrounded by double quotes. The escaping rules are the
@ -177,62 +128,109 @@ Base64 characters are allowed.
ByteString =/ "#[" *(ws / base64char) ws "]"
base64char = %x41-5A / %x61-7A / %x30-39 / "+" / "/" / "-" / "_" / "="
A `Symbol` may be written in a “bare” form[^cf-sexp-token] so long as
it conforms to certain restrictions on the characters appearing in the
symbol. Alternatively, it may be written in a quoted form. The quoted
form is much the same as the syntax for `String`s, including embedded
escape syntax, except using a bar or pipe character (`|`) instead of a
double quote mark.
A `Symbol` may be written in either of two forms.
Symbol = symstart *symcont / "|" *symchar "|"
symstart = ALPHA / sympunct / symustart
symcont = ALPHA / sympunct / symustart / symucont / DIGIT / "-"
sympunct = "~" / "!" / "$" / "%" / "^" / "&" / "*" /
"?" / "_" / "=" / "+" / "/" / "."
The first is a quoted form, much the same as the syntax for `String`s,
including embedded escape syntax, except using a bar or pipe character
(`|`) instead of a double quote mark.
QuotedSymbol = "|" *symchar "|"
symchar = unescaped / %x22 / escape (escaped / %x7C / %s"u" 4HEXDIG)
symustart = <any code point greater than 127 whose Unicode
category is Lu, Ll, Lt, Lm, Lo, Mn, Mc, Me,
Pc, Po, Sc, Sm, Sk, So, or Co>
symucont = <any code point greater than 127 whose Unicode
category is Nd, Nl, No, or Pd>
Alternatively, a `Symbol` may be written in a “bare” form[^cf-sexp-token].
The grammar for numeric data is a subset of the grammar for bare `Symbol`s,
so if a `SymbolOrNumber` also matches the grammar for `Float`, `Double` or
`SignedInteger`, then it must be interpreted as one of those, and otherwise
it must be interpreted as a bare `Symbol`.
SymbolOrNumber = 1*baresymchar
baresymchar = ALPHA / DIGIT / sympunct / symuchar
sympunct = "~" / "!" / "$" / "%" / "^" / "&" / "*" /
"?" / "_" / "=" / "+" / "-" / "/" / "."
symuchar = <any code point greater than 127 whose Unicode
category is Lu, Ll, Lt, Lm, Lo, Mn, Mc, Me, Nd,
Nl, No, Pc, Pd, Po, Sc, Sm, Sk, So, or Co>
[^cf-sexp-token]: Compare with the [SPKI S-expression][sexp.txt]
definition of “token representation”, and with the
[R6RS definition of identifiers](http://www.r6rs.org/final/html/r6rs/r6rs-Z-H-7.html#node_sec_4.2.4).
An `Embedded` is written as a `Value` chosen to represent the denoted
object, prefixed with `#!`.
Numeric data follow the [JSON
grammar](https://tools.ietf.org/html/rfc8259#section-6) except that leading
zeros are permitted and an optional leading `+` sign is allowed. The
addition of a trailing “f” distinguishes a `Float` from a `Double` value.
`Float`s and `Double`s always have either a fractional part or an exponent
part, where `SignedInteger`s never have
either.[^reading-and-writing-floats-accurately]
[^arbitrary-precision-signedinteger]
Float = flt %i"f"
Double = flt
SignedInteger = int
nat = 1*DIGIT
int = ["-"/"+"] nat
frac = "." 1*DIGIT
exp = %i"e" ["-"/"+"] 1*DIGIT
flt = int (frac exp / frac / exp)
[^reading-and-writing-floats-accurately]: **Implementation note.**
Your language's standard library likely has a good routine for
converting between decimal notation and IEEE 754 floating-point.
However, if not, or if you are interested in the challenges of
accurately reading and writing floating point numbers, see the
excellent matched pair of 1990 papers by Clinger and Steele &
White, and a recent follow-up by Jaffer:
Clinger, William D. How to Read Floating Point Numbers
Accurately. In Proc. PLDI. White Plains, New York, 1990.
<https://doi.org/10.1145/93542.93557>.
Steele, Guy L., Jr., and Jon L. White. How to Print
Floating-Point Numbers Accurately. In Proc. PLDI. White Plains,
New York, 1990. <https://doi.org/10.1145/93542.93559>.
Jaffer, Aubrey. Easy Accurate Reading and Writing of
Floating-Point Numbers. ArXiv:1310.8121 [Cs], 27 October 2013.
<http://arxiv.org/abs/1310.8121>.
[^arbitrary-precision-signedinteger]: **Implementation note.** Be
aware when implementing reading and writing of `SignedInteger`s
that the data model *requires* arbitrary-precision integers. Your
implementation may (but, ideally, should not) truncate precision
when reading or writing a `SignedInteger`; however, if it does so,
it should (a) signal its client that truncation has occurred, and
(b) make it clear to the client that comparing such truncated
values for equality or ordering will not yield results that match
the expected semantics of the data model.
Some valid IEEE 754 `Float`s and `Double`s are not covered by the grammar
above, namely, the several million NaNs and the two infinities. These are
represented as raw hexadecimal strings similar to hexadecimal
`ByteString`s. Implementations are free to use hexadecimal floating-point
syntax whereever convenient, even for values representable using the
grammar above.[^rationale-no-general-machine-syntax]
Value =/ HexFloat / HexDouble
HexFloat = "#xf" %x22 4(ws 2HEXDIG) ws %x22
HexDouble = "#xd" %x22 8(ws 2HEXDIG) ws %x22
[^rationale-no-general-machine-syntax]: **Rationale.** Previous versions
of this specification included an escape to the [machine-oriented
binary syntax](preserves-binary.html) by prefixing a `ByteString`
containing the binary representation of a `Value` with `#=`. The only
true need for this feature was to represent otherwise-unrepresentable
floating-point values. Instead, this specification allows such
floating-point values to be written directly. Removing the `#=` syntax
simplifies implementations (there is no longer any need to support the
machine-oriented syntax) and avoids complications around treatment of
annotations potentially contained within machine-encoded values.
Finally, an `Embedded` is written as a `Value` chosen to represent the
denoted object, prefixed with `#!`.
Embedded = "#!" Value
Finally, any `Value` may be represented by escaping from the textual
syntax to the [machine-oriented binary syntax](preserves-binary.html)
by prefixing a `ByteString` containing the binary representation of the
`Value` with `#=`.[^rationale-switch-to-binary]
[^no-literal-binary-in-text] [^machine-value-annotations]
Machine = "#=" ws ByteString
[^rationale-switch-to-binary]: **Rationale.** The textual syntax
cannot express every `Value`: specifically, it cannot express the
several million floating-point NaNs, or the two floating-point
Infinities. Since the machine-oriented binary format for `Value`s
expresses each `Value` with precision, embedding binary `Value`s
solves the problem.
[^no-literal-binary-in-text]: Every text is ultimately physically
stored as bytes; therefore, it might seem possible to escape to the
raw form of binary encoding from within a piece of textual syntax.
However, while bytes must be involved in any *representation* of
text, the text *itself* is logically a sequence of *code points* and
is not *intrinsically* a binary structure at all. It would be
incoherent to expect to be able to access the representation of the
text from within the text itself.
[^machine-value-annotations]: Any text-syntax annotations preceding
the `#` are prepended to any binary-syntax annotations yielded by
decoding the `ByteString`.
## Annotations
When written down, a `Value` may have an associated sequence of
@ -293,5 +291,22 @@ The text syntax for `Boolean`s, `Symbol`s, and `ByteString`s is
directly inspired by [Racket](https://racket-lang.org/)'s lexical
syntax.
## Appendix. Regular expressions for bare symbols and numbers
When parsing, if a token matches both `SymbolOrNumber` and `Number`, it's a
number; use `Float`, `Double` and `SignedInteger` to disambiguate. If it
matches `SymbolOrNumber` but not `Number`, it's a "bare" `Symbol`.
SymbolOrNumber: ^[-a-zA-Z0-9~!$%^&*?_=+/.]+$
Number: ^([-+]?\d+)(((\.\d+([eE][-+]?\d+)?)|([eE][-+]?\d+))([fF]?))?$
Float: ^([-+]?\d+)(((\.\d+([eE][-+]?\d+)?)|([eE][-+]?\d+))[fF])$
Double: ^([-+]?\d+)(((\.\d+([eE][-+]?\d+)?)|([eE][-+]?\d+)))$
SignedInteger: ^([-+]?\d+)$
When printing, if a symbol matches both `SymbolOrNumber` and `Number` or
neither `SymbolOrNumber` nor `Number`, it must be quoted (`|...|`). If it
matches `SymbolOrNumber` but not `Number`, it may be printed as a "bare"
`Symbol`.
<!-- Heading to visually offset the footnotes from the main document: -->
## Notes

View File

@ -220,21 +220,23 @@ The total ordering specified [above](#total-order) means that the following stat
<!-- TODO: Give some examples of large and small Preserves, perhaps -->
<!-- translated from various JSON blobs floating around the internet. -->
| Value | Encoded byte sequence |
|-----------------------------|---------------------------------------------------------------------------------|
| `<capture <discard>>` | B4 B3 07 'c' 'a' 'p' 't' 'u' 'r' 'e' B4 B3 07 'd' 'i' 's' 'c' 'a' 'r' 'd' 84 84 |
| `[1 2 3 4]` | B5 91 92 93 94 84 |
| `[-2 -1 0 1]` | B5 9E 9F 90 91 84 |
| `"hello"` (format B) | B1 05 'h' 'e' 'l' 'l' 'o' |
| `["a" b #"c" [] #{} #t #f]` | B5 B1 01 'a' B3 01 'b' B2 01 'c' B5 84 B6 84 81 80 84 |
| `-257` | A1 FE FF |
| `-1` | 9F |
| `0` | 90 |
| `1` | 91 |
| `255` | A1 00 FF |
| `1.0f` | 82 3F 80 00 00 |
| `1.0` | 83 3F F0 00 00 00 00 00 00 |
| `-1.202e300` | 83 FE 3C B7 B7 59 BF 04 26 |
| Value | Encoded byte sequence |
|-----------------------------------------------------|---------------------------------------------------------------------------------|
| `<capture <discard>>` | B4 B3 07 'c' 'a' 'p' 't' 'u' 'r' 'e' B4 B3 07 'd' 'i' 's' 'c' 'a' 'r' 'd' 84 84 |
| `[1 2 3 4]` | B5 91 92 93 94 84 |
| `[-2 -1 0 1]` | B5 9E 9F 90 91 84 |
| `"hello"` (format B) | B1 05 'h' 'e' 'l' 'l' 'o' |
| `["a" b #"c" [] #{} #t #f]` | B5 B1 01 'a' B3 01 'b' B2 01 'c' B5 84 B6 84 81 80 84 |
| `-257` | A1 FE FF |
| `-1` | 9F |
| `0` | 90 |
| `1` | 91 |
| `255` | A1 00 FF |
| `1.0f` | 82 3F 80 00 00 |
| `1.0` | 83 3F F0 00 00 00 00 00 00 |
| `-1.202e300` | 83 FE 3C B7 B7 59 BF 04 26 |
| `#xf"7f800000"`, positive `Float` infinity | 82 7F 80 00 00 |
| `#xd"fff0000000000000"`, negative `Double` infinity | 83 FF F0 00 00 00 00 00 00 |
The next example uses a non-`Symbol` label for a record.[^extensibility2] The `Record`

Binary file not shown.

View File

@ -74,9 +74,45 @@
dict3: @"Duplicate key" <ParseError "{ a: 1, a: 2 }">
dict4: @"Unexpected close brace" <ParseError "}">
dict5: @"Missing value" <DecodeError #x"b7 91 92 93 84">
double0: <Test #x"830000000000000000" 0.0>
double+0: <Test #x"830000000000000000" +0.0>
double-0: <Test #x"838000000000000000" -0.0>
double1: <Test #x"833ff0000000000000" 1.0>
double2: <Test #x"83fe3cb7b759bf0426" -1.202e300>
double3: <Test #x"83123456789abcdef0" #xd"12 34 56 78 9a bc de f0">
double4: @"Fewer than 16 digits" <ParseError "#xd\"12345678\"">
double5: @"More than 16 digits" <ParseError "#xd\"123456789abcdef012\"">
double6: @"Invalid chars" <ParseError "#xd\"12zz56789abcdef0\"">
double7: @"Positive infinity" <Test #x"837ff0000000000000" #xd"7ff0000000000000">
double8: @"Negative infinity" <Test #x"83fff0000000000000" #xd"fff0000000000000">
double9: @"-qNaN" <Test #x"83fff0000000000001" #xd"fff0000000000001">
double10: @"-qNaN" <Test #x"83fff0000000000111" #xd"fff0000000000111">
double11: @"+qNaN" <Test #x"837ff0000000000001" #xd"7ff0000000000001">
double12: @"+qNaN" <Test #x"837ff0000000000111" #xd"7ff0000000000111">
double13: @"Bad spacing" <ParseError "#xd\"12345 6789abcdef0\"">
double14: @"-sNaN" <Test #x"83fff8000000000001" #xd"fff8000000000001">
double15: @"-sNaN" <Test #x"83fff8000000000111" #xd"fff8000000000111">
double16: @"+sNaN" <Test #x"837ff8000000000001" #xd"7ff8000000000001">
double17: @"+sNaN" <Test #x"837ff8000000000111" #xd"7ff8000000000111">
float0: <Test #x"8200000000" 0.0f>
float+0: <Test #x"8200000000" +0.0f>
float-0: <Test #x"8280000000" -0.0f>
float1: <Test #x"823f800000" 1.0f>
float2: <Test #x"8212345678" #xf"12 34 56 78">
float3: @"Fewer than 8 digits" <ParseError "#xf\"123456\"">
float4: @"More than 8 digits" <ParseError "#xf\"123456789a\"">
float5: @"Invalid chars" <ParseError "#xf\"12zz5678\"">
float6: @"Positive infinity" <Test #x"827f800000" #xf"7f800000">
float7: @"Negative infinity" <Test #x"82ff800000" #xf"ff800000">
float8: @"+sNaN" <Test #x"827f800001" #xf"7f800001">
float9: @"+sNaN" <Test #x"827f800111" #xf"7f800111">
float10: @"-sNaN" <Test #x"82ff800001" #xf"ff800001">
float11: @"-sNaN" <Test #x"82ff800111" #xf"ff800111">
float12: @"Bad spacing" <ParseError "#xf\"12345 678\"">
float13: @"+qNaN" <Test #x"827fc00001" #xf"7fc00001">
float14: @"+qNaN" <Test #x"827fc00111" #xf"7fc00111">
float15: @"-qNaN" <Test #x"82ffc00001" #xf"ffc00001">
float16: @"-qNaN" <Test #x"82ffc00111" #xf"ffc00111">
int-257: <Test #x"a1feff" -257>
int-256: <Test #x"a1ff00" -256>
int-255: <Test #x"a1ff01" -255>
@ -89,10 +125,13 @@
int-2: <Test #x"9e" -2>
int-1: <Test #x"9f" -1>
int0: <Test #x"90" 0>
int+0: <Test #x"90" +0>
int-0: <Test #x"90" -0>
int1: <Test #x"91" 1>
int12: <Test #x"9c" 12>
int13: <Test #x"a00d" 13>
int127: <Test #x"a07f" 127>
int+127: <Test #x"a07f" +127>
int128: <Test #x"a10080" 128>
int255: <Test #x"a100ff" 255>
int256: <Test #x"a10100" 256>
@ -112,6 +151,8 @@
list8: @"Missing close bracket" <ParseShort "[">
list9: @"Unexpected close bracket" <ParseError "]">
list10: @"Missing end byte" <DecodeShort #x"b58080">
list11: <Test #x"b59184" [01]>
list12: <Test #x"b59c84" [12]>
noinput0: @"No input at all" <DecodeEOF #x"">
embed0: <Test #x"8690" #!0>
embed1: <Test #x"868690" #!#!0>
@ -138,17 +179,22 @@
string5: <Test #x"b104f09d849e" "\uD834\uDD1E">
symbol0: <Test #x"b300" ||>
symbol2: <Test #x"b30568656c6c6f" hello>
symbol3: <Test #x"b305312d322d33" 1-2-3>
symbol4: <Test #x"b305612d622d63" a-b-c>
symbol5: <Test #x"b305612b622b63" a+b+c>
symbol6: <Test #x"b3012b" +>
symbol7: <Test #x"b3032b2b2b" +++>
symbol8: <Test #x"b3012d" ->
symbol9: <Test #x"b3032d2d2d" --->
symbol10: <Test #x"b3022d61" -a>
symbol11: <Test #x"b3042d2d2d61" ---a>
symbol12: <Test #x"b3042d2d2d31" ---1>
symbol13: <Test #x"b3042b312e78" +1.x>
tag0: @"Unexpected end tag" <DecodeError #x"84">
tag1: @"Invalid tag" <DecodeError #x"10">
tag2: @"Invalid tag" <DecodeError #x"61b10110">
whitespace0: @"Leading spaces have to eventually yield something" <ParseShort " ">
whitespace1: @"No input at all" <ParseEOF "">
value1: <Test #"\xB2\x06corymb" #=#"\xB2\x06corymb">
value2: <Test #"\x81" #=#"\x81">
value3: <Test #"\x81" #=#[gQ]>
value4: <Test #"\x81" #=#[gQ==]>
value5: <Test #"\x81" #= #[gQ==]>
value6: <Test #x"b591929384" #=#x"b591929384">
longlist14: <Test #x"b5808080808080808080808080808084"
[#f #f #f #f #f