Repair text syntax for numbers and symbols. Closes #19/#36/#37/#38.

Numbers and (bare) Symbols are now disambiguated after reading, which permits leading `+`, leading `0`, and a wider range of acceptable Symbols. Updates spec text, test cases, and implementations. Some ancillary fixes to Python's comparison routines are also included.
2022-11-06 22:27:01 +01:00 · 2022-11-06 22:27:01 +01:00 · 269ed2391a
parent 351feba8d2
commit 269ed2391a
31 changed files with 864 additions and 553 deletions
--- a/implementations/javascript/packages/core/src/bytes.ts
+++ b/implementations/javascript/packages/core/src/bytes.ts
@ -35,6 +35,10 @@ export class Bytes implements Preservable<any>, PreserveWritable<any> {
        }
    }

+    dataview(): DataView {
+        return new DataView(this._view.buffer, this._view.byteOffset, this._view.byteLength);
+    }
+
    get length(): number {
        return this._view.length;
    }
@ -179,6 +183,10 @@ export function underlying(b: Bytes | Uint8Array): Uint8Array {
    return (b instanceof Uint8Array) ? b : b._view;
 }

+export function dataview(b: Bytes | DataView): DataView {
+    return (b instanceof DataView) ? b : b.dataview();
+}
+
 // Uint8Array / TypedArray methods

 export interface Bytes {
--- a/implementations/javascript/packages/core/src/decoder.ts
+++ b/implementations/javascript/packages/core/src/decoder.ts
@ -216,8 +216,8 @@ export class Decoder<T = never> implements TypedDecoder<T> {
        switch (tag) {
            case Tag.False: return this.state.wrap<T>(false);
            case Tag.True: return this.state.wrap<T>(true);
-            case Tag.Float: return this.state.wrap<T>(new SingleFloat(this.state.nextbytes(4).getFloat32(0, false)));
-            case Tag.Double: return this.state.wrap<T>(new DoubleFloat(this.state.nextbytes(8).getFloat64(0, false)));
+            case Tag.Float: return this.state.wrap<T>(SingleFloat.fromBytes(this.state.nextbytes(4)));
+            case Tag.Double: return this.state.wrap<T>(DoubleFloat.fromBytes(this.state.nextbytes(8)));
            case Tag.End: throw new DecodeError("Unexpected Compound end marker");
            case Tag.Annotation: {
                const a = this.next();
@ -294,7 +294,7 @@ export class Decoder<T = never> implements TypedDecoder<T> {
    nextFloat(): SingleFloat | undefined {
        this.skipAnnotations();
        switch (this.state.nextbyte()) {
-            case Tag.Float: return new SingleFloat(this.state.nextbytes(4).getFloat32(0, false));
+            case Tag.Float: return SingleFloat.fromBytes(this.state.nextbytes(4));
            default: return void 0;
        }
    }
@ -302,7 +302,7 @@ export class Decoder<T = never> implements TypedDecoder<T> {
    nextDouble(): DoubleFloat | undefined {
        this.skipAnnotations();
        switch (this.state.nextbyte()) {
-            case Tag.Double: return new DoubleFloat(this.state.nextbytes(8).getFloat64(0, false));
+            case Tag.Double: return DoubleFloat.fromBytes(this.state.nextbytes(8));
            default: return void 0;
        }
    }
--- a/implementations/javascript/packages/core/src/float.ts
+++ b/implementations/javascript/packages/core/src/float.ts
@ -4,6 +4,7 @@ import { Value } from "./values";
 import type { GenericEmbedded } from "./embedded";
 import type { Encoder, Preservable } from "./encoder";
 import type { Writer, PreserveWritable } from "./writer";
+import { Bytes, dataview, underlying } from "./bytes";

 export type FloatType = 'Single' | 'Double';
 export const FloatType = Symbol.for('FloatType');
@ -19,8 +20,15 @@ export abstract class Float {
        return stringify(this);
    }

+    abstract toBytes(): Bytes;
+
    equals(other: any): boolean {
-        return Object.is(other.constructor, this.constructor) && (other.value === this.value);
+        if (!Object.is(other.constructor, this.constructor)) return false;
+        if (Number.isNaN(this.value) && Number.isNaN(other.value)) {
+            return other.toBytes().equals(this.toBytes());
+        } else {
+            return Object.is(other.value, this.value);
+        }
    }

    hashCode(): number {
@ -44,24 +52,72 @@ export function floatValue(f: any): number {
    }
 }

+export function floatlikeString(f: number): string {
+    if (Object.is(f, -0)) return '-0.0';
+    const s = '' + f;
+    if (s.includes('.') || s.includes('e') || s.includes('E')) return s;
+    return s + '.0';
+}
+
 export class SingleFloat extends Float implements Preservable<any>, PreserveWritable<any> {
    __as_preserve__<T = GenericEmbedded>(): Value<T> {
        return this;
    }

+    static fromBytes(bs: Bytes | DataView): SingleFloat {
+        const view = dataview(bs);
+        const vf = view.getInt32(0, false);
+        if ((vf & 0x7f800000) === 0x7f800000) {
+            // NaN or inf. Preserve quiet/signalling bit by manually expanding to double-precision.
+            const sign = vf >> 31;
+            const payload = vf & 0x007fffff;
+            const dbs = new Bytes(8);
+            const dview = dataview(dbs);
+            dview.setInt16(0, (sign << 15) | 0x7ff0 | (payload >> 19), false);
+            dview.setInt32(2, (payload & 0x7ffff) << 13, false);
+            return new SingleFloat(dview.getFloat64(0, false));
+        } else {
+            return new SingleFloat(dataview(bs).getFloat32(0, false));
+        }
+    }
+
    static __from_preserve__<T>(v: Value<T>): undefined | SingleFloat {
        return Float.isSingle(v) ? v : void 0;
    }

+    __w(v: DataView, offset: number) {
+        if (Number.isNaN(this.value)) {
+            const dbs = new Bytes(8);
+            const dview = dataview(dbs);
+            dview.setFloat64(0, this.value, false);
+            const sign = dview.getInt8(0) >> 7;
+            const payload = (dview.getInt32(1, false) >> 5) & 0x007fffff;
+            const vf = (sign << 31) | 0x7f800000 | payload;
+            v.setInt32(offset, vf, false);
+        } else {
+            v.setFloat32(offset, this.value, false);
+        }
+    }
+
    __preserve_on__(encoder: Encoder<any>) {
        encoder.state.emitbyte(Tag.Float);
        encoder.state.makeroom(4);
-        encoder.state.view.setFloat32(encoder.state.index, this.value, false);
+        this.__w(encoder.state.view, encoder.state.index);
        encoder.state.index += 4;
    }

+    toBytes(): Bytes {
+        const bs = new Bytes(4);
+        this.__w(bs.dataview(), 0);
+        return bs;
+    }
+
    __preserve_text_on__(w: Writer<any>) {
-        w.state.pieces.push('' + this.value + 'f');
+        if (Number.isFinite(this.value)) {
+            w.state.pieces.push(floatlikeString(this.value) + 'f');
+        } else {
+            w.state.pieces.push('#xf"', this.toBytes().toHex(), '"');
+        }
    }

    get [FloatType](): 'Single' {
@ -78,6 +134,10 @@ export class DoubleFloat extends Float implements Preservable<any>, PreserveWrit
        return this;
    }

+    static fromBytes(bs: Bytes | DataView): DoubleFloat {
+        return new DoubleFloat(dataview(bs).getFloat64(0, false));
+    }
+
    static __from_preserve__<T>(v: Value<T>): undefined | DoubleFloat {
        return Float.isDouble(v) ? v : void 0;
    }
@ -89,8 +149,18 @@ export class DoubleFloat extends Float implements Preservable<any>, PreserveWrit
        encoder.state.index += 8;
    }

+    toBytes(): Bytes {
+        const bs = new Bytes(8);
+        bs.dataview().setFloat64(0, this.value, false);
+        return bs;
+    }
+
    __preserve_text_on__(w: Writer<any>) {
-        w.state.pieces.push('' + this.value);
+        if (Number.isFinite(this.value)) {
+            w.state.pieces.push(floatlikeString(this.value));
+        } else {
+            w.state.pieces.push('#xd"', this.toBytes().toHex(), '"');
+        }
    }

    get [FloatType](): 'Double' {
--- a/implementations/javascript/packages/core/src/reader.ts
+++ b/implementations/javascript/packages/core/src/reader.ts
@ -3,12 +3,12 @@
 import type { Value } from './values';
 import { DecodeError, ShortPacket } from './codec';
 import { Dictionary, Set } from './dictionary';
-import { strip, unannotate } from './strip';
-import { Bytes, unhexDigit } from './bytes';
-import { decode, Decoder, DecoderState, neverEmbeddedTypeDecode } from './decoder';
+import { strip } from './strip';
+import { Bytes, underlying, unhexDigit } from './bytes';
+import { Decoder, DecoderState, neverEmbeddedTypeDecode } from './decoder';
 import { Record } from './record';
 import { Annotated, newPosition, Position, updatePosition } from './annotated';
-import { Double, DoubleFloat, Single, SingleFloat } from './float';
+import { Double, DoubleFloat, FloatType, Single, SingleFloat } from './float';
 import { stringify } from './text';
 import { embed, GenericEmbedded, EmbeddedTypeDecode } from './embedded';

@ -25,6 +25,13 @@ type IntOrFloat = 'int' | 'float';
 type Numeric = number | SingleFloat | DoubleFloat;
 type IntContinuation = (kind: IntOrFloat, acc: string) => Numeric;

+export const NUMBER_RE: RegExp = /^([-+]?\d+)(((\.\d+([eE][-+]?\d+)?)|([eE][-+]?\d+))([fF]?))?$/;
+// Groups:
+//  1 - integer part and sign
+//  2 - decimal part, exponent and Float marker
+//  3 - decimal part and exponent
+//  7 - Float marker
+
 export class ReaderState {
    buffer: string;
    pos: Position;
@ -124,6 +131,22 @@ export class ReaderState {
        }
    }

+    readHexFloat(precision: FloatType): SingleFloat | DoubleFloat {
+        const pos = this.copyPos();
+        if (this.nextchar() !== '"') {
+            this.error("Missing open-double-quote in hex-encoded floating-point number", pos);
+        }
+        const bs = this.readHexBinary();
+        switch (precision) {
+            case 'Single':
+                if (bs.length !== 4) this.error("Incorrect number of bytes in hex-encoded Float", pos);
+                return SingleFloat.fromBytes(bs);
+            case 'Double':
+                if (bs.length !== 8) this.error("Incorrect number of bytes in hex-encoded Double", pos);
+                return DoubleFloat.fromBytes(bs);
+        }
+    }
+
    readBase64Binary(): Bytes {
        let acc = '';
        while (true) {
@ -135,67 +158,7 @@ export class ReaderState {
        return decodeBase64(acc);
    }

-    readIntpart(acc: string, ch: string): Numeric {
-        if (ch === '0') return this.readFracexp('int', acc + ch);
-        return this.readDigit1('int', acc, (kind, acc) => this.readFracexp(kind, acc), ch);
-    }
-
-    readDigit1(kind: IntOrFloat, acc: string, k: IntContinuation, ch?: string): Numeric {
-        if (ch === void 0) ch = this.nextchar();
-        if (ch >= '0' && ch <= '9') return this.readDigit0(kind, acc + ch, k);
-        this.error('Incomplete number', this.pos);
-    }
-
-    readDigit0(kind: IntOrFloat, acc: string, k: IntContinuation): Numeric {
-        while (true) {
-            if (this.atEnd()) break;
-            const ch = this.peek();
-            if (!(ch >= '0' && ch <= '9')) break;
-            this.advance();
-            acc = acc + ch;
-        }
-        return k(kind, acc);
-    }
-
-    readFracexp(kind: IntOrFloat, acc: string): Numeric {
-        if (!this.atEnd() && this.peek() === '.') {
-            this.advance();
-            return this.readDigit1('float', acc + '.', (kind, acc) => this.readExp(kind, acc));
-        }
-        return this.readExp(kind, acc);
-    }
-
-    readExp(kind: IntOrFloat, acc: string): Numeric {
-        const ch = this.atEnd() ? '' : this.peek();
-        if (ch === 'e' || ch === 'E') {
-            this.advance();
-            return this.readSignAndExp(acc + ch);
-        }
-        return this.finishNumber(kind, acc);
-    }
-
-    readSignAndExp(acc: string): Numeric {
-        const ch = this.peek();
-        if (ch === '+' || ch === '-') {
-            this.advance();
-            return this.readDigit1('float', acc + ch, (kind, acc) => this.finishNumber(kind, acc));
-        }
-        return this.readDigit1('float', acc, (kind, acc) => this.finishNumber(kind, acc));
-    }
-
-    finishNumber(kind: IntOrFloat, acc: string): Numeric {
-        const i = parseFloat(acc);
-        if (kind === 'int') return i;
-        const ch = this.atEnd() ? '' : this.peek();
-        if (ch === 'f' || ch === 'F') {
-            this.advance();
-            return Single(i);
-        } else {
-            return Double(i);
-        }
-    }
-
-    readRawSymbol<T>(acc: string): Value<T> {
+    readRawSymbolOrNumber<T>(acc: string): Value<T> {
        while (true) {
            if (this.atEnd()) break;
            const ch = this.peek();
@ -203,7 +166,20 @@ export class ReaderState {
            this.advance();
            acc = acc + ch;
        }
-        return Symbol.for(acc);
+        const m = NUMBER_RE.exec(acc);
+        if (m) {
+            if (m[2] === void 0) {
+                let v = parseInt(m[1]);
+                if (Object.is(v, -0)) v = 0;
+                return v;
+            } else if (m[7] === '') {
+                return Double(parseFloat(m[1] + m[3]));
+            } else {
+                return Single(parseFloat(m[1] + m[3]));
+            }
+        } else {
+            return Symbol.for(acc);
+        }
    }

    readStringlike<E, R>(xform: (ch: string) => E,
@ -355,11 +331,6 @@ export class Reader<T> {
        const unwrapped = ((): Value<T> => {
            const c = this.state.nextchar();
            switch (c) {
-                case '-':
-                    return this.state.readIntpart('-', this.state.nextchar());
-                case '0': case '1': case '2': case '3': case '4':
-                case '5': case '6': case '7': case '8': case '9':
-                    return this.state.readIntpart('', c);
                case '"':
                    return this.state.readString('"');
                case '|':
@ -377,22 +348,13 @@ export class Reader<T> {
                        case 't': return true;
                        case '{': return this.seq(new Set<T>(), (v, s) => s.add(v), '}');
                        case '"': return this.state.readLiteralBinary();
-                        case 'x':
-                            if (this.state.nextchar() !== '"') {
-                                this.state.error('Expected open-quote at start of hex ByteString',
-                                                 startPos);
-                            }
-                            return this.state.readHexBinary();
-                        case '[': return this.state.readBase64Binary();
-                        case '=': {
-                            const bs = unannotate(this.next());
-                            if (!Bytes.isBytes(bs)) this.state.error('ByteString must follow #=',
-                                                                     startPos);
-                            return decode<T>(bs, {
-                                embeddedDecode: this.embeddedType,
-                                includeAnnotations: this.state.options.includeAnnotations,
-                            });
+                        case 'x': switch (this.state.nextchar()) {
+                            case '"': return this.state.readHexBinary();
+                            case 'f': return this.state.readHexFloat('Single');
+                            case 'd': return this.state.readHexFloat('Double');
+                            default: this.state.error('Invalid #x syntax', startPos);
                        }
+                        case '[': return this.state.readBase64Binary();
                        case '!': return embed(this.embeddedType.fromValue(
                            new Reader<GenericEmbedded>(this.state, genericEmbeddedTypeDecode).next(),
                            this.state.options));
@ -411,7 +373,7 @@ export class Reader<T> {
                case ']': this.state.error('Unexpected ]', startPos);
                case '}': this.state.error('Unexpected }', startPos);
                default:
-                    return this.state.readRawSymbol(c);
+                    return this.state.readRawSymbolOrNumber(c);
            }
        })();
        return this.wrap(unwrapped, startPos);
--- a/implementations/javascript/packages/core/src/text.ts
+++ b/implementations/javascript/packages/core/src/text.ts
@ -4,7 +4,7 @@ import type { Value } from './values';
 import { Annotated } from './annotated';
 import { Bytes } from './bytes';
 import { KeyedDictionary, KeyedSet } from './dictionary';
-import { Writer, Writable, WriterOptions, EmbeddedWriter, WriterState } from './writer';
+import { Writer, WriterOptions, EmbeddedWriter, WriterState } from './writer';
 import { fromJS } from './fromjs';

 export const stringifyEmbeddedWrite: EmbeddedWriter<any> = {
--- a/implementations/javascript/packages/core/src/writer.ts
+++ b/implementations/javascript/packages/core/src/writer.ts
@ -3,6 +3,7 @@ import { Record, Tuple } from "./record";
 import type { GenericEmbedded, Embedded, EmbeddedTypeEncode } from "./embedded";
 import { Encoder, EncoderState } from "./encoder";
 import type { Value } from "./values";
+import { NUMBER_RE } from './reader';

 export type Writable<T> =
    Value<T> | PreserveWritable<T> | Iterable<Value<T>> | ArrayBufferView;
@ -270,8 +271,7 @@ export class Writer<T> {
            case 'symbol': {
                const s = v.description!;
                // FIXME: This regular expression is conservatively correct, but Anglo-chauvinistic.
-                const m = /^[a-zA-Z~!$%^&*?_=+/.][-a-zA-Z~!$%^&*?_=+/.0-9]*$/.exec(s);
-                if (m) {
+                if (/^[-a-zA-Z0-9~!$%^&*?_=+/.]+$/.exec(s) && !NUMBER_RE.exec(s)) {
                    this.state.pieces.push(s);
                } else {
                    this.state.pieces.push(this.state.escapeStringlike(s, '|'));
--- a/implementations/python/preserves/binary.py
+++ b/implementations/python/preserves/binary.py
@ -72,7 +72,7 @@ class Decoder(BinaryCodec):
        tag = self.nextbyte()
        if tag == 0x80: return self.wrap(False)
        if tag == 0x81: return self.wrap(True)
-        if tag == 0x82: return self.wrap(Float(struct.unpack('>f', self.nextbytes(4))[0]))
+        if tag == 0x82: return self.wrap(Float.from_bytes(self.nextbytes(4)))
        if tag == 0x83: return self.wrap(struct.unpack('>d', self.nextbytes(8))[0])
        if tag == 0x84: raise DecodeError('Unexpected end-of-stream marker')
        if tag == 0x85:
--- a/implementations/python/preserves/compare.py
+++ b/implementations/python/preserves/compare.py
@ -2,7 +2,7 @@ import numbers
 from enum import Enum
 from functools import cmp_to_key

-from .values import preserve, Float, Embedded, Record, Symbol
+from .values import preserve, Float, Embedded, Record, Symbol, cmp_floats, _unwrap
 from .compat import basestring_

 class TypeNumber(Enum):
@ -19,7 +19,7 @@ class TypeNumber(Enum):
    SET = 9
    DICTIONARY = 10

-    EMBEDDED = 10
+    EMBEDDED = 11

 def type_number(v):
    if hasattr(v, '__preserve__'):
@ -84,12 +84,17 @@ def _item_key(item):
    return item[0]

 def _eq(a, b):
+    a = _unwrap(a)
+    b = _unwrap(b)
    ta = type_number(a)
    tb = type_number(b)
    if ta != tb: return False

+    if ta == TypeNumber.DOUBLE:
+        return cmp_floats(a, b) == 0
+
    if ta == TypeNumber.EMBEDDED:
-        return ta.embeddedValue == tb.embeddedValue
+        return _eq(a.embeddedValue, b.embeddedValue)

    if ta == TypeNumber.RECORD:
        return _eq(a.key, b.key) and _eq_sequences(a.fields, b.fields)
@ -118,13 +123,18 @@ def _cmp_sequences(aa, bb):
    return len(aa) - len(bb)

 def _cmp(a, b):
+    a = _unwrap(a)
+    b = _unwrap(b)
    ta = type_number(a)
    tb = type_number(b)
    if ta.value < tb.value: return -1
    if tb.value < ta.value: return 1

+    if ta == TypeNumber.DOUBLE:
+        return cmp_floats(a, b)
+
    if ta == TypeNumber.EMBEDDED:
-        return _simplecmp(ta.embeddedValue, tb.embeddedValue)
+        return _cmp(a.embeddedValue, b.embeddedValue)

    if ta == TypeNumber.RECORD:
        v = _cmp(a.key, b.key)
--- a/implementations/python/preserves/text.py
+++ b/implementations/python/preserves/text.py
@ -1,6 +1,7 @@
 import numbers
 import struct
 import base64
+import math

 from .values import *
 from .error import *
@ -9,6 +10,8 @@ from .binary import Decoder

 class TextCodec(object): pass

+NUMBER_RE = re.compile(r'^([-+]?\d+)(((\.\d+([eE][-+]?\d+)?)|([eE][-+]?\d+))([fF]?))?$')
+
 class Parser(TextCodec):
    def __init__(self, input_buffer=u'', include_annotations=False, parse_embedded=lambda x: x):
        super(Parser, self).__init__()
@ -66,50 +69,6 @@ class Parser(TextCodec):
                return self.wrap(u''.join(s))
            s.append(c)

-    def read_intpart(self, acc, c):
-        if c == '0':
-            acc.append(c)
-        else:
-            self.read_digit1(acc, c)
-        return self.read_fracexp(acc)
-
-    def read_fracexp(self, acc):
-        is_float = False
-        if self.peek() == '.':
-            is_float = True
-            acc.append(self.nextchar())
-            self.read_digit1(acc, self.nextchar())
-        if self.peek() in 'eE':
-            acc.append(self.nextchar())
-            return self.read_sign_and_exp(acc)
-        else:
-            return self.finish_number(acc, is_float)
-
-    def read_sign_and_exp(self, acc):
-        if self.peek() in '+-':
-            acc.append(self.nextchar())
-        self.read_digit1(acc, self.nextchar())
-        return self.finish_number(acc, True)
-
-    def finish_number(self, acc, is_float):
-        if is_float:
-            if self.peek() in 'fF':
-                self.skip()
-                return Float(float(u''.join(acc)))
-            else:
-                return float(u''.join(acc))
-        else:
-            return int(u''.join(acc))
-
-    def read_digit1(self, acc, c):
-        if not c.isdigit():
-            raise DecodeError('Incomplete number')
-        acc.append(c)
-        while not self._atend():
-            if not self.peek().isdigit():
-                break
-            acc.append(self.nextchar())
-
    def read_stringlike(self, terminator, hexescape, hexescaper):
        acc = []
        while True:
@ -186,6 +145,16 @@ class Parser(TextCodec):
            if c == '=': continue
            acc.append(c)

+    def read_hex_float(self, bytecount):
+        if self.nextchar() != '"':
+            raise DecodeError('Missing open-double-quote in hex-encoded floating-point number')
+        bs = self.read_hex_binary()
+        if len(bs) != bytecount:
+            raise DecodeError('Incorrect number of bytes in hex-encoded floating-point number')
+        if bytecount == 4: return Float.from_bytes(bs)
+        if bytecount == 8: return struct.unpack('>d', bs)[0]
+        raise DecodeError('Unsupported byte count in hex-encoded floating-point number')
+
    def upto(self, delimiter):
        vs = []
        while True:
@ -208,14 +177,24 @@ class Parser(TextCodec):
                raise DecodeError('Missing expected key/value separator')
            acc.append(self.next())

-    def read_raw_symbol(self, acc):
+    def read_raw_symbol_or_number(self, acc):
        while not self._atend():
            c = self.peek()
            if c.isspace() or c in '(){}[]<>";,@#:|':
                break
            self.skip()
            acc.append(c)
-        return Symbol(u''.join(acc))
+        acc = u''.join(acc)
+        m = NUMBER_RE.match(acc)
+        if m:
+            if m[2] is None:
+                return int(m[1])
+            elif m[7] == '':
+                return float(m[1] + m[3])
+            else:
+                return Float(float(m[1] + m[3]))
+        else:
+            return Symbol(acc)

    def wrap(self, v):
        return Annotated(v) if self.include_annotations else v
@ -223,12 +202,6 @@ class Parser(TextCodec):
    def next(self):
        self.skip_whitespace()
        c = self.peek()
-        if c == '-':
-            self.skip()
-            return self.wrap(self.read_intpart(['-'], self.nextchar()))
-        if c.isdigit():
-            self.skip()
-            return self.wrap(self.read_intpart([], c))
        if c == '"':
            self.skip()
            return self.wrap(self.read_string('"'))
@ -251,9 +224,11 @@ class Parser(TextCodec):
            if c == '{': return self.wrap(frozenset(self.upto('}')))
            if c == '"': return self.wrap(self.read_literal_binary())
            if c == 'x':
-                if self.nextchar() != '"':
-                    raise DecodeError('Expected open-quote at start of hex ByteString')
-                return self.wrap(self.read_hex_binary())
+                c = self.nextchar()
+                if c == '"': return self.wrap(self.read_hex_binary())
+                if c == 'f': return self.wrap(self.read_hex_float(4))
+                if c == 'd': return self.wrap(self.read_hex_float(8))
+                raise DecodeError('Invalid #x syntax')
            if c == '[': return self.wrap(self.read_base64_binary())
            if c == '=':
                old_ann = self.include_annotations
@ -286,7 +261,7 @@ class Parser(TextCodec):
        if c in '>]}':
            raise DecodeError('Unexpected ' + c)
        self.skip()
-        return self.wrap(self.read_raw_symbol([c]))
+        return self.wrap(self.read_raw_symbol_or_number([c]))

    def try_next(self):
        start = self.index
@ -385,7 +360,10 @@ class Formatter(TextCodec):
        elif v is True:
            self.chunks.append('#t')
        elif isinstance(v, float):
-            self.chunks.append(repr(v))
+            if math.isnan(v) or math.isinf(v):
+                self.chunks.append('#xd"' + struct.pack('>d', v).hex() + '"')
+            else:
+                self.chunks.append(repr(v))
        elif isinstance(v, numbers.Number):
            self.chunks.append('%d' % (v,))
        elif isinstance(v, bytes):
--- a/implementations/python/preserves/values.py
+++ b/implementations/python/preserves/values.py
@ -1,6 +1,7 @@
 import re
 import sys
 import struct
+import math

 from .error import DecodeError

@ -9,6 +10,16 @@ def preserve(v):
        v = v.__preserve__()
    return v

+def float_to_int(v):
+    return struct.unpack('>Q', struct.pack('>d', v))[0]
+
+def cmp_floats(a, b):
+    a = float_to_int(a)
+    b = float_to_int(b)
+    if a & 0x8000000000000000: a = a ^ 0x7fffffffffffffff
+    if b & 0x8000000000000000: b = b ^ 0x7fffffffffffffff
+    return a - b
+
 class Float(object):
    def __init__(self, value):
        self.value = value
@ -16,7 +27,12 @@ class Float(object):
    def __eq__(self, other):
        other = _unwrap(other)
        if other.__class__ is self.__class__:
-            return self.value == other.value
+            return cmp_floats(self.value, other.value) == 0
+
+    def __lt__(self, other):
+        other = _unwrap(other)
+        if other.__class__ is self.__class__:
+            return cmp_floats(self.value, other.value) < 0

    def __ne__(self, other):
        return not self.__eq__(other)
@ -27,15 +43,41 @@ class Float(object):
    def __repr__(self):
        return 'Float(' + repr(self.value) + ')'

+    def _to_bytes(self):
+        if math.isnan(self.value) or math.isinf(self.value):
+            dbs = struct.pack('>d', self.value)
+            vd = struct.unpack('>Q', dbs)[0]
+            sign = vd >> 63
+            payload = (vd >> 29) & 0x007fffff
+            vf = (sign << 31) | 0x7f800000 | payload
+            return struct.pack('>I', vf)
+        else:
+            return struct.pack('>f', self.value)
+
    def __preserve_write_binary__(self, encoder):
        encoder.buffer.append(0x82)
-        encoder.buffer.extend(struct.pack('>f', self.value))
+        encoder.buffer.extend(self._to_bytes())

    def __preserve_write_text__(self, formatter):
-        formatter.chunks.append(repr(self.value) + 'f')
+        if math.isnan(self.value) or math.isinf(self.value):
+            formatter.chunks.append('#xf"' + self._to_bytes().hex() + '"')
+        else:
+            formatter.chunks.append(repr(self.value) + 'f')
+
+    @staticmethod
+    def from_bytes(bs):
+        vf = struct.unpack('>I', bs)[0]
+        if (vf & 0x7f800000) == 0x7f800000:
+            # NaN or inf. Preserve quiet/signalling bit by manually expanding to double-precision.
+            sign = vf >> 31
+            payload = vf & 0x007fffff
+            dbs = struct.pack('>Q', (sign << 63) | 0x7ff0000000000000 | (payload << 29))
+            return Float(struct.unpack('>d', dbs)[0])
+        else:
+            return Float(struct.unpack('>f', bs)[0])

 # FIXME: This regular expression is conservatively correct, but Anglo-chauvinistic.
-RAW_SYMBOL_RE = re.compile(r'^[a-zA-Z~!$%^&*?_=+/.][-a-zA-Z~!$%^&*?_=+/.0-9]*$')
+RAW_SYMBOL_RE = re.compile(r'^[-a-zA-Z0-9~!$%^&*?_=+/.]+$')

 class Symbol(object):
    def __init__(self, name):
--- a/implementations/python/tests/samples.bin
+++ b/implementations/python/tests/samples.bin
--- a/implementations/python/tests/samples.pr
+++ b/implementations/python/tests/samples.pr
@ -74,9 +74,45 @@
  dict3: @"Duplicate key" <ParseError "{ a: 1, a: 2 }">
  dict4: @"Unexpected close brace" <ParseError "}">
  dict5: @"Missing value" <DecodeError #x"b7 91 92 93 84">
+  double0: <Test #x"830000000000000000" 0.0>
+  double+0: <Test #x"830000000000000000" +0.0>
+  double-0: <Test #x"838000000000000000" -0.0>
  double1: <Test #x"833ff0000000000000" 1.0>
  double2: <Test #x"83fe3cb7b759bf0426" -1.202e300>
+  double3: <Test #x"83123456789abcdef0" #xd"12 34 56 78  9a bc de f0">
+  double4: @"Fewer than 16 digits" <ParseError "#xd\"12345678\"">
+  double5: @"More than 16 digits" <ParseError "#xd\"123456789abcdef012\"">
+  double6: @"Invalid chars" <ParseError "#xd\"12zz56789abcdef0\"">
+  double7: @"Positive infinity" <Test #x"837ff0000000000000" #xd"7ff0000000000000">
+  double8: @"Negative infinity" <Test #x"83fff0000000000000" #xd"fff0000000000000">
+  double9: @"-qNaN" <Test #x"83fff0000000000001" #xd"fff0000000000001">
+  double10: @"-qNaN" <Test #x"83fff0000000000111" #xd"fff0000000000111">
+  double11: @"+qNaN" <Test #x"837ff0000000000001" #xd"7ff0000000000001">
+  double12: @"+qNaN" <Test #x"837ff0000000000111" #xd"7ff0000000000111">
+  double13: @"Bad spacing" <ParseError "#xd\"12345 6789abcdef0\"">
+  double14: @"-sNaN" <Test #x"83fff8000000000001" #xd"fff8000000000001">
+  double15: @"-sNaN" <Test #x"83fff8000000000111" #xd"fff8000000000111">
+  double16: @"+sNaN" <Test #x"837ff8000000000001" #xd"7ff8000000000001">
+  double17: @"+sNaN" <Test #x"837ff8000000000111" #xd"7ff8000000000111">
+  float0: <Test #x"8200000000" 0.0f>
+  float+0: <Test #x"8200000000" +0.0f>
+  float-0: <Test #x"8280000000" -0.0f>
  float1: <Test #x"823f800000" 1.0f>
+  float2: <Test #x"8212345678" #xf"12 34  56 78">
+  float3: @"Fewer than 8 digits" <ParseError "#xf\"123456\"">
+  float4: @"More than 8 digits" <ParseError "#xf\"123456789a\"">
+  float5: @"Invalid chars" <ParseError "#xf\"12zz5678\"">
+  float6: @"Positive infinity" <Test #x"827f800000" #xf"7f800000">
+  float7: @"Negative infinity" <Test #x"82ff800000" #xf"ff800000">
+  float8: @"+sNaN" <Test #x"827f800001" #xf"7f800001">
+  float9: @"+sNaN" <Test #x"827f800111" #xf"7f800111">
+  float10: @"-sNaN" <Test #x"82ff800001" #xf"ff800001">
+  float11: @"-sNaN" <Test #x"82ff800111" #xf"ff800111">
+  float12: @"Bad spacing" <ParseError "#xf\"12345 678\"">
+  float13: @"+qNaN" <Test #x"827fc00001" #xf"7fc00001">
+  float14: @"+qNaN" <Test #x"827fc00111" #xf"7fc00111">
+  float15: @"-qNaN" <Test #x"82ffc00001" #xf"ffc00001">
+  float16: @"-qNaN" <Test #x"82ffc00111" #xf"ffc00111">
  int-257: <Test #x"a1feff" -257>
  int-256: <Test #x"a1ff00" -256>
  int-255: <Test #x"a1ff01" -255>
@ -89,10 +125,13 @@
  int-2: <Test #x"9e" -2>
  int-1: <Test #x"9f" -1>
  int0: <Test #x"90" 0>
+  int+0: <Test #x"90" +0>
+  int-0: <Test #x"90" -0>
  int1: <Test #x"91" 1>
  int12: <Test #x"9c" 12>
  int13: <Test #x"a00d" 13>
  int127: <Test #x"a07f" 127>
+  int+127: <Test #x"a07f" +127>
  int128: <Test #x"a10080" 128>
  int255: <Test #x"a100ff" 255>
  int256: <Test #x"a10100" 256>
@ -112,6 +151,8 @@
  list8: @"Missing close bracket" <ParseShort "[">
  list9: @"Unexpected close bracket" <ParseError "]">
  list10: @"Missing end byte" <DecodeShort #x"b58080">
+  list11: <Test #x"b59184" [01]>
+  list12: <Test #x"b59c84" [12]>
  noinput0: @"No input at all" <DecodeEOF #x"">
  embed0: <Test #x"8690" #!0>
  embed1: <Test #x"868690" #!#!0>
@ -138,17 +179,22 @@
  string5: <Test #x"b104f09d849e" "\uD834\uDD1E">
  symbol0: <Test #x"b300" ||>
  symbol2: <Test #x"b30568656c6c6f" hello>
+  symbol3: <Test #x"b305312d322d33" 1-2-3>
+  symbol4: <Test #x"b305612d622d63" a-b-c>
+  symbol5: <Test #x"b305612b622b63" a+b+c>
+  symbol6: <Test #x"b3012b" +>
+  symbol7: <Test #x"b3032b2b2b" +++>
+  symbol8: <Test #x"b3012d" ->
+  symbol9: <Test #x"b3032d2d2d" --->
+  symbol10: <Test #x"b3022d61" -a>
+  symbol11: <Test #x"b3042d2d2d61" ---a>
+  symbol12: <Test #x"b3042d2d2d31" ---1>
+  symbol13: <Test #x"b3042b312e78" +1.x>
  tag0: @"Unexpected end tag" <DecodeError #x"84">
  tag1: @"Invalid tag" <DecodeError #x"10">
  tag2: @"Invalid tag" <DecodeError #x"61b10110">
  whitespace0: @"Leading spaces have to eventually yield something" <ParseShort "   ">
  whitespace1: @"No input at all" <ParseEOF "">
-  value1: <Test #"\xB2\x06corymb" #=#"\xB2\x06corymb">
-  value2: <Test #"\x81" #=#"\x81">
-  value3: <Test #"\x81" #=#[gQ]>
-  value4: <Test #"\x81" #=#[gQ==]>
-  value5: <Test #"\x81" #=   #[gQ==]>
-  value6: <Test #x"b591929384" #=#x"b591929384">

  longlist14: <Test #x"b5808080808080808080808080808084"
               [#f #f #f #f #f
--- a/implementations/python/tests/test_compare.py
+++ b/implementations/python/tests/test_compare.py
@ -1,9 +1,9 @@
-import unittest
+from utils import PreservesTestCase

 from preserves import *
 from preserves.compare import *

-class BasicCompareTests(unittest.TestCase):
+class BasicCompareTests(PreservesTestCase):
    def test_eq_identity(self):
        self.assertTrue(eq(1, 1))
        self.assertFalse(eq(1, 1.0))
--- a/implementations/python/tests/test_path.py
+++ b/implementations/python/tests/test_path.py
@ -1,30 +1,30 @@
-import unittest
+from utils import PreservesTestCase

 from preserves import *
 from preserves.path import parse

-class BasicPathTests(unittest.TestCase):
+class BasicPathTests(PreservesTestCase):
    def test_identity(self):
-        self.assertEqual(parse('').exec(1), (1,))
-        self.assertEqual(parse('').exec([]), ([],))
-        self.assertEqual(parse('').exec(Record(Symbol('hi'), [])), (Record(Symbol('hi'), []),))
+        self.assertPreservesEqual(parse('').exec(1), (1,))
+        self.assertPreservesEqual(parse('').exec([]), ([],))
+        self.assertPreservesEqual(parse('').exec(Record(Symbol('hi'), [])), (Record(Symbol('hi'), []),))

    def test_children(self):
-        self.assertEqual(parse('/').exec([1, 2, 3]), (1, 2, 3))
-        self.assertEqual(parse('/').exec([1, [2], 3]), (1, [2], 3))
-        self.assertEqual(parse('/').exec(Record(Symbol('hi'), [1, [2], 3])), (1, [2], 3))
+        self.assertPreservesEqual(parse('/').exec([1, 2, 3]), (1, 2, 3))
+        self.assertPreservesEqual(parse('/').exec([1, [2], 3]), (1, [2], 3))
+        self.assertPreservesEqual(parse('/').exec(Record(Symbol('hi'), [1, [2], 3])), (1, [2], 3))

    def test_label(self):
-        self.assertEqual(parse('.^').exec([1, 2, 3]), ())
-        self.assertEqual(parse('.^').exec([1, [2], 3]), ())
-        self.assertEqual(parse('.^').exec(Record(Symbol('hi'), [1, [2], 3])), (Symbol('hi'),))
+        self.assertPreservesEqual(parse('.^').exec([1, 2, 3]), ())
+        self.assertPreservesEqual(parse('.^').exec([1, [2], 3]), ())
+        self.assertPreservesEqual(parse('.^').exec(Record(Symbol('hi'), [1, [2], 3])), (Symbol('hi'),))

    def test_count(self):
-        self.assertEqual(parse('<count / ^ hi>').exec([ Record(Symbol('hi'), [1]),
-                                                        Record(Symbol('no'), [2]),
-                                                        Record(Symbol('hi'), [3]) ]),
+        self.assertPreservesEqual(parse('<count / ^ hi>').exec([ Record(Symbol('hi'), [1]),
+                                                                 Record(Symbol('no'), [2]),
+                                                                 Record(Symbol('hi'), [3]) ]),
                         (2,))
-        self.assertEqual(parse('/ <count ^ hi>').exec([ Record(Symbol('hi'), [1]),
-                                                        Record(Symbol('no'), [2]),
-                                                        Record(Symbol('hi'), [3]) ]),
+        self.assertPreservesEqual(parse('/ <count ^ hi>').exec([ Record(Symbol('hi'), [1]),
+                                                                 Record(Symbol('no'), [2]),
+                                                                 Record(Symbol('hi'), [3]) ]),
                         (1, 0, 1))
--- a/implementations/python/tests/test_preserves.py
+++ b/implementations/python/tests/test_preserves.py
@ -1,11 +1,12 @@
 import numbers
 import os
 import sys
-import unittest

 # Make `preserves` available for imports
 sys.path.insert(0, os.path.abspath(os.path.join(os.path.dirname(__file__), '..')))

+from utils import PreservesTestCase
+
 from preserves import *
 from preserves.compat import basestring_, ord_
 from preserves.values import _unwrap
@ -49,33 +50,33 @@ def _e(v):
 def _R(k, *args):
    return Record(Symbol(k), args)

-class BinaryCodecTests(unittest.TestCase):
+class BinaryCodecTests(PreservesTestCase):
    def _roundtrip(self, forward, expected, back=None, nondeterministic=False):
        if back is None: back = forward
-        self.assertEqual(_d(_e(forward)), back)
-        self.assertEqual(_d(_e(back)), back)
-        self.assertEqual(_d(expected), back)
+        self.assertPreservesEqual(_d(_e(forward)), back)
+        self.assertPreservesEqual(_d(_e(back)), back)
+        self.assertPreservesEqual(_d(expected), back)
        if not nondeterministic:
            actual = _e(forward)
-            self.assertEqual(actual, expected, '%s != %s' % (_hex(actual), _hex(expected)))
+            self.assertPreservesEqual(actual, expected, '%s != %s' % (_hex(actual), _hex(expected)))

    def test_decode_varint(self):
        with self.assertRaises(DecodeError):
            Decoder(_buf()).varint()
-        self.assertEqual(Decoder(_buf(0)).varint(), 0)
-        self.assertEqual(Decoder(_buf(10)).varint(), 10)
-        self.assertEqual(Decoder(_buf(100)).varint(), 100)
-        self.assertEqual(Decoder(_buf(200, 1)).varint(), 200)
-        self.assertEqual(Decoder(_buf(0b10101100, 0b00000010)).varint(), 300)
-        self.assertEqual(Decoder(_buf(128, 148, 235, 220, 3)).varint(), 1000000000)
+        self.assertPreservesEqual(Decoder(_buf(0)).varint(), 0)
+        self.assertPreservesEqual(Decoder(_buf(10)).varint(), 10)
+        self.assertPreservesEqual(Decoder(_buf(100)).varint(), 100)
+        self.assertPreservesEqual(Decoder(_buf(200, 1)).varint(), 200)
+        self.assertPreservesEqual(Decoder(_buf(0b10101100, 0b00000010)).varint(), 300)
+        self.assertPreservesEqual(Decoder(_buf(128, 148, 235, 220, 3)).varint(), 1000000000)

    def test_encode_varint(self):
-        self.assertEqual(_varint(0), _buf(0))
-        self.assertEqual(_varint(10), _buf(10))
-        self.assertEqual(_varint(100), _buf(100))
-        self.assertEqual(_varint(200), _buf(200, 1))
-        self.assertEqual(_varint(300), _buf(0b10101100, 0b00000010))
-        self.assertEqual(_varint(1000000000), _buf(128, 148, 235, 220, 3))
+        self.assertPreservesEqual(_varint(0), _buf(0))
+        self.assertPreservesEqual(_varint(10), _buf(10))
+        self.assertPreservesEqual(_varint(100), _buf(100))
+        self.assertPreservesEqual(_varint(200), _buf(200, 1))
+        self.assertPreservesEqual(_varint(300), _buf(0b10101100, 0b00000010))
+        self.assertPreservesEqual(_varint(1000000000), _buf(128, 148, 235, 220, 3))

    def test_simple_seq(self):
        self._roundtrip([1,2,3,4], _buf(0xb5, 0x91, 0x92, 0x93, 0x94, 0x84), back=(1,2,3,4))
@ -157,7 +158,7 @@ class BinaryCodecTests(unittest.TestCase):
            # python 3
            bs = _e(d.items())
            self.assertRegex(_hex(bs), r)
-        self.assertEqual(sorted(_d(bs)), [(u'a', 1), (u'b', 2), (u'c', 3)])
+        self.assertPreservesEqual(sorted(_d(bs)), [(u'a', 1), (u'b', 2), (u'c', 3)])

    def test_long_sequence(self):
        self._roundtrip((False,) * 14, _buf(0xb5, b'\x80' * 14, 0x84))
@ -172,9 +173,9 @@ class BinaryCodecTests(unittest.TestCase):
        a1 = Embedded(A(1))
        a2 = Embedded(A(1))
        self.assertNotEqual(encode(a1, encode_embedded=id), encode(a2, encode_embedded=id))
-        self.assertEqual(encode(a1, encode_embedded=id), encode(a1, encode_embedded=id))
-        self.assertEqual(ord_(encode(a1, encode_embedded=id)[0]), 0x86)
-        self.assertEqual(ord_(encode(a2, encode_embedded=id)[0]), 0x86)
+        self.assertPreservesEqual(encode(a1, encode_embedded=id), encode(a1, encode_embedded=id))
+        self.assertPreservesEqual(ord_(encode(a1, encode_embedded=id)[0]), 0x86)
+        self.assertPreservesEqual(ord_(encode(a2, encode_embedded=id)[0]), 0x86)

    def test_decode_embedded_absent(self):
        with self.assertRaises(DecodeError):
@ -185,15 +186,15 @@ class BinaryCodecTests(unittest.TestCase):
        def enc(p):
            objects.append(p)
            return len(objects) - 1
-        self.assertEqual(encode([Embedded(object()), Embedded(object())], encode_embedded = enc),
-                         b'\xb5\x86\x90\x86\x91\x84')
+        self.assertPreservesEqual(encode([Embedded(object()), Embedded(object())], encode_embedded = enc),
+                                  b'\xb5\x86\x90\x86\x91\x84')

    def test_decode_embedded(self):
        objects = [123, 234]
        def dec(v):
            return objects[v]
-        self.assertEqual(decode(b'\xb5\x86\x90\x86\x91\x84', decode_embedded = dec),
-                         (Embedded(123), Embedded(234)))
+        self.assertPreservesEqual(decode(b'\xb5\x86\x90\x86\x91\x84', decode_embedded = dec),
+                                  (Embedded(123), Embedded(234)))

 def load_binary_samples():
    with open(os.path.join(os.path.dirname(__file__), 'samples.bin'), 'rb') as f:
@ -203,16 +204,16 @@ def load_text_samples():
    with open(os.path.join(os.path.dirname(__file__), 'samples.pr'), 'rt') as f:
        return Parser(f.read(), include_annotations=True, parse_embedded=lambda x: x).next()

-class TextCodecTests(unittest.TestCase):
+class TextCodecTests(PreservesTestCase):
    def test_samples_bin_eq_txt(self):
        b = load_binary_samples()
        t = load_text_samples()
-        self.assertEqual(b, t)
+        self.assertPreservesEqual(b, t)

    def test_txt_roundtrip(self):
        b = load_binary_samples()
        s = stringify(b, format_embedded=lambda x: x)
-        self.assertEqual(parse(s, include_annotations=True, parse_embedded=lambda x: x), b)
+        self.assertPreservesEqual(parse(s, include_annotations=True, parse_embedded=lambda x: x), b)

 def add_method(d, tName, fn):
    if hasattr(fn, 'func_name'):
@ -254,14 +255,14 @@ def install_test(d, variant, tName, binaryForm, annotatedTextForm):
    entry = get_expected_values(tName, textForm)
    forward = entry['forward']
    back = entry['back']
-    def test_match_expected(self): self.assertEqual(textForm, back)
-    def test_roundtrip(self): self.assertEqual(self.DS(self.E(textForm)), back)
-    def test_forward(self): self.assertEqual(self.DS(self.E(forward)), back)
-    def test_back(self): self.assertEqual(self.DS(binaryForm), back)
-    def test_back_ann(self): self.assertEqual(self.D(self.E(annotatedTextForm)), annotatedTextForm)
-    def test_encode(self): self.assertEqual(self.E(forward), binaryForm)
-    def test_encode_canonical(self): self.assertEqual(self.EC(annotatedTextForm), binaryForm)
-    def test_encode_ann(self): self.assertEqual(self.E(annotatedTextForm), binaryForm)
+    def test_match_expected(self): self.assertPreservesEqual(textForm, back)
+    def test_roundtrip(self): self.assertPreservesEqual(self.DS(self.E(textForm)), back)
+    def test_forward(self): self.assertPreservesEqual(self.DS(self.E(forward)), back)
+    def test_back(self): self.assertPreservesEqual(self.DS(binaryForm), back)
+    def test_back_ann(self): self.assertPreservesEqual(self.D(self.E(annotatedTextForm)), annotatedTextForm)
+    def test_encode(self): self.assertPreservesEqual(self.E(forward), binaryForm)
+    def test_encode_canonical(self): self.assertPreservesEqual(self.EC(annotatedTextForm), binaryForm)
+    def test_encode_ann(self): self.assertPreservesEqual(self.E(annotatedTextForm), binaryForm)
    add_method(d, tName, test_match_expected)
    add_method(d, tName, test_roundtrip)
    add_method(d, tName, test_forward)
@ -284,7 +285,7 @@ def install_exn_test(d, tName, bs, check_proc):
        self.fail('did not fail as expected')
    add_method(d, tName, test_exn)

-class CommonTestSuite(unittest.TestCase):
+class CommonTestSuite(PreservesTestCase):
    TestCases = Record.makeConstructor('TestCases', 'cases')

    samples = load_binary_samples()
@ -325,7 +326,7 @@ class CommonTestSuite(unittest.TestCase):
    def EC(self, v):
        return encode(v, encode_embedded=lambda x: x, canonicalize=True)

-class RecordTests(unittest.TestCase):
+class RecordTests(PreservesTestCase):
    def test_getters(self):
        T = Record.makeConstructor('t', 'x y z')
        T2 = Record.makeConstructor('t', 'x y z')
@ -334,8 +335,8 @@ class RecordTests(unittest.TestCase):
        self.assertTrue(T.isClassOf(t))
        self.assertTrue(T2.isClassOf(t))
        self.assertFalse(U.isClassOf(t))
-        self.assertEqual(T._x(t), 1)
-        self.assertEqual(T2._y(t), 2)
-        self.assertEqual(T._z(t), 3)
+        self.assertPreservesEqual(T._x(t), 1)
+        self.assertPreservesEqual(T2._y(t), 2)
+        self.assertPreservesEqual(T._z(t), 3)
        with self.assertRaises(TypeError):
            U._x(t)
--- a/implementations/python/tests/test_schema.py
+++ b/implementations/python/tests/test_schema.py
@ -1,4 +1,4 @@
-import unittest
+from utils import PreservesTestCase

 from preserves import *
 from preserves.schema import meta, Compiler
@ -8,7 +8,7 @@ def literal_schema(modname, s):
    c.load_schema((Symbol(modname),), preserve(s))
    return c.root

-class BasicSchemaTests(unittest.TestCase):
+class BasicSchemaTests(PreservesTestCase):
    def test_dictionary_literal(self):
        m = literal_schema(
            's',
@ -22,7 +22,7 @@ class BasicSchemaTests(unittest.TestCase):
 }>
 '''))
        self.assertEqual(m.s.C.decode({'core': Symbol('true')}), m.s.C())
-        self.assertEqual(preserve(m.s.C()), {'core': Symbol('true')})
+        self.assertPreservesEqual(preserve(m.s.C()), {'core': Symbol('true')})

    def test_alternation_of_dictionary_literal(self):
        m = literal_schema(
@ -40,6 +40,6 @@ class BasicSchemaTests(unittest.TestCase):
 }>
 '''))
        self.assertEqual(m.s.C.decode({'core': Symbol('true')}), m.s.C.core())
-        self.assertEqual(preserve(m.s.C.core()), {'core': Symbol('true')})
+        self.assertPreservesEqual(preserve(m.s.C.core()), {'core': Symbol('true')})
        self.assertEqual(m.s.C.decode({'notcore': Symbol('true')}), m.s.C.notcore())
-        self.assertEqual(preserve(m.s.C.notcore()), {'notcore': Symbol('true')})
+        self.assertPreservesEqual(preserve(m.s.C.notcore()), {'notcore': Symbol('true')})
--- a/implementations/python/tests/utils.py
+++ b/implementations/python/tests/utils.py
@ -0,0 +1,9 @@
+import unittest
+
+from preserves import cmp
+
+class PreservesTestCase(unittest.TestCase):
+    def assertPreservesEqual(self, a, b, msg=None):
+        if msg is None:
+            msg = 'Expected %s to be Preserves-equal to %s' % (a, b)
+        self.assertTrue(cmp(a, b) == 0, msg)
--- a/implementations/racket/preserves/preserves/float-bytes.rkt
+++ b/implementations/racket/preserves/preserves/float-bytes.rkt
@ -0,0 +1,101 @@
+#lang racket/base
+;; Conversion between binary32 and binary64 big-endian external format (byte-vectors) and
+;; internal double-precision floating-point numbers, with special attention paid to
+;; preservation of the quiet/signaling bit of NaNs, which otherwise is frequently disturbed by
+;; hardware-level conversion between single and double precision.
+
+(provide bytes->float
+         float->bytes
+         bytes->double
+         double->bytes)
+
+(require "float.rkt")
+(require (only-in racket/math nan? infinite?))
+
+(module binary racket/base
+  (provide (all-defined-out))
+
+  (define (binary32-nan-or-inf? bs)
+    (and (= (bitwise-bit-field (bytes-ref bs 0) 0 7) #x7f)
+         (bitwise-bit-set? (bytes-ref bs 1) 7)))
+
+  (define (binary64-nan-or-inf? bs)
+    (and (= (bitwise-bit-field (bytes-ref bs 0) 0 7) #x7f)
+         (= (bitwise-bit-field (bytes-ref bs 1) 4 8) #x0f)))
+
+  (define (sign-bit-set? bs)
+    (bitwise-bit-set? (bytes-ref bs 0) 0)))
+
+(require (submod "." binary))
+
+(define (bytes->float bs)
+  (if (binary32-nan-or-inf? bs)
+      (let* ((vf (integer-bytes->integer bs #f #t))
+             (signexp (bitwise-bit-field vf 23 32))
+             (payload (bitwise-bit-field vf 0 23))
+             (vd (bitwise-ior (arithmetic-shift signexp 55)
+                              #x0070000000000000
+                              (arithmetic-shift payload 29)))
+             (dbs (integer->integer-bytes vd 8 #f #t)))
+        (float (floating-point-bytes->real dbs #t 0 8)))
+      (float (floating-point-bytes->real bs #t 0 4))))
+
+(define (float->bytes v)
+  (let ((v (float-value v)))
+    (if (or (nan? v) (infinite? v))
+        (let* ((dbs (real->floating-point-bytes v 8 #t))
+               (vd (integer-bytes->integer dbs #f #t))
+               (signexp (bitwise-bit-field vd 55 64))
+               (payload (bitwise-bit-field vd 29 52))
+               (vf (bitwise-ior (arithmetic-shift signexp 23)
+                                payload))
+               (bs (integer->integer-bytes vf 4 #f #t)))
+          bs)
+        (real->floating-point-bytes v 4 #t))))
+
+(define (bytes->double bs)
+  (floating-point-bytes->real bs #t 0 8))
+
+(define (double->bytes v)
+  (real->floating-point-bytes v 8 #t))
+
+(module+ test
+  (require rackunit)
+  (require file/sha1)
+
+  (define (check-roundtrip-double hex)
+    (check-equal? (bytes->hex-string (double->bytes (bytes->double (hex-string->bytes hex))))
+                  hex))
+
+  (define (check-roundtrip-float hex)
+    (check-equal? (bytes->hex-string (float->bytes (bytes->float (hex-string->bytes hex))))
+                  hex))
+
+  (check-roundtrip-double "0123456789abcdef")
+  (check-roundtrip-double "7ff0000000000321")
+  (check-roundtrip-double "7ff0000000000001")
+  (check-roundtrip-double "7ff0000000000000")
+  (check-roundtrip-double "fff0000000000321")
+  (check-roundtrip-double "fff0000000000001")
+  (check-roundtrip-double "fff0000000000000")
+  (check-roundtrip-double "7ff8000000000321")
+  (check-roundtrip-double "7ff8000000000001")
+  (check-roundtrip-double "7ff8000000000000")
+  (check-roundtrip-double "fff8000000000321")
+  (check-roundtrip-double "fff8000000000001")
+  (check-roundtrip-double "fff8000000000000")
+
+  (check-roundtrip-float "01234567")
+  (check-roundtrip-float "7f800321")
+  (check-roundtrip-float "7f800001")
+  (check-roundtrip-float "7f800000")
+  (check-roundtrip-float "ff800321")
+  (check-roundtrip-float "ff800001")
+  (check-roundtrip-float "ff800000")
+  (check-roundtrip-float "7fc00321")
+  (check-roundtrip-float "7fc00001")
+  (check-roundtrip-float "7fc00000")
+  (check-roundtrip-float "ffc00321")
+  (check-roundtrip-float "ffc00001")
+  (check-roundtrip-float "ffc00000")
+  )
--- a/implementations/racket/preserves/preserves/jelly.rkt
+++ b/implementations/racket/preserves/preserves/jelly.rkt
@ -8,8 +8,8 @@
 ;;---------------------------------------------------------------------------
 ;; Representing values

+(require "float.rkt" "float-bytes.rkt")
 (struct record (label fields) #:transparent)
-(struct float (value) #:transparent) ;; a marker for single-precision I/O
 (struct annotated (annotations item) #:transparent)
 (struct embedded (value) #:transparent)

@ -23,8 +23,8 @@
      (match (next-byte)
        [#x80 #f]
        [#x81 #t]
-        [#x82 (float (floating-point-bytes->real (next-bytes 4) #t 0 4))]
-        [#x83 (floating-point-bytes->real (next-bytes 8) #t 0 8)]
+        [#x82 (bytes->float (next-bytes 4))]
+        [#x83 (bytes->double (next-bytes 8))]
        [#x84 '#:end]
        [#x85 (let ((a (next)))
                (match (next)
@ -80,8 +80,8 @@
    (match v
      [#f (write-byte #x80 out-port)]
      [#t (write-byte #x81 out-port)]
-      [(float v) (write-byte #x82 out-port) (output-bytes (real->floating-point-bytes v 4 #t))]
-      [(? flonum?) (write-byte #x83 out-port) (output-bytes (real->floating-point-bytes v 8 #t))]
+      [(float _) (write-byte #x82 out-port) (output-bytes (float->bytes v))]
+      [(? flonum?) (write-byte #x83 out-port) (output-bytes (double->bytes v))]

      [(annotated as v)
       (for [(a (in-list as))] (write-byte #x85 out-port) (output a))
--- a/implementations/racket/preserves/preserves/read-binary.rkt
+++ b/implementations/racket/preserves/preserves/read-binary.rkt
@ -7,6 +7,7 @@
 (require "record.rkt")
 (require "embedded.rkt")
 (require "float.rkt")
+(require "float-bytes.rkt")
 (require "annotation.rkt")
 (require "varint.rkt")
 (require racket/set)
@ -70,8 +71,8 @@
      (match lead-byte
        [#x80 #f]
        [#x81 #t]
-        [#x82 (float (floating-point-bytes->real (next-bytes 4) #t 0 4))]
-        [#x83 (floating-point-bytes->real (next-bytes 8) #t 0 8)]
+        [#x82 (bytes->float (next-bytes 4))]
+        [#x83 (bytes->double (next-bytes 8))]
        [#x84 '#:end]
        [#x85 (let ((a (next)))
                (if read-annotations?
--- a/implementations/racket/preserves/preserves/read-text.rkt
+++ b/implementations/racket/preserves/preserves/read-text.rkt
@ -10,6 +10,7 @@
 (require "read-binary.rkt")
 (require "record.rkt")
 (require "float.rkt")
+(require "float-bytes.rkt")
 (require syntax/readerr)
 (require (only-in file/sha1 hex-string->bytes))
 (require (only-in net/base64 base64-decode))
@ -67,8 +68,6 @@
  (define (next*)
    (skip-whitespace)
    (match (next-char)
-      [#\- (read-intpart (list #\-) (next-char))]
-      [(and c (or #\0 #\1 #\2 #\3 #\4 #\5 #\6 #\7 #\8 #\9)) (read-intpart '() c)]
      [#\" (read-string #\")]
      [(== PIPE) (string->symbol (read-string PIPE))]

@ -82,21 +81,12 @@
             [#\t #t]
             [#\{ (sequence-fold (set) set-add* values #\})]
             [#\" (read-literal-binary)]
-             [#\x (if (eqv? (next-char) #\")
-                      (read-hex-binary '())
-                      (parse-error "Expected open-quote at start of hex ByteString"))]
+             [#\x (match (next-char)
+                    [#\" (read-hex-binary '())]
+                    [#\f (read-hex-float 'float)]
+                    [#\d (read-hex-float 'double)]
+                    [c (parse-error "Invalid #x syntax: ~v" c)])]
             [#\[ (read-base64-binary '())]
-             [#\= (define bs (read-preserve/text in-port #:read-syntax? #t #:source source))
-                  (when (not (bytes? (annotated-item bs)))
-                    (parse-error "ByteString must follow #="))
-                  (when (not (null? (annotated-annotations bs)))
-                    (parse-error "Annotations not permitted after #="))
-                  (bytes->preserve
-                   (annotated-item bs)
-                   (lambda (message . args)
-                     (apply parse-error (string-append "Inline binary value: " message) args))
-                   #:read-syntax? read-syntax?
-                   #:on-short (lambda () (parse-error "Incomplete inline binary value")))]
             [#\! (embedded (decode-embedded (next)))]
             [c (parse-error "Invalid # syntax: ~v" c)])]

@ -110,7 +100,7 @@
      [#\] (parse-error "Unexpected ]")]
      [#\} (parse-error "Unexpected }")]

-      [c (read-raw-symbol (list c))]))
+      [c (read-raw-symbol-or-number (list c))]))

  (define (set-add* s e)
    (when (set-member? s e) (parse-error "Duplicate set element: ~v" e))
@ -159,49 +149,6 @@
                (annotated '() loc v))))
        (lambda (pos0 v) v)))

-  ;;---------------------------------------------------------------------------
-  ;; Numbers
-
-  (define (read-intpart acc-rev ch)
-    (match ch
-      [#\0 (read-fracexp (cons ch acc-rev))]
-      [_ (read-digit+ acc-rev read-fracexp ch)]))
-
-  (define (read-digit* acc-rev k)
-    (match (peek-char in-port)
-      [(? char? (? char-numeric?)) (read-digit* (cons (read-char in-port) acc-rev) k)]
-      [_ (k acc-rev)]))
-
-  (define (read-digit+ acc-rev k [ch (read-char in-port)])
-    (match ch
-      [(? char? (? char-numeric?)) (read-digit* (cons ch acc-rev) k)]
-      [_ (parse-error "Incomplete number")]))
-
-  (define (read-fracexp acc-rev)
-    (match (peek-char in-port)
-      [#\. (read-digit+ (cons (read-char in-port) acc-rev) read-exp)]
-      [_ (read-exp acc-rev)]))
-
-  (define (read-exp acc-rev)
-    (match (peek-char in-port)
-      [(or #\e #\E) (read-sign-and-exp (cons (read-char in-port) acc-rev))]
-      [_ (finish-number acc-rev)]))
-
-  (define (read-sign-and-exp acc-rev)
-    (match (peek-char in-port)
-      [(or #\+ #\-) (read-digit+ (cons (read-char in-port) acc-rev) finish-number)]
-      [_ (read-digit+ acc-rev finish-number)]))
-
-  (define (finish-number acc-rev)
-    (define s (list->string (reverse acc-rev)))
-    (define n (string->number s 10))
-    (when (not n) (parse-error "Invalid number: ~v" s))
-    (if (flonum? n)
-        (match (peek-char in-port)
-          [(or #\f #\F) (read-char in-port) (float n)]
-          [_ n])
-        n))
-
  ;;---------------------------------------------------------------------------
  ;; String-like things

@ -279,6 +226,19 @@
          [else
           (parse-error "Invalid hex character")]))

+  ;;---------------------------------------------------------------------------
+  ;; Hex-encoded floating point numbers
+
+  (define (read-hex-float precision)
+    (unless (eqv? (next-char) #\")
+      (parse-error "Missing open-double-quote in hex-encoded floating-point number"))
+    (define bs (read-hex-binary '()))
+    (unless (= (bytes-length bs) (match precision ['float 4] ['double 8]))
+      (parse-error "Incorrect number of bytes in hex-encoded floating-point number"))
+    (match precision
+      ['float (bytes->float bs)]
+      ['double (bytes->double bs)]))
+
  ;;---------------------------------------------------------------------------
  ;; Base64-encoded ByteStrings

@ -334,16 +294,56 @@
                   #\}))

  ;;---------------------------------------------------------------------------
-  ;; "Raw" symbols
+  ;; "Raw" symbols and numbers

-  (define (read-raw-symbol acc)
+  (define (read-raw-symbol-or-number acc)
    (match (peek-char in-port)
      [(or (? eof-object?)
           (? char? (or #\( #\) #\{ #\} #\[ #\] #\< #\>
                        #\" #\; #\, #\@ #\# #\: (== PIPE)
                        (? char-whitespace?))))
-       (string->symbol (list->string (reverse acc)))]
-      [_ (read-raw-symbol (cons (read-char in-port) acc))]))
+       (let ((input (reverse acc)))
+         (or (analyze-number input)
+             (string->symbol (list->string input))))]
+      [_ (read-raw-symbol-or-number (cons (read-char in-port) acc))]))
+
+  (define (analyze-number input)
+    (match input
+      [(cons (and sign (or #\+ #\-)) input) (read-digit+ (list sign) read-fracexp input)]
+      [_ (read-digit+ (list) read-fracexp input)]))
+
+  (define (read-digit* acc-rev k input)
+    (match input
+      [(cons (? char? (? char-numeric? d)) input) (read-digit* (cons d acc-rev) k input)]
+      [_ (k acc-rev input)]))
+
+  (define (read-digit+ acc-rev k input)
+    (match input
+      [(cons (? char? (? char-numeric? d)) input) (read-digit* (cons d acc-rev) k input)]
+      [_ #f]))
+
+  (define (read-fracexp acc-rev input)
+    (match input
+      [(cons #\. input) (read-digit+ (cons #\. acc-rev) read-exp input)]
+      [_ (read-exp acc-rev input)]))
+
+  (define (read-exp acc-rev input)
+    (match input
+      [(cons (and e (or #\e #\E)) input) (read-sign-and-exp (cons e acc-rev) input)]
+      [_ (finish-number acc-rev input)]))
+
+  (define (read-sign-and-exp acc-rev input)
+    (match input
+      [(cons (and sign (or #\+ #\-)) input) (read-digit+ (cons sign acc-rev) finish-number input)]
+      [_ (read-digit+ acc-rev finish-number input)]))
+
+  (define (finish-number acc-rev input)
+    (define s (list->string (reverse acc-rev)))
+    (define n (string->number s 10))
+    (cond [(not n) #f]
+          [(and (flonum? n) (member input '((#\f) (#\F)))) (float n)]
+          [(equal? input '()) n]
+          [else #f]))

  ;;---------------------------------------------------------------------------
  ;; Main entry point to parser
--- a/implementations/racket/preserves/preserves/tests/samples.pr
+++ b/implementations/racket/preserves/preserves/tests/samples.pr
@ -74,9 +74,45 @@
  dict3: @"Duplicate key" <ParseError "{ a: 1, a: 2 }">
  dict4: @"Unexpected close brace" <ParseError "}">
  dict5: @"Missing value" <DecodeError #x"b7 91 92 93 84">
+  double0: <Test #x"830000000000000000" 0.0>
+  double+0: <Test #x"830000000000000000" +0.0>
+  double-0: <Test #x"838000000000000000" -0.0>
  double1: <Test #x"833ff0000000000000" 1.0>
  double2: <Test #x"83fe3cb7b759bf0426" -1.202e300>
+  double3: <Test #x"83123456789abcdef0" #xd"12 34 56 78  9a bc de f0">
+  double4: @"Fewer than 16 digits" <ParseError "#xd\"12345678\"">
+  double5: @"More than 16 digits" <ParseError "#xd\"123456789abcdef012\"">
+  double6: @"Invalid chars" <ParseError "#xd\"12zz56789abcdef0\"">
+  double7: @"Positive infinity" <Test #x"837ff0000000000000" #xd"7ff0000000000000">
+  double8: @"Negative infinity" <Test #x"83fff0000000000000" #xd"fff0000000000000">
+  double9: @"-qNaN" <Test #x"83fff0000000000001" #xd"fff0000000000001">
+  double10: @"-qNaN" <Test #x"83fff0000000000111" #xd"fff0000000000111">
+  double11: @"+qNaN" <Test #x"837ff0000000000001" #xd"7ff0000000000001">
+  double12: @"+qNaN" <Test #x"837ff0000000000111" #xd"7ff0000000000111">
+  double13: @"Bad spacing" <ParseError "#xd\"12345 6789abcdef0\"">
+  double14: @"-sNaN" <Test #x"83fff8000000000001" #xd"fff8000000000001">
+  double15: @"-sNaN" <Test #x"83fff8000000000111" #xd"fff8000000000111">
+  double16: @"+sNaN" <Test #x"837ff8000000000001" #xd"7ff8000000000001">
+  double17: @"+sNaN" <Test #x"837ff8000000000111" #xd"7ff8000000000111">
+  float0: <Test #x"8200000000" 0.0f>
+  float+0: <Test #x"8200000000" +0.0f>
+  float-0: <Test #x"8280000000" -0.0f>
  float1: <Test #x"823f800000" 1.0f>
+  float2: <Test #x"8212345678" #xf"12 34  56 78">
+  float3: @"Fewer than 8 digits" <ParseError "#xf\"123456\"">
+  float4: @"More than 8 digits" <ParseError "#xf\"123456789a\"">
+  float5: @"Invalid chars" <ParseError "#xf\"12zz5678\"">
+  float6: @"Positive infinity" <Test #x"827f800000" #xf"7f800000">
+  float7: @"Negative infinity" <Test #x"82ff800000" #xf"ff800000">
+  float8: @"+sNaN" <Test #x"827f800001" #xf"7f800001">
+  float9: @"+sNaN" <Test #x"827f800111" #xf"7f800111">
+  float10: @"-sNaN" <Test #x"82ff800001" #xf"ff800001">
+  float11: @"-sNaN" <Test #x"82ff800111" #xf"ff800111">
+  float12: @"Bad spacing" <ParseError "#xf\"12345 678\"">
+  float13: @"+qNaN" <Test #x"827fc00001" #xf"7fc00001">
+  float14: @"+qNaN" <Test #x"827fc00111" #xf"7fc00111">
+  float15: @"-qNaN" <Test #x"82ffc00001" #xf"ffc00001">
+  float16: @"-qNaN" <Test #x"82ffc00111" #xf"ffc00111">
  int-257: <Test #x"a1feff" -257>
  int-256: <Test #x"a1ff00" -256>
  int-255: <Test #x"a1ff01" -255>
@ -89,10 +125,13 @@
  int-2: <Test #x"9e" -2>
  int-1: <Test #x"9f" -1>
  int0: <Test #x"90" 0>
+  int+0: <Test #x"90" +0>
+  int-0: <Test #x"90" -0>
  int1: <Test #x"91" 1>
  int12: <Test #x"9c" 12>
  int13: <Test #x"a00d" 13>
  int127: <Test #x"a07f" 127>
+  int+127: <Test #x"a07f" +127>
  int128: <Test #x"a10080" 128>
  int255: <Test #x"a100ff" 255>
  int256: <Test #x"a10100" 256>
@ -112,6 +151,8 @@
  list8: @"Missing close bracket" <ParseShort "[">
  list9: @"Unexpected close bracket" <ParseError "]">
  list10: @"Missing end byte" <DecodeShort #x"b58080">
+  list11: <Test #x"b59184" [01]>
+  list12: <Test #x"b59c84" [12]>
  noinput0: @"No input at all" <DecodeEOF #x"">
  embed0: <Test #x"8690" #!0>
  embed1: <Test #x"868690" #!#!0>
@ -138,17 +179,22 @@
  string5: <Test #x"b104f09d849e" "\uD834\uDD1E">
  symbol0: <Test #x"b300" ||>
  symbol2: <Test #x"b30568656c6c6f" hello>
+  symbol3: <Test #x"b305312d322d33" 1-2-3>
+  symbol4: <Test #x"b305612d622d63" a-b-c>
+  symbol5: <Test #x"b305612b622b63" a+b+c>
+  symbol6: <Test #x"b3012b" +>
+  symbol7: <Test #x"b3032b2b2b" +++>
+  symbol8: <Test #x"b3012d" ->
+  symbol9: <Test #x"b3032d2d2d" --->
+  symbol10: <Test #x"b3022d61" -a>
+  symbol11: <Test #x"b3042d2d2d61" ---a>
+  symbol12: <Test #x"b3042d2d2d31" ---1>
+  symbol13: <Test #x"b3042b312e78" +1.x>
  tag0: @"Unexpected end tag" <DecodeError #x"84">
  tag1: @"Invalid tag" <DecodeError #x"10">
  tag2: @"Invalid tag" <DecodeError #x"61b10110">
  whitespace0: @"Leading spaces have to eventually yield something" <ParseShort "   ">
  whitespace1: @"No input at all" <ParseEOF "">
-  value1: <Test #"\xB2\x06corymb" #=#"\xB2\x06corymb">
-  value2: <Test #"\x81" #=#"\x81">
-  value3: <Test #"\x81" #=#[gQ]>
-  value4: <Test #"\x81" #=#[gQ==]>
-  value5: <Test #"\x81" #=   #[gQ==]>
-  value6: <Test #x"b591929384" #=#x"b591929384">

  longlist14: <Test #x"b5808080808080808080808080808084"
               [#f #f #f #f #f
--- a/implementations/racket/preserves/preserves/write-binary.rkt
+++ b/implementations/racket/preserves/preserves/write-binary.rkt
@ -8,6 +8,7 @@
 (require "record.rkt")
 (require "embedded.rkt")
 (require "float.rkt")
+(require "float-bytes.rkt")
 (require "annotation.rkt")
 (require "varint.rkt")
 (require "object-id.rkt")
@ -86,12 +87,12 @@
      [#f (output-byte #x80)]
      [#t (output-byte #x81)]

-      [(float v)
+      [(float _)
       (output-byte #x82)
-       (output-bytes (real->floating-point-bytes v 4 #t))]
+       (output-bytes (float->bytes v))]
      [(? flonum?)
       (output-byte #x83)
-       (output-bytes (real->floating-point-bytes v 8 #t))]
+       (output-bytes (double->bytes v))]

      [(annotated as _ v)
       (when write-annotations?
--- a/implementations/racket/preserves/preserves/write-text.rkt
+++ b/implementations/racket/preserves/preserves/write-text.rkt
@ -12,11 +12,14 @@
 (require "embedded.rkt")
 (require "annotation.rkt")
 (require "float.rkt")
+(require "float-bytes.rkt")
 (require "record.rkt")
 (require "object-id.rkt")
 (require racket/dict)
 (require racket/set)
 (require (only-in racket/port with-output-to-string))
+(require (only-in racket/math nan? infinite?))
+(require (only-in file/sha1 bytes->hex-string))

 (define PIPE #\|)

@ -132,6 +135,15 @@
              (write-binary-stringlike v)
              (write-binary-base64 outer-distance v)))))

+  (define (write-float v precision)
+    (if (or (nan? v) (infinite? v))
+        (! "#x~a\"~a\""
+           (match precision ['float "f"] ['double "d"])
+           (bytes->hex-string (match precision
+                                ['float (float->bytes (float v))]
+                                ['double (double->bytes v)])))
+        (! "~v~a" v (match precision ['float "f"] ['double ""]))))
+
  (define (write-value distance v)
    (match v
      [(annotated annotations _ item)
@ -143,8 +155,8 @@
       (write-value distance item)]
      [#f (! "#f")]
      [#t (! "#t")]
-      [(float v) (! "~vf" v)]
-      [(? flonum?) (! "~v" v)]
+      [(float v) (write-float v 'float)]
+      [(? flonum?) (write-float v 'double)]
      [(? integer? x) (! "~v" v)]
      [(? string?)
       (! "\"")
--- a/implementations/rust/preserves/Cargo.toml
+++ b/implementations/rust/preserves/Cargo.toml
@ -15,6 +15,7 @@ gitlab = { repository = "preserves/preserves" }
 base64 = "0.13"
 dtoa = "0.4"
 num = "0.4"
+lazy_static = "1.4.0"
 regex = "1.5"
 serde = { version = "1.0", features = ["derive"] }
 serde_bytes = "0.11"
--- a/implementations/rust/preserves/src/value/text/reader.rs
+++ b/implementations/rust/preserves/src/value/text/reader.rs
@ -26,8 +26,11 @@ use crate::value::reader::BinarySource;
 use crate::value::reader::ReaderResult;
 use crate::value::repr::Annotations;

+use lazy_static::lazy_static;
+
 use num::bigint::BigInt;

+use std::convert::TryInto;
 use std::io;
 use std::iter::FromIterator;
 use std::marker::PhantomData;
@ -137,86 +140,21 @@ impl<'de, 'src, D: Embeddable, Dec: DomainParse<D>, S: BinarySource<'de>>
        }
    }

-    fn read_intpart<N: NestedValue>(&mut self, mut bs: Vec<u8>, c: u8) -> io::Result<N> {
-        match c {
-            b'0' => {
-                bs.push(c);
-                self.read_fracexp(bs)
-            }
-            _ => {
-                self.read_digit1(&mut bs, c)?;
-                self.read_fracexp(bs)
-            }
+    fn read_hex_float<N: NestedValue>(&mut self, bytecount: usize) -> io::Result<N> {
+        if self.next_byte()? != b'"' {
+            return Err(io_syntax_error("Missing open-double-quote in hex-encoded floating-point number"));
        }
-    }
-
-    fn read_fracexp<N: NestedValue>(&mut self, mut bs: Vec<u8>) -> io::Result<N> {
-        let mut is_float = false;
-        match self.peek() {
-            Ok(b'.') => {
-                is_float = true;
-                bs.push(self.next_byte()?);
-                let c = self.next_byte()?;
-                self.read_digit1(&mut bs, c)?;
-            }
-            _ => ()
+        let bs = self.read_hex_binary()?;
+        if bs.len() != bytecount {
+            return Err(io_syntax_error("Incorrect number of bytes in hex-encoded floating-point number"));
        }
-        match self.peek() {
-            Ok(b'e') | Ok(b'E') => {
-                bs.push(self.next_byte()?);
-                self.read_sign_and_exp(bs)
-            }
-            _ => self.finish_number(bs, is_float)
+        match bytecount {
+            4 => Ok(Value::from(f32::from_bits(u32::from_be_bytes(bs.try_into().unwrap()))).wrap()),
+            8 => Ok(Value::from(f64::from_bits(u64::from_be_bytes(bs.try_into().unwrap()))).wrap()),
+            _ => Err(io_syntax_error("Unsupported byte count in hex-encoded floating-point number")),
        }
    }

-    fn read_sign_and_exp<N: NestedValue>(&mut self, mut bs: Vec<u8>) -> io::Result<N> {
-        match self.peek()? {
-            b'+' | b'-' => bs.push(self.next_byte()?),
-            _ => (),
-        }
-        let c = self.next_byte()?;
-        self.read_digit1(&mut bs, c)?;
-        self.finish_number(bs, true)
-    }
-
-    fn finish_number<N: NestedValue>(&mut self, bs: Vec<u8>, is_float: bool) -> io::Result<N> {
-        let s = decode_utf8(bs)?;
-        if is_float {
-            match self.peek() {
-                Ok(b'f') | Ok(b'F') => {
-                    self.skip()?;
-                    Ok(N::new(s.parse::<f32>().map_err(
-                        |_| io_syntax_error(&format!(
-                            "Invalid single-precision number: {:?}", s)))?))
-                }
-                _ =>
-                    Ok(N::new(s.parse::<f64>().map_err(
-                        |_| io_syntax_error(&format!(
-                            "Invalid double-precision number: {:?}", s)))?))
-            }
-        } else {
-            Ok(N::new(s.parse::<BigInt>().map_err(
-                |_| io_syntax_error(&format!(
-                    "Invalid signed-integer number: {:?}", s)))?))
-        }
-    }
-
-    fn read_digit1(&mut self, bs: &mut Vec<u8>, c: u8) -> io::Result<()>
-    {
-        if !(c as char).is_digit(10) {
-            return Err(io_syntax_error("Incomplete number"));
-        }
-        bs.push(c);
-        while let Ok(c) = self.peek() {
-            if !(c as char).is_digit(10) {
-                break;
-            }
-            bs.push(self.next_byte()?);
-        }
-        Ok(())
-    }
-
    fn read_stringlike<X, H, R>(
        &mut self,
        mut seed: R,
@ -299,14 +237,13 @@ impl<'de, 'src, D: Embeddable, Dec: DomainParse<D>, S: BinarySource<'de>>
            |bs, r| Ok(bs.push(r.hexnum(2)? as u8)))?[..]))
    }

-    fn read_hex_binary<N: NestedValue>(&mut self) -> io::Result<N> {
+    fn read_hex_binary(&mut self) -> io::Result<Vec<u8>> {
        let mut s = String::new();
        loop {
            self.skip_whitespace();
            let c1 = self.next_byte()? as char;
            if c1 == '"' {
-                let bs = hex::HexParser::Strict.decode(&s).unwrap();
-                return Ok(N::new(&bs[..]));
+                return Ok(hex::HexParser::Strict.decode(&s).unwrap());
            }
            let c2 = self.next_byte()? as char;
            if !(c1.is_digit(16) && c2.is_digit(16)) {
@ -364,7 +301,11 @@ impl<'de, 'src, D: Embeddable, Dec: DomainParse<D>, S: BinarySource<'de>>
        }
    }

-    fn read_raw_symbol<N: NestedValue>(&mut self, mut bs: Vec<u8>) -> io::Result<N> {
+    fn read_raw_symbol_or_number<N: NestedValue>(&mut self, mut bs: Vec<u8>) -> io::Result<N> {
+        lazy_static! {
+            static ref NUMBER_RE: regex::Regex = regex::Regex::new(
+                r"^([-+]?\d+)(((\.\d+([eE][-+]?\d+)?)|([eE][-+]?\d+))([fF]?))?$").unwrap();
+        }
        loop {
            let c = match self.peek() {
                Err(e) if is_eof_io_error(&e) => b' ',
@ -374,8 +315,33 @@ impl<'de, 'src, D: Embeddable, Dec: DomainParse<D>, S: BinarySource<'de>>
            };
            match c {
                b'(' | b')' | b'{' | b'}' | b'[' | b']' | b'<' | b'>' |
-                b'"' | b';' | b',' | b'@' | b'#' | b':' | b'|' | b' ' =>
-                    return Ok(N::symbol(&decode_utf8(bs)?)),
+                b'"' | b';' | b',' | b'@' | b'#' | b':' | b'|' | b' ' => {
+                    let s = decode_utf8(bs)?;
+                    return match NUMBER_RE.captures(&s) {
+                        None => Ok(N::symbol(&s)),
+                        Some(m) => match m.get(2) {
+                            None => Ok(N::new(s.parse::<BigInt>().map_err(
+                                |_| io_syntax_error(&format!(
+                                    "Invalid signed-integer number: {:?}", s)))?)),
+                            Some(_) => {
+                                if let Some(maybe_f) = m.get(7) {
+                                    let s = m[1].to_owned() + &m[3];
+                                    if maybe_f.range().is_empty() {
+                                        Ok(N::new(s.parse::<f64>().map_err(
+                                            |_| io_syntax_error(&format!(
+                                                "Invalid double-precision number: {:?}", s)))?))
+                                    } else {
+                                        Ok(N::new(s.parse::<f32>().map_err(
+                                            |_| io_syntax_error(&format!(
+                                                "Invalid single-precision number: {:?}", s)))?))
+                                    }
+                                } else {
+                                    panic!("Internal error: cannot analyze number {:?}", s)
+                                }
+                            }
+                        }
+                    }
+                }
                c => {
                    self.skip()?;
                    bs.push(c)
@ -396,15 +362,6 @@ impl<'de, 'src, N: NestedValue, Dec: DomainParse<N::Embedded>, S: BinarySource<'
            Err(e) => return Err(e.into()),
        };
        Ok(Some(match c {
-            b'-' => {
-                self.skip()?;
-                let c1 = self.next_byte()?;
-                self.read_intpart(vec![b'-'], c1)?
-            }
-            b'0' | b'1' | b'2' | b'3' | b'4' | b'5' | b'6' | b'7' | b'8' | b'9' => {
-                self.skip()?;
-                self.read_intpart(Vec::new(), c)?
-            }
            b'"' => {
                self.skip()?;
                N::new(self.read_string(b'"')?)
@ -435,26 +392,13 @@ impl<'de, 'src, N: NestedValue, Dec: DomainParse<N::Embedded>, S: BinarySource<'
                    b't' => N::new(true),
                    b'{' => N::new(Set::from_iter(self.upto(b'}', read_annotations)?.into_iter())),
                    b'"' => self.read_literal_binary()?,
-                    b'x' => if self.next_byte()? == b'"' {
-                        self.read_hex_binary()?
-                    } else {
-                        return Err(io_syntax_error("Expected open-quote at start of hex ByteString"));
+                    b'x' => match self.next_byte()? {
+                        b'"' => N::new(&self.read_hex_binary()?[..]),
+                        b'f' => self.read_hex_float(4)?,
+                        b'd' => self.read_hex_float(8)?,
+                        _ => return Err(io_syntax_error("Invalid #x syntax")),
                    },
                    b'[' => self.read_base64_binary()?,
-                    b'=' => {
-                        let bs_val: N = self.demand_next(true)?;
-                        if bs_val.annotations().slice().len() > 0 {
-                            return Err(io_syntax_error("Annotations not permitted after #="));
-                        }
-                        match bs_val.value().as_bytestring() {
-                            None =>
-                                return Err(io_syntax_error("ByteString must follow #=")),
-                            Some(bs) =>
-                                crate::value::BytesBinarySource::new(bs)
-                                .packed(ViaCodec::new(&mut self.dec))
-                                .demand_next(read_annotations)?
-                        }
-                    }
                    b'!' => {
                        let v = self.next_iovalue(read_annotations)?;
                        Value::Embedded(self.dec.parse_embedded(&v)?).wrap()
@ -483,7 +427,7 @@ impl<'de, 'src, N: NestedValue, Dec: DomainParse<N::Embedded>, S: BinarySource<'
            b'}' => return Err(io_syntax_error("Unexpected }")),
            other => {
                self.skip()?;
-                self.read_raw_symbol(vec![other])?
+                self.read_raw_symbol_or_number(vec![other])?
            }
        }))
    }
--- a/implementations/rust/preserves/src/value/text/writer.rs
+++ b/implementations/rust/preserves/src/value/text/writer.rs
@ -1,3 +1,4 @@
+use crate::hex::HexFormatter;
 use crate::value::DomainEncode;
 use crate::value::IOValue;
 use crate::value::IOValueDomainCodec;
@ -6,6 +7,8 @@ use crate::value::Writer;
 use crate::value::suspendable::Suspendable;
 use crate::value::writer::CompoundWriter;

+use lazy_static::lazy_static;
+
 use num::bigint::BigInt;

 use std::io;
@ -231,13 +234,23 @@ impl<W: io::Write> Writer for TextWriter<W> {
    }

    fn write_f32(&mut self, v: f32) -> io::Result<()> {
-        dtoa::write(&mut *self.w, v)?;
-        write!(self.w, "f")
+        if v.is_nan() || v.is_infinite() {
+            write!(self.w, "#xf\"{}\"",
+                   HexFormatter::Packed.encode(&u32::to_be_bytes(f32::to_bits(v))))
+        } else {
+            dtoa::write(&mut *self.w, v)?;
+            write!(self.w, "f")
+        }
    }

    fn write_f64(&mut self, v: f64) -> io::Result<()> {
-        dtoa::write(&mut *self.w, v)?;
-        Ok(())
+        if v.is_nan() || v.is_infinite() {
+            write!(self.w, "#xd\"{}\"",
+                   HexFormatter::Packed.encode(&u64::to_be_bytes(f64::to_bits(v))))
+        } else {
+            dtoa::write(&mut *self.w, v)?;
+            Ok(())
+        }
    }

    simple_writer_method!(write_i8, i8);
@ -269,9 +282,12 @@ impl<W: io::Write> Writer for TextWriter<W> {
    }

    fn write_symbol(&mut self, v: &str) -> io::Result<()> {
-        // FIXME: This regular expression is conservatively correct, but Anglo-chauvinistic.
-        let re = regex::Regex::new("^[a-zA-Z~!$%^&*?_=+/.][-a-zA-Z~!$%^&*?_=+/.0-9]*$").unwrap();
-        if re.is_match(v) {
+        lazy_static! {
+            // FIXME: This regular expression is conservatively correct, but Anglo-chauvinistic.
+            static ref RE: regex::Regex =
+                regex::Regex::new("^[-a-zA-Z0-9~!$%^&*?_=+/.]+$").unwrap();
+        }
+        if RE.is_match(v) {
            write!(self.w, "{}", v)
        } else {
            write!(self.w, "|")?;
--- a/preserves-text.md
+++ b/preserves-text.md
@ -40,10 +40,10 @@ Standalone documents may have trailing whitespace.

 Any `Value` may be preceded by whitespace.

-             Value = ws (Record / Collection / Atom / Embedded / Machine)
+             Value = ws (Record / Collection / Atom / Embedded)
        Collection = Sequence / Dictionary / Set
-              Atom = Boolean / Float / Double / SignedInteger /
-                     String / ByteString / Symbol
+              Atom = Boolean / String / ByteString /
+                     QuotedSymbol / SymbolOrNumber

 Each `Record` is an angle-bracket enclosed grouping of its
 label-`Value` followed by its field-`Value`s.
@ -73,55 +73,6 @@ false, respectively.

           Boolean = %s"#t" / %s"#f"

-Numeric data follow the
-[JSON grammar](https://tools.ietf.org/html/rfc8259#section-6), with
-the addition of a trailing “f” distinguishing `Float` from `Double`
-values. `Float`s and `Double`s always have either a fractional part or
-an exponent part, where `SignedInteger`s never have
-either.[^reading-and-writing-floats-accurately]
-[^arbitrary-precision-signedinteger]
-
-             Float = flt %i"f"
-            Double = flt
-     SignedInteger = int
-
-          digit1-9 = %x31-39
-               nat = %x30 / ( digit1-9 *DIGIT )
-               int = ["-"] nat
-              frac = "." 1*DIGIT
-               exp = %i"e" ["-"/"+"] 1*DIGIT
-               flt = int (frac exp / frac / exp)
-
-  [^reading-and-writing-floats-accurately]: **Implementation note.**
-    Your language's standard library likely has a good routine for
-    converting between decimal notation and IEEE 754 floating-point.
-    However, if not, or if you are interested in the challenges of
-    accurately reading and writing floating point numbers, see the
-    excellent matched pair of 1990 papers by Clinger and Steele &
-    White, and a recent follow-up by Jaffer:
-
-    Clinger, William D. ‘How to Read Floating Point Numbers
-    Accurately’. In Proc. PLDI. White Plains, New York, 1990.
-    <https://doi.org/10.1145/93542.93557>.
-
-    Steele, Guy L., Jr., and Jon L. White. ‘How to Print
-    Floating-Point Numbers Accurately’. In Proc. PLDI. White Plains,
-    New York, 1990. <https://doi.org/10.1145/93542.93559>.
-
-    Jaffer, Aubrey. ‘Easy Accurate Reading and Writing of
-    Floating-Point Numbers’. ArXiv:1310.8121 [Cs], 27 October 2013.
-    <http://arxiv.org/abs/1310.8121>.
-
-  [^arbitrary-precision-signedinteger]: **Implementation note.** Be
-    aware when implementing reading and writing of `SignedInteger`s
-    that the data model *requires* arbitrary-precision integers. Your
-    implementation may (but, ideally, should not) truncate precision
-    when reading or writing a `SignedInteger`; however, if it does so,
-    it should (a) signal its client that truncation has occurred, and
-    (b) make it clear to the client that comparing such truncated
-    values for equality or ordering will not yield results that match
-    the expected semantics of the data model.
-
 `String`s are,
 [as in JSON](https://tools.ietf.org/html/rfc8259#section-7), possibly
 escaped text surrounded by double quotes. The escaping rules are the
@ -177,62 +128,109 @@ Base64 characters are allowed.
       ByteString =/ "#[" *(ws / base64char) ws "]"
        base64char = %x41-5A / %x61-7A / %x30-39 / "+" / "/" / "-" / "_" / "="

-A `Symbol` may be written in a “bare” form[^cf-sexp-token] so long as
-it conforms to certain restrictions on the characters appearing in the
-symbol. Alternatively, it may be written in a quoted form. The quoted
-form is much the same as the syntax for `String`s, including embedded
-escape syntax, except using a bar or pipe character (`|`) instead of a
-double quote mark.
+A `Symbol` may be written in either of two forms.

-            Symbol = symstart *symcont / "|" *symchar "|"
-          symstart = ALPHA / sympunct / symustart
-           symcont = ALPHA / sympunct / symustart / symucont / DIGIT / "-"
-          sympunct = "~" / "!" / "$" / "%" / "^" / "&" / "*" /
-                     "?" / "_" / "=" / "+" / "/" / "."
+The first is a quoted form, much the same as the syntax for `String`s,
+including embedded escape syntax, except using a bar or pipe character
+(`|`) instead of a double quote mark.
+
+      QuotedSymbol = "|" *symchar "|"
           symchar = unescaped / %x22 / escape (escaped / %x7C / %s"u" 4HEXDIG)
-         symustart = <any code point greater than 127 whose Unicode
-                      category is Lu, Ll, Lt, Lm, Lo, Mn, Mc, Me,
-                      Pc, Po, Sc, Sm, Sk, So, or Co>
-          symucont = <any code point greater than 127 whose Unicode
-                      category is Nd, Nl, No, or Pd>
+
+Alternatively, a `Symbol` may be written in a “bare” form[^cf-sexp-token].
+The grammar for numeric data is a subset of the grammar for bare `Symbol`s,
+so if a `SymbolOrNumber` also matches the grammar for `Float`, `Double` or
+`SignedInteger`, then it must be interpreted as one of those, and otherwise
+it must be interpreted as a bare `Symbol`.
+
+    SymbolOrNumber = 1*baresymchar
+       baresymchar = ALPHA / DIGIT / sympunct / symuchar
+          sympunct = "~" / "!" / "$" / "%" / "^" / "&" / "*" /
+                     "?" / "_" / "=" / "+" / "-" / "/" / "."
+          symuchar = <any code point greater than 127 whose Unicode
+                      category is Lu, Ll, Lt, Lm, Lo, Mn, Mc, Me, Nd,
+                      Nl, No, Pc, Pd, Po, Sc, Sm, Sk, So, or Co>

  [^cf-sexp-token]: Compare with the [SPKI S-expression][sexp.txt]
    definition of “token representation”, and with the
    [R6RS definition of identifiers](http://www.r6rs.org/final/html/r6rs/r6rs-Z-H-7.html#node_sec_4.2.4).

-An `Embedded` is written as a `Value` chosen to represent the denoted
-object, prefixed with `#!`.
+Numeric data follow the [JSON
+grammar](https://tools.ietf.org/html/rfc8259#section-6) except that leading
+zeros are permitted and an optional leading `+` sign is allowed. The
+addition of a trailing “f” distinguishes a `Float` from a `Double` value.
+`Float`s and `Double`s always have either a fractional part or an exponent
+part, where `SignedInteger`s never have
+either.[^reading-and-writing-floats-accurately]
+[^arbitrary-precision-signedinteger]
+
+             Float = flt %i"f"
+            Double = flt
+     SignedInteger = int
+
+               nat = 1*DIGIT
+               int = ["-"/"+"] nat
+              frac = "." 1*DIGIT
+               exp = %i"e" ["-"/"+"] 1*DIGIT
+               flt = int (frac exp / frac / exp)
+
+  [^reading-and-writing-floats-accurately]: **Implementation note.**
+    Your language's standard library likely has a good routine for
+    converting between decimal notation and IEEE 754 floating-point.
+    However, if not, or if you are interested in the challenges of
+    accurately reading and writing floating point numbers, see the
+    excellent matched pair of 1990 papers by Clinger and Steele &
+    White, and a recent follow-up by Jaffer:
+
+    Clinger, William D. ‘How to Read Floating Point Numbers
+    Accurately’. In Proc. PLDI. White Plains, New York, 1990.
+    <https://doi.org/10.1145/93542.93557>.
+
+    Steele, Guy L., Jr., and Jon L. White. ‘How to Print
+    Floating-Point Numbers Accurately’. In Proc. PLDI. White Plains,
+    New York, 1990. <https://doi.org/10.1145/93542.93559>.
+
+    Jaffer, Aubrey. ‘Easy Accurate Reading and Writing of
+    Floating-Point Numbers’. ArXiv:1310.8121 [Cs], 27 October 2013.
+    <http://arxiv.org/abs/1310.8121>.
+
+  [^arbitrary-precision-signedinteger]: **Implementation note.** Be
+    aware when implementing reading and writing of `SignedInteger`s
+    that the data model *requires* arbitrary-precision integers. Your
+    implementation may (but, ideally, should not) truncate precision
+    when reading or writing a `SignedInteger`; however, if it does so,
+    it should (a) signal its client that truncation has occurred, and
+    (b) make it clear to the client that comparing such truncated
+    values for equality or ordering will not yield results that match
+    the expected semantics of the data model.
+
+Some valid IEEE 754 `Float`s and `Double`s are not covered by the grammar
+above, namely, the several million NaNs and the two infinities. These are
+represented as raw hexadecimal strings similar to hexadecimal
+`ByteString`s. Implementations are free to use hexadecimal floating-point
+syntax whereever convenient, even for values representable using the
+grammar above.[^rationale-no-general-machine-syntax]
+
+            Value =/ HexFloat / HexDouble
+          HexFloat = "#xf" %x22 4(ws 2HEXDIG) ws %x22
+         HexDouble = "#xd" %x22 8(ws 2HEXDIG) ws %x22
+
+  [^rationale-no-general-machine-syntax]: **Rationale.** Previous versions
+    of this specification included an escape to the [machine-oriented
+    binary syntax](preserves-binary.html) by prefixing a `ByteString`
+    containing the binary representation of a `Value` with `#=`. The only
+    true need for this feature was to represent otherwise-unrepresentable
+    floating-point values. Instead, this specification allows such
+    floating-point values to be written directly. Removing the `#=` syntax
+    simplifies implementations (there is no longer any need to support the
+    machine-oriented syntax) and avoids complications around treatment of
+    annotations potentially contained within machine-encoded values.
+
+Finally, an `Embedded` is written as a `Value` chosen to represent the
+denoted object, prefixed with `#!`.

           Embedded = "#!" Value

-Finally, any `Value` may be represented by escaping from the textual
-syntax to the [machine-oriented binary syntax](preserves-binary.html)
-by prefixing a `ByteString` containing the binary representation of the
-`Value` with `#=`.[^rationale-switch-to-binary]
-[^no-literal-binary-in-text] [^machine-value-annotations]
-
-           Machine = "#=" ws ByteString
-
-  [^rationale-switch-to-binary]: **Rationale.** The textual syntax
-    cannot express every `Value`: specifically, it cannot express the
-    several million floating-point NaNs, or the two floating-point
-    Infinities. Since the machine-oriented binary format for `Value`s
-    expresses each `Value` with precision, embedding binary `Value`s
-    solves the problem.
-
-  [^no-literal-binary-in-text]: Every text is ultimately physically
-    stored as bytes; therefore, it might seem possible to escape to the
-    raw form of binary encoding from within a piece of textual syntax.
-    However, while bytes must be involved in any *representation* of
-    text, the text *itself* is logically a sequence of *code points* and
-    is not *intrinsically* a binary structure at all. It would be
-    incoherent to expect to be able to access the representation of the
-    text from within the text itself.
-
-  [^machine-value-annotations]: Any text-syntax annotations preceding
-    the `#` are prepended to any binary-syntax annotations yielded by
-    decoding the `ByteString`.
-
 ## Annotations

 When written down, a `Value` may have an associated sequence of
@ -293,5 +291,22 @@ The text syntax for `Boolean`s, `Symbol`s, and `ByteString`s is
 directly inspired by [Racket](https://racket-lang.org/)'s lexical
 syntax.

+## Appendix. Regular expressions for bare symbols and numbers
+
+When parsing, if a token matches both `SymbolOrNumber` and `Number`, it's a
+number; use `Float`, `Double` and `SignedInteger` to disambiguate. If it
+matches `SymbolOrNumber` but not `Number`, it's a "bare" `Symbol`.
+
+    SymbolOrNumber: ^[-a-zA-Z0-9~!$%^&*?_=+/.]+$
+            Number: ^([-+]?\d+)(((\.\d+([eE][-+]?\d+)?)|([eE][-+]?\d+))([fF]?))?$
+             Float: ^([-+]?\d+)(((\.\d+([eE][-+]?\d+)?)|([eE][-+]?\d+))[fF])$
+            Double: ^([-+]?\d+)(((\.\d+([eE][-+]?\d+)?)|([eE][-+]?\d+)))$
+     SignedInteger: ^([-+]?\d+)$
+
+When printing, if a symbol matches both `SymbolOrNumber` and `Number` or
+neither `SymbolOrNumber` nor `Number`, it must be quoted (`|...|`). If it
+matches `SymbolOrNumber` but not `Number`, it may be printed as a "bare"
+`Symbol`.
+
 <!-- Heading to visually offset the footnotes from the main document: -->
 ## Notes
--- a/preserves.md
+++ b/preserves.md
@ -220,21 +220,23 @@ The total ordering specified [above](#total-order) means that the following stat
 <!-- TODO: Give some examples of large and small Preserves, perhaps -->
 <!-- translated from various JSON blobs floating around the internet. -->

-| Value                       | Encoded byte sequence                                                           |
-|-----------------------------|---------------------------------------------------------------------------------|
-| `<capture <discard>>`       | B4 B3 07 'c' 'a' 'p' 't' 'u' 'r' 'e' B4 B3 07 'd' 'i' 's' 'c' 'a' 'r' 'd' 84 84 |
-| `[1 2 3 4]`                 | B5 91 92 93 94 84                                                               |
-| `[-2 -1 0 1]`               | B5 9E 9F 90 91 84                                                               |
-| `"hello"` (format B)        | B1 05 'h' 'e' 'l' 'l' 'o'                                                       |
-| `["a" b #"c" [] #{} #t #f]` | B5 B1 01 'a' B3 01 'b' B2 01 'c' B5 84 B6 84 81 80 84                           |
-| `-257`                      | A1 FE FF                                                                        |
-| `-1`                        | 9F                                                                              |
-| `0`                         | 90                                                                              |
-| `1`                         | 91                                                                              |
-| `255`                       | A1 00 FF                                                                        |
-| `1.0f`                      | 82 3F 80 00 00                                                                  |
-| `1.0`                       | 83 3F F0 00 00 00 00 00 00                                                      |
-| `-1.202e300`                | 83 FE 3C B7 B7 59 BF 04 26                                                      |
+| Value                                               | Encoded byte sequence                                                           |
+|-----------------------------------------------------|---------------------------------------------------------------------------------|
+| `<capture <discard>>`                               | B4 B3 07 'c' 'a' 'p' 't' 'u' 'r' 'e' B4 B3 07 'd' 'i' 's' 'c' 'a' 'r' 'd' 84 84 |
+| `[1 2 3 4]`                                         | B5 91 92 93 94 84                                                               |
+| `[-2 -1 0 1]`                                       | B5 9E 9F 90 91 84                                                               |
+| `"hello"` (format B)                                | B1 05 'h' 'e' 'l' 'l' 'o'                                                       |
+| `["a" b #"c" [] #{} #t #f]`                         | B5 B1 01 'a' B3 01 'b' B2 01 'c' B5 84 B6 84 81 80 84                           |
+| `-257`                                              | A1 FE FF                                                                        |
+| `-1`                                                | 9F                                                                              |
+| `0`                                                 | 90                                                                              |
+| `1`                                                 | 91                                                                              |
+| `255`                                               | A1 00 FF                                                                        |
+| `1.0f`                                              | 82 3F 80 00 00                                                                  |
+| `1.0`                                               | 83 3F F0 00 00 00 00 00 00                                                      |
+| `-1.202e300`                                        | 83 FE 3C B7 B7 59 BF 04 26                                                      |
+| `#xf"7f800000"`, positive `Float` infinity          | 82 7F 80 00 00                                                                  |
+| `#xd"fff0000000000000"`, negative `Double` infinity | 83 FF F0 00 00 00 00 00 00                                                      |

 The next example uses a non-`Symbol` label for a record.[^extensibility2] The `Record`

--- a/tests/samples.bin
+++ b/tests/samples.bin
--- a/tests/samples.pr
+++ b/tests/samples.pr
@ -74,9 +74,45 @@
  dict3: @"Duplicate key" <ParseError "{ a: 1, a: 2 }">
  dict4: @"Unexpected close brace" <ParseError "}">
  dict5: @"Missing value" <DecodeError #x"b7 91 92 93 84">
+  double0: <Test #x"830000000000000000" 0.0>
+  double+0: <Test #x"830000000000000000" +0.0>
+  double-0: <Test #x"838000000000000000" -0.0>
  double1: <Test #x"833ff0000000000000" 1.0>
  double2: <Test #x"83fe3cb7b759bf0426" -1.202e300>
+  double3: <Test #x"83123456789abcdef0" #xd"12 34 56 78  9a bc de f0">
+  double4: @"Fewer than 16 digits" <ParseError "#xd\"12345678\"">
+  double5: @"More than 16 digits" <ParseError "#xd\"123456789abcdef012\"">
+  double6: @"Invalid chars" <ParseError "#xd\"12zz56789abcdef0\"">
+  double7: @"Positive infinity" <Test #x"837ff0000000000000" #xd"7ff0000000000000">
+  double8: @"Negative infinity" <Test #x"83fff0000000000000" #xd"fff0000000000000">
+  double9: @"-qNaN" <Test #x"83fff0000000000001" #xd"fff0000000000001">
+  double10: @"-qNaN" <Test #x"83fff0000000000111" #xd"fff0000000000111">
+  double11: @"+qNaN" <Test #x"837ff0000000000001" #xd"7ff0000000000001">
+  double12: @"+qNaN" <Test #x"837ff0000000000111" #xd"7ff0000000000111">
+  double13: @"Bad spacing" <ParseError "#xd\"12345 6789abcdef0\"">
+  double14: @"-sNaN" <Test #x"83fff8000000000001" #xd"fff8000000000001">
+  double15: @"-sNaN" <Test #x"83fff8000000000111" #xd"fff8000000000111">
+  double16: @"+sNaN" <Test #x"837ff8000000000001" #xd"7ff8000000000001">
+  double17: @"+sNaN" <Test #x"837ff8000000000111" #xd"7ff8000000000111">
+  float0: <Test #x"8200000000" 0.0f>
+  float+0: <Test #x"8200000000" +0.0f>
+  float-0: <Test #x"8280000000" -0.0f>
  float1: <Test #x"823f800000" 1.0f>
+  float2: <Test #x"8212345678" #xf"12 34  56 78">
+  float3: @"Fewer than 8 digits" <ParseError "#xf\"123456\"">
+  float4: @"More than 8 digits" <ParseError "#xf\"123456789a\"">
+  float5: @"Invalid chars" <ParseError "#xf\"12zz5678\"">
+  float6: @"Positive infinity" <Test #x"827f800000" #xf"7f800000">
+  float7: @"Negative infinity" <Test #x"82ff800000" #xf"ff800000">
+  float8: @"+sNaN" <Test #x"827f800001" #xf"7f800001">
+  float9: @"+sNaN" <Test #x"827f800111" #xf"7f800111">
+  float10: @"-sNaN" <Test #x"82ff800001" #xf"ff800001">
+  float11: @"-sNaN" <Test #x"82ff800111" #xf"ff800111">
+  float12: @"Bad spacing" <ParseError "#xf\"12345 678\"">
+  float13: @"+qNaN" <Test #x"827fc00001" #xf"7fc00001">
+  float14: @"+qNaN" <Test #x"827fc00111" #xf"7fc00111">
+  float15: @"-qNaN" <Test #x"82ffc00001" #xf"ffc00001">
+  float16: @"-qNaN" <Test #x"82ffc00111" #xf"ffc00111">
  int-257: <Test #x"a1feff" -257>
  int-256: <Test #x"a1ff00" -256>
  int-255: <Test #x"a1ff01" -255>
@ -89,10 +125,13 @@
  int-2: <Test #x"9e" -2>
  int-1: <Test #x"9f" -1>
  int0: <Test #x"90" 0>
+  int+0: <Test #x"90" +0>
+  int-0: <Test #x"90" -0>
  int1: <Test #x"91" 1>
  int12: <Test #x"9c" 12>
  int13: <Test #x"a00d" 13>
  int127: <Test #x"a07f" 127>
+  int+127: <Test #x"a07f" +127>
  int128: <Test #x"a10080" 128>
  int255: <Test #x"a100ff" 255>
  int256: <Test #x"a10100" 256>
@ -112,6 +151,8 @@
  list8: @"Missing close bracket" <ParseShort "[">
  list9: @"Unexpected close bracket" <ParseError "]">
  list10: @"Missing end byte" <DecodeShort #x"b58080">
+  list11: <Test #x"b59184" [01]>
+  list12: <Test #x"b59c84" [12]>
  noinput0: @"No input at all" <DecodeEOF #x"">
  embed0: <Test #x"8690" #!0>
  embed1: <Test #x"868690" #!#!0>
@ -138,17 +179,22 @@
  string5: <Test #x"b104f09d849e" "\uD834\uDD1E">
  symbol0: <Test #x"b300" ||>
  symbol2: <Test #x"b30568656c6c6f" hello>
+  symbol3: <Test #x"b305312d322d33" 1-2-3>
+  symbol4: <Test #x"b305612d622d63" a-b-c>
+  symbol5: <Test #x"b305612b622b63" a+b+c>
+  symbol6: <Test #x"b3012b" +>
+  symbol7: <Test #x"b3032b2b2b" +++>
+  symbol8: <Test #x"b3012d" ->
+  symbol9: <Test #x"b3032d2d2d" --->
+  symbol10: <Test #x"b3022d61" -a>
+  symbol11: <Test #x"b3042d2d2d61" ---a>
+  symbol12: <Test #x"b3042d2d2d31" ---1>
+  symbol13: <Test #x"b3042b312e78" +1.x>
  tag0: @"Unexpected end tag" <DecodeError #x"84">
  tag1: @"Invalid tag" <DecodeError #x"10">
  tag2: @"Invalid tag" <DecodeError #x"61b10110">
  whitespace0: @"Leading spaces have to eventually yield something" <ParseShort "   ">
  whitespace1: @"No input at all" <ParseEOF "">
-  value1: <Test #"\xB2\x06corymb" #=#"\xB2\x06corymb">
-  value2: <Test #"\x81" #=#"\x81">
-  value3: <Test #"\x81" #=#[gQ]>
-  value4: <Test #"\x81" #=#[gQ==]>
-  value5: <Test #"\x81" #=   #[gQ==]>
-  value6: <Test #x"b591929384" #=#x"b591929384">

  longlist14: <Test #x"b5808080808080808080808080808084"
               [#f #f #f #f #f