Autodetectability of binary vs text; documented test case schema a little

This commit is contained in:
Tony Garnock-Jones 2020-05-13 12:55:55 +02:00
parent ebbd268166
commit 8e0ab95d82
13 changed files with 291 additions and 148 deletions

View File

@ -157,52 +157,59 @@ class Decoder {
}
next() {
const [major, minor, arg] = this.nextop();
switch (major) {
case 0:
switch (minor) {
case 0:
switch (arg) {
case 0: return this.wrap(false);
case 1: return this.wrap(true);
case 2: return this.wrap(Single(this.nextbytes(4).getFloat32(0, false)));
case 3: return this.wrap(Double(this.nextbytes(8).getFloat64(0, false)));
case 4: throw new DecodeError("Unexpected end-of-stream marker");
case 5: {
const a = this.next();
const v = this.next();
return this.unshiftAnnotation(a, v);
while (true) { // we loop because we may need to consume an arbitrary number of no-ops
const [major, minor, arg] = this.nextop();
switch (major) {
case 0:
switch (minor) {
case 0:
switch (arg) {
case 0: return this.wrap(false);
case 1: return this.wrap(true);
case 2: return this.wrap(Single(this.nextbytes(4).getFloat32(0, false)));
case 3: return this.wrap(Double(this.nextbytes(8).getFloat64(0, false)));
case 4: throw new DecodeError("Unexpected end-of-stream marker");
case 5: {
const a = this.next();
const v = this.next();
return this.unshiftAnnotation(a, v);
}
default: throw new DecodeError("Illegal format A lead byte");
}
default: throw new DecodeError("Illegal format A lead byte");
case 1: {
const n = this.wirelength(arg);
const v = this.placeholders.get(n, void 0);
if (typeof v === 'undefined') {
const e = new DecodeError("Invalid Preserves placeholder");
e.irritant = n;
throw e;
}
return this.wrap(v);
}
case 1: {
const n = this.wirelength(arg);
const v = this.placeholders.get(n, void 0);
if (typeof v === 'undefined') {
const e = new DecodeError("Invalid Preserves placeholder");
e.irritant = n;
throw e;
case 2: {
const t = arg >> 2;
const n = arg & 3;
switch (t) {
case 1: return this.wrap(this.binarystream(n));
case 2: return this.wrap(this.valuestream(n));
default: throw new DecodeError("Invalid format C start byte");
}
}
return this.wrap(v);
case 3:
return this.wrap((arg > 12) ? arg - 16 : arg);
}
case 2: {
const t = arg >> 2;
const n = arg & 3;
switch (t) {
case 1: return this.wrap(this.binarystream(n));
case 2: return this.wrap(this.valuestream(n));
default: throw new DecodeError("Invalid format C start byte");
}
case 1:
return this.wrap(this.decodebinary(minor, Bytes.from(this.nextbytes(this.wirelength(arg)))));
case 2:
return this.wrap(this.decodecompound(minor, this.nextvalues(this.wirelength(arg))));
case 3:
if (minor === 3 && arg === 15) {
// no-op.
continue;
} else {
throw new DecodeError("Invalid lead byte (major 3)");
}
case 3:
return this.wrap((arg > 12) ? arg - 16 : arg);
}
case 1:
return this.wrap(this.decodebinary(minor, Bytes.from(this.nextbytes(this.wirelength(arg)))));
case 2:
return this.wrap(this.decodecompound(minor, this.nextvalues(this.wirelength(arg))));
case 3:
throw new DecodeError("Invalid lead byte (major 3)");
}
}
}
@ -315,6 +322,10 @@ class Encoder {
this.header(0, 0, 4);
}
encodenoop() {
this.leadbyte(3, 3, 15);
}
push(v) {
const placeholder = this.placeholders.get(v, void 0);
if (typeof placeholder !== 'undefined') {

View File

@ -165,12 +165,12 @@ describe('common test suite', () => {
it('should go back', () => assert(is(DS(binaryForm), back)));
it('should go back with annotations',
() => assert(is(D(E(annotatedTextForm)), annotatedTextForm)));
if (variety !== 'nondeterministic') {
if (variety !== 'decode' && variety !== 'nondeterministic') {
it('should encode correctly',
() => assert(is(E(forward), binaryForm),
E(forward) + ' ' + binaryForm));
}
if (variety !== 'nondeterministic' && variety !== 'streaming') {
if (variety !== 'decode' && variety !== 'nondeterministic' && variety !== 'streaming') {
it('should encode correctly with annotations',
() => assert(is(E(annotatedTextForm), binaryForm),
E(annotatedTextForm) + ' ' + binaryForm));
@ -192,6 +192,9 @@ describe('common test suite', () => {
case Symbol.for('NondeterministicTest'):
runTestCase('nondeterministic', tName, t.get(0).strip(), t.get(1));
break;
case Symbol.for('DecodeTest'):
runTestCase('decode', tName, t.get(0).strip(), t.get(1));
break;
case Symbol.for('DecodeError'):
describe(tName, () => {
it('should fail with DecodeError', () => {

View File

@ -405,39 +405,44 @@ class Decoder(Codec):
return v
def next(self):
(major, minor, arg) = self.nextop()
if major == 0:
if minor == 0:
if arg == 0: return self.wrap(False)
if arg == 1: return self.wrap(True)
if arg == 2: return self.wrap(Float(struct.unpack('>f', self.nextbytes(4))[0]))
if arg == 3: return self.wrap(struct.unpack('>d', self.nextbytes(8))[0])
if arg == 4: raise DecodeError('Unexpected end-of-stream marker')
if arg == 5:
a = self.next()
v = self.next()
return self.unshift_annotation(a, v)
raise DecodeError('Invalid format A encoding')
elif minor == 1:
n = self.wirelength(arg)
v = self.placeholders.get(n, None)
if v is None:
raise DecodeError('Invalid Preserves placeholder')
return self.wrap(v)
elif minor == 2:
t = arg >> 2
n = arg & 3
if t == 1: return self.wrap(self.binarystream(n))
if t == 2: return self.wrap(self.valuestream(n))
raise DecodeError('Invalid format C start byte')
else: # minor == 3
return self.wrap(arg - 16 if arg > 12 else arg)
elif major == 1:
return self.wrap(self.decodebinary(minor, self.nextbytes(self.wirelength(arg))))
elif major == 2:
return self.wrap(self.decodecompound(minor, self.nextvalues(self.wirelength(arg))))
else: # major == 3
raise DecodeError('Invalid lead byte (major 3)')
while True: # we loop because we may need to consume an arbitrary number of no-ops
(major, minor, arg) = self.nextop()
if major == 0:
if minor == 0:
if arg == 0: return self.wrap(False)
if arg == 1: return self.wrap(True)
if arg == 2: return self.wrap(Float(struct.unpack('>f', self.nextbytes(4))[0]))
if arg == 3: return self.wrap(struct.unpack('>d', self.nextbytes(8))[0])
if arg == 4: raise DecodeError('Unexpected end-of-stream marker')
if arg == 5:
a = self.next()
v = self.next()
return self.unshift_annotation(a, v)
raise DecodeError('Invalid format A encoding')
elif minor == 1:
n = self.wirelength(arg)
v = self.placeholders.get(n, None)
if v is None:
raise DecodeError('Invalid Preserves placeholder')
return self.wrap(v)
elif minor == 2:
t = arg >> 2
n = arg & 3
if t == 1: return self.wrap(self.binarystream(n))
if t == 2: return self.wrap(self.valuestream(n))
raise DecodeError('Invalid format C start byte')
else: # minor == 3
return self.wrap(arg - 16 if arg > 12 else arg)
elif major == 1:
return self.wrap(self.decodebinary(minor, self.nextbytes(self.wirelength(arg))))
elif major == 2:
return self.wrap(self.decodecompound(minor, self.nextvalues(self.wirelength(arg))))
else: # major == 3
if minor == 3 and arg == 15:
# no-op.
continue
else:
raise DecodeError('Invalid lead byte (major 3)')
def try_next(self):
start = self.index
@ -499,6 +504,9 @@ class Encoder(Codec):
for i in items: self.append(i)
self.leadbyte(0, 0, 4)
def encodenoop(self):
self.leadbyte(3, 3, 15)
def append(self, v):
try:
placeholder = self.placeholders.get(v, None)

View File

@ -249,9 +249,9 @@ def install_test(d, variant, tName, binaryForm, annotatedTextForm):
add_method(d, tName, test_forward)
add_method(d, tName, test_back)
add_method(d, tName, test_back_ann)
if variant not in ['nondeterministic']:
if variant not in ['decode', 'nondeterministic']:
add_method(d, tName, test_encode)
if variant not in ['nondeterministic', 'streaming']:
if variant not in ['decode', 'nondeterministic', 'streaming']:
add_method(d, tName, test_encode_ann)
def install_exn_test(d, tName, bs, check_proc):
@ -287,6 +287,8 @@ class CommonTestSuite(unittest.TestCase):
install_test(locals(), 'streaming', tName, t[0].strip(), t[1])
elif t.key == Symbol('NondeterministicTest'):
install_test(locals(), 'nondeterministic', tName, t[0].strip(), t[1])
elif t.key == Symbol('DecodeTest'):
install_test(locals(), 'decode', tName, t[0].strip(), t[1])
elif t.key == Symbol('DecodeError'):
def expected_err(self, e):
self.assertIsInstance(e, DecodeError)

View File

@ -20,6 +20,7 @@
preserve->string
current-value->placeholder
current-placeholder->value
prepend-noop
encode
decode
decode-syntax
@ -115,6 +116,9 @@
(define current-value->placeholder (make-parameter (lambda (v) #f)))
(define current-placeholder->value (make-parameter (lambda (v) (void))))
(define (prepend-noop encoded-value)
(bit-string-append #"\xff" encoded-value))
(define (encode v)
(bit-string->bytes (bit-string (v :: (wire-value)))))
@ -370,6 +374,9 @@
(decode-compound minor fields rest (nil-annotation ks bs) kf))
kf))
([ (= #b11111111 :: bits 8) (rest :: binary) ]
(decode-one rest ks kf))
(else (kf))))
(decode-one input ks kf))
@ -1110,25 +1117,28 @@
(match (hash-ref samples-txt-expected t-name text-form)
[(asymmetric f b) (values f b #f)] ;; #f because e.g. annotation4 includes annotations
[v (values v v #t)]))
(check-equal? text-form back loc)
(check-equal? (d-strip (encode text-form)) back loc)
(check-equal? (d-strip (encode forward)) back loc)
(check-equal? (d-strip binary-form) back loc)
(check-equal? (d binary-form) annotated-text-form loc)
(check-equal? (d (encode annotated-text-form)) annotated-text-form loc)
(check-equal? (string->preserve (preserve->string text-form)) back loc)
(check-equal? (string->preserve (preserve->string forward)) back loc)
(check-equal? (string->preserve-syntax (preserve->string annotated-text-form))
(check-equal? text-form back loc) ;; expectation 1
(check-equal? (d-strip (encode text-form)) back loc) ;; expectation 2
(check-equal? (d-strip (encode forward)) back loc) ;; expectation 3
(check-equal? (d-strip binary-form) back loc) ;; expectation 4
(check-equal? (d binary-form) annotated-text-form loc) ;; expectation 5
(check-equal? (d (encode annotated-text-form)) annotated-text-form loc) ;; expectation 6
(check-equal? (string->preserve (preserve->string text-form)) back loc) ;; expectation 7
(check-equal? (string->preserve (preserve->string forward)) back loc) ;; expectation 8
(check-equal? (string->preserve-syntax (preserve->string annotated-text-form)) ;; similar to 8
annotated-text-form
loc)
(when (or (not (memq variety '(nondeterministic)))
(and can-execute-nondet-with-canonicalization?))
(when (and (not (memq variety '(decode)))
(or (not (memq variety '(nondeterministic)))
(and can-execute-nondet-with-canonicalization?)))
;; expectations 9 and 10
(parameterize ((canonicalize-preserves? (if (memq variety '(nondeterministic)) #t #f)))
(check-equal? (encode forward) binary-form loc)))
(unless (memq variety '(nondeterministic streaming))
(unless (memq variety '(decode nondeterministic streaming))
;; expectation 11
(check-equal? (encode annotated-text-form) binary-form loc)))
(define-runtime-path tests-path "../../../tests")
(define-runtime-path tests-path "../../../../tests")
(let* ((path (build-path tests-path "samples.txt"))
(testfile (call-with-input-file path
(lambda (p)
@ -1158,6 +1168,8 @@
(run-test-case 'nondeterministic t-name loc binary-form annotated-text-form)]
[`#s(StreamingTest ,(strip-annotations binary-form) ,annotated-text-form)
(run-test-case 'streaming t-name loc binary-form annotated-text-form)]
[`#s(DecodeTest ,(strip-annotations binary-form) ,annotated-text-form)
(run-test-case 'decode t-name loc binary-form annotated-text-form)]
[`#s(ParseError ,(strip-annotations str))
(with-handlers [(exn:fail:read:eof?
(lambda (e) (fail-test "Unexpected EOF: ~e" e)))

View File

@ -307,6 +307,7 @@ mod samples_tests {
Test(#[serde(with = "serde_bytes")] Vec<u8>, PlainValue<Dom>),
NondeterministicTest(#[serde(with = "serde_bytes")] Vec<u8>, PlainValue<Dom>),
StreamingTest(#[serde(with = "serde_bytes")] Vec<u8>, PlainValue<Dom>),
DecodeTest(#[serde(with = "serde_bytes")] Vec<u8>, PlainValue<Dom>),
ParseError(String),
ParseShort(String),
DecodeError(#[serde(with = "serde_bytes")] Vec<u8>),
@ -341,6 +342,10 @@ mod samples_tests {
assert_eq!(&codec.decode(&mut &codec.encode_bytes(val)?[..])?, val);
assert_eq!(&codec.decode(&mut &bin[..])?, val);
}
TestCase::DecodeTest(ref bin, ref val) => {
assert_eq!(&codec.decode(&mut &codec.encode_bytes(val)?[..])?, val);
assert_eq!(&codec.decode(&mut &bin[..])?, val);
}
TestCase::ParseError(_) => (),
TestCase::ParseShort(_) => (),
TestCase::DecodeError(ref bin) => {

View File

@ -6,6 +6,7 @@ pub enum Op {
Misc(u8),
Atom(AtomMinor),
Compound(CompoundMinor),
Reserved(u8),
}
#[derive(Debug, PartialEq, Eq)]
@ -18,6 +19,7 @@ impl TryFrom<u8> for Op {
0 => Ok(Self::Misc(v & 3)),
1 => Ok(Self::Atom(AtomMinor::try_from(v & 3).unwrap())),
2 => Ok(Self::Compound(CompoundMinor::try_from(v & 3).unwrap())),
3 => Ok(Self::Reserved(v & 3)),
_ => Err(InvalidOp),
}
}
@ -29,6 +31,7 @@ impl From<Op> for u8 {
Op::Misc(minor) => minor & 3,
Op::Atom(minor) => (1 << 2) | ((minor as u8) & 3),
Op::Compound(minor) => (2 << 2) | ((minor as u8) & 3),
Op::Reserved(minor) => (3 << 2) | (minor & 3),
}
}
}

View File

@ -246,59 +246,63 @@ impl<'a, 'b, R: Read, N: NestedValue<D>, D: Domain> Decoder<'a, 'b, R, N, D> {
}
pub fn next(&mut self) -> Result<N> {
match self.nextop()? {
(Op::Misc(0), 0) => Ok(Value::from(false).wrap()),
(Op::Misc(0), 1) => Ok(Value::from(true).wrap()),
(Op::Misc(0), 2) => {
let bs: &[u8] = &self.readbytes(4)?;
Ok(Value::from(f32::from_bits(u32::from_be_bytes(bs.try_into().unwrap()))).wrap())
}
(Op::Misc(0), 3) => {
let bs: &[u8] = &self.readbytes(8)?;
Ok(Value::from(f64::from_bits(u64::from_be_bytes(bs.try_into().unwrap()))).wrap())
}
(Op::Misc(0), 5) => {
if self.read_annotations {
let mut annotations = vec![self.next()?];
while Self::decodeop(self.peek()?).ok() == Some((Op::Misc(0), 5)) {
self.skip()?;
annotations.push(self.next()?);
loop {
return match self.nextop()? {
(Op::Misc(0), 0) => Ok(Value::from(false).wrap()),
(Op::Misc(0), 1) => Ok(Value::from(true).wrap()),
(Op::Misc(0), 2) => {
let bs: &[u8] = &self.readbytes(4)?;
Ok(Value::from(f32::from_bits(u32::from_be_bytes(bs.try_into().unwrap()))).wrap())
}
(Op::Misc(0), 3) => {
let bs: &[u8] = &self.readbytes(8)?;
Ok(Value::from(f64::from_bits(u64::from_be_bytes(bs.try_into().unwrap()))).wrap())
}
(Op::Misc(0), 5) => {
if self.read_annotations {
let mut annotations = vec![self.next()?];
while Self::decodeop(self.peek()?).ok() == Some((Op::Misc(0), 5)) {
self.skip()?;
annotations.push(self.next()?);
}
let v = self.next()?;
assert!(v.annotations().is_empty());
Ok(N::wrap_ann(annotations, v.value_owned()))
} else {
self.next()?;
self.next()
}
let v = self.next()?;
assert!(v.annotations().is_empty());
Ok(N::wrap_ann(annotations, v.value_owned()))
} else {
self.next()?;
self.next()
}
}
(Op::Misc(0), _) => Err(Error::Syntax("Invalid format A encoding")),
(Op::Misc(1), arg) => {
let n = self.wirelength(arg)?;
match self.placeholders.and_then(|m| m.get(&n)) {
Some(v) => Ok(v.clone().wrap()),
None => Err(Error::Syntax("Invalid Preserves placeholder")),
(Op::Misc(0), _) => Err(Error::Syntax("Invalid format A encoding")),
(Op::Misc(1), arg) => {
let n = self.wirelength(arg)?;
match self.placeholders.and_then(|m| m.get(&n)) {
Some(v) => Ok(v.clone().wrap()),
None => Err(Error::Syntax("Invalid Preserves placeholder")),
}
}
}
(Op::Misc(2), arg) => {
match Op::try_from(arg)? {
Op::Atom(minor) => self.binarystream(minor),
Op::Compound(minor) => self.valuestream(minor),
_ => Err(Error::Syntax("Invalid format C start byte")),
(Op::Misc(2), arg) => {
match Op::try_from(arg)? {
Op::Atom(minor) => self.binarystream(minor),
Op::Compound(minor) => self.valuestream(minor),
_ => Err(Error::Syntax("Invalid format C start byte")),
}
}
}
(Op::Misc(3), arg) => {
let n = if arg > 12 { i32::from(arg) - 16 } else { i32::from(arg) };
Ok(Value::from(n).wrap())
}
(Op::Misc(_), _) => unreachable!(),
(Op::Atom(minor), arg) => {
let count = self.wirelength(arg)?;
Self::decodebinary(minor, self.readbytes(count)?)
}
(Op::Compound(minor), arg) => {
let count = self.wirelength(arg)?;
Self::decodecompound(minor, self.readvalues(count)?)
(Op::Misc(3), arg) => {
let n = if arg > 12 { i32::from(arg) - 16 } else { i32::from(arg) };
Ok(Value::from(n).wrap())
}
(Op::Misc(_), _) => unreachable!(),
(Op::Atom(minor), arg) => {
let count = self.wirelength(arg)?;
Self::decodebinary(minor, self.readbytes(count)?)
}
(Op::Compound(minor), arg) => {
let count = self.wirelength(arg)?;
Self::decodecompound(minor, self.readvalues(count)?)
}
(Op::Reserved(3), 15) => continue,
(Op::Reserved(_), _) => Err(InvalidOp.into()),
}
}
}

View File

@ -70,6 +70,10 @@ impl<'a, 'b, W: Write, N: NestedValue<D>, D: Domain> Encoder<'a, 'b, W, N, D> {
self.write_all(bs)
}
pub fn write_noop(&mut self) -> Result {
self.write_op(Op::Reserved(3), 15)
}
pub fn write(&mut self, v: &N) -> Result {
for ann in v.annotations() {
self.write_header(Op::Misc(0), 5)?;

View File

@ -22,6 +22,10 @@ pre, code { background-color: #eee; font-family: "DejaVu Sans Mono", monospace;
code { font-size: 75%; }
pre { padding: 0.33rem; line-height: 1; overflow-x: auto; }
p, ul, table {
margin: 1em 0;
}
body {
counter-reset: section 0 subsection 0 appendix 0;
}

View File

@ -4,7 +4,7 @@ title: "Preserves: an Expressive Data Language"
---
Tony Garnock-Jones <tonyg@leastfixedpoint.com>
August 2019. Version 0.0.6.
May 2020. Version 0.0.7.
[sexp.txt]: http://people.csail.mit.edu/rivest/Sexp.txt
[spki]: http://world.std.com/~cme/html/spki.html
@ -470,11 +470,15 @@ representation.[^some-encodings-unused]
| 0 | 3 | | (format A) Certain small `SignedInteger`s |
| 1 | | | (format B) An `Atom` with variable-length binary representation |
| 2 | | | (format B) A `Compound` with variable-length representation |
| 3 | 3 | 15 | (format A) 0xFF byte; no-op |
#### Encoding data of type-specific length (format A).
Each type of data defines its own rules for this format.
Of particular note is lead byte `0xFF`, which is a no-op byte acting
as a kind of pseudo-whitespace in a binary-syntax encoding.
#### Encoding data of known length (format B).
Format B is used where the length `l` of the `Value` to be encoded is
@ -896,10 +900,11 @@ endless sequence of zero length chunks, appearing to make progress but
not actually doing so. Implementations *MUST* reject zero length
chunks when decoding, and *MUST NOT* produce them when encoding.
**Whitespace.** Similarly, the textual format for `Value`s allows
arbitrary whitespace in many positions. In streaming transfer
situations, consider optional restrictions on the amount of
consecutive whitespace that may appear in a serialized `Value`.
**Whitespace and no-ops.** Similarly, the binary format allows `0xFF`
no-ops and the textual format allows arbitrary whitespace in many
positions. In streaming transfer situations, consider optional
restrictions on the amount of consecutive whitespace or the number of
consecutive no-ops that may appear.
**Annotations.** Also similarly, in modes where a `Value` is being
read while annotations are skipped, an endless sequence of annotations
@ -922,6 +927,24 @@ The text syntax for `Boolean`s, `Symbol`s, and `ByteString`s is
directly inspired by [Racket](https://racket-lang.org/)'s lexical
syntax.
## Appendix. Autodetection of textual or binary syntax
Whitespace characters `0x09` (ASCII HT (tab)), `0x0A` (LF), `0x0D`
(CR), `0x20` (space) and `0x2C` (comma) are ignored at the start of a
textual-syntax Preserves `Document`, and their UTF-8 encodings are
reserved lead byte values in binary-syntax Preserves.
The byte `0xFF`, signifying a no-op in binary-syntax Preserves, has no
meaning in either 7-bit ASCII or UTF-8, and therefore cannot appear in
a valid textual-syntax Preserves `Document`.
If applications prefix their textual-syntax documents with e.g. a
space or newline character, and their binary-syntax documents with a
`0xFF` byte, consumers of these documents may reliably autodetect the
syntax being used. In a network protocol supporting this kind of
autodetection, clients may transmit LF or `0xFF` to select text or
binary syntax, respectively.
## Appendix. Table of lead byte values
00 - False
@ -930,9 +953,9 @@ syntax.
03 - Double
04 - End stream
05 - Annotation
(0x) RESERVED 06-0F
(0x) RESERVED 06-0F (NB. 09, 0A, 0D specially reserved)
1x - Placeholder
2x - Start Stream
2x - Start Stream (NB. 20, 2C specially reserved)
3x - Small integers 0..12,-3..-1
4x - SignedInteger
@ -948,7 +971,8 @@ syntax.
(Cx) RESERVED C0-CF
(Dx) RESERVED D0-DF
(Ex) RESERVED E0-EF
(Fx) RESERVED F0-FF
(Fx) RESERVED F0-FE
FF No-op
## Appendix. Bit fields within lead byte values
@ -962,13 +986,25 @@ syntax.
00 00 0100 End Stream (to match a previous Start Stream)
00 00 0101 Annotation; two more Reprs follow
00 00 1001 (ASCII HT (tab)) \
00 00 1010 (ASCII LF) |- Reserved: may be used to indicate
00 00 1101 (ASCII CR) / use of text encoding
00 01 mmmm Placeholder; m is the placeholder number
00 10 ttnn Start Stream <tt,nn>
When tt = 00 --> error
When nn = 00 --> (ASCII space)
Reserved: may be used to indicate
use of text encoding
otherwise --> error
01 --> each chunk is a ByteString
10 --> each chunk is a single encoded Value
11 --> error (RESERVED)
When nn = 00 --> (ASCII comma)
Reserved: may be used to indicate
use of text encoding
otherwise --> error
00 11 xxxx Small integers 0..12,-3..-1
@ -983,6 +1019,7 @@ syntax.
10 11 mmmm Dictionary
11 nn mmmm error, RESERVED
11 11 1111 no-op; unambiguous indication of binary Preserves format
Where `mmmm` appears, interpret it as an unsigned 4-bit number `m`. If
`m`<15, let `l`=`m`. Otherwise, `m`=15; let `l` be the result of

Binary file not shown.

View File

@ -1,4 +1,46 @@
@<EmacsMode "-*- preserves -*-">
@<Documentation [
"Individual test cases may be any of the following record types:"
<TestCaseTypes {
Test: {fields: [binary annotatedValue] expectations: {1 2 3 4 5 6 7 8 9 11}}
NondeterministicTest: {fields: [binary annotatedValue] expectations: {1 2 3 4 5 6 7 8 10 11}}
StreamingTest: {fields: [binary annotatedValue] expectations: {1 2 3 4 5 6 7 8 9 }}
DecodeTest: {fields: [binary annotatedValue] expectations: {1 2 3 4 5 6 7 8}}
ParseError: {fields: [text] expectations: {12}}
ParseShort: {fields: [text] expectations: {13}}
DecodeError: {fields: [bytes] expectations: {14}}
DecodeShort: {fields: [bytes] expectations: {15}}
}>
"In each test, let value = strip(annotatedValue),",
" forward = value,",
" back = value,"
"except where test-case-specific values of `forward` and/or `back` are provided"
"by the executing harness (of particular importance for `StreamingTest`s),"
"and check the following numbered expectations according to the table above:"
<TestCaseExpectations {
1: "value = back"
2: "strip(decodeBinary(encodeBinary(value))) = back"
3: "strip(decodeBinary(encodeBinary(forward))) = back"
4: "strip(decodeBinary(binary)) = back"
5: "decodeBinary(binary) = annotatedValue"
6: "decodeBinary(encodeBinary(annotatedValue)) = annotatedValue"
7: "decodeText(encodeText(value)) = back"
8: "decodeText(encodeText(forward)) = back"
9: "encodeBinary(forward) = binary"
10: "canonicallyEncodeBinary(forward) = binary"
11: "encodeBinary(annotatedValue) = binary"
12: "decodeText(text) fails with a syntax error (NB. never with premature EOF)"
13: "decodeText(text) fails signalling premature EOF (NB. never with a syntax error)"
14: "decodeBinary(bytes) fails with a syntax error (NB. never with premature EOF)"
15: "decodeBinary(bytes) fails signalling premature EOF (NB. never with a syntax error)"
}>
"Each `StreamingTest` will need to have an implementation-specific `forward`"
"supplied that encodes to the specific format C byte sequences in `binary`."
"Alternatively, implementations may choose to skip expectation 11 for"
"`StreamingTest`s, treating them like `DecodeTest`s."
]>
<TestCases
<ExpectedPlaceholderMapping {
0: discard
@ -76,6 +118,12 @@
list7: <Test #hex{93 73616263 732e2e2e 73646566} [abc ... def]>
list8: @"Missing close bracket" <ParseShort "[">
list9: @"Unexpected close bracket" <ParseError "]">
noop0: <DecodeTest #hex{ff10} discard>
noop1: <DecodeTest #hex{ff31} 1>
noop2: <DecodeTest #hex{ffffff42ff00} -256>
noop3: <DecodeTest #hex{ff05ff53616263ff42ff00} @"abc" -256>
noop4: @"No-ops must be followed by something" <DecodeShort #hex{ffffff}>
noop5: @"No input at all is considered short" <DecodeShort #hex{}>
placeholder0: <Test #hex{10} discard>
placeholder1: <Test #hex{11} capture>
placeholder2: <Test #hex{12} observe>
@ -115,6 +163,8 @@
symbol0: <Test #hex{70} ||>
symbol1: <StreamingTest #hex{27626865626c6c616f04} hello>
symbol2: <Test #hex{7568656c6c6f} hello>
whitespace0: @"Leading spaces have to eventually yield something" <ParseShort " ">
whitespace1: @"No input at all is considered short" <ParseShort "">
value1: <Test #"\x66corymb" #value#"fcorymb">
value2: <Test #"\x01" #value#"\x01">
value3: <Test #"\x01" #value#base64{AQ}>