Autodetectability of binary vs text; documented test case schema a little

2020-05-13 12:55:55 +02:00 · 2020-05-13 12:55:55 +02:00 · 8e0ab95d82
parent ebbd268166
commit 8e0ab95d82
13 changed files with 291 additions and 148 deletions
--- a/implementations/javascript/src/codec.js
+++ b/implementations/javascript/src/codec.js
@ -157,52 +157,59 @@ class Decoder {
  }

  next() {
-    const [major, minor, arg] = this.nextop();
-    switch (major) {
-      case 0:
-        switch (minor) {
-          case 0:
-            switch (arg) {
-              case 0: return this.wrap(false);
-              case 1: return this.wrap(true);
-              case 2: return this.wrap(Single(this.nextbytes(4).getFloat32(0, false)));
-              case 3: return this.wrap(Double(this.nextbytes(8).getFloat64(0, false)));
-              case 4: throw new DecodeError("Unexpected end-of-stream marker");
-              case 5: {
-                const a = this.next();
-                const v = this.next();
-                return this.unshiftAnnotation(a, v);
+    while (true) { // we loop because we may need to consume an arbitrary number of no-ops
+      const [major, minor, arg] = this.nextop();
+      switch (major) {
+        case 0:
+          switch (minor) {
+            case 0:
+              switch (arg) {
+                case 0: return this.wrap(false);
+                case 1: return this.wrap(true);
+                case 2: return this.wrap(Single(this.nextbytes(4).getFloat32(0, false)));
+                case 3: return this.wrap(Double(this.nextbytes(8).getFloat64(0, false)));
+                case 4: throw new DecodeError("Unexpected end-of-stream marker");
+                case 5: {
+                  const a = this.next();
+                  const v = this.next();
+                  return this.unshiftAnnotation(a, v);
+                }
+                default: throw new DecodeError("Illegal format A lead byte");
              }
-              default: throw new DecodeError("Illegal format A lead byte");
+            case 1: {
+              const n = this.wirelength(arg);
+              const v = this.placeholders.get(n, void 0);
+              if (typeof v === 'undefined') {
+                const e = new DecodeError("Invalid Preserves placeholder");
+                e.irritant = n;
+                throw e;
+              }
+              return this.wrap(v);
            }
-          case 1: {
-            const n = this.wirelength(arg);
-            const v = this.placeholders.get(n, void 0);
-            if (typeof v === 'undefined') {
-              const e = new DecodeError("Invalid Preserves placeholder");
-              e.irritant = n;
-              throw e;
+            case 2: {
+              const t = arg >> 2;
+              const n = arg & 3;
+              switch (t) {
+                case 1: return this.wrap(this.binarystream(n));
+                case 2: return this.wrap(this.valuestream(n));
+                default: throw new DecodeError("Invalid format C start byte");
+              }
            }
-            return this.wrap(v);
+            case 3:
+              return this.wrap((arg > 12) ? arg - 16 : arg);
          }
-          case 2: {
-            const t = arg >> 2;
-            const n = arg & 3;
-            switch (t) {
-              case 1: return this.wrap(this.binarystream(n));
-              case 2: return this.wrap(this.valuestream(n));
-              default: throw new DecodeError("Invalid format C start byte");
-            }
+        case 1:
+          return this.wrap(this.decodebinary(minor, Bytes.from(this.nextbytes(this.wirelength(arg)))));
+        case 2:
+          return this.wrap(this.decodecompound(minor, this.nextvalues(this.wirelength(arg))));
+        case 3:
+          if (minor === 3 && arg === 15) {
+            // no-op.
+            continue;
+          } else {
+            throw new DecodeError("Invalid lead byte (major 3)");
          }
-          case 3:
-            return this.wrap((arg > 12) ? arg - 16 : arg);
-        }
-      case 1:
-        return this.wrap(this.decodebinary(minor, Bytes.from(this.nextbytes(this.wirelength(arg)))));
-      case 2:
-        return this.wrap(this.decodecompound(minor, this.nextvalues(this.wirelength(arg))));
-      case 3:
-        throw new DecodeError("Invalid lead byte (major 3)");
+      }
    }
  }

@ -315,6 +322,10 @@ class Encoder {
    this.header(0, 0, 4);
  }

+  encodenoop() {
+    this.leadbyte(3, 3, 15);
+  }
+
  push(v) {
    const placeholder = this.placeholders.get(v, void 0);
    if (typeof placeholder !== 'undefined') {
--- a/implementations/javascript/test/test-codec.js
+++ b/implementations/javascript/test/test-codec.js
@ -165,12 +165,12 @@ describe('common test suite', () => {
      it('should go back', () => assert(is(DS(binaryForm), back)));
      it('should go back with annotations',
         () => assert(is(D(E(annotatedTextForm)), annotatedTextForm)));
-      if (variety !== 'nondeterministic') {
+      if (variety !== 'decode' && variety !== 'nondeterministic') {
        it('should encode correctly',
           () => assert(is(E(forward), binaryForm),
                        E(forward) + ' ' + binaryForm));
      }
-      if (variety !== 'nondeterministic' && variety !== 'streaming') {
+      if (variety !== 'decode' && variety !== 'nondeterministic' && variety !== 'streaming') {
        it('should encode correctly with annotations',
           () => assert(is(E(annotatedTextForm), binaryForm),
                        E(annotatedTextForm) + ' ' + binaryForm));
@ -192,6 +192,9 @@ describe('common test suite', () => {
      case Symbol.for('NondeterministicTest'):
        runTestCase('nondeterministic', tName, t.get(0).strip(), t.get(1));
        break;
+      case Symbol.for('DecodeTest'):
+        runTestCase('decode', tName, t.get(0).strip(), t.get(1));
+        break;
      case Symbol.for('DecodeError'):
        describe(tName, () => {
          it('should fail with DecodeError', () => {
--- a/implementations/python/preserves/preserves.py
+++ b/implementations/python/preserves/preserves.py
@ -405,39 +405,44 @@ class Decoder(Codec):
        return v

    def next(self):
-        (major, minor, arg) = self.nextop()
-        if major == 0:
-            if minor == 0:
-                if arg == 0: return self.wrap(False)
-                if arg == 1: return self.wrap(True)
-                if arg == 2: return self.wrap(Float(struct.unpack('>f', self.nextbytes(4))[0]))
-                if arg == 3: return self.wrap(struct.unpack('>d', self.nextbytes(8))[0])
-                if arg == 4: raise DecodeError('Unexpected end-of-stream marker')
-                if arg == 5:
-                    a = self.next()
-                    v = self.next()
-                    return self.unshift_annotation(a, v)
-                raise DecodeError('Invalid format A encoding')
-            elif minor == 1:
-                n = self.wirelength(arg)
-                v = self.placeholders.get(n, None)
-                if v is None:
-                    raise DecodeError('Invalid Preserves placeholder')
-                return self.wrap(v)
-            elif minor == 2:
-                t = arg >> 2
-                n = arg & 3
-                if t == 1: return self.wrap(self.binarystream(n))
-                if t == 2: return self.wrap(self.valuestream(n))
-                raise DecodeError('Invalid format C start byte')
-            else: # minor == 3
-                return self.wrap(arg - 16 if arg > 12 else arg)
-        elif major == 1:
-            return self.wrap(self.decodebinary(minor, self.nextbytes(self.wirelength(arg))))
-        elif major == 2:
-            return self.wrap(self.decodecompound(minor, self.nextvalues(self.wirelength(arg))))
-        else: # major == 3
-            raise DecodeError('Invalid lead byte (major 3)')
+        while True: # we loop because we may need to consume an arbitrary number of no-ops
+            (major, minor, arg) = self.nextop()
+            if major == 0:
+                if minor == 0:
+                    if arg == 0: return self.wrap(False)
+                    if arg == 1: return self.wrap(True)
+                    if arg == 2: return self.wrap(Float(struct.unpack('>f', self.nextbytes(4))[0]))
+                    if arg == 3: return self.wrap(struct.unpack('>d', self.nextbytes(8))[0])
+                    if arg == 4: raise DecodeError('Unexpected end-of-stream marker')
+                    if arg == 5:
+                        a = self.next()
+                        v = self.next()
+                        return self.unshift_annotation(a, v)
+                    raise DecodeError('Invalid format A encoding')
+                elif minor == 1:
+                    n = self.wirelength(arg)
+                    v = self.placeholders.get(n, None)
+                    if v is None:
+                        raise DecodeError('Invalid Preserves placeholder')
+                    return self.wrap(v)
+                elif minor == 2:
+                    t = arg >> 2
+                    n = arg & 3
+                    if t == 1: return self.wrap(self.binarystream(n))
+                    if t == 2: return self.wrap(self.valuestream(n))
+                    raise DecodeError('Invalid format C start byte')
+                else: # minor == 3
+                    return self.wrap(arg - 16 if arg > 12 else arg)
+            elif major == 1:
+                return self.wrap(self.decodebinary(minor, self.nextbytes(self.wirelength(arg))))
+            elif major == 2:
+                return self.wrap(self.decodecompound(minor, self.nextvalues(self.wirelength(arg))))
+            else: # major == 3
+                if minor == 3 and arg == 15:
+                    # no-op.
+                    continue
+                else:
+                    raise DecodeError('Invalid lead byte (major 3)')

    def try_next(self):
        start = self.index
@ -499,6 +504,9 @@ class Encoder(Codec):
        for i in items: self.append(i)
        self.leadbyte(0, 0, 4)

+    def encodenoop(self):
+        self.leadbyte(3, 3, 15)
+
    def append(self, v):
        try:
            placeholder = self.placeholders.get(v, None)
--- a/implementations/python/preserves/test_preserves.py
+++ b/implementations/python/preserves/test_preserves.py
@ -249,9 +249,9 @@ def install_test(d, variant, tName, binaryForm, annotatedTextForm):
    add_method(d, tName, test_forward)
    add_method(d, tName, test_back)
    add_method(d, tName, test_back_ann)
-    if variant not in ['nondeterministic']:
+    if variant not in ['decode', 'nondeterministic']:
        add_method(d, tName, test_encode)
-    if variant not in ['nondeterministic', 'streaming']:
+    if variant not in ['decode', 'nondeterministic', 'streaming']:
        add_method(d, tName, test_encode_ann)

 def install_exn_test(d, tName, bs, check_proc):
@ -287,6 +287,8 @@ class CommonTestSuite(unittest.TestCase):
            install_test(locals(), 'streaming', tName, t[0].strip(), t[1])
        elif t.key == Symbol('NondeterministicTest'):
            install_test(locals(), 'nondeterministic', tName, t[0].strip(), t[1])
+        elif t.key == Symbol('DecodeTest'):
+            install_test(locals(), 'decode', tName, t[0].strip(), t[1])
        elif t.key == Symbol('DecodeError'):
            def expected_err(self, e):
                self.assertIsInstance(e, DecodeError)
--- a/implementations/racket/preserves/preserves/main.rkt
+++ b/implementations/racket/preserves/preserves/main.rkt
@ -20,6 +20,7 @@
         preserve->string
         current-value->placeholder
         current-placeholder->value
+         prepend-noop
         encode
         decode
         decode-syntax
@ -115,6 +116,9 @@
 (define current-value->placeholder (make-parameter (lambda (v) #f)))
 (define current-placeholder->value (make-parameter (lambda (v) (void))))

+(define (prepend-noop encoded-value)
+  (bit-string-append #"\xff" encoded-value))
+
 (define (encode v)
  (bit-string->bytes (bit-string (v :: (wire-value)))))

@ -370,6 +374,9 @@
                        (decode-compound minor fields rest (nil-annotation ks bs) kf))
                      kf))

+      ([ (= #b11111111 :: bits 8) (rest :: binary) ]
+       (decode-one rest ks kf))
+
      (else (kf))))

  (decode-one input ks kf))
@ -1110,25 +1117,28 @@
      (match (hash-ref samples-txt-expected t-name text-form)
        [(asymmetric f b) (values f b #f)] ;; #f because e.g. annotation4 includes annotations
        [v (values v v #t)]))
-    (check-equal? text-form back loc)
-    (check-equal? (d-strip (encode text-form)) back loc)
-    (check-equal? (d-strip (encode forward)) back loc)
-    (check-equal? (d-strip binary-form) back loc)
-    (check-equal? (d binary-form) annotated-text-form loc)
-    (check-equal? (d (encode annotated-text-form)) annotated-text-form loc)
-    (check-equal? (string->preserve (preserve->string text-form)) back loc)
-    (check-equal? (string->preserve (preserve->string forward)) back loc)
-    (check-equal? (string->preserve-syntax (preserve->string annotated-text-form))
+    (check-equal? text-form back loc)                                              ;; expectation 1
+    (check-equal? (d-strip (encode text-form)) back loc)                           ;; expectation 2
+    (check-equal? (d-strip (encode forward)) back loc)                             ;; expectation 3
+    (check-equal? (d-strip binary-form) back loc)                                  ;; expectation 4
+    (check-equal? (d binary-form) annotated-text-form loc)                         ;; expectation 5
+    (check-equal? (d (encode annotated-text-form)) annotated-text-form loc)        ;; expectation 6
+    (check-equal? (string->preserve (preserve->string text-form)) back loc)        ;; expectation 7
+    (check-equal? (string->preserve (preserve->string forward)) back loc)          ;; expectation 8
+    (check-equal? (string->preserve-syntax (preserve->string annotated-text-form)) ;; similar to 8
                  annotated-text-form
                  loc)
-    (when (or (not (memq variety '(nondeterministic)))
-              (and can-execute-nondet-with-canonicalization?))
+    (when (and (not (memq variety '(decode)))
+               (or (not (memq variety '(nondeterministic)))
+                   (and can-execute-nondet-with-canonicalization?)))
+      ;; expectations 9 and 10
      (parameterize ((canonicalize-preserves? (if (memq variety '(nondeterministic)) #t #f)))
        (check-equal? (encode forward) binary-form loc)))
-    (unless (memq variety '(nondeterministic streaming))
+    (unless (memq variety '(decode nondeterministic streaming))
+      ;; expectation 11
      (check-equal? (encode annotated-text-form) binary-form loc)))

-  (define-runtime-path tests-path "../../../tests")
+  (define-runtime-path tests-path "../../../../tests")
  (let* ((path (build-path tests-path "samples.txt"))
         (testfile (call-with-input-file path
                     (lambda (p)
@ -1158,6 +1168,8 @@
           (run-test-case 'nondeterministic t-name loc binary-form annotated-text-form)]
          [`#s(StreamingTest ,(strip-annotations binary-form) ,annotated-text-form)
           (run-test-case 'streaming t-name loc binary-form annotated-text-form)]
+          [`#s(DecodeTest ,(strip-annotations binary-form) ,annotated-text-form)
+           (run-test-case 'decode t-name loc binary-form annotated-text-form)]
          [`#s(ParseError ,(strip-annotations str))
           (with-handlers [(exn:fail:read:eof?
                            (lambda (e) (fail-test "Unexpected EOF: ~e" e)))
--- a/implementations/rust/src/lib.rs
+++ b/implementations/rust/src/lib.rs
@ -307,6 +307,7 @@ mod samples_tests {
        Test(#[serde(with = "serde_bytes")] Vec<u8>, PlainValue<Dom>),
        NondeterministicTest(#[serde(with = "serde_bytes")] Vec<u8>, PlainValue<Dom>),
        StreamingTest(#[serde(with = "serde_bytes")] Vec<u8>, PlainValue<Dom>),
+        DecodeTest(#[serde(with = "serde_bytes")] Vec<u8>, PlainValue<Dom>),
        ParseError(String),
        ParseShort(String),
        DecodeError(#[serde(with = "serde_bytes")] Vec<u8>),
@ -341,6 +342,10 @@ mod samples_tests {
                    assert_eq!(&codec.decode(&mut &codec.encode_bytes(val)?[..])?, val);
                    assert_eq!(&codec.decode(&mut &bin[..])?, val);
                }
+                TestCase::DecodeTest(ref bin, ref val) => {
+                    assert_eq!(&codec.decode(&mut &codec.encode_bytes(val)?[..])?, val);
+                    assert_eq!(&codec.decode(&mut &bin[..])?, val);
+                }
                TestCase::ParseError(_) => (),
                TestCase::ParseShort(_) => (),
                TestCase::DecodeError(ref bin) => {
--- a/implementations/rust/src/value/constants.rs
+++ b/implementations/rust/src/value/constants.rs
@ -6,6 +6,7 @@ pub enum Op {
    Misc(u8),
    Atom(AtomMinor),
    Compound(CompoundMinor),
+    Reserved(u8),
 }

 #[derive(Debug, PartialEq, Eq)]
@ -18,6 +19,7 @@ impl TryFrom<u8> for Op {
            0 => Ok(Self::Misc(v & 3)),
            1 => Ok(Self::Atom(AtomMinor::try_from(v & 3).unwrap())),
            2 => Ok(Self::Compound(CompoundMinor::try_from(v & 3).unwrap())),
+            3 => Ok(Self::Reserved(v & 3)),
            _ => Err(InvalidOp),
        }
    }
@ -29,6 +31,7 @@ impl From<Op> for u8 {
            Op::Misc(minor) => minor & 3,
            Op::Atom(minor) => (1 << 2) | ((minor as u8) & 3),
            Op::Compound(minor) => (2 << 2) | ((minor as u8) & 3),
+            Op::Reserved(minor) => (3 << 2) | (minor & 3),
        }
    }
 }
--- a/implementations/rust/src/value/decoder.rs
+++ b/implementations/rust/src/value/decoder.rs
@ -246,59 +246,63 @@ impl<'a, 'b, R: Read, N: NestedValue<D>, D: Domain> Decoder<'a, 'b, R, N, D> {
    }

    pub fn next(&mut self) -> Result<N> {
-        match self.nextop()? {
-            (Op::Misc(0), 0) => Ok(Value::from(false).wrap()),
-            (Op::Misc(0), 1) => Ok(Value::from(true).wrap()),
-            (Op::Misc(0), 2) => {
-                let bs: &[u8] = &self.readbytes(4)?;
-                Ok(Value::from(f32::from_bits(u32::from_be_bytes(bs.try_into().unwrap()))).wrap())
-            }
-            (Op::Misc(0), 3) => {
-                let bs: &[u8] = &self.readbytes(8)?;
-                Ok(Value::from(f64::from_bits(u64::from_be_bytes(bs.try_into().unwrap()))).wrap())
-            }
-            (Op::Misc(0), 5) => {
-                if self.read_annotations {
-                    let mut annotations = vec![self.next()?];
-                    while Self::decodeop(self.peek()?).ok() == Some((Op::Misc(0), 5)) {
-                        self.skip()?;
-                        annotations.push(self.next()?);
+        loop {
+            return match self.nextop()? {
+                (Op::Misc(0), 0) => Ok(Value::from(false).wrap()),
+                (Op::Misc(0), 1) => Ok(Value::from(true).wrap()),
+                (Op::Misc(0), 2) => {
+                    let bs: &[u8] = &self.readbytes(4)?;
+                    Ok(Value::from(f32::from_bits(u32::from_be_bytes(bs.try_into().unwrap()))).wrap())
+                }
+                (Op::Misc(0), 3) => {
+                    let bs: &[u8] = &self.readbytes(8)?;
+                    Ok(Value::from(f64::from_bits(u64::from_be_bytes(bs.try_into().unwrap()))).wrap())
+                }
+                (Op::Misc(0), 5) => {
+                    if self.read_annotations {
+                        let mut annotations = vec![self.next()?];
+                        while Self::decodeop(self.peek()?).ok() == Some((Op::Misc(0), 5)) {
+                            self.skip()?;
+                            annotations.push(self.next()?);
+                        }
+                        let v = self.next()?;
+                        assert!(v.annotations().is_empty());
+                        Ok(N::wrap_ann(annotations, v.value_owned()))
+                    } else {
+                        self.next()?;
+                        self.next()
                    }
-                    let v = self.next()?;
-                    assert!(v.annotations().is_empty());
-                    Ok(N::wrap_ann(annotations, v.value_owned()))
-                } else {
-                    self.next()?;
-                    self.next()
                }
-            }
-            (Op::Misc(0), _) => Err(Error::Syntax("Invalid format A encoding")),
-            (Op::Misc(1), arg) => {
-                let n = self.wirelength(arg)?;
-                match self.placeholders.and_then(|m| m.get(&n)) {
-                    Some(v) => Ok(v.clone().wrap()),
-                    None => Err(Error::Syntax("Invalid Preserves placeholder")),
+                (Op::Misc(0), _) => Err(Error::Syntax("Invalid format A encoding")),
+                (Op::Misc(1), arg) => {
+                    let n = self.wirelength(arg)?;
+                    match self.placeholders.and_then(|m| m.get(&n)) {
+                        Some(v) => Ok(v.clone().wrap()),
+                        None => Err(Error::Syntax("Invalid Preserves placeholder")),
+                    }
                }
-            }
-            (Op::Misc(2), arg) => {
-                match Op::try_from(arg)? {
-                    Op::Atom(minor) => self.binarystream(minor),
-                    Op::Compound(minor) => self.valuestream(minor),
-                    _ => Err(Error::Syntax("Invalid format C start byte")),
+                (Op::Misc(2), arg) => {
+                    match Op::try_from(arg)? {
+                        Op::Atom(minor) => self.binarystream(minor),
+                        Op::Compound(minor) => self.valuestream(minor),
+                        _ => Err(Error::Syntax("Invalid format C start byte")),
+                    }
                }
-            }
-            (Op::Misc(3), arg) => {
-                let n = if arg > 12 { i32::from(arg) - 16 } else { i32::from(arg) };
-                Ok(Value::from(n).wrap())
-            }
-            (Op::Misc(_), _) => unreachable!(),
-            (Op::Atom(minor), arg) => {
-                let count = self.wirelength(arg)?;
-                Self::decodebinary(minor, self.readbytes(count)?)
-            }
-            (Op::Compound(minor), arg) => {
-                let count = self.wirelength(arg)?;
-                Self::decodecompound(minor, self.readvalues(count)?)
+                (Op::Misc(3), arg) => {
+                    let n = if arg > 12 { i32::from(arg) - 16 } else { i32::from(arg) };
+                    Ok(Value::from(n).wrap())
+                }
+                (Op::Misc(_), _) => unreachable!(),
+                (Op::Atom(minor), arg) => {
+                    let count = self.wirelength(arg)?;
+                    Self::decodebinary(minor, self.readbytes(count)?)
+                }
+                (Op::Compound(minor), arg) => {
+                    let count = self.wirelength(arg)?;
+                    Self::decodecompound(minor, self.readvalues(count)?)
+                }
+                (Op::Reserved(3), 15) => continue,
+                (Op::Reserved(_), _) => Err(InvalidOp.into()),
            }
        }
    }
--- a/implementations/rust/src/value/encoder.rs
+++ b/implementations/rust/src/value/encoder.rs
@ -70,6 +70,10 @@ impl<'a, 'b, W: Write, N: NestedValue<D>, D: Domain> Encoder<'a, 'b, W, N, D> {
        self.write_all(bs)
    }

+    pub fn write_noop(&mut self) -> Result {
+        self.write_op(Op::Reserved(3), 15)
+    }
+
    pub fn write(&mut self, v: &N) -> Result {
        for ann in v.annotations() {
            self.write_header(Op::Misc(0), 5)?;
--- a/preserves.css
+++ b/preserves.css
@ -22,6 +22,10 @@ pre, code { background-color: #eee; font-family: "DejaVu Sans Mono", monospace;
 code { font-size: 75%; }
 pre { padding: 0.33rem; line-height: 1; overflow-x: auto; }

+p, ul, table {
+    margin: 1em 0;
+}
+
 body {
  counter-reset: section 0 subsection 0 appendix 0;
 }
--- a/preserves.md
+++ b/preserves.md
@ -4,7 +4,7 @@ title: "Preserves: an Expressive Data Language"
 ---

 Tony Garnock-Jones <tonyg@leastfixedpoint.com>  
-August 2019. Version 0.0.6.
+May 2020. Version 0.0.7.

  [sexp.txt]: http://people.csail.mit.edu/rivest/Sexp.txt
  [spki]: http://world.std.com/~cme/html/spki.html
@ -470,11 +470,15 @@ representation.[^some-encodings-unused]
 |  0  |  3  |     | (format A) Certain small `SignedInteger`s |
 |  1  |     |     | (format B) An `Atom` with variable-length binary representation |
 |  2  |     |     | (format B) A `Compound` with variable-length representation |
+|  3  |  3  | 15  | (format A) 0xFF byte; no-op |

 #### Encoding data of type-specific length (format A).

 Each type of data defines its own rules for this format.

+Of particular note is lead byte `0xFF`, which is a no-op byte acting
+as a kind of pseudo-whitespace in a binary-syntax encoding.
+
 #### Encoding data of known length (format B).

 Format B is used where the length `l` of the `Value` to be encoded is
@ -896,10 +900,11 @@ endless sequence of zero length chunks, appearing to make progress but
 not actually doing so. Implementations *MUST* reject zero length
 chunks when decoding, and *MUST NOT* produce them when encoding.

-**Whitespace.** Similarly, the textual format for `Value`s allows
-arbitrary whitespace in many positions. In streaming transfer
-situations, consider optional restrictions on the amount of
-consecutive whitespace that may appear in a serialized `Value`.
+**Whitespace and no-ops.** Similarly, the binary format allows `0xFF`
+no-ops and the textual format allows arbitrary whitespace in many
+positions. In streaming transfer situations, consider optional
+restrictions on the amount of consecutive whitespace or the number of
+consecutive no-ops that may appear.

 **Annotations.** Also similarly, in modes where a `Value` is being
 read while annotations are skipped, an endless sequence of annotations
@ -922,6 +927,24 @@ The text syntax for `Boolean`s, `Symbol`s, and `ByteString`s is
 directly inspired by [Racket](https://racket-lang.org/)'s lexical
 syntax.

+## Appendix. Autodetection of textual or binary syntax
+
+Whitespace characters `0x09` (ASCII HT (tab)), `0x0A` (LF), `0x0D`
+(CR), `0x20` (space) and `0x2C` (comma) are ignored at the start of a
+textual-syntax Preserves `Document`, and their UTF-8 encodings are
+reserved lead byte values in binary-syntax Preserves.
+
+The byte `0xFF`, signifying a no-op in binary-syntax Preserves, has no
+meaning in either 7-bit ASCII or UTF-8, and therefore cannot appear in
+a valid textual-syntax Preserves `Document`.
+
+If applications prefix their textual-syntax documents with e.g. a
+space or newline character, and their binary-syntax documents with a
+`0xFF` byte, consumers of these documents may reliably autodetect the
+syntax being used. In a network protocol supporting this kind of
+autodetection, clients may transmit LF or `0xFF` to select text or
+binary syntax, respectively.
+
 ## Appendix. Table of lead byte values

     00 - False
@ -930,9 +953,9 @@ syntax.
     03 - Double
     04 - End stream
     05 - Annotation
-    (0x)  RESERVED 06-0F
+    (0x)  RESERVED 06-0F (NB. 09, 0A, 0D specially reserved)
     1x - Placeholder
-     2x - Start Stream
+     2x - Start Stream (NB. 20, 2C specially reserved)
     3x - Small integers 0..12,-3..-1

     4x - SignedInteger
@ -948,7 +971,8 @@ syntax.
    (Cx)  RESERVED C0-CF
    (Dx)  RESERVED D0-DF
    (Ex)  RESERVED E0-EF
-    (Fx)  RESERVED F0-FF
+    (Fx)  RESERVED F0-FE
+     FF   No-op

 ## Appendix. Bit fields within lead byte values

@ -962,13 +986,25 @@ syntax.
     00 00 0100  End Stream (to match a previous Start Stream)
     00 00 0101  Annotation; two more Reprs follow

+     00 00 1001  (ASCII HT (tab))  \
+     00 00 1010  (ASCII LF)        |- Reserved: may be used to indicate
+     00 00 1101  (ASCII CR)        /    use of text encoding
+
     00 01 mmmm  Placeholder; m is the placeholder number

     00 10 ttnn  Start Stream <tt,nn>
                   When tt = 00 --> error
+                               When nn = 00 --> (ASCII space)
+                                           Reserved: may be used to indicate
+                                             use of text encoding
+                                         otherwise --> error
                             01 --> each chunk is a ByteString
                             10 --> each chunk is a single encoded Value
                             11 --> error (RESERVED)
+                               When nn = 00 --> (ASCII comma)
+                                           Reserved: may be used to indicate
+                                             use of text encoding
+                                         otherwise --> error

     00 11 xxxx  Small integers 0..12,-3..-1

@ -983,6 +1019,7 @@ syntax.
     10 11 mmmm  Dictionary

     11 nn mmmm  error, RESERVED
+     11 11 1111  no-op; unambiguous indication of binary Preserves format

 Where `mmmm` appears, interpret it as an unsigned 4-bit number `m`. If
 `m`<15, let `l`=`m`. Otherwise, `m`=15; let `l` be the result of
--- a/tests/samples.bin
+++ b/tests/samples.bin
--- a/tests/samples.txt
+++ b/tests/samples.txt
@ -1,4 +1,46 @@
@<EmacsMode "-*- preserves -*-">
+@<Documentation [
+  "Individual test cases may be any of the following record types:"
+  <TestCaseTypes {
+    Test:                 {fields: [binary annotatedValue] expectations: {1 2 3 4 5 6 7 8 9    11}}
+    NondeterministicTest: {fields: [binary annotatedValue] expectations: {1 2 3 4 5 6 7 8   10 11}}
+    StreamingTest:        {fields: [binary annotatedValue] expectations: {1 2 3 4 5 6 7 8 9      }}
+    DecodeTest:           {fields: [binary annotatedValue] expectations: {1 2 3 4 5 6 7 8}}
+    ParseError:           {fields: [text]                  expectations: {12}}
+    ParseShort:           {fields: [text]                  expectations: {13}}
+    DecodeError:          {fields: [bytes]                 expectations: {14}}
+    DecodeShort:          {fields: [bytes]                 expectations: {15}}
+  }>
+  "In each test, let value = strip(annotatedValue),",
+  "                  forward = value,",
+  "                  back = value,"
+  "except where test-case-specific values of `forward` and/or `back` are provided"
+  "by the executing harness (of particular importance for `StreamingTest`s),"
+  "and check the following numbered expectations according to the table above:"
+  <TestCaseExpectations {
+     1: "value = back"
+     2: "strip(decodeBinary(encodeBinary(value))) = back"
+     3: "strip(decodeBinary(encodeBinary(forward))) = back"
+     4: "strip(decodeBinary(binary)) = back"
+     5: "decodeBinary(binary) = annotatedValue"
+     6: "decodeBinary(encodeBinary(annotatedValue)) = annotatedValue"
+     7: "decodeText(encodeText(value)) = back"
+     8: "decodeText(encodeText(forward)) = back"
+     9: "encodeBinary(forward) = binary"
+    10: "canonicallyEncodeBinary(forward) = binary"
+    11: "encodeBinary(annotatedValue) = binary"
+
+    12: "decodeText(text) fails with a syntax error (NB. never with premature EOF)"
+    13: "decodeText(text) fails signalling premature EOF (NB. never with a syntax error)"
+
+    14: "decodeBinary(bytes) fails with a syntax error (NB. never with premature EOF)"
+    15: "decodeBinary(bytes) fails signalling premature EOF (NB. never with a syntax error)"
+  }>
+  "Each `StreamingTest` will need to have an implementation-specific `forward`"
+  "supplied that encodes to the specific format C byte sequences in `binary`."
+  "Alternatively, implementations may choose to skip expectation 11 for"
+  "`StreamingTest`s, treating them like `DecodeTest`s."
+]>
 <TestCases
 <ExpectedPlaceholderMapping {
   0: discard
@ -76,6 +118,12 @@
   list7: <Test #hex{93 73616263 732e2e2e 73646566} [abc ... def]>
   list8: @"Missing close bracket" <ParseShort "[">
   list9: @"Unexpected close bracket" <ParseError "]">
+   noop0: <DecodeTest #hex{ff10} discard>
+   noop1: <DecodeTest #hex{ff31} 1>
+   noop2: <DecodeTest #hex{ffffff42ff00} -256>
+   noop3: <DecodeTest #hex{ff05ff53616263ff42ff00} @"abc" -256>
+   noop4: @"No-ops must be followed by something" <DecodeShort #hex{ffffff}>
+   noop5: @"No input at all is considered short" <DecodeShort #hex{}>
   placeholder0: <Test #hex{10} discard>
   placeholder1: <Test #hex{11} capture>
   placeholder2: <Test #hex{12} observe>
@ -115,6 +163,8 @@
   symbol0: <Test #hex{70} ||>
   symbol1: <StreamingTest #hex{27626865626c6c616f04} hello>
   symbol2: <Test #hex{7568656c6c6f} hello>
+   whitespace0: @"Leading spaces have to eventually yield something" <ParseShort "   ">
+   whitespace1: @"No input at all is considered short" <ParseShort "">
   value1: <Test #"\x66corymb" #value#"fcorymb">
   value2: <Test #"\x01" #value#"\x01">
   value3: <Test #"\x01" #value#base64{AQ}>