diff --git a/implementations/javascript/src/codec.js b/implementations/javascript/src/codec.js index 53c1cb5..523c63a 100644 --- a/implementations/javascript/src/codec.js +++ b/implementations/javascript/src/codec.js @@ -157,52 +157,59 @@ class Decoder { } next() { - const [major, minor, arg] = this.nextop(); - switch (major) { - case 0: - switch (minor) { - case 0: - switch (arg) { - case 0: return this.wrap(false); - case 1: return this.wrap(true); - case 2: return this.wrap(Single(this.nextbytes(4).getFloat32(0, false))); - case 3: return this.wrap(Double(this.nextbytes(8).getFloat64(0, false))); - case 4: throw new DecodeError("Unexpected end-of-stream marker"); - case 5: { - const a = this.next(); - const v = this.next(); - return this.unshiftAnnotation(a, v); + while (true) { // we loop because we may need to consume an arbitrary number of no-ops + const [major, minor, arg] = this.nextop(); + switch (major) { + case 0: + switch (minor) { + case 0: + switch (arg) { + case 0: return this.wrap(false); + case 1: return this.wrap(true); + case 2: return this.wrap(Single(this.nextbytes(4).getFloat32(0, false))); + case 3: return this.wrap(Double(this.nextbytes(8).getFloat64(0, false))); + case 4: throw new DecodeError("Unexpected end-of-stream marker"); + case 5: { + const a = this.next(); + const v = this.next(); + return this.unshiftAnnotation(a, v); + } + default: throw new DecodeError("Illegal format A lead byte"); } - default: throw new DecodeError("Illegal format A lead byte"); + case 1: { + const n = this.wirelength(arg); + const v = this.placeholders.get(n, void 0); + if (typeof v === 'undefined') { + const e = new DecodeError("Invalid Preserves placeholder"); + e.irritant = n; + throw e; + } + return this.wrap(v); } - case 1: { - const n = this.wirelength(arg); - const v = this.placeholders.get(n, void 0); - if (typeof v === 'undefined') { - const e = new DecodeError("Invalid Preserves placeholder"); - e.irritant = n; - throw e; + case 2: { + const t = arg >> 2; + const n = arg & 3; + switch (t) { + case 1: return this.wrap(this.binarystream(n)); + case 2: return this.wrap(this.valuestream(n)); + default: throw new DecodeError("Invalid format C start byte"); + } } - return this.wrap(v); + case 3: + return this.wrap((arg > 12) ? arg - 16 : arg); } - case 2: { - const t = arg >> 2; - const n = arg & 3; - switch (t) { - case 1: return this.wrap(this.binarystream(n)); - case 2: return this.wrap(this.valuestream(n)); - default: throw new DecodeError("Invalid format C start byte"); - } + case 1: + return this.wrap(this.decodebinary(minor, Bytes.from(this.nextbytes(this.wirelength(arg))))); + case 2: + return this.wrap(this.decodecompound(minor, this.nextvalues(this.wirelength(arg)))); + case 3: + if (minor === 3 && arg === 15) { + // no-op. + continue; + } else { + throw new DecodeError("Invalid lead byte (major 3)"); } - case 3: - return this.wrap((arg > 12) ? arg - 16 : arg); - } - case 1: - return this.wrap(this.decodebinary(minor, Bytes.from(this.nextbytes(this.wirelength(arg))))); - case 2: - return this.wrap(this.decodecompound(minor, this.nextvalues(this.wirelength(arg)))); - case 3: - throw new DecodeError("Invalid lead byte (major 3)"); + } } } @@ -315,6 +322,10 @@ class Encoder { this.header(0, 0, 4); } + encodenoop() { + this.leadbyte(3, 3, 15); + } + push(v) { const placeholder = this.placeholders.get(v, void 0); if (typeof placeholder !== 'undefined') { diff --git a/implementations/javascript/test/test-codec.js b/implementations/javascript/test/test-codec.js index 024f20b..124ea73 100644 --- a/implementations/javascript/test/test-codec.js +++ b/implementations/javascript/test/test-codec.js @@ -165,12 +165,12 @@ describe('common test suite', () => { it('should go back', () => assert(is(DS(binaryForm), back))); it('should go back with annotations', () => assert(is(D(E(annotatedTextForm)), annotatedTextForm))); - if (variety !== 'nondeterministic') { + if (variety !== 'decode' && variety !== 'nondeterministic') { it('should encode correctly', () => assert(is(E(forward), binaryForm), E(forward) + ' ' + binaryForm)); } - if (variety !== 'nondeterministic' && variety !== 'streaming') { + if (variety !== 'decode' && variety !== 'nondeterministic' && variety !== 'streaming') { it('should encode correctly with annotations', () => assert(is(E(annotatedTextForm), binaryForm), E(annotatedTextForm) + ' ' + binaryForm)); @@ -192,6 +192,9 @@ describe('common test suite', () => { case Symbol.for('NondeterministicTest'): runTestCase('nondeterministic', tName, t.get(0).strip(), t.get(1)); break; + case Symbol.for('DecodeTest'): + runTestCase('decode', tName, t.get(0).strip(), t.get(1)); + break; case Symbol.for('DecodeError'): describe(tName, () => { it('should fail with DecodeError', () => { diff --git a/implementations/python/preserves/preserves.py b/implementations/python/preserves/preserves.py index 43e6d2e..80d0b9b 100644 --- a/implementations/python/preserves/preserves.py +++ b/implementations/python/preserves/preserves.py @@ -405,39 +405,44 @@ class Decoder(Codec): return v def next(self): - (major, minor, arg) = self.nextop() - if major == 0: - if minor == 0: - if arg == 0: return self.wrap(False) - if arg == 1: return self.wrap(True) - if arg == 2: return self.wrap(Float(struct.unpack('>f', self.nextbytes(4))[0])) - if arg == 3: return self.wrap(struct.unpack('>d', self.nextbytes(8))[0]) - if arg == 4: raise DecodeError('Unexpected end-of-stream marker') - if arg == 5: - a = self.next() - v = self.next() - return self.unshift_annotation(a, v) - raise DecodeError('Invalid format A encoding') - elif minor == 1: - n = self.wirelength(arg) - v = self.placeholders.get(n, None) - if v is None: - raise DecodeError('Invalid Preserves placeholder') - return self.wrap(v) - elif minor == 2: - t = arg >> 2 - n = arg & 3 - if t == 1: return self.wrap(self.binarystream(n)) - if t == 2: return self.wrap(self.valuestream(n)) - raise DecodeError('Invalid format C start byte') - else: # minor == 3 - return self.wrap(arg - 16 if arg > 12 else arg) - elif major == 1: - return self.wrap(self.decodebinary(minor, self.nextbytes(self.wirelength(arg)))) - elif major == 2: - return self.wrap(self.decodecompound(minor, self.nextvalues(self.wirelength(arg)))) - else: # major == 3 - raise DecodeError('Invalid lead byte (major 3)') + while True: # we loop because we may need to consume an arbitrary number of no-ops + (major, minor, arg) = self.nextop() + if major == 0: + if minor == 0: + if arg == 0: return self.wrap(False) + if arg == 1: return self.wrap(True) + if arg == 2: return self.wrap(Float(struct.unpack('>f', self.nextbytes(4))[0])) + if arg == 3: return self.wrap(struct.unpack('>d', self.nextbytes(8))[0]) + if arg == 4: raise DecodeError('Unexpected end-of-stream marker') + if arg == 5: + a = self.next() + v = self.next() + return self.unshift_annotation(a, v) + raise DecodeError('Invalid format A encoding') + elif minor == 1: + n = self.wirelength(arg) + v = self.placeholders.get(n, None) + if v is None: + raise DecodeError('Invalid Preserves placeholder') + return self.wrap(v) + elif minor == 2: + t = arg >> 2 + n = arg & 3 + if t == 1: return self.wrap(self.binarystream(n)) + if t == 2: return self.wrap(self.valuestream(n)) + raise DecodeError('Invalid format C start byte') + else: # minor == 3 + return self.wrap(arg - 16 if arg > 12 else arg) + elif major == 1: + return self.wrap(self.decodebinary(minor, self.nextbytes(self.wirelength(arg)))) + elif major == 2: + return self.wrap(self.decodecompound(minor, self.nextvalues(self.wirelength(arg)))) + else: # major == 3 + if minor == 3 and arg == 15: + # no-op. + continue + else: + raise DecodeError('Invalid lead byte (major 3)') def try_next(self): start = self.index @@ -499,6 +504,9 @@ class Encoder(Codec): for i in items: self.append(i) self.leadbyte(0, 0, 4) + def encodenoop(self): + self.leadbyte(3, 3, 15) + def append(self, v): try: placeholder = self.placeholders.get(v, None) diff --git a/implementations/python/preserves/test_preserves.py b/implementations/python/preserves/test_preserves.py index 2d61439..1e08ab1 100644 --- a/implementations/python/preserves/test_preserves.py +++ b/implementations/python/preserves/test_preserves.py @@ -249,9 +249,9 @@ def install_test(d, variant, tName, binaryForm, annotatedTextForm): add_method(d, tName, test_forward) add_method(d, tName, test_back) add_method(d, tName, test_back_ann) - if variant not in ['nondeterministic']: + if variant not in ['decode', 'nondeterministic']: add_method(d, tName, test_encode) - if variant not in ['nondeterministic', 'streaming']: + if variant not in ['decode', 'nondeterministic', 'streaming']: add_method(d, tName, test_encode_ann) def install_exn_test(d, tName, bs, check_proc): @@ -287,6 +287,8 @@ class CommonTestSuite(unittest.TestCase): install_test(locals(), 'streaming', tName, t[0].strip(), t[1]) elif t.key == Symbol('NondeterministicTest'): install_test(locals(), 'nondeterministic', tName, t[0].strip(), t[1]) + elif t.key == Symbol('DecodeTest'): + install_test(locals(), 'decode', tName, t[0].strip(), t[1]) elif t.key == Symbol('DecodeError'): def expected_err(self, e): self.assertIsInstance(e, DecodeError) diff --git a/implementations/racket/preserves/preserves/main.rkt b/implementations/racket/preserves/preserves/main.rkt index b77236b..04836eb 100644 --- a/implementations/racket/preserves/preserves/main.rkt +++ b/implementations/racket/preserves/preserves/main.rkt @@ -20,6 +20,7 @@ preserve->string current-value->placeholder current-placeholder->value + prepend-noop encode decode decode-syntax @@ -115,6 +116,9 @@ (define current-value->placeholder (make-parameter (lambda (v) #f))) (define current-placeholder->value (make-parameter (lambda (v) (void)))) +(define (prepend-noop encoded-value) + (bit-string-append #"\xff" encoded-value)) + (define (encode v) (bit-string->bytes (bit-string (v :: (wire-value))))) @@ -370,6 +374,9 @@ (decode-compound minor fields rest (nil-annotation ks bs) kf)) kf)) + ([ (= #b11111111 :: bits 8) (rest :: binary) ] + (decode-one rest ks kf)) + (else (kf)))) (decode-one input ks kf)) @@ -1110,25 +1117,28 @@ (match (hash-ref samples-txt-expected t-name text-form) [(asymmetric f b) (values f b #f)] ;; #f because e.g. annotation4 includes annotations [v (values v v #t)])) - (check-equal? text-form back loc) - (check-equal? (d-strip (encode text-form)) back loc) - (check-equal? (d-strip (encode forward)) back loc) - (check-equal? (d-strip binary-form) back loc) - (check-equal? (d binary-form) annotated-text-form loc) - (check-equal? (d (encode annotated-text-form)) annotated-text-form loc) - (check-equal? (string->preserve (preserve->string text-form)) back loc) - (check-equal? (string->preserve (preserve->string forward)) back loc) - (check-equal? (string->preserve-syntax (preserve->string annotated-text-form)) + (check-equal? text-form back loc) ;; expectation 1 + (check-equal? (d-strip (encode text-form)) back loc) ;; expectation 2 + (check-equal? (d-strip (encode forward)) back loc) ;; expectation 3 + (check-equal? (d-strip binary-form) back loc) ;; expectation 4 + (check-equal? (d binary-form) annotated-text-form loc) ;; expectation 5 + (check-equal? (d (encode annotated-text-form)) annotated-text-form loc) ;; expectation 6 + (check-equal? (string->preserve (preserve->string text-form)) back loc) ;; expectation 7 + (check-equal? (string->preserve (preserve->string forward)) back loc) ;; expectation 8 + (check-equal? (string->preserve-syntax (preserve->string annotated-text-form)) ;; similar to 8 annotated-text-form loc) - (when (or (not (memq variety '(nondeterministic))) - (and can-execute-nondet-with-canonicalization?)) + (when (and (not (memq variety '(decode))) + (or (not (memq variety '(nondeterministic))) + (and can-execute-nondet-with-canonicalization?))) + ;; expectations 9 and 10 (parameterize ((canonicalize-preserves? (if (memq variety '(nondeterministic)) #t #f))) (check-equal? (encode forward) binary-form loc))) - (unless (memq variety '(nondeterministic streaming)) + (unless (memq variety '(decode nondeterministic streaming)) + ;; expectation 11 (check-equal? (encode annotated-text-form) binary-form loc))) - (define-runtime-path tests-path "../../../tests") + (define-runtime-path tests-path "../../../../tests") (let* ((path (build-path tests-path "samples.txt")) (testfile (call-with-input-file path (lambda (p) @@ -1158,6 +1168,8 @@ (run-test-case 'nondeterministic t-name loc binary-form annotated-text-form)] [`#s(StreamingTest ,(strip-annotations binary-form) ,annotated-text-form) (run-test-case 'streaming t-name loc binary-form annotated-text-form)] + [`#s(DecodeTest ,(strip-annotations binary-form) ,annotated-text-form) + (run-test-case 'decode t-name loc binary-form annotated-text-form)] [`#s(ParseError ,(strip-annotations str)) (with-handlers [(exn:fail:read:eof? (lambda (e) (fail-test "Unexpected EOF: ~e" e))) diff --git a/implementations/rust/src/lib.rs b/implementations/rust/src/lib.rs index 210248c..24c6790 100644 --- a/implementations/rust/src/lib.rs +++ b/implementations/rust/src/lib.rs @@ -307,6 +307,7 @@ mod samples_tests { Test(#[serde(with = "serde_bytes")] Vec, PlainValue), NondeterministicTest(#[serde(with = "serde_bytes")] Vec, PlainValue), StreamingTest(#[serde(with = "serde_bytes")] Vec, PlainValue), + DecodeTest(#[serde(with = "serde_bytes")] Vec, PlainValue), ParseError(String), ParseShort(String), DecodeError(#[serde(with = "serde_bytes")] Vec), @@ -341,6 +342,10 @@ mod samples_tests { assert_eq!(&codec.decode(&mut &codec.encode_bytes(val)?[..])?, val); assert_eq!(&codec.decode(&mut &bin[..])?, val); } + TestCase::DecodeTest(ref bin, ref val) => { + assert_eq!(&codec.decode(&mut &codec.encode_bytes(val)?[..])?, val); + assert_eq!(&codec.decode(&mut &bin[..])?, val); + } TestCase::ParseError(_) => (), TestCase::ParseShort(_) => (), TestCase::DecodeError(ref bin) => { diff --git a/implementations/rust/src/value/constants.rs b/implementations/rust/src/value/constants.rs index c833a04..c304f72 100644 --- a/implementations/rust/src/value/constants.rs +++ b/implementations/rust/src/value/constants.rs @@ -6,6 +6,7 @@ pub enum Op { Misc(u8), Atom(AtomMinor), Compound(CompoundMinor), + Reserved(u8), } #[derive(Debug, PartialEq, Eq)] @@ -18,6 +19,7 @@ impl TryFrom for Op { 0 => Ok(Self::Misc(v & 3)), 1 => Ok(Self::Atom(AtomMinor::try_from(v & 3).unwrap())), 2 => Ok(Self::Compound(CompoundMinor::try_from(v & 3).unwrap())), + 3 => Ok(Self::Reserved(v & 3)), _ => Err(InvalidOp), } } @@ -29,6 +31,7 @@ impl From for u8 { Op::Misc(minor) => minor & 3, Op::Atom(minor) => (1 << 2) | ((minor as u8) & 3), Op::Compound(minor) => (2 << 2) | ((minor as u8) & 3), + Op::Reserved(minor) => (3 << 2) | (minor & 3), } } } diff --git a/implementations/rust/src/value/decoder.rs b/implementations/rust/src/value/decoder.rs index e76e96a..ee118df 100644 --- a/implementations/rust/src/value/decoder.rs +++ b/implementations/rust/src/value/decoder.rs @@ -246,59 +246,63 @@ impl<'a, 'b, R: Read, N: NestedValue, D: Domain> Decoder<'a, 'b, R, N, D> { } pub fn next(&mut self) -> Result { - match self.nextop()? { - (Op::Misc(0), 0) => Ok(Value::from(false).wrap()), - (Op::Misc(0), 1) => Ok(Value::from(true).wrap()), - (Op::Misc(0), 2) => { - let bs: &[u8] = &self.readbytes(4)?; - Ok(Value::from(f32::from_bits(u32::from_be_bytes(bs.try_into().unwrap()))).wrap()) - } - (Op::Misc(0), 3) => { - let bs: &[u8] = &self.readbytes(8)?; - Ok(Value::from(f64::from_bits(u64::from_be_bytes(bs.try_into().unwrap()))).wrap()) - } - (Op::Misc(0), 5) => { - if self.read_annotations { - let mut annotations = vec![self.next()?]; - while Self::decodeop(self.peek()?).ok() == Some((Op::Misc(0), 5)) { - self.skip()?; - annotations.push(self.next()?); + loop { + return match self.nextop()? { + (Op::Misc(0), 0) => Ok(Value::from(false).wrap()), + (Op::Misc(0), 1) => Ok(Value::from(true).wrap()), + (Op::Misc(0), 2) => { + let bs: &[u8] = &self.readbytes(4)?; + Ok(Value::from(f32::from_bits(u32::from_be_bytes(bs.try_into().unwrap()))).wrap()) + } + (Op::Misc(0), 3) => { + let bs: &[u8] = &self.readbytes(8)?; + Ok(Value::from(f64::from_bits(u64::from_be_bytes(bs.try_into().unwrap()))).wrap()) + } + (Op::Misc(0), 5) => { + if self.read_annotations { + let mut annotations = vec![self.next()?]; + while Self::decodeop(self.peek()?).ok() == Some((Op::Misc(0), 5)) { + self.skip()?; + annotations.push(self.next()?); + } + let v = self.next()?; + assert!(v.annotations().is_empty()); + Ok(N::wrap_ann(annotations, v.value_owned())) + } else { + self.next()?; + self.next() } - let v = self.next()?; - assert!(v.annotations().is_empty()); - Ok(N::wrap_ann(annotations, v.value_owned())) - } else { - self.next()?; - self.next() } - } - (Op::Misc(0), _) => Err(Error::Syntax("Invalid format A encoding")), - (Op::Misc(1), arg) => { - let n = self.wirelength(arg)?; - match self.placeholders.and_then(|m| m.get(&n)) { - Some(v) => Ok(v.clone().wrap()), - None => Err(Error::Syntax("Invalid Preserves placeholder")), + (Op::Misc(0), _) => Err(Error::Syntax("Invalid format A encoding")), + (Op::Misc(1), arg) => { + let n = self.wirelength(arg)?; + match self.placeholders.and_then(|m| m.get(&n)) { + Some(v) => Ok(v.clone().wrap()), + None => Err(Error::Syntax("Invalid Preserves placeholder")), + } } - } - (Op::Misc(2), arg) => { - match Op::try_from(arg)? { - Op::Atom(minor) => self.binarystream(minor), - Op::Compound(minor) => self.valuestream(minor), - _ => Err(Error::Syntax("Invalid format C start byte")), + (Op::Misc(2), arg) => { + match Op::try_from(arg)? { + Op::Atom(minor) => self.binarystream(minor), + Op::Compound(minor) => self.valuestream(minor), + _ => Err(Error::Syntax("Invalid format C start byte")), + } } - } - (Op::Misc(3), arg) => { - let n = if arg > 12 { i32::from(arg) - 16 } else { i32::from(arg) }; - Ok(Value::from(n).wrap()) - } - (Op::Misc(_), _) => unreachable!(), - (Op::Atom(minor), arg) => { - let count = self.wirelength(arg)?; - Self::decodebinary(minor, self.readbytes(count)?) - } - (Op::Compound(minor), arg) => { - let count = self.wirelength(arg)?; - Self::decodecompound(minor, self.readvalues(count)?) + (Op::Misc(3), arg) => { + let n = if arg > 12 { i32::from(arg) - 16 } else { i32::from(arg) }; + Ok(Value::from(n).wrap()) + } + (Op::Misc(_), _) => unreachable!(), + (Op::Atom(minor), arg) => { + let count = self.wirelength(arg)?; + Self::decodebinary(minor, self.readbytes(count)?) + } + (Op::Compound(minor), arg) => { + let count = self.wirelength(arg)?; + Self::decodecompound(minor, self.readvalues(count)?) + } + (Op::Reserved(3), 15) => continue, + (Op::Reserved(_), _) => Err(InvalidOp.into()), } } } diff --git a/implementations/rust/src/value/encoder.rs b/implementations/rust/src/value/encoder.rs index 55b102e..6dd82d9 100644 --- a/implementations/rust/src/value/encoder.rs +++ b/implementations/rust/src/value/encoder.rs @@ -70,6 +70,10 @@ impl<'a, 'b, W: Write, N: NestedValue, D: Domain> Encoder<'a, 'b, W, N, D> { self.write_all(bs) } + pub fn write_noop(&mut self) -> Result { + self.write_op(Op::Reserved(3), 15) + } + pub fn write(&mut self, v: &N) -> Result { for ann in v.annotations() { self.write_header(Op::Misc(0), 5)?; diff --git a/preserves.css b/preserves.css index ea3486c..da64c0c 100644 --- a/preserves.css +++ b/preserves.css @@ -22,6 +22,10 @@ pre, code { background-color: #eee; font-family: "DejaVu Sans Mono", monospace; code { font-size: 75%; } pre { padding: 0.33rem; line-height: 1; overflow-x: auto; } +p, ul, table { + margin: 1em 0; +} + body { counter-reset: section 0 subsection 0 appendix 0; } diff --git a/preserves.md b/preserves.md index fb361c7..0f107a5 100644 --- a/preserves.md +++ b/preserves.md @@ -4,7 +4,7 @@ title: "Preserves: an Expressive Data Language" --- Tony Garnock-Jones -August 2019. Version 0.0.6. +May 2020. Version 0.0.7. [sexp.txt]: http://people.csail.mit.edu/rivest/Sexp.txt [spki]: http://world.std.com/~cme/html/spki.html @@ -470,11 +470,15 @@ representation.[^some-encodings-unused] | 0 | 3 | | (format A) Certain small `SignedInteger`s | | 1 | | | (format B) An `Atom` with variable-length binary representation | | 2 | | | (format B) A `Compound` with variable-length representation | +| 3 | 3 | 15 | (format A) 0xFF byte; no-op | #### Encoding data of type-specific length (format A). Each type of data defines its own rules for this format. +Of particular note is lead byte `0xFF`, which is a no-op byte acting +as a kind of pseudo-whitespace in a binary-syntax encoding. + #### Encoding data of known length (format B). Format B is used where the length `l` of the `Value` to be encoded is @@ -896,10 +900,11 @@ endless sequence of zero length chunks, appearing to make progress but not actually doing so. Implementations *MUST* reject zero length chunks when decoding, and *MUST NOT* produce them when encoding. -**Whitespace.** Similarly, the textual format for `Value`s allows -arbitrary whitespace in many positions. In streaming transfer -situations, consider optional restrictions on the amount of -consecutive whitespace that may appear in a serialized `Value`. +**Whitespace and no-ops.** Similarly, the binary format allows `0xFF` +no-ops and the textual format allows arbitrary whitespace in many +positions. In streaming transfer situations, consider optional +restrictions on the amount of consecutive whitespace or the number of +consecutive no-ops that may appear. **Annotations.** Also similarly, in modes where a `Value` is being read while annotations are skipped, an endless sequence of annotations @@ -922,6 +927,24 @@ The text syntax for `Boolean`s, `Symbol`s, and `ByteString`s is directly inspired by [Racket](https://racket-lang.org/)'s lexical syntax. +## Appendix. Autodetection of textual or binary syntax + +Whitespace characters `0x09` (ASCII HT (tab)), `0x0A` (LF), `0x0D` +(CR), `0x20` (space) and `0x2C` (comma) are ignored at the start of a +textual-syntax Preserves `Document`, and their UTF-8 encodings are +reserved lead byte values in binary-syntax Preserves. + +The byte `0xFF`, signifying a no-op in binary-syntax Preserves, has no +meaning in either 7-bit ASCII or UTF-8, and therefore cannot appear in +a valid textual-syntax Preserves `Document`. + +If applications prefix their textual-syntax documents with e.g. a +space or newline character, and their binary-syntax documents with a +`0xFF` byte, consumers of these documents may reliably autodetect the +syntax being used. In a network protocol supporting this kind of +autodetection, clients may transmit LF or `0xFF` to select text or +binary syntax, respectively. + ## Appendix. Table of lead byte values 00 - False @@ -930,9 +953,9 @@ syntax. 03 - Double 04 - End stream 05 - Annotation - (0x) RESERVED 06-0F + (0x) RESERVED 06-0F (NB. 09, 0A, 0D specially reserved) 1x - Placeholder - 2x - Start Stream + 2x - Start Stream (NB. 20, 2C specially reserved) 3x - Small integers 0..12,-3..-1 4x - SignedInteger @@ -948,7 +971,8 @@ syntax. (Cx) RESERVED C0-CF (Dx) RESERVED D0-DF (Ex) RESERVED E0-EF - (Fx) RESERVED F0-FF + (Fx) RESERVED F0-FE + FF No-op ## Appendix. Bit fields within lead byte values @@ -962,13 +986,25 @@ syntax. 00 00 0100 End Stream (to match a previous Start Stream) 00 00 0101 Annotation; two more Reprs follow + 00 00 1001 (ASCII HT (tab)) \ + 00 00 1010 (ASCII LF) |- Reserved: may be used to indicate + 00 00 1101 (ASCII CR) / use of text encoding + 00 01 mmmm Placeholder; m is the placeholder number 00 10 ttnn Start Stream When tt = 00 --> error + When nn = 00 --> (ASCII space) + Reserved: may be used to indicate + use of text encoding + otherwise --> error 01 --> each chunk is a ByteString 10 --> each chunk is a single encoded Value 11 --> error (RESERVED) + When nn = 00 --> (ASCII comma) + Reserved: may be used to indicate + use of text encoding + otherwise --> error 00 11 xxxx Small integers 0..12,-3..-1 @@ -983,6 +1019,7 @@ syntax. 10 11 mmmm Dictionary 11 nn mmmm error, RESERVED + 11 11 1111 no-op; unambiguous indication of binary Preserves format Where `mmmm` appears, interpret it as an unsigned 4-bit number `m`. If `m`<15, let `l`=`m`. Otherwise, `m`=15; let `l` be the result of diff --git a/tests/samples.bin b/tests/samples.bin index cf9435d..4339755 100644 Binary files a/tests/samples.bin and b/tests/samples.bin differ diff --git a/tests/samples.txt b/tests/samples.txt index e97253a..683a73a 100644 --- a/tests/samples.txt +++ b/tests/samples.txt @@ -1,4 +1,46 @@ @ +@ + "In each test, let value = strip(annotatedValue),", + " forward = value,", + " back = value," + "except where test-case-specific values of `forward` and/or `back` are provided" + "by the executing harness (of particular importance for `StreamingTest`s)," + "and check the following numbered expectations according to the table above:" + + "Each `StreamingTest` will need to have an implementation-specific `forward`" + "supplied that encodes to the specific format C byte sequences in `binary`." + "Alternatively, implementations may choose to skip expectation 11 for" + "`StreamingTest`s, treating them like `DecodeTest`s." +]> list8: @"Missing close bracket" list9: @"Unexpected close bracket" + noop0: + noop1: + noop2: + noop3: + noop4: @"No-ops must be followed by something" + noop5: @"No input at all is considered short" placeholder0: placeholder1: placeholder2: @@ -115,6 +163,8 @@ symbol0: symbol1: symbol2: + whitespace0: @"Leading spaces have to eventually yield something" + whitespace1: @"No input at all is considered short" value1: value2: value3: