Autodetectability of binary vs text; documented test case schema a little

This commit is contained in:
Tony Garnock-Jones 2020-05-13 12:55:55 +02:00
parent ebbd268166
commit 8e0ab95d82
13 changed files with 291 additions and 148 deletions

View File

@ -157,52 +157,59 @@ class Decoder {
} }
next() { next() {
const [major, minor, arg] = this.nextop(); while (true) { // we loop because we may need to consume an arbitrary number of no-ops
switch (major) { const [major, minor, arg] = this.nextop();
case 0: switch (major) {
switch (minor) { case 0:
case 0: switch (minor) {
switch (arg) { case 0:
case 0: return this.wrap(false); switch (arg) {
case 1: return this.wrap(true); case 0: return this.wrap(false);
case 2: return this.wrap(Single(this.nextbytes(4).getFloat32(0, false))); case 1: return this.wrap(true);
case 3: return this.wrap(Double(this.nextbytes(8).getFloat64(0, false))); case 2: return this.wrap(Single(this.nextbytes(4).getFloat32(0, false)));
case 4: throw new DecodeError("Unexpected end-of-stream marker"); case 3: return this.wrap(Double(this.nextbytes(8).getFloat64(0, false)));
case 5: { case 4: throw new DecodeError("Unexpected end-of-stream marker");
const a = this.next(); case 5: {
const v = this.next(); const a = this.next();
return this.unshiftAnnotation(a, v); const v = this.next();
return this.unshiftAnnotation(a, v);
}
default: throw new DecodeError("Illegal format A lead byte");
} }
default: throw new DecodeError("Illegal format A lead byte"); case 1: {
const n = this.wirelength(arg);
const v = this.placeholders.get(n, void 0);
if (typeof v === 'undefined') {
const e = new DecodeError("Invalid Preserves placeholder");
e.irritant = n;
throw e;
}
return this.wrap(v);
} }
case 1: { case 2: {
const n = this.wirelength(arg); const t = arg >> 2;
const v = this.placeholders.get(n, void 0); const n = arg & 3;
if (typeof v === 'undefined') { switch (t) {
const e = new DecodeError("Invalid Preserves placeholder"); case 1: return this.wrap(this.binarystream(n));
e.irritant = n; case 2: return this.wrap(this.valuestream(n));
throw e; default: throw new DecodeError("Invalid format C start byte");
}
} }
return this.wrap(v); case 3:
return this.wrap((arg > 12) ? arg - 16 : arg);
} }
case 2: { case 1:
const t = arg >> 2; return this.wrap(this.decodebinary(minor, Bytes.from(this.nextbytes(this.wirelength(arg)))));
const n = arg & 3; case 2:
switch (t) { return this.wrap(this.decodecompound(minor, this.nextvalues(this.wirelength(arg))));
case 1: return this.wrap(this.binarystream(n)); case 3:
case 2: return this.wrap(this.valuestream(n)); if (minor === 3 && arg === 15) {
default: throw new DecodeError("Invalid format C start byte"); // no-op.
} continue;
} else {
throw new DecodeError("Invalid lead byte (major 3)");
} }
case 3: }
return this.wrap((arg > 12) ? arg - 16 : arg);
}
case 1:
return this.wrap(this.decodebinary(minor, Bytes.from(this.nextbytes(this.wirelength(arg)))));
case 2:
return this.wrap(this.decodecompound(minor, this.nextvalues(this.wirelength(arg))));
case 3:
throw new DecodeError("Invalid lead byte (major 3)");
} }
} }
@ -315,6 +322,10 @@ class Encoder {
this.header(0, 0, 4); this.header(0, 0, 4);
} }
encodenoop() {
this.leadbyte(3, 3, 15);
}
push(v) { push(v) {
const placeholder = this.placeholders.get(v, void 0); const placeholder = this.placeholders.get(v, void 0);
if (typeof placeholder !== 'undefined') { if (typeof placeholder !== 'undefined') {

View File

@ -165,12 +165,12 @@ describe('common test suite', () => {
it('should go back', () => assert(is(DS(binaryForm), back))); it('should go back', () => assert(is(DS(binaryForm), back)));
it('should go back with annotations', it('should go back with annotations',
() => assert(is(D(E(annotatedTextForm)), annotatedTextForm))); () => assert(is(D(E(annotatedTextForm)), annotatedTextForm)));
if (variety !== 'nondeterministic') { if (variety !== 'decode' && variety !== 'nondeterministic') {
it('should encode correctly', it('should encode correctly',
() => assert(is(E(forward), binaryForm), () => assert(is(E(forward), binaryForm),
E(forward) + ' ' + binaryForm)); E(forward) + ' ' + binaryForm));
} }
if (variety !== 'nondeterministic' && variety !== 'streaming') { if (variety !== 'decode' && variety !== 'nondeterministic' && variety !== 'streaming') {
it('should encode correctly with annotations', it('should encode correctly with annotations',
() => assert(is(E(annotatedTextForm), binaryForm), () => assert(is(E(annotatedTextForm), binaryForm),
E(annotatedTextForm) + ' ' + binaryForm)); E(annotatedTextForm) + ' ' + binaryForm));
@ -192,6 +192,9 @@ describe('common test suite', () => {
case Symbol.for('NondeterministicTest'): case Symbol.for('NondeterministicTest'):
runTestCase('nondeterministic', tName, t.get(0).strip(), t.get(1)); runTestCase('nondeterministic', tName, t.get(0).strip(), t.get(1));
break; break;
case Symbol.for('DecodeTest'):
runTestCase('decode', tName, t.get(0).strip(), t.get(1));
break;
case Symbol.for('DecodeError'): case Symbol.for('DecodeError'):
describe(tName, () => { describe(tName, () => {
it('should fail with DecodeError', () => { it('should fail with DecodeError', () => {

View File

@ -405,39 +405,44 @@ class Decoder(Codec):
return v return v
def next(self): def next(self):
(major, minor, arg) = self.nextop() while True: # we loop because we may need to consume an arbitrary number of no-ops
if major == 0: (major, minor, arg) = self.nextop()
if minor == 0: if major == 0:
if arg == 0: return self.wrap(False) if minor == 0:
if arg == 1: return self.wrap(True) if arg == 0: return self.wrap(False)
if arg == 2: return self.wrap(Float(struct.unpack('>f', self.nextbytes(4))[0])) if arg == 1: return self.wrap(True)
if arg == 3: return self.wrap(struct.unpack('>d', self.nextbytes(8))[0]) if arg == 2: return self.wrap(Float(struct.unpack('>f', self.nextbytes(4))[0]))
if arg == 4: raise DecodeError('Unexpected end-of-stream marker') if arg == 3: return self.wrap(struct.unpack('>d', self.nextbytes(8))[0])
if arg == 5: if arg == 4: raise DecodeError('Unexpected end-of-stream marker')
a = self.next() if arg == 5:
v = self.next() a = self.next()
return self.unshift_annotation(a, v) v = self.next()
raise DecodeError('Invalid format A encoding') return self.unshift_annotation(a, v)
elif minor == 1: raise DecodeError('Invalid format A encoding')
n = self.wirelength(arg) elif minor == 1:
v = self.placeholders.get(n, None) n = self.wirelength(arg)
if v is None: v = self.placeholders.get(n, None)
raise DecodeError('Invalid Preserves placeholder') if v is None:
return self.wrap(v) raise DecodeError('Invalid Preserves placeholder')
elif minor == 2: return self.wrap(v)
t = arg >> 2 elif minor == 2:
n = arg & 3 t = arg >> 2
if t == 1: return self.wrap(self.binarystream(n)) n = arg & 3
if t == 2: return self.wrap(self.valuestream(n)) if t == 1: return self.wrap(self.binarystream(n))
raise DecodeError('Invalid format C start byte') if t == 2: return self.wrap(self.valuestream(n))
else: # minor == 3 raise DecodeError('Invalid format C start byte')
return self.wrap(arg - 16 if arg > 12 else arg) else: # minor == 3
elif major == 1: return self.wrap(arg - 16 if arg > 12 else arg)
return self.wrap(self.decodebinary(minor, self.nextbytes(self.wirelength(arg)))) elif major == 1:
elif major == 2: return self.wrap(self.decodebinary(minor, self.nextbytes(self.wirelength(arg))))
return self.wrap(self.decodecompound(minor, self.nextvalues(self.wirelength(arg)))) elif major == 2:
else: # major == 3 return self.wrap(self.decodecompound(minor, self.nextvalues(self.wirelength(arg))))
raise DecodeError('Invalid lead byte (major 3)') else: # major == 3
if minor == 3 and arg == 15:
# no-op.
continue
else:
raise DecodeError('Invalid lead byte (major 3)')
def try_next(self): def try_next(self):
start = self.index start = self.index
@ -499,6 +504,9 @@ class Encoder(Codec):
for i in items: self.append(i) for i in items: self.append(i)
self.leadbyte(0, 0, 4) self.leadbyte(0, 0, 4)
def encodenoop(self):
self.leadbyte(3, 3, 15)
def append(self, v): def append(self, v):
try: try:
placeholder = self.placeholders.get(v, None) placeholder = self.placeholders.get(v, None)

View File

@ -249,9 +249,9 @@ def install_test(d, variant, tName, binaryForm, annotatedTextForm):
add_method(d, tName, test_forward) add_method(d, tName, test_forward)
add_method(d, tName, test_back) add_method(d, tName, test_back)
add_method(d, tName, test_back_ann) add_method(d, tName, test_back_ann)
if variant not in ['nondeterministic']: if variant not in ['decode', 'nondeterministic']:
add_method(d, tName, test_encode) add_method(d, tName, test_encode)
if variant not in ['nondeterministic', 'streaming']: if variant not in ['decode', 'nondeterministic', 'streaming']:
add_method(d, tName, test_encode_ann) add_method(d, tName, test_encode_ann)
def install_exn_test(d, tName, bs, check_proc): def install_exn_test(d, tName, bs, check_proc):
@ -287,6 +287,8 @@ class CommonTestSuite(unittest.TestCase):
install_test(locals(), 'streaming', tName, t[0].strip(), t[1]) install_test(locals(), 'streaming', tName, t[0].strip(), t[1])
elif t.key == Symbol('NondeterministicTest'): elif t.key == Symbol('NondeterministicTest'):
install_test(locals(), 'nondeterministic', tName, t[0].strip(), t[1]) install_test(locals(), 'nondeterministic', tName, t[0].strip(), t[1])
elif t.key == Symbol('DecodeTest'):
install_test(locals(), 'decode', tName, t[0].strip(), t[1])
elif t.key == Symbol('DecodeError'): elif t.key == Symbol('DecodeError'):
def expected_err(self, e): def expected_err(self, e):
self.assertIsInstance(e, DecodeError) self.assertIsInstance(e, DecodeError)

View File

@ -20,6 +20,7 @@
preserve->string preserve->string
current-value->placeholder current-value->placeholder
current-placeholder->value current-placeholder->value
prepend-noop
encode encode
decode decode
decode-syntax decode-syntax
@ -115,6 +116,9 @@
(define current-value->placeholder (make-parameter (lambda (v) #f))) (define current-value->placeholder (make-parameter (lambda (v) #f)))
(define current-placeholder->value (make-parameter (lambda (v) (void)))) (define current-placeholder->value (make-parameter (lambda (v) (void))))
(define (prepend-noop encoded-value)
(bit-string-append #"\xff" encoded-value))
(define (encode v) (define (encode v)
(bit-string->bytes (bit-string (v :: (wire-value))))) (bit-string->bytes (bit-string (v :: (wire-value)))))
@ -370,6 +374,9 @@
(decode-compound minor fields rest (nil-annotation ks bs) kf)) (decode-compound minor fields rest (nil-annotation ks bs) kf))
kf)) kf))
([ (= #b11111111 :: bits 8) (rest :: binary) ]
(decode-one rest ks kf))
(else (kf)))) (else (kf))))
(decode-one input ks kf)) (decode-one input ks kf))
@ -1110,25 +1117,28 @@
(match (hash-ref samples-txt-expected t-name text-form) (match (hash-ref samples-txt-expected t-name text-form)
[(asymmetric f b) (values f b #f)] ;; #f because e.g. annotation4 includes annotations [(asymmetric f b) (values f b #f)] ;; #f because e.g. annotation4 includes annotations
[v (values v v #t)])) [v (values v v #t)]))
(check-equal? text-form back loc) (check-equal? text-form back loc) ;; expectation 1
(check-equal? (d-strip (encode text-form)) back loc) (check-equal? (d-strip (encode text-form)) back loc) ;; expectation 2
(check-equal? (d-strip (encode forward)) back loc) (check-equal? (d-strip (encode forward)) back loc) ;; expectation 3
(check-equal? (d-strip binary-form) back loc) (check-equal? (d-strip binary-form) back loc) ;; expectation 4
(check-equal? (d binary-form) annotated-text-form loc) (check-equal? (d binary-form) annotated-text-form loc) ;; expectation 5
(check-equal? (d (encode annotated-text-form)) annotated-text-form loc) (check-equal? (d (encode annotated-text-form)) annotated-text-form loc) ;; expectation 6
(check-equal? (string->preserve (preserve->string text-form)) back loc) (check-equal? (string->preserve (preserve->string text-form)) back loc) ;; expectation 7
(check-equal? (string->preserve (preserve->string forward)) back loc) (check-equal? (string->preserve (preserve->string forward)) back loc) ;; expectation 8
(check-equal? (string->preserve-syntax (preserve->string annotated-text-form)) (check-equal? (string->preserve-syntax (preserve->string annotated-text-form)) ;; similar to 8
annotated-text-form annotated-text-form
loc) loc)
(when (or (not (memq variety '(nondeterministic))) (when (and (not (memq variety '(decode)))
(and can-execute-nondet-with-canonicalization?)) (or (not (memq variety '(nondeterministic)))
(and can-execute-nondet-with-canonicalization?)))
;; expectations 9 and 10
(parameterize ((canonicalize-preserves? (if (memq variety '(nondeterministic)) #t #f))) (parameterize ((canonicalize-preserves? (if (memq variety '(nondeterministic)) #t #f)))
(check-equal? (encode forward) binary-form loc))) (check-equal? (encode forward) binary-form loc)))
(unless (memq variety '(nondeterministic streaming)) (unless (memq variety '(decode nondeterministic streaming))
;; expectation 11
(check-equal? (encode annotated-text-form) binary-form loc))) (check-equal? (encode annotated-text-form) binary-form loc)))
(define-runtime-path tests-path "../../../tests") (define-runtime-path tests-path "../../../../tests")
(let* ((path (build-path tests-path "samples.txt")) (let* ((path (build-path tests-path "samples.txt"))
(testfile (call-with-input-file path (testfile (call-with-input-file path
(lambda (p) (lambda (p)
@ -1158,6 +1168,8 @@
(run-test-case 'nondeterministic t-name loc binary-form annotated-text-form)] (run-test-case 'nondeterministic t-name loc binary-form annotated-text-form)]
[`#s(StreamingTest ,(strip-annotations binary-form) ,annotated-text-form) [`#s(StreamingTest ,(strip-annotations binary-form) ,annotated-text-form)
(run-test-case 'streaming t-name loc binary-form annotated-text-form)] (run-test-case 'streaming t-name loc binary-form annotated-text-form)]
[`#s(DecodeTest ,(strip-annotations binary-form) ,annotated-text-form)
(run-test-case 'decode t-name loc binary-form annotated-text-form)]
[`#s(ParseError ,(strip-annotations str)) [`#s(ParseError ,(strip-annotations str))
(with-handlers [(exn:fail:read:eof? (with-handlers [(exn:fail:read:eof?
(lambda (e) (fail-test "Unexpected EOF: ~e" e))) (lambda (e) (fail-test "Unexpected EOF: ~e" e)))

View File

@ -307,6 +307,7 @@ mod samples_tests {
Test(#[serde(with = "serde_bytes")] Vec<u8>, PlainValue<Dom>), Test(#[serde(with = "serde_bytes")] Vec<u8>, PlainValue<Dom>),
NondeterministicTest(#[serde(with = "serde_bytes")] Vec<u8>, PlainValue<Dom>), NondeterministicTest(#[serde(with = "serde_bytes")] Vec<u8>, PlainValue<Dom>),
StreamingTest(#[serde(with = "serde_bytes")] Vec<u8>, PlainValue<Dom>), StreamingTest(#[serde(with = "serde_bytes")] Vec<u8>, PlainValue<Dom>),
DecodeTest(#[serde(with = "serde_bytes")] Vec<u8>, PlainValue<Dom>),
ParseError(String), ParseError(String),
ParseShort(String), ParseShort(String),
DecodeError(#[serde(with = "serde_bytes")] Vec<u8>), DecodeError(#[serde(with = "serde_bytes")] Vec<u8>),
@ -341,6 +342,10 @@ mod samples_tests {
assert_eq!(&codec.decode(&mut &codec.encode_bytes(val)?[..])?, val); assert_eq!(&codec.decode(&mut &codec.encode_bytes(val)?[..])?, val);
assert_eq!(&codec.decode(&mut &bin[..])?, val); assert_eq!(&codec.decode(&mut &bin[..])?, val);
} }
TestCase::DecodeTest(ref bin, ref val) => {
assert_eq!(&codec.decode(&mut &codec.encode_bytes(val)?[..])?, val);
assert_eq!(&codec.decode(&mut &bin[..])?, val);
}
TestCase::ParseError(_) => (), TestCase::ParseError(_) => (),
TestCase::ParseShort(_) => (), TestCase::ParseShort(_) => (),
TestCase::DecodeError(ref bin) => { TestCase::DecodeError(ref bin) => {

View File

@ -6,6 +6,7 @@ pub enum Op {
Misc(u8), Misc(u8),
Atom(AtomMinor), Atom(AtomMinor),
Compound(CompoundMinor), Compound(CompoundMinor),
Reserved(u8),
} }
#[derive(Debug, PartialEq, Eq)] #[derive(Debug, PartialEq, Eq)]
@ -18,6 +19,7 @@ impl TryFrom<u8> for Op {
0 => Ok(Self::Misc(v & 3)), 0 => Ok(Self::Misc(v & 3)),
1 => Ok(Self::Atom(AtomMinor::try_from(v & 3).unwrap())), 1 => Ok(Self::Atom(AtomMinor::try_from(v & 3).unwrap())),
2 => Ok(Self::Compound(CompoundMinor::try_from(v & 3).unwrap())), 2 => Ok(Self::Compound(CompoundMinor::try_from(v & 3).unwrap())),
3 => Ok(Self::Reserved(v & 3)),
_ => Err(InvalidOp), _ => Err(InvalidOp),
} }
} }
@ -29,6 +31,7 @@ impl From<Op> for u8 {
Op::Misc(minor) => minor & 3, Op::Misc(minor) => minor & 3,
Op::Atom(minor) => (1 << 2) | ((minor as u8) & 3), Op::Atom(minor) => (1 << 2) | ((minor as u8) & 3),
Op::Compound(minor) => (2 << 2) | ((minor as u8) & 3), Op::Compound(minor) => (2 << 2) | ((minor as u8) & 3),
Op::Reserved(minor) => (3 << 2) | (minor & 3),
} }
} }
} }

View File

@ -246,59 +246,63 @@ impl<'a, 'b, R: Read, N: NestedValue<D>, D: Domain> Decoder<'a, 'b, R, N, D> {
} }
pub fn next(&mut self) -> Result<N> { pub fn next(&mut self) -> Result<N> {
match self.nextop()? { loop {
(Op::Misc(0), 0) => Ok(Value::from(false).wrap()), return match self.nextop()? {
(Op::Misc(0), 1) => Ok(Value::from(true).wrap()), (Op::Misc(0), 0) => Ok(Value::from(false).wrap()),
(Op::Misc(0), 2) => { (Op::Misc(0), 1) => Ok(Value::from(true).wrap()),
let bs: &[u8] = &self.readbytes(4)?; (Op::Misc(0), 2) => {
Ok(Value::from(f32::from_bits(u32::from_be_bytes(bs.try_into().unwrap()))).wrap()) let bs: &[u8] = &self.readbytes(4)?;
} Ok(Value::from(f32::from_bits(u32::from_be_bytes(bs.try_into().unwrap()))).wrap())
(Op::Misc(0), 3) => { }
let bs: &[u8] = &self.readbytes(8)?; (Op::Misc(0), 3) => {
Ok(Value::from(f64::from_bits(u64::from_be_bytes(bs.try_into().unwrap()))).wrap()) let bs: &[u8] = &self.readbytes(8)?;
} Ok(Value::from(f64::from_bits(u64::from_be_bytes(bs.try_into().unwrap()))).wrap())
(Op::Misc(0), 5) => { }
if self.read_annotations { (Op::Misc(0), 5) => {
let mut annotations = vec![self.next()?]; if self.read_annotations {
while Self::decodeop(self.peek()?).ok() == Some((Op::Misc(0), 5)) { let mut annotations = vec![self.next()?];
self.skip()?; while Self::decodeop(self.peek()?).ok() == Some((Op::Misc(0), 5)) {
annotations.push(self.next()?); self.skip()?;
annotations.push(self.next()?);
}
let v = self.next()?;
assert!(v.annotations().is_empty());
Ok(N::wrap_ann(annotations, v.value_owned()))
} else {
self.next()?;
self.next()
} }
let v = self.next()?;
assert!(v.annotations().is_empty());
Ok(N::wrap_ann(annotations, v.value_owned()))
} else {
self.next()?;
self.next()
} }
} (Op::Misc(0), _) => Err(Error::Syntax("Invalid format A encoding")),
(Op::Misc(0), _) => Err(Error::Syntax("Invalid format A encoding")), (Op::Misc(1), arg) => {
(Op::Misc(1), arg) => { let n = self.wirelength(arg)?;
let n = self.wirelength(arg)?; match self.placeholders.and_then(|m| m.get(&n)) {
match self.placeholders.and_then(|m| m.get(&n)) { Some(v) => Ok(v.clone().wrap()),
Some(v) => Ok(v.clone().wrap()), None => Err(Error::Syntax("Invalid Preserves placeholder")),
None => Err(Error::Syntax("Invalid Preserves placeholder")), }
} }
} (Op::Misc(2), arg) => {
(Op::Misc(2), arg) => { match Op::try_from(arg)? {
match Op::try_from(arg)? { Op::Atom(minor) => self.binarystream(minor),
Op::Atom(minor) => self.binarystream(minor), Op::Compound(minor) => self.valuestream(minor),
Op::Compound(minor) => self.valuestream(minor), _ => Err(Error::Syntax("Invalid format C start byte")),
_ => Err(Error::Syntax("Invalid format C start byte")), }
} }
} (Op::Misc(3), arg) => {
(Op::Misc(3), arg) => { let n = if arg > 12 { i32::from(arg) - 16 } else { i32::from(arg) };
let n = if arg > 12 { i32::from(arg) - 16 } else { i32::from(arg) }; Ok(Value::from(n).wrap())
Ok(Value::from(n).wrap()) }
} (Op::Misc(_), _) => unreachable!(),
(Op::Misc(_), _) => unreachable!(), (Op::Atom(minor), arg) => {
(Op::Atom(minor), arg) => { let count = self.wirelength(arg)?;
let count = self.wirelength(arg)?; Self::decodebinary(minor, self.readbytes(count)?)
Self::decodebinary(minor, self.readbytes(count)?) }
} (Op::Compound(minor), arg) => {
(Op::Compound(minor), arg) => { let count = self.wirelength(arg)?;
let count = self.wirelength(arg)?; Self::decodecompound(minor, self.readvalues(count)?)
Self::decodecompound(minor, self.readvalues(count)?) }
(Op::Reserved(3), 15) => continue,
(Op::Reserved(_), _) => Err(InvalidOp.into()),
} }
} }
} }

View File

@ -70,6 +70,10 @@ impl<'a, 'b, W: Write, N: NestedValue<D>, D: Domain> Encoder<'a, 'b, W, N, D> {
self.write_all(bs) self.write_all(bs)
} }
pub fn write_noop(&mut self) -> Result {
self.write_op(Op::Reserved(3), 15)
}
pub fn write(&mut self, v: &N) -> Result { pub fn write(&mut self, v: &N) -> Result {
for ann in v.annotations() { for ann in v.annotations() {
self.write_header(Op::Misc(0), 5)?; self.write_header(Op::Misc(0), 5)?;

View File

@ -22,6 +22,10 @@ pre, code { background-color: #eee; font-family: "DejaVu Sans Mono", monospace;
code { font-size: 75%; } code { font-size: 75%; }
pre { padding: 0.33rem; line-height: 1; overflow-x: auto; } pre { padding: 0.33rem; line-height: 1; overflow-x: auto; }
p, ul, table {
margin: 1em 0;
}
body { body {
counter-reset: section 0 subsection 0 appendix 0; counter-reset: section 0 subsection 0 appendix 0;
} }

View File

@ -4,7 +4,7 @@ title: "Preserves: an Expressive Data Language"
--- ---
Tony Garnock-Jones <tonyg@leastfixedpoint.com> Tony Garnock-Jones <tonyg@leastfixedpoint.com>
August 2019. Version 0.0.6. May 2020. Version 0.0.7.
[sexp.txt]: http://people.csail.mit.edu/rivest/Sexp.txt [sexp.txt]: http://people.csail.mit.edu/rivest/Sexp.txt
[spki]: http://world.std.com/~cme/html/spki.html [spki]: http://world.std.com/~cme/html/spki.html
@ -470,11 +470,15 @@ representation.[^some-encodings-unused]
| 0 | 3 | | (format A) Certain small `SignedInteger`s | | 0 | 3 | | (format A) Certain small `SignedInteger`s |
| 1 | | | (format B) An `Atom` with variable-length binary representation | | 1 | | | (format B) An `Atom` with variable-length binary representation |
| 2 | | | (format B) A `Compound` with variable-length representation | | 2 | | | (format B) A `Compound` with variable-length representation |
| 3 | 3 | 15 | (format A) 0xFF byte; no-op |
#### Encoding data of type-specific length (format A). #### Encoding data of type-specific length (format A).
Each type of data defines its own rules for this format. Each type of data defines its own rules for this format.
Of particular note is lead byte `0xFF`, which is a no-op byte acting
as a kind of pseudo-whitespace in a binary-syntax encoding.
#### Encoding data of known length (format B). #### Encoding data of known length (format B).
Format B is used where the length `l` of the `Value` to be encoded is Format B is used where the length `l` of the `Value` to be encoded is
@ -896,10 +900,11 @@ endless sequence of zero length chunks, appearing to make progress but
not actually doing so. Implementations *MUST* reject zero length not actually doing so. Implementations *MUST* reject zero length
chunks when decoding, and *MUST NOT* produce them when encoding. chunks when decoding, and *MUST NOT* produce them when encoding.
**Whitespace.** Similarly, the textual format for `Value`s allows **Whitespace and no-ops.** Similarly, the binary format allows `0xFF`
arbitrary whitespace in many positions. In streaming transfer no-ops and the textual format allows arbitrary whitespace in many
situations, consider optional restrictions on the amount of positions. In streaming transfer situations, consider optional
consecutive whitespace that may appear in a serialized `Value`. restrictions on the amount of consecutive whitespace or the number of
consecutive no-ops that may appear.
**Annotations.** Also similarly, in modes where a `Value` is being **Annotations.** Also similarly, in modes where a `Value` is being
read while annotations are skipped, an endless sequence of annotations read while annotations are skipped, an endless sequence of annotations
@ -922,6 +927,24 @@ The text syntax for `Boolean`s, `Symbol`s, and `ByteString`s is
directly inspired by [Racket](https://racket-lang.org/)'s lexical directly inspired by [Racket](https://racket-lang.org/)'s lexical
syntax. syntax.
## Appendix. Autodetection of textual or binary syntax
Whitespace characters `0x09` (ASCII HT (tab)), `0x0A` (LF), `0x0D`
(CR), `0x20` (space) and `0x2C` (comma) are ignored at the start of a
textual-syntax Preserves `Document`, and their UTF-8 encodings are
reserved lead byte values in binary-syntax Preserves.
The byte `0xFF`, signifying a no-op in binary-syntax Preserves, has no
meaning in either 7-bit ASCII or UTF-8, and therefore cannot appear in
a valid textual-syntax Preserves `Document`.
If applications prefix their textual-syntax documents with e.g. a
space or newline character, and their binary-syntax documents with a
`0xFF` byte, consumers of these documents may reliably autodetect the
syntax being used. In a network protocol supporting this kind of
autodetection, clients may transmit LF or `0xFF` to select text or
binary syntax, respectively.
## Appendix. Table of lead byte values ## Appendix. Table of lead byte values
00 - False 00 - False
@ -930,9 +953,9 @@ syntax.
03 - Double 03 - Double
04 - End stream 04 - End stream
05 - Annotation 05 - Annotation
(0x) RESERVED 06-0F (0x) RESERVED 06-0F (NB. 09, 0A, 0D specially reserved)
1x - Placeholder 1x - Placeholder
2x - Start Stream 2x - Start Stream (NB. 20, 2C specially reserved)
3x - Small integers 0..12,-3..-1 3x - Small integers 0..12,-3..-1
4x - SignedInteger 4x - SignedInteger
@ -948,7 +971,8 @@ syntax.
(Cx) RESERVED C0-CF (Cx) RESERVED C0-CF
(Dx) RESERVED D0-DF (Dx) RESERVED D0-DF
(Ex) RESERVED E0-EF (Ex) RESERVED E0-EF
(Fx) RESERVED F0-FF (Fx) RESERVED F0-FE
FF No-op
## Appendix. Bit fields within lead byte values ## Appendix. Bit fields within lead byte values
@ -962,13 +986,25 @@ syntax.
00 00 0100 End Stream (to match a previous Start Stream) 00 00 0100 End Stream (to match a previous Start Stream)
00 00 0101 Annotation; two more Reprs follow 00 00 0101 Annotation; two more Reprs follow
00 00 1001 (ASCII HT (tab)) \
00 00 1010 (ASCII LF) |- Reserved: may be used to indicate
00 00 1101 (ASCII CR) / use of text encoding
00 01 mmmm Placeholder; m is the placeholder number 00 01 mmmm Placeholder; m is the placeholder number
00 10 ttnn Start Stream <tt,nn> 00 10 ttnn Start Stream <tt,nn>
When tt = 00 --> error When tt = 00 --> error
When nn = 00 --> (ASCII space)
Reserved: may be used to indicate
use of text encoding
otherwise --> error
01 --> each chunk is a ByteString 01 --> each chunk is a ByteString
10 --> each chunk is a single encoded Value 10 --> each chunk is a single encoded Value
11 --> error (RESERVED) 11 --> error (RESERVED)
When nn = 00 --> (ASCII comma)
Reserved: may be used to indicate
use of text encoding
otherwise --> error
00 11 xxxx Small integers 0..12,-3..-1 00 11 xxxx Small integers 0..12,-3..-1
@ -983,6 +1019,7 @@ syntax.
10 11 mmmm Dictionary 10 11 mmmm Dictionary
11 nn mmmm error, RESERVED 11 nn mmmm error, RESERVED
11 11 1111 no-op; unambiguous indication of binary Preserves format
Where `mmmm` appears, interpret it as an unsigned 4-bit number `m`. If Where `mmmm` appears, interpret it as an unsigned 4-bit number `m`. If
`m`<15, let `l`=`m`. Otherwise, `m`=15; let `l` be the result of `m`<15, let `l`=`m`. Otherwise, `m`=15; let `l` be the result of

Binary file not shown.

View File

@ -1,4 +1,46 @@
@<EmacsMode "-*- preserves -*-"> @<EmacsMode "-*- preserves -*-">
@<Documentation [
"Individual test cases may be any of the following record types:"
<TestCaseTypes {
Test: {fields: [binary annotatedValue] expectations: {1 2 3 4 5 6 7 8 9 11}}
NondeterministicTest: {fields: [binary annotatedValue] expectations: {1 2 3 4 5 6 7 8 10 11}}
StreamingTest: {fields: [binary annotatedValue] expectations: {1 2 3 4 5 6 7 8 9 }}
DecodeTest: {fields: [binary annotatedValue] expectations: {1 2 3 4 5 6 7 8}}
ParseError: {fields: [text] expectations: {12}}
ParseShort: {fields: [text] expectations: {13}}
DecodeError: {fields: [bytes] expectations: {14}}
DecodeShort: {fields: [bytes] expectations: {15}}
}>
"In each test, let value = strip(annotatedValue),",
" forward = value,",
" back = value,"
"except where test-case-specific values of `forward` and/or `back` are provided"
"by the executing harness (of particular importance for `StreamingTest`s),"
"and check the following numbered expectations according to the table above:"
<TestCaseExpectations {
1: "value = back"
2: "strip(decodeBinary(encodeBinary(value))) = back"
3: "strip(decodeBinary(encodeBinary(forward))) = back"
4: "strip(decodeBinary(binary)) = back"
5: "decodeBinary(binary) = annotatedValue"
6: "decodeBinary(encodeBinary(annotatedValue)) = annotatedValue"
7: "decodeText(encodeText(value)) = back"
8: "decodeText(encodeText(forward)) = back"
9: "encodeBinary(forward) = binary"
10: "canonicallyEncodeBinary(forward) = binary"
11: "encodeBinary(annotatedValue) = binary"
12: "decodeText(text) fails with a syntax error (NB. never with premature EOF)"
13: "decodeText(text) fails signalling premature EOF (NB. never with a syntax error)"
14: "decodeBinary(bytes) fails with a syntax error (NB. never with premature EOF)"
15: "decodeBinary(bytes) fails signalling premature EOF (NB. never with a syntax error)"
}>
"Each `StreamingTest` will need to have an implementation-specific `forward`"
"supplied that encodes to the specific format C byte sequences in `binary`."
"Alternatively, implementations may choose to skip expectation 11 for"
"`StreamingTest`s, treating them like `DecodeTest`s."
]>
<TestCases <TestCases
<ExpectedPlaceholderMapping { <ExpectedPlaceholderMapping {
0: discard 0: discard
@ -76,6 +118,12 @@
list7: <Test #hex{93 73616263 732e2e2e 73646566} [abc ... def]> list7: <Test #hex{93 73616263 732e2e2e 73646566} [abc ... def]>
list8: @"Missing close bracket" <ParseShort "["> list8: @"Missing close bracket" <ParseShort "[">
list9: @"Unexpected close bracket" <ParseError "]"> list9: @"Unexpected close bracket" <ParseError "]">
noop0: <DecodeTest #hex{ff10} discard>
noop1: <DecodeTest #hex{ff31} 1>
noop2: <DecodeTest #hex{ffffff42ff00} -256>
noop3: <DecodeTest #hex{ff05ff53616263ff42ff00} @"abc" -256>
noop4: @"No-ops must be followed by something" <DecodeShort #hex{ffffff}>
noop5: @"No input at all is considered short" <DecodeShort #hex{}>
placeholder0: <Test #hex{10} discard> placeholder0: <Test #hex{10} discard>
placeholder1: <Test #hex{11} capture> placeholder1: <Test #hex{11} capture>
placeholder2: <Test #hex{12} observe> placeholder2: <Test #hex{12} observe>
@ -115,6 +163,8 @@
symbol0: <Test #hex{70} ||> symbol0: <Test #hex{70} ||>
symbol1: <StreamingTest #hex{27626865626c6c616f04} hello> symbol1: <StreamingTest #hex{27626865626c6c616f04} hello>
symbol2: <Test #hex{7568656c6c6f} hello> symbol2: <Test #hex{7568656c6c6f} hello>
whitespace0: @"Leading spaces have to eventually yield something" <ParseShort " ">
whitespace1: @"No input at all is considered short" <ParseShort "">
value1: <Test #"\x66corymb" #value#"fcorymb"> value1: <Test #"\x66corymb" #value#"fcorymb">
value2: <Test #"\x01" #value#"\x01"> value2: <Test #"\x01" #value#"\x01">
value3: <Test #"\x01" #value#base64{AQ}> value3: <Test #"\x01" #value#base64{AQ}>