Autodetectability of binary vs text; documented test case schema a little
This commit is contained in:
parent
ebbd268166
commit
8e0ab95d82
|
@ -157,52 +157,59 @@ class Decoder {
|
||||||
}
|
}
|
||||||
|
|
||||||
next() {
|
next() {
|
||||||
const [major, minor, arg] = this.nextop();
|
while (true) { // we loop because we may need to consume an arbitrary number of no-ops
|
||||||
switch (major) {
|
const [major, minor, arg] = this.nextop();
|
||||||
case 0:
|
switch (major) {
|
||||||
switch (minor) {
|
case 0:
|
||||||
case 0:
|
switch (minor) {
|
||||||
switch (arg) {
|
case 0:
|
||||||
case 0: return this.wrap(false);
|
switch (arg) {
|
||||||
case 1: return this.wrap(true);
|
case 0: return this.wrap(false);
|
||||||
case 2: return this.wrap(Single(this.nextbytes(4).getFloat32(0, false)));
|
case 1: return this.wrap(true);
|
||||||
case 3: return this.wrap(Double(this.nextbytes(8).getFloat64(0, false)));
|
case 2: return this.wrap(Single(this.nextbytes(4).getFloat32(0, false)));
|
||||||
case 4: throw new DecodeError("Unexpected end-of-stream marker");
|
case 3: return this.wrap(Double(this.nextbytes(8).getFloat64(0, false)));
|
||||||
case 5: {
|
case 4: throw new DecodeError("Unexpected end-of-stream marker");
|
||||||
const a = this.next();
|
case 5: {
|
||||||
const v = this.next();
|
const a = this.next();
|
||||||
return this.unshiftAnnotation(a, v);
|
const v = this.next();
|
||||||
|
return this.unshiftAnnotation(a, v);
|
||||||
|
}
|
||||||
|
default: throw new DecodeError("Illegal format A lead byte");
|
||||||
}
|
}
|
||||||
default: throw new DecodeError("Illegal format A lead byte");
|
case 1: {
|
||||||
|
const n = this.wirelength(arg);
|
||||||
|
const v = this.placeholders.get(n, void 0);
|
||||||
|
if (typeof v === 'undefined') {
|
||||||
|
const e = new DecodeError("Invalid Preserves placeholder");
|
||||||
|
e.irritant = n;
|
||||||
|
throw e;
|
||||||
|
}
|
||||||
|
return this.wrap(v);
|
||||||
}
|
}
|
||||||
case 1: {
|
case 2: {
|
||||||
const n = this.wirelength(arg);
|
const t = arg >> 2;
|
||||||
const v = this.placeholders.get(n, void 0);
|
const n = arg & 3;
|
||||||
if (typeof v === 'undefined') {
|
switch (t) {
|
||||||
const e = new DecodeError("Invalid Preserves placeholder");
|
case 1: return this.wrap(this.binarystream(n));
|
||||||
e.irritant = n;
|
case 2: return this.wrap(this.valuestream(n));
|
||||||
throw e;
|
default: throw new DecodeError("Invalid format C start byte");
|
||||||
|
}
|
||||||
}
|
}
|
||||||
return this.wrap(v);
|
case 3:
|
||||||
|
return this.wrap((arg > 12) ? arg - 16 : arg);
|
||||||
}
|
}
|
||||||
case 2: {
|
case 1:
|
||||||
const t = arg >> 2;
|
return this.wrap(this.decodebinary(minor, Bytes.from(this.nextbytes(this.wirelength(arg)))));
|
||||||
const n = arg & 3;
|
case 2:
|
||||||
switch (t) {
|
return this.wrap(this.decodecompound(minor, this.nextvalues(this.wirelength(arg))));
|
||||||
case 1: return this.wrap(this.binarystream(n));
|
case 3:
|
||||||
case 2: return this.wrap(this.valuestream(n));
|
if (minor === 3 && arg === 15) {
|
||||||
default: throw new DecodeError("Invalid format C start byte");
|
// no-op.
|
||||||
}
|
continue;
|
||||||
|
} else {
|
||||||
|
throw new DecodeError("Invalid lead byte (major 3)");
|
||||||
}
|
}
|
||||||
case 3:
|
}
|
||||||
return this.wrap((arg > 12) ? arg - 16 : arg);
|
|
||||||
}
|
|
||||||
case 1:
|
|
||||||
return this.wrap(this.decodebinary(minor, Bytes.from(this.nextbytes(this.wirelength(arg)))));
|
|
||||||
case 2:
|
|
||||||
return this.wrap(this.decodecompound(minor, this.nextvalues(this.wirelength(arg))));
|
|
||||||
case 3:
|
|
||||||
throw new DecodeError("Invalid lead byte (major 3)");
|
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -315,6 +322,10 @@ class Encoder {
|
||||||
this.header(0, 0, 4);
|
this.header(0, 0, 4);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
encodenoop() {
|
||||||
|
this.leadbyte(3, 3, 15);
|
||||||
|
}
|
||||||
|
|
||||||
push(v) {
|
push(v) {
|
||||||
const placeholder = this.placeholders.get(v, void 0);
|
const placeholder = this.placeholders.get(v, void 0);
|
||||||
if (typeof placeholder !== 'undefined') {
|
if (typeof placeholder !== 'undefined') {
|
||||||
|
|
|
@ -165,12 +165,12 @@ describe('common test suite', () => {
|
||||||
it('should go back', () => assert(is(DS(binaryForm), back)));
|
it('should go back', () => assert(is(DS(binaryForm), back)));
|
||||||
it('should go back with annotations',
|
it('should go back with annotations',
|
||||||
() => assert(is(D(E(annotatedTextForm)), annotatedTextForm)));
|
() => assert(is(D(E(annotatedTextForm)), annotatedTextForm)));
|
||||||
if (variety !== 'nondeterministic') {
|
if (variety !== 'decode' && variety !== 'nondeterministic') {
|
||||||
it('should encode correctly',
|
it('should encode correctly',
|
||||||
() => assert(is(E(forward), binaryForm),
|
() => assert(is(E(forward), binaryForm),
|
||||||
E(forward) + ' ' + binaryForm));
|
E(forward) + ' ' + binaryForm));
|
||||||
}
|
}
|
||||||
if (variety !== 'nondeterministic' && variety !== 'streaming') {
|
if (variety !== 'decode' && variety !== 'nondeterministic' && variety !== 'streaming') {
|
||||||
it('should encode correctly with annotations',
|
it('should encode correctly with annotations',
|
||||||
() => assert(is(E(annotatedTextForm), binaryForm),
|
() => assert(is(E(annotatedTextForm), binaryForm),
|
||||||
E(annotatedTextForm) + ' ' + binaryForm));
|
E(annotatedTextForm) + ' ' + binaryForm));
|
||||||
|
@ -192,6 +192,9 @@ describe('common test suite', () => {
|
||||||
case Symbol.for('NondeterministicTest'):
|
case Symbol.for('NondeterministicTest'):
|
||||||
runTestCase('nondeterministic', tName, t.get(0).strip(), t.get(1));
|
runTestCase('nondeterministic', tName, t.get(0).strip(), t.get(1));
|
||||||
break;
|
break;
|
||||||
|
case Symbol.for('DecodeTest'):
|
||||||
|
runTestCase('decode', tName, t.get(0).strip(), t.get(1));
|
||||||
|
break;
|
||||||
case Symbol.for('DecodeError'):
|
case Symbol.for('DecodeError'):
|
||||||
describe(tName, () => {
|
describe(tName, () => {
|
||||||
it('should fail with DecodeError', () => {
|
it('should fail with DecodeError', () => {
|
||||||
|
|
|
@ -405,39 +405,44 @@ class Decoder(Codec):
|
||||||
return v
|
return v
|
||||||
|
|
||||||
def next(self):
|
def next(self):
|
||||||
(major, minor, arg) = self.nextop()
|
while True: # we loop because we may need to consume an arbitrary number of no-ops
|
||||||
if major == 0:
|
(major, minor, arg) = self.nextop()
|
||||||
if minor == 0:
|
if major == 0:
|
||||||
if arg == 0: return self.wrap(False)
|
if minor == 0:
|
||||||
if arg == 1: return self.wrap(True)
|
if arg == 0: return self.wrap(False)
|
||||||
if arg == 2: return self.wrap(Float(struct.unpack('>f', self.nextbytes(4))[0]))
|
if arg == 1: return self.wrap(True)
|
||||||
if arg == 3: return self.wrap(struct.unpack('>d', self.nextbytes(8))[0])
|
if arg == 2: return self.wrap(Float(struct.unpack('>f', self.nextbytes(4))[0]))
|
||||||
if arg == 4: raise DecodeError('Unexpected end-of-stream marker')
|
if arg == 3: return self.wrap(struct.unpack('>d', self.nextbytes(8))[0])
|
||||||
if arg == 5:
|
if arg == 4: raise DecodeError('Unexpected end-of-stream marker')
|
||||||
a = self.next()
|
if arg == 5:
|
||||||
v = self.next()
|
a = self.next()
|
||||||
return self.unshift_annotation(a, v)
|
v = self.next()
|
||||||
raise DecodeError('Invalid format A encoding')
|
return self.unshift_annotation(a, v)
|
||||||
elif minor == 1:
|
raise DecodeError('Invalid format A encoding')
|
||||||
n = self.wirelength(arg)
|
elif minor == 1:
|
||||||
v = self.placeholders.get(n, None)
|
n = self.wirelength(arg)
|
||||||
if v is None:
|
v = self.placeholders.get(n, None)
|
||||||
raise DecodeError('Invalid Preserves placeholder')
|
if v is None:
|
||||||
return self.wrap(v)
|
raise DecodeError('Invalid Preserves placeholder')
|
||||||
elif minor == 2:
|
return self.wrap(v)
|
||||||
t = arg >> 2
|
elif minor == 2:
|
||||||
n = arg & 3
|
t = arg >> 2
|
||||||
if t == 1: return self.wrap(self.binarystream(n))
|
n = arg & 3
|
||||||
if t == 2: return self.wrap(self.valuestream(n))
|
if t == 1: return self.wrap(self.binarystream(n))
|
||||||
raise DecodeError('Invalid format C start byte')
|
if t == 2: return self.wrap(self.valuestream(n))
|
||||||
else: # minor == 3
|
raise DecodeError('Invalid format C start byte')
|
||||||
return self.wrap(arg - 16 if arg > 12 else arg)
|
else: # minor == 3
|
||||||
elif major == 1:
|
return self.wrap(arg - 16 if arg > 12 else arg)
|
||||||
return self.wrap(self.decodebinary(minor, self.nextbytes(self.wirelength(arg))))
|
elif major == 1:
|
||||||
elif major == 2:
|
return self.wrap(self.decodebinary(minor, self.nextbytes(self.wirelength(arg))))
|
||||||
return self.wrap(self.decodecompound(minor, self.nextvalues(self.wirelength(arg))))
|
elif major == 2:
|
||||||
else: # major == 3
|
return self.wrap(self.decodecompound(minor, self.nextvalues(self.wirelength(arg))))
|
||||||
raise DecodeError('Invalid lead byte (major 3)')
|
else: # major == 3
|
||||||
|
if minor == 3 and arg == 15:
|
||||||
|
# no-op.
|
||||||
|
continue
|
||||||
|
else:
|
||||||
|
raise DecodeError('Invalid lead byte (major 3)')
|
||||||
|
|
||||||
def try_next(self):
|
def try_next(self):
|
||||||
start = self.index
|
start = self.index
|
||||||
|
@ -499,6 +504,9 @@ class Encoder(Codec):
|
||||||
for i in items: self.append(i)
|
for i in items: self.append(i)
|
||||||
self.leadbyte(0, 0, 4)
|
self.leadbyte(0, 0, 4)
|
||||||
|
|
||||||
|
def encodenoop(self):
|
||||||
|
self.leadbyte(3, 3, 15)
|
||||||
|
|
||||||
def append(self, v):
|
def append(self, v):
|
||||||
try:
|
try:
|
||||||
placeholder = self.placeholders.get(v, None)
|
placeholder = self.placeholders.get(v, None)
|
||||||
|
|
|
@ -249,9 +249,9 @@ def install_test(d, variant, tName, binaryForm, annotatedTextForm):
|
||||||
add_method(d, tName, test_forward)
|
add_method(d, tName, test_forward)
|
||||||
add_method(d, tName, test_back)
|
add_method(d, tName, test_back)
|
||||||
add_method(d, tName, test_back_ann)
|
add_method(d, tName, test_back_ann)
|
||||||
if variant not in ['nondeterministic']:
|
if variant not in ['decode', 'nondeterministic']:
|
||||||
add_method(d, tName, test_encode)
|
add_method(d, tName, test_encode)
|
||||||
if variant not in ['nondeterministic', 'streaming']:
|
if variant not in ['decode', 'nondeterministic', 'streaming']:
|
||||||
add_method(d, tName, test_encode_ann)
|
add_method(d, tName, test_encode_ann)
|
||||||
|
|
||||||
def install_exn_test(d, tName, bs, check_proc):
|
def install_exn_test(d, tName, bs, check_proc):
|
||||||
|
@ -287,6 +287,8 @@ class CommonTestSuite(unittest.TestCase):
|
||||||
install_test(locals(), 'streaming', tName, t[0].strip(), t[1])
|
install_test(locals(), 'streaming', tName, t[0].strip(), t[1])
|
||||||
elif t.key == Symbol('NondeterministicTest'):
|
elif t.key == Symbol('NondeterministicTest'):
|
||||||
install_test(locals(), 'nondeterministic', tName, t[0].strip(), t[1])
|
install_test(locals(), 'nondeterministic', tName, t[0].strip(), t[1])
|
||||||
|
elif t.key == Symbol('DecodeTest'):
|
||||||
|
install_test(locals(), 'decode', tName, t[0].strip(), t[1])
|
||||||
elif t.key == Symbol('DecodeError'):
|
elif t.key == Symbol('DecodeError'):
|
||||||
def expected_err(self, e):
|
def expected_err(self, e):
|
||||||
self.assertIsInstance(e, DecodeError)
|
self.assertIsInstance(e, DecodeError)
|
||||||
|
|
|
@ -20,6 +20,7 @@
|
||||||
preserve->string
|
preserve->string
|
||||||
current-value->placeholder
|
current-value->placeholder
|
||||||
current-placeholder->value
|
current-placeholder->value
|
||||||
|
prepend-noop
|
||||||
encode
|
encode
|
||||||
decode
|
decode
|
||||||
decode-syntax
|
decode-syntax
|
||||||
|
@ -115,6 +116,9 @@
|
||||||
(define current-value->placeholder (make-parameter (lambda (v) #f)))
|
(define current-value->placeholder (make-parameter (lambda (v) #f)))
|
||||||
(define current-placeholder->value (make-parameter (lambda (v) (void))))
|
(define current-placeholder->value (make-parameter (lambda (v) (void))))
|
||||||
|
|
||||||
|
(define (prepend-noop encoded-value)
|
||||||
|
(bit-string-append #"\xff" encoded-value))
|
||||||
|
|
||||||
(define (encode v)
|
(define (encode v)
|
||||||
(bit-string->bytes (bit-string (v :: (wire-value)))))
|
(bit-string->bytes (bit-string (v :: (wire-value)))))
|
||||||
|
|
||||||
|
@ -370,6 +374,9 @@
|
||||||
(decode-compound minor fields rest (nil-annotation ks bs) kf))
|
(decode-compound minor fields rest (nil-annotation ks bs) kf))
|
||||||
kf))
|
kf))
|
||||||
|
|
||||||
|
([ (= #b11111111 :: bits 8) (rest :: binary) ]
|
||||||
|
(decode-one rest ks kf))
|
||||||
|
|
||||||
(else (kf))))
|
(else (kf))))
|
||||||
|
|
||||||
(decode-one input ks kf))
|
(decode-one input ks kf))
|
||||||
|
@ -1110,25 +1117,28 @@
|
||||||
(match (hash-ref samples-txt-expected t-name text-form)
|
(match (hash-ref samples-txt-expected t-name text-form)
|
||||||
[(asymmetric f b) (values f b #f)] ;; #f because e.g. annotation4 includes annotations
|
[(asymmetric f b) (values f b #f)] ;; #f because e.g. annotation4 includes annotations
|
||||||
[v (values v v #t)]))
|
[v (values v v #t)]))
|
||||||
(check-equal? text-form back loc)
|
(check-equal? text-form back loc) ;; expectation 1
|
||||||
(check-equal? (d-strip (encode text-form)) back loc)
|
(check-equal? (d-strip (encode text-form)) back loc) ;; expectation 2
|
||||||
(check-equal? (d-strip (encode forward)) back loc)
|
(check-equal? (d-strip (encode forward)) back loc) ;; expectation 3
|
||||||
(check-equal? (d-strip binary-form) back loc)
|
(check-equal? (d-strip binary-form) back loc) ;; expectation 4
|
||||||
(check-equal? (d binary-form) annotated-text-form loc)
|
(check-equal? (d binary-form) annotated-text-form loc) ;; expectation 5
|
||||||
(check-equal? (d (encode annotated-text-form)) annotated-text-form loc)
|
(check-equal? (d (encode annotated-text-form)) annotated-text-form loc) ;; expectation 6
|
||||||
(check-equal? (string->preserve (preserve->string text-form)) back loc)
|
(check-equal? (string->preserve (preserve->string text-form)) back loc) ;; expectation 7
|
||||||
(check-equal? (string->preserve (preserve->string forward)) back loc)
|
(check-equal? (string->preserve (preserve->string forward)) back loc) ;; expectation 8
|
||||||
(check-equal? (string->preserve-syntax (preserve->string annotated-text-form))
|
(check-equal? (string->preserve-syntax (preserve->string annotated-text-form)) ;; similar to 8
|
||||||
annotated-text-form
|
annotated-text-form
|
||||||
loc)
|
loc)
|
||||||
(when (or (not (memq variety '(nondeterministic)))
|
(when (and (not (memq variety '(decode)))
|
||||||
(and can-execute-nondet-with-canonicalization?))
|
(or (not (memq variety '(nondeterministic)))
|
||||||
|
(and can-execute-nondet-with-canonicalization?)))
|
||||||
|
;; expectations 9 and 10
|
||||||
(parameterize ((canonicalize-preserves? (if (memq variety '(nondeterministic)) #t #f)))
|
(parameterize ((canonicalize-preserves? (if (memq variety '(nondeterministic)) #t #f)))
|
||||||
(check-equal? (encode forward) binary-form loc)))
|
(check-equal? (encode forward) binary-form loc)))
|
||||||
(unless (memq variety '(nondeterministic streaming))
|
(unless (memq variety '(decode nondeterministic streaming))
|
||||||
|
;; expectation 11
|
||||||
(check-equal? (encode annotated-text-form) binary-form loc)))
|
(check-equal? (encode annotated-text-form) binary-form loc)))
|
||||||
|
|
||||||
(define-runtime-path tests-path "../../../tests")
|
(define-runtime-path tests-path "../../../../tests")
|
||||||
(let* ((path (build-path tests-path "samples.txt"))
|
(let* ((path (build-path tests-path "samples.txt"))
|
||||||
(testfile (call-with-input-file path
|
(testfile (call-with-input-file path
|
||||||
(lambda (p)
|
(lambda (p)
|
||||||
|
@ -1158,6 +1168,8 @@
|
||||||
(run-test-case 'nondeterministic t-name loc binary-form annotated-text-form)]
|
(run-test-case 'nondeterministic t-name loc binary-form annotated-text-form)]
|
||||||
[`#s(StreamingTest ,(strip-annotations binary-form) ,annotated-text-form)
|
[`#s(StreamingTest ,(strip-annotations binary-form) ,annotated-text-form)
|
||||||
(run-test-case 'streaming t-name loc binary-form annotated-text-form)]
|
(run-test-case 'streaming t-name loc binary-form annotated-text-form)]
|
||||||
|
[`#s(DecodeTest ,(strip-annotations binary-form) ,annotated-text-form)
|
||||||
|
(run-test-case 'decode t-name loc binary-form annotated-text-form)]
|
||||||
[`#s(ParseError ,(strip-annotations str))
|
[`#s(ParseError ,(strip-annotations str))
|
||||||
(with-handlers [(exn:fail:read:eof?
|
(with-handlers [(exn:fail:read:eof?
|
||||||
(lambda (e) (fail-test "Unexpected EOF: ~e" e)))
|
(lambda (e) (fail-test "Unexpected EOF: ~e" e)))
|
||||||
|
|
|
@ -307,6 +307,7 @@ mod samples_tests {
|
||||||
Test(#[serde(with = "serde_bytes")] Vec<u8>, PlainValue<Dom>),
|
Test(#[serde(with = "serde_bytes")] Vec<u8>, PlainValue<Dom>),
|
||||||
NondeterministicTest(#[serde(with = "serde_bytes")] Vec<u8>, PlainValue<Dom>),
|
NondeterministicTest(#[serde(with = "serde_bytes")] Vec<u8>, PlainValue<Dom>),
|
||||||
StreamingTest(#[serde(with = "serde_bytes")] Vec<u8>, PlainValue<Dom>),
|
StreamingTest(#[serde(with = "serde_bytes")] Vec<u8>, PlainValue<Dom>),
|
||||||
|
DecodeTest(#[serde(with = "serde_bytes")] Vec<u8>, PlainValue<Dom>),
|
||||||
ParseError(String),
|
ParseError(String),
|
||||||
ParseShort(String),
|
ParseShort(String),
|
||||||
DecodeError(#[serde(with = "serde_bytes")] Vec<u8>),
|
DecodeError(#[serde(with = "serde_bytes")] Vec<u8>),
|
||||||
|
@ -341,6 +342,10 @@ mod samples_tests {
|
||||||
assert_eq!(&codec.decode(&mut &codec.encode_bytes(val)?[..])?, val);
|
assert_eq!(&codec.decode(&mut &codec.encode_bytes(val)?[..])?, val);
|
||||||
assert_eq!(&codec.decode(&mut &bin[..])?, val);
|
assert_eq!(&codec.decode(&mut &bin[..])?, val);
|
||||||
}
|
}
|
||||||
|
TestCase::DecodeTest(ref bin, ref val) => {
|
||||||
|
assert_eq!(&codec.decode(&mut &codec.encode_bytes(val)?[..])?, val);
|
||||||
|
assert_eq!(&codec.decode(&mut &bin[..])?, val);
|
||||||
|
}
|
||||||
TestCase::ParseError(_) => (),
|
TestCase::ParseError(_) => (),
|
||||||
TestCase::ParseShort(_) => (),
|
TestCase::ParseShort(_) => (),
|
||||||
TestCase::DecodeError(ref bin) => {
|
TestCase::DecodeError(ref bin) => {
|
||||||
|
|
|
@ -6,6 +6,7 @@ pub enum Op {
|
||||||
Misc(u8),
|
Misc(u8),
|
||||||
Atom(AtomMinor),
|
Atom(AtomMinor),
|
||||||
Compound(CompoundMinor),
|
Compound(CompoundMinor),
|
||||||
|
Reserved(u8),
|
||||||
}
|
}
|
||||||
|
|
||||||
#[derive(Debug, PartialEq, Eq)]
|
#[derive(Debug, PartialEq, Eq)]
|
||||||
|
@ -18,6 +19,7 @@ impl TryFrom<u8> for Op {
|
||||||
0 => Ok(Self::Misc(v & 3)),
|
0 => Ok(Self::Misc(v & 3)),
|
||||||
1 => Ok(Self::Atom(AtomMinor::try_from(v & 3).unwrap())),
|
1 => Ok(Self::Atom(AtomMinor::try_from(v & 3).unwrap())),
|
||||||
2 => Ok(Self::Compound(CompoundMinor::try_from(v & 3).unwrap())),
|
2 => Ok(Self::Compound(CompoundMinor::try_from(v & 3).unwrap())),
|
||||||
|
3 => Ok(Self::Reserved(v & 3)),
|
||||||
_ => Err(InvalidOp),
|
_ => Err(InvalidOp),
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
@ -29,6 +31,7 @@ impl From<Op> for u8 {
|
||||||
Op::Misc(minor) => minor & 3,
|
Op::Misc(minor) => minor & 3,
|
||||||
Op::Atom(minor) => (1 << 2) | ((minor as u8) & 3),
|
Op::Atom(minor) => (1 << 2) | ((minor as u8) & 3),
|
||||||
Op::Compound(minor) => (2 << 2) | ((minor as u8) & 3),
|
Op::Compound(minor) => (2 << 2) | ((minor as u8) & 3),
|
||||||
|
Op::Reserved(minor) => (3 << 2) | (minor & 3),
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
|
@ -246,59 +246,63 @@ impl<'a, 'b, R: Read, N: NestedValue<D>, D: Domain> Decoder<'a, 'b, R, N, D> {
|
||||||
}
|
}
|
||||||
|
|
||||||
pub fn next(&mut self) -> Result<N> {
|
pub fn next(&mut self) -> Result<N> {
|
||||||
match self.nextop()? {
|
loop {
|
||||||
(Op::Misc(0), 0) => Ok(Value::from(false).wrap()),
|
return match self.nextop()? {
|
||||||
(Op::Misc(0), 1) => Ok(Value::from(true).wrap()),
|
(Op::Misc(0), 0) => Ok(Value::from(false).wrap()),
|
||||||
(Op::Misc(0), 2) => {
|
(Op::Misc(0), 1) => Ok(Value::from(true).wrap()),
|
||||||
let bs: &[u8] = &self.readbytes(4)?;
|
(Op::Misc(0), 2) => {
|
||||||
Ok(Value::from(f32::from_bits(u32::from_be_bytes(bs.try_into().unwrap()))).wrap())
|
let bs: &[u8] = &self.readbytes(4)?;
|
||||||
}
|
Ok(Value::from(f32::from_bits(u32::from_be_bytes(bs.try_into().unwrap()))).wrap())
|
||||||
(Op::Misc(0), 3) => {
|
}
|
||||||
let bs: &[u8] = &self.readbytes(8)?;
|
(Op::Misc(0), 3) => {
|
||||||
Ok(Value::from(f64::from_bits(u64::from_be_bytes(bs.try_into().unwrap()))).wrap())
|
let bs: &[u8] = &self.readbytes(8)?;
|
||||||
}
|
Ok(Value::from(f64::from_bits(u64::from_be_bytes(bs.try_into().unwrap()))).wrap())
|
||||||
(Op::Misc(0), 5) => {
|
}
|
||||||
if self.read_annotations {
|
(Op::Misc(0), 5) => {
|
||||||
let mut annotations = vec![self.next()?];
|
if self.read_annotations {
|
||||||
while Self::decodeop(self.peek()?).ok() == Some((Op::Misc(0), 5)) {
|
let mut annotations = vec![self.next()?];
|
||||||
self.skip()?;
|
while Self::decodeop(self.peek()?).ok() == Some((Op::Misc(0), 5)) {
|
||||||
annotations.push(self.next()?);
|
self.skip()?;
|
||||||
|
annotations.push(self.next()?);
|
||||||
|
}
|
||||||
|
let v = self.next()?;
|
||||||
|
assert!(v.annotations().is_empty());
|
||||||
|
Ok(N::wrap_ann(annotations, v.value_owned()))
|
||||||
|
} else {
|
||||||
|
self.next()?;
|
||||||
|
self.next()
|
||||||
}
|
}
|
||||||
let v = self.next()?;
|
|
||||||
assert!(v.annotations().is_empty());
|
|
||||||
Ok(N::wrap_ann(annotations, v.value_owned()))
|
|
||||||
} else {
|
|
||||||
self.next()?;
|
|
||||||
self.next()
|
|
||||||
}
|
}
|
||||||
}
|
(Op::Misc(0), _) => Err(Error::Syntax("Invalid format A encoding")),
|
||||||
(Op::Misc(0), _) => Err(Error::Syntax("Invalid format A encoding")),
|
(Op::Misc(1), arg) => {
|
||||||
(Op::Misc(1), arg) => {
|
let n = self.wirelength(arg)?;
|
||||||
let n = self.wirelength(arg)?;
|
match self.placeholders.and_then(|m| m.get(&n)) {
|
||||||
match self.placeholders.and_then(|m| m.get(&n)) {
|
Some(v) => Ok(v.clone().wrap()),
|
||||||
Some(v) => Ok(v.clone().wrap()),
|
None => Err(Error::Syntax("Invalid Preserves placeholder")),
|
||||||
None => Err(Error::Syntax("Invalid Preserves placeholder")),
|
}
|
||||||
}
|
}
|
||||||
}
|
(Op::Misc(2), arg) => {
|
||||||
(Op::Misc(2), arg) => {
|
match Op::try_from(arg)? {
|
||||||
match Op::try_from(arg)? {
|
Op::Atom(minor) => self.binarystream(minor),
|
||||||
Op::Atom(minor) => self.binarystream(minor),
|
Op::Compound(minor) => self.valuestream(minor),
|
||||||
Op::Compound(minor) => self.valuestream(minor),
|
_ => Err(Error::Syntax("Invalid format C start byte")),
|
||||||
_ => Err(Error::Syntax("Invalid format C start byte")),
|
}
|
||||||
}
|
}
|
||||||
}
|
(Op::Misc(3), arg) => {
|
||||||
(Op::Misc(3), arg) => {
|
let n = if arg > 12 { i32::from(arg) - 16 } else { i32::from(arg) };
|
||||||
let n = if arg > 12 { i32::from(arg) - 16 } else { i32::from(arg) };
|
Ok(Value::from(n).wrap())
|
||||||
Ok(Value::from(n).wrap())
|
}
|
||||||
}
|
(Op::Misc(_), _) => unreachable!(),
|
||||||
(Op::Misc(_), _) => unreachable!(),
|
(Op::Atom(minor), arg) => {
|
||||||
(Op::Atom(minor), arg) => {
|
let count = self.wirelength(arg)?;
|
||||||
let count = self.wirelength(arg)?;
|
Self::decodebinary(minor, self.readbytes(count)?)
|
||||||
Self::decodebinary(minor, self.readbytes(count)?)
|
}
|
||||||
}
|
(Op::Compound(minor), arg) => {
|
||||||
(Op::Compound(minor), arg) => {
|
let count = self.wirelength(arg)?;
|
||||||
let count = self.wirelength(arg)?;
|
Self::decodecompound(minor, self.readvalues(count)?)
|
||||||
Self::decodecompound(minor, self.readvalues(count)?)
|
}
|
||||||
|
(Op::Reserved(3), 15) => continue,
|
||||||
|
(Op::Reserved(_), _) => Err(InvalidOp.into()),
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
|
@ -70,6 +70,10 @@ impl<'a, 'b, W: Write, N: NestedValue<D>, D: Domain> Encoder<'a, 'b, W, N, D> {
|
||||||
self.write_all(bs)
|
self.write_all(bs)
|
||||||
}
|
}
|
||||||
|
|
||||||
|
pub fn write_noop(&mut self) -> Result {
|
||||||
|
self.write_op(Op::Reserved(3), 15)
|
||||||
|
}
|
||||||
|
|
||||||
pub fn write(&mut self, v: &N) -> Result {
|
pub fn write(&mut self, v: &N) -> Result {
|
||||||
for ann in v.annotations() {
|
for ann in v.annotations() {
|
||||||
self.write_header(Op::Misc(0), 5)?;
|
self.write_header(Op::Misc(0), 5)?;
|
||||||
|
|
|
@ -22,6 +22,10 @@ pre, code { background-color: #eee; font-family: "DejaVu Sans Mono", monospace;
|
||||||
code { font-size: 75%; }
|
code { font-size: 75%; }
|
||||||
pre { padding: 0.33rem; line-height: 1; overflow-x: auto; }
|
pre { padding: 0.33rem; line-height: 1; overflow-x: auto; }
|
||||||
|
|
||||||
|
p, ul, table {
|
||||||
|
margin: 1em 0;
|
||||||
|
}
|
||||||
|
|
||||||
body {
|
body {
|
||||||
counter-reset: section 0 subsection 0 appendix 0;
|
counter-reset: section 0 subsection 0 appendix 0;
|
||||||
}
|
}
|
||||||
|
|
53
preserves.md
53
preserves.md
|
@ -4,7 +4,7 @@ title: "Preserves: an Expressive Data Language"
|
||||||
---
|
---
|
||||||
|
|
||||||
Tony Garnock-Jones <tonyg@leastfixedpoint.com>
|
Tony Garnock-Jones <tonyg@leastfixedpoint.com>
|
||||||
August 2019. Version 0.0.6.
|
May 2020. Version 0.0.7.
|
||||||
|
|
||||||
[sexp.txt]: http://people.csail.mit.edu/rivest/Sexp.txt
|
[sexp.txt]: http://people.csail.mit.edu/rivest/Sexp.txt
|
||||||
[spki]: http://world.std.com/~cme/html/spki.html
|
[spki]: http://world.std.com/~cme/html/spki.html
|
||||||
|
@ -470,11 +470,15 @@ representation.[^some-encodings-unused]
|
||||||
| 0 | 3 | | (format A) Certain small `SignedInteger`s |
|
| 0 | 3 | | (format A) Certain small `SignedInteger`s |
|
||||||
| 1 | | | (format B) An `Atom` with variable-length binary representation |
|
| 1 | | | (format B) An `Atom` with variable-length binary representation |
|
||||||
| 2 | | | (format B) A `Compound` with variable-length representation |
|
| 2 | | | (format B) A `Compound` with variable-length representation |
|
||||||
|
| 3 | 3 | 15 | (format A) 0xFF byte; no-op |
|
||||||
|
|
||||||
#### Encoding data of type-specific length (format A).
|
#### Encoding data of type-specific length (format A).
|
||||||
|
|
||||||
Each type of data defines its own rules for this format.
|
Each type of data defines its own rules for this format.
|
||||||
|
|
||||||
|
Of particular note is lead byte `0xFF`, which is a no-op byte acting
|
||||||
|
as a kind of pseudo-whitespace in a binary-syntax encoding.
|
||||||
|
|
||||||
#### Encoding data of known length (format B).
|
#### Encoding data of known length (format B).
|
||||||
|
|
||||||
Format B is used where the length `l` of the `Value` to be encoded is
|
Format B is used where the length `l` of the `Value` to be encoded is
|
||||||
|
@ -896,10 +900,11 @@ endless sequence of zero length chunks, appearing to make progress but
|
||||||
not actually doing so. Implementations *MUST* reject zero length
|
not actually doing so. Implementations *MUST* reject zero length
|
||||||
chunks when decoding, and *MUST NOT* produce them when encoding.
|
chunks when decoding, and *MUST NOT* produce them when encoding.
|
||||||
|
|
||||||
**Whitespace.** Similarly, the textual format for `Value`s allows
|
**Whitespace and no-ops.** Similarly, the binary format allows `0xFF`
|
||||||
arbitrary whitespace in many positions. In streaming transfer
|
no-ops and the textual format allows arbitrary whitespace in many
|
||||||
situations, consider optional restrictions on the amount of
|
positions. In streaming transfer situations, consider optional
|
||||||
consecutive whitespace that may appear in a serialized `Value`.
|
restrictions on the amount of consecutive whitespace or the number of
|
||||||
|
consecutive no-ops that may appear.
|
||||||
|
|
||||||
**Annotations.** Also similarly, in modes where a `Value` is being
|
**Annotations.** Also similarly, in modes where a `Value` is being
|
||||||
read while annotations are skipped, an endless sequence of annotations
|
read while annotations are skipped, an endless sequence of annotations
|
||||||
|
@ -922,6 +927,24 @@ The text syntax for `Boolean`s, `Symbol`s, and `ByteString`s is
|
||||||
directly inspired by [Racket](https://racket-lang.org/)'s lexical
|
directly inspired by [Racket](https://racket-lang.org/)'s lexical
|
||||||
syntax.
|
syntax.
|
||||||
|
|
||||||
|
## Appendix. Autodetection of textual or binary syntax
|
||||||
|
|
||||||
|
Whitespace characters `0x09` (ASCII HT (tab)), `0x0A` (LF), `0x0D`
|
||||||
|
(CR), `0x20` (space) and `0x2C` (comma) are ignored at the start of a
|
||||||
|
textual-syntax Preserves `Document`, and their UTF-8 encodings are
|
||||||
|
reserved lead byte values in binary-syntax Preserves.
|
||||||
|
|
||||||
|
The byte `0xFF`, signifying a no-op in binary-syntax Preserves, has no
|
||||||
|
meaning in either 7-bit ASCII or UTF-8, and therefore cannot appear in
|
||||||
|
a valid textual-syntax Preserves `Document`.
|
||||||
|
|
||||||
|
If applications prefix their textual-syntax documents with e.g. a
|
||||||
|
space or newline character, and their binary-syntax documents with a
|
||||||
|
`0xFF` byte, consumers of these documents may reliably autodetect the
|
||||||
|
syntax being used. In a network protocol supporting this kind of
|
||||||
|
autodetection, clients may transmit LF or `0xFF` to select text or
|
||||||
|
binary syntax, respectively.
|
||||||
|
|
||||||
## Appendix. Table of lead byte values
|
## Appendix. Table of lead byte values
|
||||||
|
|
||||||
00 - False
|
00 - False
|
||||||
|
@ -930,9 +953,9 @@ syntax.
|
||||||
03 - Double
|
03 - Double
|
||||||
04 - End stream
|
04 - End stream
|
||||||
05 - Annotation
|
05 - Annotation
|
||||||
(0x) RESERVED 06-0F
|
(0x) RESERVED 06-0F (NB. 09, 0A, 0D specially reserved)
|
||||||
1x - Placeholder
|
1x - Placeholder
|
||||||
2x - Start Stream
|
2x - Start Stream (NB. 20, 2C specially reserved)
|
||||||
3x - Small integers 0..12,-3..-1
|
3x - Small integers 0..12,-3..-1
|
||||||
|
|
||||||
4x - SignedInteger
|
4x - SignedInteger
|
||||||
|
@ -948,7 +971,8 @@ syntax.
|
||||||
(Cx) RESERVED C0-CF
|
(Cx) RESERVED C0-CF
|
||||||
(Dx) RESERVED D0-DF
|
(Dx) RESERVED D0-DF
|
||||||
(Ex) RESERVED E0-EF
|
(Ex) RESERVED E0-EF
|
||||||
(Fx) RESERVED F0-FF
|
(Fx) RESERVED F0-FE
|
||||||
|
FF No-op
|
||||||
|
|
||||||
## Appendix. Bit fields within lead byte values
|
## Appendix. Bit fields within lead byte values
|
||||||
|
|
||||||
|
@ -962,13 +986,25 @@ syntax.
|
||||||
00 00 0100 End Stream (to match a previous Start Stream)
|
00 00 0100 End Stream (to match a previous Start Stream)
|
||||||
00 00 0101 Annotation; two more Reprs follow
|
00 00 0101 Annotation; two more Reprs follow
|
||||||
|
|
||||||
|
00 00 1001 (ASCII HT (tab)) \
|
||||||
|
00 00 1010 (ASCII LF) |- Reserved: may be used to indicate
|
||||||
|
00 00 1101 (ASCII CR) / use of text encoding
|
||||||
|
|
||||||
00 01 mmmm Placeholder; m is the placeholder number
|
00 01 mmmm Placeholder; m is the placeholder number
|
||||||
|
|
||||||
00 10 ttnn Start Stream <tt,nn>
|
00 10 ttnn Start Stream <tt,nn>
|
||||||
When tt = 00 --> error
|
When tt = 00 --> error
|
||||||
|
When nn = 00 --> (ASCII space)
|
||||||
|
Reserved: may be used to indicate
|
||||||
|
use of text encoding
|
||||||
|
otherwise --> error
|
||||||
01 --> each chunk is a ByteString
|
01 --> each chunk is a ByteString
|
||||||
10 --> each chunk is a single encoded Value
|
10 --> each chunk is a single encoded Value
|
||||||
11 --> error (RESERVED)
|
11 --> error (RESERVED)
|
||||||
|
When nn = 00 --> (ASCII comma)
|
||||||
|
Reserved: may be used to indicate
|
||||||
|
use of text encoding
|
||||||
|
otherwise --> error
|
||||||
|
|
||||||
00 11 xxxx Small integers 0..12,-3..-1
|
00 11 xxxx Small integers 0..12,-3..-1
|
||||||
|
|
||||||
|
@ -983,6 +1019,7 @@ syntax.
|
||||||
10 11 mmmm Dictionary
|
10 11 mmmm Dictionary
|
||||||
|
|
||||||
11 nn mmmm error, RESERVED
|
11 nn mmmm error, RESERVED
|
||||||
|
11 11 1111 no-op; unambiguous indication of binary Preserves format
|
||||||
|
|
||||||
Where `mmmm` appears, interpret it as an unsigned 4-bit number `m`. If
|
Where `mmmm` appears, interpret it as an unsigned 4-bit number `m`. If
|
||||||
`m`<15, let `l`=`m`. Otherwise, `m`=15; let `l` be the result of
|
`m`<15, let `l`=`m`. Otherwise, `m`=15; let `l` be the result of
|
||||||
|
|
Binary file not shown.
|
@ -1,4 +1,46 @@
|
||||||
@<EmacsMode "-*- preserves -*-">
|
@<EmacsMode "-*- preserves -*-">
|
||||||
|
@<Documentation [
|
||||||
|
"Individual test cases may be any of the following record types:"
|
||||||
|
<TestCaseTypes {
|
||||||
|
Test: {fields: [binary annotatedValue] expectations: {1 2 3 4 5 6 7 8 9 11}}
|
||||||
|
NondeterministicTest: {fields: [binary annotatedValue] expectations: {1 2 3 4 5 6 7 8 10 11}}
|
||||||
|
StreamingTest: {fields: [binary annotatedValue] expectations: {1 2 3 4 5 6 7 8 9 }}
|
||||||
|
DecodeTest: {fields: [binary annotatedValue] expectations: {1 2 3 4 5 6 7 8}}
|
||||||
|
ParseError: {fields: [text] expectations: {12}}
|
||||||
|
ParseShort: {fields: [text] expectations: {13}}
|
||||||
|
DecodeError: {fields: [bytes] expectations: {14}}
|
||||||
|
DecodeShort: {fields: [bytes] expectations: {15}}
|
||||||
|
}>
|
||||||
|
"In each test, let value = strip(annotatedValue),",
|
||||||
|
" forward = value,",
|
||||||
|
" back = value,"
|
||||||
|
"except where test-case-specific values of `forward` and/or `back` are provided"
|
||||||
|
"by the executing harness (of particular importance for `StreamingTest`s),"
|
||||||
|
"and check the following numbered expectations according to the table above:"
|
||||||
|
<TestCaseExpectations {
|
||||||
|
1: "value = back"
|
||||||
|
2: "strip(decodeBinary(encodeBinary(value))) = back"
|
||||||
|
3: "strip(decodeBinary(encodeBinary(forward))) = back"
|
||||||
|
4: "strip(decodeBinary(binary)) = back"
|
||||||
|
5: "decodeBinary(binary) = annotatedValue"
|
||||||
|
6: "decodeBinary(encodeBinary(annotatedValue)) = annotatedValue"
|
||||||
|
7: "decodeText(encodeText(value)) = back"
|
||||||
|
8: "decodeText(encodeText(forward)) = back"
|
||||||
|
9: "encodeBinary(forward) = binary"
|
||||||
|
10: "canonicallyEncodeBinary(forward) = binary"
|
||||||
|
11: "encodeBinary(annotatedValue) = binary"
|
||||||
|
|
||||||
|
12: "decodeText(text) fails with a syntax error (NB. never with premature EOF)"
|
||||||
|
13: "decodeText(text) fails signalling premature EOF (NB. never with a syntax error)"
|
||||||
|
|
||||||
|
14: "decodeBinary(bytes) fails with a syntax error (NB. never with premature EOF)"
|
||||||
|
15: "decodeBinary(bytes) fails signalling premature EOF (NB. never with a syntax error)"
|
||||||
|
}>
|
||||||
|
"Each `StreamingTest` will need to have an implementation-specific `forward`"
|
||||||
|
"supplied that encodes to the specific format C byte sequences in `binary`."
|
||||||
|
"Alternatively, implementations may choose to skip expectation 11 for"
|
||||||
|
"`StreamingTest`s, treating them like `DecodeTest`s."
|
||||||
|
]>
|
||||||
<TestCases
|
<TestCases
|
||||||
<ExpectedPlaceholderMapping {
|
<ExpectedPlaceholderMapping {
|
||||||
0: discard
|
0: discard
|
||||||
|
@ -76,6 +118,12 @@
|
||||||
list7: <Test #hex{93 73616263 732e2e2e 73646566} [abc ... def]>
|
list7: <Test #hex{93 73616263 732e2e2e 73646566} [abc ... def]>
|
||||||
list8: @"Missing close bracket" <ParseShort "[">
|
list8: @"Missing close bracket" <ParseShort "[">
|
||||||
list9: @"Unexpected close bracket" <ParseError "]">
|
list9: @"Unexpected close bracket" <ParseError "]">
|
||||||
|
noop0: <DecodeTest #hex{ff10} discard>
|
||||||
|
noop1: <DecodeTest #hex{ff31} 1>
|
||||||
|
noop2: <DecodeTest #hex{ffffff42ff00} -256>
|
||||||
|
noop3: <DecodeTest #hex{ff05ff53616263ff42ff00} @"abc" -256>
|
||||||
|
noop4: @"No-ops must be followed by something" <DecodeShort #hex{ffffff}>
|
||||||
|
noop5: @"No input at all is considered short" <DecodeShort #hex{}>
|
||||||
placeholder0: <Test #hex{10} discard>
|
placeholder0: <Test #hex{10} discard>
|
||||||
placeholder1: <Test #hex{11} capture>
|
placeholder1: <Test #hex{11} capture>
|
||||||
placeholder2: <Test #hex{12} observe>
|
placeholder2: <Test #hex{12} observe>
|
||||||
|
@ -115,6 +163,8 @@
|
||||||
symbol0: <Test #hex{70} ||>
|
symbol0: <Test #hex{70} ||>
|
||||||
symbol1: <StreamingTest #hex{27626865626c6c616f04} hello>
|
symbol1: <StreamingTest #hex{27626865626c6c616f04} hello>
|
||||||
symbol2: <Test #hex{7568656c6c6f} hello>
|
symbol2: <Test #hex{7568656c6c6f} hello>
|
||||||
|
whitespace0: @"Leading spaces have to eventually yield something" <ParseShort " ">
|
||||||
|
whitespace1: @"No input at all is considered short" <ParseShort "">
|
||||||
value1: <Test #"\x66corymb" #value#"fcorymb">
|
value1: <Test #"\x66corymb" #value#"fcorymb">
|
||||||
value2: <Test #"\x01" #value#"\x01">
|
value2: <Test #"\x01" #value#"\x01">
|
||||||
value3: <Test #"\x01" #value#base64{AQ}>
|
value3: <Test #"\x01" #value#base64{AQ}>
|
||||||
|
|
Loading…
Reference in New Issue