forked from syndicate-lang/preserves
Autodetectability of binary vs text; documented test case schema a little
This commit is contained in:
parent
ebbd268166
commit
8e0ab95d82
|
@ -157,52 +157,59 @@ class Decoder {
|
|||
}
|
||||
|
||||
next() {
|
||||
const [major, minor, arg] = this.nextop();
|
||||
switch (major) {
|
||||
case 0:
|
||||
switch (minor) {
|
||||
case 0:
|
||||
switch (arg) {
|
||||
case 0: return this.wrap(false);
|
||||
case 1: return this.wrap(true);
|
||||
case 2: return this.wrap(Single(this.nextbytes(4).getFloat32(0, false)));
|
||||
case 3: return this.wrap(Double(this.nextbytes(8).getFloat64(0, false)));
|
||||
case 4: throw new DecodeError("Unexpected end-of-stream marker");
|
||||
case 5: {
|
||||
const a = this.next();
|
||||
const v = this.next();
|
||||
return this.unshiftAnnotation(a, v);
|
||||
while (true) { // we loop because we may need to consume an arbitrary number of no-ops
|
||||
const [major, minor, arg] = this.nextop();
|
||||
switch (major) {
|
||||
case 0:
|
||||
switch (minor) {
|
||||
case 0:
|
||||
switch (arg) {
|
||||
case 0: return this.wrap(false);
|
||||
case 1: return this.wrap(true);
|
||||
case 2: return this.wrap(Single(this.nextbytes(4).getFloat32(0, false)));
|
||||
case 3: return this.wrap(Double(this.nextbytes(8).getFloat64(0, false)));
|
||||
case 4: throw new DecodeError("Unexpected end-of-stream marker");
|
||||
case 5: {
|
||||
const a = this.next();
|
||||
const v = this.next();
|
||||
return this.unshiftAnnotation(a, v);
|
||||
}
|
||||
default: throw new DecodeError("Illegal format A lead byte");
|
||||
}
|
||||
default: throw new DecodeError("Illegal format A lead byte");
|
||||
case 1: {
|
||||
const n = this.wirelength(arg);
|
||||
const v = this.placeholders.get(n, void 0);
|
||||
if (typeof v === 'undefined') {
|
||||
const e = new DecodeError("Invalid Preserves placeholder");
|
||||
e.irritant = n;
|
||||
throw e;
|
||||
}
|
||||
return this.wrap(v);
|
||||
}
|
||||
case 1: {
|
||||
const n = this.wirelength(arg);
|
||||
const v = this.placeholders.get(n, void 0);
|
||||
if (typeof v === 'undefined') {
|
||||
const e = new DecodeError("Invalid Preserves placeholder");
|
||||
e.irritant = n;
|
||||
throw e;
|
||||
case 2: {
|
||||
const t = arg >> 2;
|
||||
const n = arg & 3;
|
||||
switch (t) {
|
||||
case 1: return this.wrap(this.binarystream(n));
|
||||
case 2: return this.wrap(this.valuestream(n));
|
||||
default: throw new DecodeError("Invalid format C start byte");
|
||||
}
|
||||
}
|
||||
return this.wrap(v);
|
||||
case 3:
|
||||
return this.wrap((arg > 12) ? arg - 16 : arg);
|
||||
}
|
||||
case 2: {
|
||||
const t = arg >> 2;
|
||||
const n = arg & 3;
|
||||
switch (t) {
|
||||
case 1: return this.wrap(this.binarystream(n));
|
||||
case 2: return this.wrap(this.valuestream(n));
|
||||
default: throw new DecodeError("Invalid format C start byte");
|
||||
}
|
||||
case 1:
|
||||
return this.wrap(this.decodebinary(minor, Bytes.from(this.nextbytes(this.wirelength(arg)))));
|
||||
case 2:
|
||||
return this.wrap(this.decodecompound(minor, this.nextvalues(this.wirelength(arg))));
|
||||
case 3:
|
||||
if (minor === 3 && arg === 15) {
|
||||
// no-op.
|
||||
continue;
|
||||
} else {
|
||||
throw new DecodeError("Invalid lead byte (major 3)");
|
||||
}
|
||||
case 3:
|
||||
return this.wrap((arg > 12) ? arg - 16 : arg);
|
||||
}
|
||||
case 1:
|
||||
return this.wrap(this.decodebinary(minor, Bytes.from(this.nextbytes(this.wirelength(arg)))));
|
||||
case 2:
|
||||
return this.wrap(this.decodecompound(minor, this.nextvalues(this.wirelength(arg))));
|
||||
case 3:
|
||||
throw new DecodeError("Invalid lead byte (major 3)");
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
|
@ -315,6 +322,10 @@ class Encoder {
|
|||
this.header(0, 0, 4);
|
||||
}
|
||||
|
||||
encodenoop() {
|
||||
this.leadbyte(3, 3, 15);
|
||||
}
|
||||
|
||||
push(v) {
|
||||
const placeholder = this.placeholders.get(v, void 0);
|
||||
if (typeof placeholder !== 'undefined') {
|
||||
|
|
|
@ -165,12 +165,12 @@ describe('common test suite', () => {
|
|||
it('should go back', () => assert(is(DS(binaryForm), back)));
|
||||
it('should go back with annotations',
|
||||
() => assert(is(D(E(annotatedTextForm)), annotatedTextForm)));
|
||||
if (variety !== 'nondeterministic') {
|
||||
if (variety !== 'decode' && variety !== 'nondeterministic') {
|
||||
it('should encode correctly',
|
||||
() => assert(is(E(forward), binaryForm),
|
||||
E(forward) + ' ' + binaryForm));
|
||||
}
|
||||
if (variety !== 'nondeterministic' && variety !== 'streaming') {
|
||||
if (variety !== 'decode' && variety !== 'nondeterministic' && variety !== 'streaming') {
|
||||
it('should encode correctly with annotations',
|
||||
() => assert(is(E(annotatedTextForm), binaryForm),
|
||||
E(annotatedTextForm) + ' ' + binaryForm));
|
||||
|
@ -192,6 +192,9 @@ describe('common test suite', () => {
|
|||
case Symbol.for('NondeterministicTest'):
|
||||
runTestCase('nondeterministic', tName, t.get(0).strip(), t.get(1));
|
||||
break;
|
||||
case Symbol.for('DecodeTest'):
|
||||
runTestCase('decode', tName, t.get(0).strip(), t.get(1));
|
||||
break;
|
||||
case Symbol.for('DecodeError'):
|
||||
describe(tName, () => {
|
||||
it('should fail with DecodeError', () => {
|
||||
|
|
|
@ -405,39 +405,44 @@ class Decoder(Codec):
|
|||
return v
|
||||
|
||||
def next(self):
|
||||
(major, minor, arg) = self.nextop()
|
||||
if major == 0:
|
||||
if minor == 0:
|
||||
if arg == 0: return self.wrap(False)
|
||||
if arg == 1: return self.wrap(True)
|
||||
if arg == 2: return self.wrap(Float(struct.unpack('>f', self.nextbytes(4))[0]))
|
||||
if arg == 3: return self.wrap(struct.unpack('>d', self.nextbytes(8))[0])
|
||||
if arg == 4: raise DecodeError('Unexpected end-of-stream marker')
|
||||
if arg == 5:
|
||||
a = self.next()
|
||||
v = self.next()
|
||||
return self.unshift_annotation(a, v)
|
||||
raise DecodeError('Invalid format A encoding')
|
||||
elif minor == 1:
|
||||
n = self.wirelength(arg)
|
||||
v = self.placeholders.get(n, None)
|
||||
if v is None:
|
||||
raise DecodeError('Invalid Preserves placeholder')
|
||||
return self.wrap(v)
|
||||
elif minor == 2:
|
||||
t = arg >> 2
|
||||
n = arg & 3
|
||||
if t == 1: return self.wrap(self.binarystream(n))
|
||||
if t == 2: return self.wrap(self.valuestream(n))
|
||||
raise DecodeError('Invalid format C start byte')
|
||||
else: # minor == 3
|
||||
return self.wrap(arg - 16 if arg > 12 else arg)
|
||||
elif major == 1:
|
||||
return self.wrap(self.decodebinary(minor, self.nextbytes(self.wirelength(arg))))
|
||||
elif major == 2:
|
||||
return self.wrap(self.decodecompound(minor, self.nextvalues(self.wirelength(arg))))
|
||||
else: # major == 3
|
||||
raise DecodeError('Invalid lead byte (major 3)')
|
||||
while True: # we loop because we may need to consume an arbitrary number of no-ops
|
||||
(major, minor, arg) = self.nextop()
|
||||
if major == 0:
|
||||
if minor == 0:
|
||||
if arg == 0: return self.wrap(False)
|
||||
if arg == 1: return self.wrap(True)
|
||||
if arg == 2: return self.wrap(Float(struct.unpack('>f', self.nextbytes(4))[0]))
|
||||
if arg == 3: return self.wrap(struct.unpack('>d', self.nextbytes(8))[0])
|
||||
if arg == 4: raise DecodeError('Unexpected end-of-stream marker')
|
||||
if arg == 5:
|
||||
a = self.next()
|
||||
v = self.next()
|
||||
return self.unshift_annotation(a, v)
|
||||
raise DecodeError('Invalid format A encoding')
|
||||
elif minor == 1:
|
||||
n = self.wirelength(arg)
|
||||
v = self.placeholders.get(n, None)
|
||||
if v is None:
|
||||
raise DecodeError('Invalid Preserves placeholder')
|
||||
return self.wrap(v)
|
||||
elif minor == 2:
|
||||
t = arg >> 2
|
||||
n = arg & 3
|
||||
if t == 1: return self.wrap(self.binarystream(n))
|
||||
if t == 2: return self.wrap(self.valuestream(n))
|
||||
raise DecodeError('Invalid format C start byte')
|
||||
else: # minor == 3
|
||||
return self.wrap(arg - 16 if arg > 12 else arg)
|
||||
elif major == 1:
|
||||
return self.wrap(self.decodebinary(minor, self.nextbytes(self.wirelength(arg))))
|
||||
elif major == 2:
|
||||
return self.wrap(self.decodecompound(minor, self.nextvalues(self.wirelength(arg))))
|
||||
else: # major == 3
|
||||
if minor == 3 and arg == 15:
|
||||
# no-op.
|
||||
continue
|
||||
else:
|
||||
raise DecodeError('Invalid lead byte (major 3)')
|
||||
|
||||
def try_next(self):
|
||||
start = self.index
|
||||
|
@ -499,6 +504,9 @@ class Encoder(Codec):
|
|||
for i in items: self.append(i)
|
||||
self.leadbyte(0, 0, 4)
|
||||
|
||||
def encodenoop(self):
|
||||
self.leadbyte(3, 3, 15)
|
||||
|
||||
def append(self, v):
|
||||
try:
|
||||
placeholder = self.placeholders.get(v, None)
|
||||
|
|
|
@ -249,9 +249,9 @@ def install_test(d, variant, tName, binaryForm, annotatedTextForm):
|
|||
add_method(d, tName, test_forward)
|
||||
add_method(d, tName, test_back)
|
||||
add_method(d, tName, test_back_ann)
|
||||
if variant not in ['nondeterministic']:
|
||||
if variant not in ['decode', 'nondeterministic']:
|
||||
add_method(d, tName, test_encode)
|
||||
if variant not in ['nondeterministic', 'streaming']:
|
||||
if variant not in ['decode', 'nondeterministic', 'streaming']:
|
||||
add_method(d, tName, test_encode_ann)
|
||||
|
||||
def install_exn_test(d, tName, bs, check_proc):
|
||||
|
@ -287,6 +287,8 @@ class CommonTestSuite(unittest.TestCase):
|
|||
install_test(locals(), 'streaming', tName, t[0].strip(), t[1])
|
||||
elif t.key == Symbol('NondeterministicTest'):
|
||||
install_test(locals(), 'nondeterministic', tName, t[0].strip(), t[1])
|
||||
elif t.key == Symbol('DecodeTest'):
|
||||
install_test(locals(), 'decode', tName, t[0].strip(), t[1])
|
||||
elif t.key == Symbol('DecodeError'):
|
||||
def expected_err(self, e):
|
||||
self.assertIsInstance(e, DecodeError)
|
||||
|
|
|
@ -20,6 +20,7 @@
|
|||
preserve->string
|
||||
current-value->placeholder
|
||||
current-placeholder->value
|
||||
prepend-noop
|
||||
encode
|
||||
decode
|
||||
decode-syntax
|
||||
|
@ -115,6 +116,9 @@
|
|||
(define current-value->placeholder (make-parameter (lambda (v) #f)))
|
||||
(define current-placeholder->value (make-parameter (lambda (v) (void))))
|
||||
|
||||
(define (prepend-noop encoded-value)
|
||||
(bit-string-append #"\xff" encoded-value))
|
||||
|
||||
(define (encode v)
|
||||
(bit-string->bytes (bit-string (v :: (wire-value)))))
|
||||
|
||||
|
@ -370,6 +374,9 @@
|
|||
(decode-compound minor fields rest (nil-annotation ks bs) kf))
|
||||
kf))
|
||||
|
||||
([ (= #b11111111 :: bits 8) (rest :: binary) ]
|
||||
(decode-one rest ks kf))
|
||||
|
||||
(else (kf))))
|
||||
|
||||
(decode-one input ks kf))
|
||||
|
@ -1110,25 +1117,28 @@
|
|||
(match (hash-ref samples-txt-expected t-name text-form)
|
||||
[(asymmetric f b) (values f b #f)] ;; #f because e.g. annotation4 includes annotations
|
||||
[v (values v v #t)]))
|
||||
(check-equal? text-form back loc)
|
||||
(check-equal? (d-strip (encode text-form)) back loc)
|
||||
(check-equal? (d-strip (encode forward)) back loc)
|
||||
(check-equal? (d-strip binary-form) back loc)
|
||||
(check-equal? (d binary-form) annotated-text-form loc)
|
||||
(check-equal? (d (encode annotated-text-form)) annotated-text-form loc)
|
||||
(check-equal? (string->preserve (preserve->string text-form)) back loc)
|
||||
(check-equal? (string->preserve (preserve->string forward)) back loc)
|
||||
(check-equal? (string->preserve-syntax (preserve->string annotated-text-form))
|
||||
(check-equal? text-form back loc) ;; expectation 1
|
||||
(check-equal? (d-strip (encode text-form)) back loc) ;; expectation 2
|
||||
(check-equal? (d-strip (encode forward)) back loc) ;; expectation 3
|
||||
(check-equal? (d-strip binary-form) back loc) ;; expectation 4
|
||||
(check-equal? (d binary-form) annotated-text-form loc) ;; expectation 5
|
||||
(check-equal? (d (encode annotated-text-form)) annotated-text-form loc) ;; expectation 6
|
||||
(check-equal? (string->preserve (preserve->string text-form)) back loc) ;; expectation 7
|
||||
(check-equal? (string->preserve (preserve->string forward)) back loc) ;; expectation 8
|
||||
(check-equal? (string->preserve-syntax (preserve->string annotated-text-form)) ;; similar to 8
|
||||
annotated-text-form
|
||||
loc)
|
||||
(when (or (not (memq variety '(nondeterministic)))
|
||||
(and can-execute-nondet-with-canonicalization?))
|
||||
(when (and (not (memq variety '(decode)))
|
||||
(or (not (memq variety '(nondeterministic)))
|
||||
(and can-execute-nondet-with-canonicalization?)))
|
||||
;; expectations 9 and 10
|
||||
(parameterize ((canonicalize-preserves? (if (memq variety '(nondeterministic)) #t #f)))
|
||||
(check-equal? (encode forward) binary-form loc)))
|
||||
(unless (memq variety '(nondeterministic streaming))
|
||||
(unless (memq variety '(decode nondeterministic streaming))
|
||||
;; expectation 11
|
||||
(check-equal? (encode annotated-text-form) binary-form loc)))
|
||||
|
||||
(define-runtime-path tests-path "../../../tests")
|
||||
(define-runtime-path tests-path "../../../../tests")
|
||||
(let* ((path (build-path tests-path "samples.txt"))
|
||||
(testfile (call-with-input-file path
|
||||
(lambda (p)
|
||||
|
@ -1158,6 +1168,8 @@
|
|||
(run-test-case 'nondeterministic t-name loc binary-form annotated-text-form)]
|
||||
[`#s(StreamingTest ,(strip-annotations binary-form) ,annotated-text-form)
|
||||
(run-test-case 'streaming t-name loc binary-form annotated-text-form)]
|
||||
[`#s(DecodeTest ,(strip-annotations binary-form) ,annotated-text-form)
|
||||
(run-test-case 'decode t-name loc binary-form annotated-text-form)]
|
||||
[`#s(ParseError ,(strip-annotations str))
|
||||
(with-handlers [(exn:fail:read:eof?
|
||||
(lambda (e) (fail-test "Unexpected EOF: ~e" e)))
|
||||
|
|
|
@ -307,6 +307,7 @@ mod samples_tests {
|
|||
Test(#[serde(with = "serde_bytes")] Vec<u8>, PlainValue<Dom>),
|
||||
NondeterministicTest(#[serde(with = "serde_bytes")] Vec<u8>, PlainValue<Dom>),
|
||||
StreamingTest(#[serde(with = "serde_bytes")] Vec<u8>, PlainValue<Dom>),
|
||||
DecodeTest(#[serde(with = "serde_bytes")] Vec<u8>, PlainValue<Dom>),
|
||||
ParseError(String),
|
||||
ParseShort(String),
|
||||
DecodeError(#[serde(with = "serde_bytes")] Vec<u8>),
|
||||
|
@ -341,6 +342,10 @@ mod samples_tests {
|
|||
assert_eq!(&codec.decode(&mut &codec.encode_bytes(val)?[..])?, val);
|
||||
assert_eq!(&codec.decode(&mut &bin[..])?, val);
|
||||
}
|
||||
TestCase::DecodeTest(ref bin, ref val) => {
|
||||
assert_eq!(&codec.decode(&mut &codec.encode_bytes(val)?[..])?, val);
|
||||
assert_eq!(&codec.decode(&mut &bin[..])?, val);
|
||||
}
|
||||
TestCase::ParseError(_) => (),
|
||||
TestCase::ParseShort(_) => (),
|
||||
TestCase::DecodeError(ref bin) => {
|
||||
|
|
|
@ -6,6 +6,7 @@ pub enum Op {
|
|||
Misc(u8),
|
||||
Atom(AtomMinor),
|
||||
Compound(CompoundMinor),
|
||||
Reserved(u8),
|
||||
}
|
||||
|
||||
#[derive(Debug, PartialEq, Eq)]
|
||||
|
@ -18,6 +19,7 @@ impl TryFrom<u8> for Op {
|
|||
0 => Ok(Self::Misc(v & 3)),
|
||||
1 => Ok(Self::Atom(AtomMinor::try_from(v & 3).unwrap())),
|
||||
2 => Ok(Self::Compound(CompoundMinor::try_from(v & 3).unwrap())),
|
||||
3 => Ok(Self::Reserved(v & 3)),
|
||||
_ => Err(InvalidOp),
|
||||
}
|
||||
}
|
||||
|
@ -29,6 +31,7 @@ impl From<Op> for u8 {
|
|||
Op::Misc(minor) => minor & 3,
|
||||
Op::Atom(minor) => (1 << 2) | ((minor as u8) & 3),
|
||||
Op::Compound(minor) => (2 << 2) | ((minor as u8) & 3),
|
||||
Op::Reserved(minor) => (3 << 2) | (minor & 3),
|
||||
}
|
||||
}
|
||||
}
|
||||
|
|
|
@ -246,59 +246,63 @@ impl<'a, 'b, R: Read, N: NestedValue<D>, D: Domain> Decoder<'a, 'b, R, N, D> {
|
|||
}
|
||||
|
||||
pub fn next(&mut self) -> Result<N> {
|
||||
match self.nextop()? {
|
||||
(Op::Misc(0), 0) => Ok(Value::from(false).wrap()),
|
||||
(Op::Misc(0), 1) => Ok(Value::from(true).wrap()),
|
||||
(Op::Misc(0), 2) => {
|
||||
let bs: &[u8] = &self.readbytes(4)?;
|
||||
Ok(Value::from(f32::from_bits(u32::from_be_bytes(bs.try_into().unwrap()))).wrap())
|
||||
}
|
||||
(Op::Misc(0), 3) => {
|
||||
let bs: &[u8] = &self.readbytes(8)?;
|
||||
Ok(Value::from(f64::from_bits(u64::from_be_bytes(bs.try_into().unwrap()))).wrap())
|
||||
}
|
||||
(Op::Misc(0), 5) => {
|
||||
if self.read_annotations {
|
||||
let mut annotations = vec![self.next()?];
|
||||
while Self::decodeop(self.peek()?).ok() == Some((Op::Misc(0), 5)) {
|
||||
self.skip()?;
|
||||
annotations.push(self.next()?);
|
||||
loop {
|
||||
return match self.nextop()? {
|
||||
(Op::Misc(0), 0) => Ok(Value::from(false).wrap()),
|
||||
(Op::Misc(0), 1) => Ok(Value::from(true).wrap()),
|
||||
(Op::Misc(0), 2) => {
|
||||
let bs: &[u8] = &self.readbytes(4)?;
|
||||
Ok(Value::from(f32::from_bits(u32::from_be_bytes(bs.try_into().unwrap()))).wrap())
|
||||
}
|
||||
(Op::Misc(0), 3) => {
|
||||
let bs: &[u8] = &self.readbytes(8)?;
|
||||
Ok(Value::from(f64::from_bits(u64::from_be_bytes(bs.try_into().unwrap()))).wrap())
|
||||
}
|
||||
(Op::Misc(0), 5) => {
|
||||
if self.read_annotations {
|
||||
let mut annotations = vec![self.next()?];
|
||||
while Self::decodeop(self.peek()?).ok() == Some((Op::Misc(0), 5)) {
|
||||
self.skip()?;
|
||||
annotations.push(self.next()?);
|
||||
}
|
||||
let v = self.next()?;
|
||||
assert!(v.annotations().is_empty());
|
||||
Ok(N::wrap_ann(annotations, v.value_owned()))
|
||||
} else {
|
||||
self.next()?;
|
||||
self.next()
|
||||
}
|
||||
let v = self.next()?;
|
||||
assert!(v.annotations().is_empty());
|
||||
Ok(N::wrap_ann(annotations, v.value_owned()))
|
||||
} else {
|
||||
self.next()?;
|
||||
self.next()
|
||||
}
|
||||
}
|
||||
(Op::Misc(0), _) => Err(Error::Syntax("Invalid format A encoding")),
|
||||
(Op::Misc(1), arg) => {
|
||||
let n = self.wirelength(arg)?;
|
||||
match self.placeholders.and_then(|m| m.get(&n)) {
|
||||
Some(v) => Ok(v.clone().wrap()),
|
||||
None => Err(Error::Syntax("Invalid Preserves placeholder")),
|
||||
(Op::Misc(0), _) => Err(Error::Syntax("Invalid format A encoding")),
|
||||
(Op::Misc(1), arg) => {
|
||||
let n = self.wirelength(arg)?;
|
||||
match self.placeholders.and_then(|m| m.get(&n)) {
|
||||
Some(v) => Ok(v.clone().wrap()),
|
||||
None => Err(Error::Syntax("Invalid Preserves placeholder")),
|
||||
}
|
||||
}
|
||||
}
|
||||
(Op::Misc(2), arg) => {
|
||||
match Op::try_from(arg)? {
|
||||
Op::Atom(minor) => self.binarystream(minor),
|
||||
Op::Compound(minor) => self.valuestream(minor),
|
||||
_ => Err(Error::Syntax("Invalid format C start byte")),
|
||||
(Op::Misc(2), arg) => {
|
||||
match Op::try_from(arg)? {
|
||||
Op::Atom(minor) => self.binarystream(minor),
|
||||
Op::Compound(minor) => self.valuestream(minor),
|
||||
_ => Err(Error::Syntax("Invalid format C start byte")),
|
||||
}
|
||||
}
|
||||
}
|
||||
(Op::Misc(3), arg) => {
|
||||
let n = if arg > 12 { i32::from(arg) - 16 } else { i32::from(arg) };
|
||||
Ok(Value::from(n).wrap())
|
||||
}
|
||||
(Op::Misc(_), _) => unreachable!(),
|
||||
(Op::Atom(minor), arg) => {
|
||||
let count = self.wirelength(arg)?;
|
||||
Self::decodebinary(minor, self.readbytes(count)?)
|
||||
}
|
||||
(Op::Compound(minor), arg) => {
|
||||
let count = self.wirelength(arg)?;
|
||||
Self::decodecompound(minor, self.readvalues(count)?)
|
||||
(Op::Misc(3), arg) => {
|
||||
let n = if arg > 12 { i32::from(arg) - 16 } else { i32::from(arg) };
|
||||
Ok(Value::from(n).wrap())
|
||||
}
|
||||
(Op::Misc(_), _) => unreachable!(),
|
||||
(Op::Atom(minor), arg) => {
|
||||
let count = self.wirelength(arg)?;
|
||||
Self::decodebinary(minor, self.readbytes(count)?)
|
||||
}
|
||||
(Op::Compound(minor), arg) => {
|
||||
let count = self.wirelength(arg)?;
|
||||
Self::decodecompound(minor, self.readvalues(count)?)
|
||||
}
|
||||
(Op::Reserved(3), 15) => continue,
|
||||
(Op::Reserved(_), _) => Err(InvalidOp.into()),
|
||||
}
|
||||
}
|
||||
}
|
||||
|
|
|
@ -70,6 +70,10 @@ impl<'a, 'b, W: Write, N: NestedValue<D>, D: Domain> Encoder<'a, 'b, W, N, D> {
|
|||
self.write_all(bs)
|
||||
}
|
||||
|
||||
pub fn write_noop(&mut self) -> Result {
|
||||
self.write_op(Op::Reserved(3), 15)
|
||||
}
|
||||
|
||||
pub fn write(&mut self, v: &N) -> Result {
|
||||
for ann in v.annotations() {
|
||||
self.write_header(Op::Misc(0), 5)?;
|
||||
|
|
|
@ -22,6 +22,10 @@ pre, code { background-color: #eee; font-family: "DejaVu Sans Mono", monospace;
|
|||
code { font-size: 75%; }
|
||||
pre { padding: 0.33rem; line-height: 1; overflow-x: auto; }
|
||||
|
||||
p, ul, table {
|
||||
margin: 1em 0;
|
||||
}
|
||||
|
||||
body {
|
||||
counter-reset: section 0 subsection 0 appendix 0;
|
||||
}
|
||||
|
|
53
preserves.md
53
preserves.md
|
@ -4,7 +4,7 @@ title: "Preserves: an Expressive Data Language"
|
|||
---
|
||||
|
||||
Tony Garnock-Jones <tonyg@leastfixedpoint.com>
|
||||
August 2019. Version 0.0.6.
|
||||
May 2020. Version 0.0.7.
|
||||
|
||||
[sexp.txt]: http://people.csail.mit.edu/rivest/Sexp.txt
|
||||
[spki]: http://world.std.com/~cme/html/spki.html
|
||||
|
@ -470,11 +470,15 @@ representation.[^some-encodings-unused]
|
|||
| 0 | 3 | | (format A) Certain small `SignedInteger`s |
|
||||
| 1 | | | (format B) An `Atom` with variable-length binary representation |
|
||||
| 2 | | | (format B) A `Compound` with variable-length representation |
|
||||
| 3 | 3 | 15 | (format A) 0xFF byte; no-op |
|
||||
|
||||
#### Encoding data of type-specific length (format A).
|
||||
|
||||
Each type of data defines its own rules for this format.
|
||||
|
||||
Of particular note is lead byte `0xFF`, which is a no-op byte acting
|
||||
as a kind of pseudo-whitespace in a binary-syntax encoding.
|
||||
|
||||
#### Encoding data of known length (format B).
|
||||
|
||||
Format B is used where the length `l` of the `Value` to be encoded is
|
||||
|
@ -896,10 +900,11 @@ endless sequence of zero length chunks, appearing to make progress but
|
|||
not actually doing so. Implementations *MUST* reject zero length
|
||||
chunks when decoding, and *MUST NOT* produce them when encoding.
|
||||
|
||||
**Whitespace.** Similarly, the textual format for `Value`s allows
|
||||
arbitrary whitespace in many positions. In streaming transfer
|
||||
situations, consider optional restrictions on the amount of
|
||||
consecutive whitespace that may appear in a serialized `Value`.
|
||||
**Whitespace and no-ops.** Similarly, the binary format allows `0xFF`
|
||||
no-ops and the textual format allows arbitrary whitespace in many
|
||||
positions. In streaming transfer situations, consider optional
|
||||
restrictions on the amount of consecutive whitespace or the number of
|
||||
consecutive no-ops that may appear.
|
||||
|
||||
**Annotations.** Also similarly, in modes where a `Value` is being
|
||||
read while annotations are skipped, an endless sequence of annotations
|
||||
|
@ -922,6 +927,24 @@ The text syntax for `Boolean`s, `Symbol`s, and `ByteString`s is
|
|||
directly inspired by [Racket](https://racket-lang.org/)'s lexical
|
||||
syntax.
|
||||
|
||||
## Appendix. Autodetection of textual or binary syntax
|
||||
|
||||
Whitespace characters `0x09` (ASCII HT (tab)), `0x0A` (LF), `0x0D`
|
||||
(CR), `0x20` (space) and `0x2C` (comma) are ignored at the start of a
|
||||
textual-syntax Preserves `Document`, and their UTF-8 encodings are
|
||||
reserved lead byte values in binary-syntax Preserves.
|
||||
|
||||
The byte `0xFF`, signifying a no-op in binary-syntax Preserves, has no
|
||||
meaning in either 7-bit ASCII or UTF-8, and therefore cannot appear in
|
||||
a valid textual-syntax Preserves `Document`.
|
||||
|
||||
If applications prefix their textual-syntax documents with e.g. a
|
||||
space or newline character, and their binary-syntax documents with a
|
||||
`0xFF` byte, consumers of these documents may reliably autodetect the
|
||||
syntax being used. In a network protocol supporting this kind of
|
||||
autodetection, clients may transmit LF or `0xFF` to select text or
|
||||
binary syntax, respectively.
|
||||
|
||||
## Appendix. Table of lead byte values
|
||||
|
||||
00 - False
|
||||
|
@ -930,9 +953,9 @@ syntax.
|
|||
03 - Double
|
||||
04 - End stream
|
||||
05 - Annotation
|
||||
(0x) RESERVED 06-0F
|
||||
(0x) RESERVED 06-0F (NB. 09, 0A, 0D specially reserved)
|
||||
1x - Placeholder
|
||||
2x - Start Stream
|
||||
2x - Start Stream (NB. 20, 2C specially reserved)
|
||||
3x - Small integers 0..12,-3..-1
|
||||
|
||||
4x - SignedInteger
|
||||
|
@ -948,7 +971,8 @@ syntax.
|
|||
(Cx) RESERVED C0-CF
|
||||
(Dx) RESERVED D0-DF
|
||||
(Ex) RESERVED E0-EF
|
||||
(Fx) RESERVED F0-FF
|
||||
(Fx) RESERVED F0-FE
|
||||
FF No-op
|
||||
|
||||
## Appendix. Bit fields within lead byte values
|
||||
|
||||
|
@ -962,13 +986,25 @@ syntax.
|
|||
00 00 0100 End Stream (to match a previous Start Stream)
|
||||
00 00 0101 Annotation; two more Reprs follow
|
||||
|
||||
00 00 1001 (ASCII HT (tab)) \
|
||||
00 00 1010 (ASCII LF) |- Reserved: may be used to indicate
|
||||
00 00 1101 (ASCII CR) / use of text encoding
|
||||
|
||||
00 01 mmmm Placeholder; m is the placeholder number
|
||||
|
||||
00 10 ttnn Start Stream <tt,nn>
|
||||
When tt = 00 --> error
|
||||
When nn = 00 --> (ASCII space)
|
||||
Reserved: may be used to indicate
|
||||
use of text encoding
|
||||
otherwise --> error
|
||||
01 --> each chunk is a ByteString
|
||||
10 --> each chunk is a single encoded Value
|
||||
11 --> error (RESERVED)
|
||||
When nn = 00 --> (ASCII comma)
|
||||
Reserved: may be used to indicate
|
||||
use of text encoding
|
||||
otherwise --> error
|
||||
|
||||
00 11 xxxx Small integers 0..12,-3..-1
|
||||
|
||||
|
@ -983,6 +1019,7 @@ syntax.
|
|||
10 11 mmmm Dictionary
|
||||
|
||||
11 nn mmmm error, RESERVED
|
||||
11 11 1111 no-op; unambiguous indication of binary Preserves format
|
||||
|
||||
Where `mmmm` appears, interpret it as an unsigned 4-bit number `m`. If
|
||||
`m`<15, let `l`=`m`. Otherwise, `m`=15; let `l` be the result of
|
||||
|
|
Binary file not shown.
|
@ -1,4 +1,46 @@
|
|||
@<EmacsMode "-*- preserves -*-">
|
||||
@<Documentation [
|
||||
"Individual test cases may be any of the following record types:"
|
||||
<TestCaseTypes {
|
||||
Test: {fields: [binary annotatedValue] expectations: {1 2 3 4 5 6 7 8 9 11}}
|
||||
NondeterministicTest: {fields: [binary annotatedValue] expectations: {1 2 3 4 5 6 7 8 10 11}}
|
||||
StreamingTest: {fields: [binary annotatedValue] expectations: {1 2 3 4 5 6 7 8 9 }}
|
||||
DecodeTest: {fields: [binary annotatedValue] expectations: {1 2 3 4 5 6 7 8}}
|
||||
ParseError: {fields: [text] expectations: {12}}
|
||||
ParseShort: {fields: [text] expectations: {13}}
|
||||
DecodeError: {fields: [bytes] expectations: {14}}
|
||||
DecodeShort: {fields: [bytes] expectations: {15}}
|
||||
}>
|
||||
"In each test, let value = strip(annotatedValue),",
|
||||
" forward = value,",
|
||||
" back = value,"
|
||||
"except where test-case-specific values of `forward` and/or `back` are provided"
|
||||
"by the executing harness (of particular importance for `StreamingTest`s),"
|
||||
"and check the following numbered expectations according to the table above:"
|
||||
<TestCaseExpectations {
|
||||
1: "value = back"
|
||||
2: "strip(decodeBinary(encodeBinary(value))) = back"
|
||||
3: "strip(decodeBinary(encodeBinary(forward))) = back"
|
||||
4: "strip(decodeBinary(binary)) = back"
|
||||
5: "decodeBinary(binary) = annotatedValue"
|
||||
6: "decodeBinary(encodeBinary(annotatedValue)) = annotatedValue"
|
||||
7: "decodeText(encodeText(value)) = back"
|
||||
8: "decodeText(encodeText(forward)) = back"
|
||||
9: "encodeBinary(forward) = binary"
|
||||
10: "canonicallyEncodeBinary(forward) = binary"
|
||||
11: "encodeBinary(annotatedValue) = binary"
|
||||
|
||||
12: "decodeText(text) fails with a syntax error (NB. never with premature EOF)"
|
||||
13: "decodeText(text) fails signalling premature EOF (NB. never with a syntax error)"
|
||||
|
||||
14: "decodeBinary(bytes) fails with a syntax error (NB. never with premature EOF)"
|
||||
15: "decodeBinary(bytes) fails signalling premature EOF (NB. never with a syntax error)"
|
||||
}>
|
||||
"Each `StreamingTest` will need to have an implementation-specific `forward`"
|
||||
"supplied that encodes to the specific format C byte sequences in `binary`."
|
||||
"Alternatively, implementations may choose to skip expectation 11 for"
|
||||
"`StreamingTest`s, treating them like `DecodeTest`s."
|
||||
]>
|
||||
<TestCases
|
||||
<ExpectedPlaceholderMapping {
|
||||
0: discard
|
||||
|
@ -76,6 +118,12 @@
|
|||
list7: <Test #hex{93 73616263 732e2e2e 73646566} [abc ... def]>
|
||||
list8: @"Missing close bracket" <ParseShort "[">
|
||||
list9: @"Unexpected close bracket" <ParseError "]">
|
||||
noop0: <DecodeTest #hex{ff10} discard>
|
||||
noop1: <DecodeTest #hex{ff31} 1>
|
||||
noop2: <DecodeTest #hex{ffffff42ff00} -256>
|
||||
noop3: <DecodeTest #hex{ff05ff53616263ff42ff00} @"abc" -256>
|
||||
noop4: @"No-ops must be followed by something" <DecodeShort #hex{ffffff}>
|
||||
noop5: @"No input at all is considered short" <DecodeShort #hex{}>
|
||||
placeholder0: <Test #hex{10} discard>
|
||||
placeholder1: <Test #hex{11} capture>
|
||||
placeholder2: <Test #hex{12} observe>
|
||||
|
@ -115,6 +163,8 @@
|
|||
symbol0: <Test #hex{70} ||>
|
||||
symbol1: <StreamingTest #hex{27626865626c6c616f04} hello>
|
||||
symbol2: <Test #hex{7568656c6c6f} hello>
|
||||
whitespace0: @"Leading spaces have to eventually yield something" <ParseShort " ">
|
||||
whitespace1: @"No input at all is considered short" <ParseShort "">
|
||||
value1: <Test #"\x66corymb" #value#"fcorymb">
|
||||
value2: <Test #"\x01" #value#"\x01">
|
||||
value3: <Test #"\x01" #value#base64{AQ}>
|
||||
|
|
Loading…
Reference in New Issue