diff --git a/implementations/rust/src/value/decoder.rs b/implementations/rust/src/value/decoder.rs index 1efe0ba..824552b 100644 --- a/implementations/rust/src/value/decoder.rs +++ b/implementations/rust/src/value/decoder.rs @@ -1,15 +1,11 @@ -use bytes::BytesMut; -use super::reader::{Reader, Token, err, is_eof_error, decodebinary}; -use super::value::{Value, NestedValue, Set, Map, Domain}; -use super::constants::{AtomMinor, CompoundMinor}; +use super::reader::{Reader, is_eof_error}; +use super::value::{NestedValue, Domain}; -pub use super::reader::{Error, Result}; +pub use super::reader::{Error, Result, DecodePlaceholderMap}; -pub type DecodePlaceholderMap = Map>; - -pub struct Decoder<'a, R: Reader, N: NestedValue, D: Domain> { +pub struct Decoder<'a, R: Reader, N: NestedValue, Dom: Domain> { pub read: R, - placeholders: Option<&'a DecodePlaceholderMap>, + placeholders: Option<&'a DecodePlaceholderMap>, read_annotations: bool, } @@ -17,7 +13,7 @@ impl<'a, R: Reader, N: NestedValue, D: Domain> Decoder<'a, R, N, D> { pub fn new(read: R, placeholders: Option<&'a DecodePlaceholderMap>) -> Self { - Decoder{ + Decoder { read, placeholders, read_annotations: true, @@ -29,106 +25,7 @@ impl<'a, R: Reader, N: NestedValue, D: Domain> Decoder<'a, R, N, D> { } pub fn next_or_err(&mut self) -> Result { - let t = self.read.next_token()?; - self.next_inner(t) - } - - pub fn next_inner(&mut self, mut token: Token) -> Result { - loop { - return match token { - Token::Annotation => - if self.read_annotations { - let mut annotations = vec![self.next_or_err()?]; - loop { - match self.read.next_token()? { - Token::Annotation => - annotations.push(self.next_or_err()?), - other => { - token = other; - break; - } - } - } - let v = self.next_inner(token)?; - assert!(v.annotations().is_empty()); - Ok(N::wrap_ann(annotations, v.value_owned())) - } else { - self.next_or_err()?; - token = self.read.next_token()?; - continue; - } - Token::PlaceholderRef(n) => - match self.placeholders.and_then(|m| m.get(&n)) { - Some(v) => Ok(v.clone().wrap()), - None => Err(err("Invalid Preserves placeholder")), - } - Token::Noop => { - token = self.read.next_token()?; - continue; - } - - Token::Boolean(b) => Ok(Value::from(b).wrap()), - Token::Float(f) => Ok(Value::from(f).wrap()), - Token::Double(d) => Ok(Value::from(d).wrap()), - Token::SignedInteger(i) => Ok(Value::from(i).wrap()), - Token::String(s) => Ok(Value::from(s).wrap()), - Token::ByteString(bs) => Ok(Value::ByteString(bs.to_vec()).wrap()), - Token::Symbol(s) => Ok(Value::symbol(&s).wrap()), - - Token::OpenAtom(minor) => self.binarystream(minor), - Token::CloseAtom(minor) => Err(err(&format!("Unexpected {:?} close", minor))), - - Token::OpenCompound(paren, mut limit) => self.decodecompound(paren, &mut limit), - Token::CloseCompound(paren) => Err(err(&format!("Unexpected {:?} close", paren))), - } - } - } - - pub fn binarystream(&mut self, minor: AtomMinor) -> Result { - let mut bs = BytesMut::with_capacity(256); - while !self.read.at_atom_end(minor)? { - match self.next_or_err()?.value().as_bytestring() { - Some(chunk) => bs.extend_from_slice(chunk), - None => return Err(err("Unexpected non-binary chunk")), - } - } - // We know it'll be a SignedInteger, String, ByteString, or - // Symbol, so the recursion is safe. - self.next_inner(decodebinary(minor, bs)?) - } - - pub fn decodecompound(&mut self, paren: CompoundMinor, limit: &mut Option) -> - Result - { - match paren { - CompoundMinor::Record => - match I(self, paren, limit).next() { - None => Err(err("Too few elements in encoded record")), - Some(labelres) => { - let label = labelres?; - Ok(Value::record(label, I(self, paren, limit).collect::>>()?).wrap()) - } - } - CompoundMinor::Sequence => { - Ok(Value::Sequence(I(self, paren, limit).collect::>>()?).wrap()) - } - CompoundMinor::Set => { - let mut s = Set::new(); - for res in I(self, paren, limit) { s.insert(res?); } - Ok(Value::Set(s).wrap()) - } - CompoundMinor::Dictionary => { - let mut d = Map::new(); - while let Some(kres) = I(self, paren, limit).next() { - let k = kres?; - match I(self, paren, limit).next() { - Some(vres) => { d.insert(k, vres?); } - None => return Err(err("Missing dictionary value")), - } - } - Ok(Value::Dictionary(d).wrap()) - } - } + self.read.next(self.placeholders, self.read_annotations) } } @@ -141,30 +38,3 @@ impl<'a, R: Reader, N: NestedValue, D: Domain> std::iter::Iterator for Decode } } } - -struct I<'f, 'a, R: Reader, N: NestedValue, D: Domain>( - &'f mut Decoder<'a, R, N, D>, - CompoundMinor, - &'f mut Option, -); - -impl<'f, 'a, R: Reader, N: NestedValue, D: Domain> Iterator for I<'f, 'a, R, N, D> { - type Item = Result; - fn next(&mut self) -> Option { - let I(d, paren, count) = self; - match count { - Some(0) => None, - Some(n) => { - *self.2 = Some(*n - 1); - Some(d.next_or_err()) - }, - None => { - match d.read.at_compound_end(*paren) { - Ok(true) => None, - Ok(false) => Some(d.next_or_err()), - Err(e) => Some(Err(e)), - } - } - } - } -} diff --git a/implementations/rust/src/value/mod.rs b/implementations/rust/src/value/mod.rs index 622dceb..3680efb 100644 --- a/implementations/rust/src/value/mod.rs +++ b/implementations/rust/src/value/mod.rs @@ -12,11 +12,11 @@ pub mod writer; pub use codec::Codec; pub use de::Deserializer; pub use de::from_value; -pub use decoder::DecodePlaceholderMap; pub use decoder::Decoder; pub use encoder::EncodePlaceholderMap; pub use encoder::Encoder; pub use reader::BinaryReader; +pub use reader::DecodePlaceholderMap; pub use reader::Reader; pub use reader::is_eof_error; pub use reader::is_syntax_error; diff --git a/implementations/rust/src/value/reader.rs b/implementations/rust/src/value/reader.rs index c4b7dbf..b3e2a58 100644 --- a/implementations/rust/src/value/reader.rs +++ b/implementations/rust/src/value/reader.rs @@ -4,6 +4,7 @@ use std::convert::TryFrom; use std::convert::TryInto; use std::io::Read; use super::constants::{Op, InvalidOp, AtomMinor, CompoundMinor}; +use super::value::{Value, Domain, NestedValue, Map, Set}; pub type Error = std::io::Error; pub type Result = std::result::Result; @@ -14,32 +15,30 @@ enum PeekState { Buffer(BytesMut), } -#[derive(Debug, Clone)] -pub enum Token { - Annotation, - PlaceholderRef(usize), - Noop, - - Boolean(bool), - Float(f32), - Double(f64), - SignedInteger(BigInt), - String(String), - ByteString(BytesMut), - Symbol(String), - - OpenAtom(AtomMinor), - CloseAtom(AtomMinor), - - OpenCompound(CompoundMinor, Option), - CloseCompound(CompoundMinor), -} +pub type DecodePlaceholderMap = Map>; pub trait Reader { - fn next_token(&mut self) -> Result; + fn next, Dom: Domain>( + &mut self, + placeholders: Option<&DecodePlaceholderMap>, + read_annotations: bool, + ) -> Result; + fn buffered_len(&mut self) -> Result; - fn at_atom_end(&mut self, minor: AtomMinor) -> Result; - fn at_compound_end(&mut self, paren: CompoundMinor) -> Result; +} + +impl<'re, R: Reader> Reader for &'re mut R { + fn next, Dom: Domain>( + &mut self, + placeholders: Option<&DecodePlaceholderMap>, + read_annotations: bool, + ) -> Result { + (*self).next(placeholders, read_annotations) + } + + fn buffered_len(&mut self) -> Result { + (*self).buffered_len() + } } pub struct BinaryReader<'a, R: Read> { @@ -48,6 +47,45 @@ pub struct BinaryReader<'a, R: Read> { chunksize: usize, } +struct ConfiguredBinaryReader<'de, 'pl, 'a, R: Read, N: NestedValue, Dom: Domain> { + reader: &'de mut BinaryReader<'a, R>, + placeholders: Option<&'pl DecodePlaceholderMap>, + read_annotations: bool, +} + +struct CountedStream<'de, 'pl, 'a, R: Read, N: NestedValue, Dom: Domain> { + reader: ConfiguredBinaryReader<'de, 'pl, 'a, R, N, Dom>, + count: usize, +} + +impl<'de, 'pl, 'a, R: Read, N: NestedValue, Dom: Domain> Iterator + for CountedStream<'de, 'pl, 'a, R, N, Dom> +{ + type Item = Result; + fn next(&mut self) -> Option { + if self.count == 0 { return None } + self.count -= 1; + Some(self.reader.reader.next(self.reader.placeholders, self.reader.read_annotations)) + } +} + +struct DelimitedStream<'de, 'pl, 'a, R: Read, N: NestedValue, Dom: Domain> { + reader: ConfiguredBinaryReader<'de, 'pl, 'a, R, N, Dom>, +} + +impl<'de, 'pl, 'a, R: Read, N: NestedValue, Dom: Domain> Iterator + for DelimitedStream<'de, 'pl, 'a, R, N, Dom> +{ + type Item = Result; + fn next(&mut self) -> Option { + match self.reader.reader.peekend() { + Err(e) => Some(Err(e)), + Ok(true) => None, + Ok(false) => Some(self.reader.reader.next(self.reader.placeholders, self.reader.read_annotations)), + } + } +} + pub fn decodeop(b: u8) -> Result<(Op, u8)> { Ok((Op::try_from(b >> 4)?, b & 15)) } @@ -60,12 +98,53 @@ pub fn decodestr(bs: &[u8]) -> Result<&str> { std::str::from_utf8(bs).map_err(|_| err("Invalid UTF-8")) } -pub fn decodebinary(minor: AtomMinor, bs: BytesMut) -> Result { +pub fn decodebinary, Dom: Domain>(minor: AtomMinor, bs: BytesMut) -> Result { match minor { - AtomMinor::SignedInteger => Ok(Token::SignedInteger(decodeint(&bs))), - AtomMinor::String => Ok(Token::String(decodestr(&bs)?.into())), - AtomMinor::ByteString => Ok(Token::ByteString(bs)), - AtomMinor::Symbol => Ok(Token::Symbol(decodestr(&bs)?.into())), + AtomMinor::SignedInteger => Ok(Value::from(decodeint(&bs)).wrap()), + AtomMinor::String => Ok(Value::from(decodestr(&bs)?).wrap()), + AtomMinor::ByteString => Ok(Value::ByteString(bs.to_vec()).wrap()), + AtomMinor::Symbol => Ok(Value::symbol(decodestr(&bs)?).wrap()), + } +} + + +pub fn decodecompound, Dom: Domain, I: Iterator>>( + minor: CompoundMinor, + mut iter: I, +) -> + Result +{ + match minor { + CompoundMinor::Record => + match iter.next() { + None => Err(err("Too few elements in encoded record")), + Some(labelres) => { + let label = labelres?; + Ok(Value::record(label, iter.collect::>>()?).wrap()) + } + } + CompoundMinor::Sequence => { + Ok(Value::Sequence(iter.collect::>>()?).wrap()) + } + CompoundMinor::Set => { + let mut s = Set::new(); + for res in iter { s.insert(res?); } + Ok(Value::Set(s).wrap()) + } + CompoundMinor::Dictionary => { + let mut d = Map::new(); + while let Some(kres) = iter.next() { + let k = kres?; + match iter.next() { + Some(vres) => { + let v = vres?; + d.insert(k, v); + } + None => return Err(err("Missing dictionary value")), + } + } + Ok(Value::Dictionary(d).wrap()) + } } } @@ -105,7 +184,7 @@ impl<'a, R: Read> BinaryReader<'a, R> { BinaryReader { read, buf: PeekState::Buffer(BytesMut::new()), - chunksize: 1, + chunksize: 64, } } @@ -161,10 +240,6 @@ impl<'a, R: Read> BinaryReader<'a, R> { Ok(buf.split_to(req)) } - pub fn nextop(&mut self) -> Result<(Op, u8)> { - decodeop(self.read()?) - } - pub fn varint(&mut self) -> Result { let v = self.read()?; if v < 128 { @@ -192,61 +267,95 @@ impl<'a, R: Read> BinaryReader<'a, R> { } } -impl<'re, 'a, R: Read> Reader for &'re mut BinaryReader<'a, R> { - fn next_token(&mut self) -> Result { - (*self).next_token() - } - - fn buffered_len(&mut self) -> Result { - (*self).buffered_len() - } - - fn at_atom_end(&mut self, minor: AtomMinor) -> Result { - (*self).at_atom_end(minor) - } - - fn at_compound_end(&mut self, paren: CompoundMinor) -> Result { - (*self).at_compound_end(paren) - } -} - impl<'re, 'a, R: Read> Reader for BinaryReader<'a, R> { - fn next_token(&mut self) -> Result { - match self.nextop()? { - (Op::Misc(0), 0) => Ok(Token::Boolean(false)), - (Op::Misc(0), 1) => Ok(Token::Boolean(true)), - (Op::Misc(0), 2) => { - let mut bs = [0; 4]; - bs.copy_from_slice(&self.readbytes(4)?); - Ok(Token::Float(f32::from_bits(u32::from_be_bytes(bs.try_into().unwrap())))) + fn next, Dom: Domain>( + &mut self, + placeholders: Option<&DecodePlaceholderMap>, + read_annotations: bool + ) -> + Result + { + loop { + return match decodeop(self.read()?)? { + (Op::Misc(0), 0) => Ok(Value::from(false).wrap()), + (Op::Misc(0), 1) => Ok(Value::from(true).wrap()), + (Op::Misc(0), 2) => { + let mut bs = [0; 4]; + bs.copy_from_slice(&self.readbytes(4)?); + Ok(Value::from(f32::from_bits(u32::from_be_bytes(bs.try_into().unwrap()))).wrap()) + } + (Op::Misc(0), 3) => { + let mut bs = [0; 8]; + bs.copy_from_slice(&self.readbytes(8)?); + Ok(Value::from(f64::from_bits(u64::from_be_bytes(bs.try_into().unwrap()))).wrap()) + } + (Op::Misc(0), 5) => { + if read_annotations { + let mut annotations = vec![self.next(placeholders, read_annotations)?]; + while decodeop(self.peek()?)? == (Op::Misc(0), 5) { + self.skip()?; + annotations.push(self.next(placeholders, read_annotations)?); + } + let v = self.next(placeholders, read_annotations)?; + assert!(v.annotations().is_empty()); + Ok(N::wrap_ann(annotations, v.value_owned())) + } else { + let _ = self.next(placeholders, read_annotations)?; + continue; + } + } + (Op::Misc(0), _) => Err(err("Invalid format A encoding")), + (Op::Misc(1), arg) => { + let n = self.wirelength(arg)?; + match placeholders.and_then(|m| m.get(&n)) { + Some(v) => Ok(v.clone().wrap()), + None => Err(err("Invalid Preserves placeholder")), + } + } + (Op::Misc(2), arg) => match Op::try_from(arg)? { + Op::Atom(minor) => { + let mut bs = BytesMut::with_capacity(256); + while !self.peekend()? { + match self.next(placeholders, false)?.value().as_bytestring() { + Some(chunk) => bs.extend_from_slice(chunk), + None => return Err(err("Unexpected non-binary chunk")), + } + } + decodebinary(minor, bs) + } + Op::Compound(minor) => decodecompound(minor, DelimitedStream { + reader: ConfiguredBinaryReader { + reader: self, + placeholders, + read_annotations, + }, + }), + _ => Err(err("Invalid format C start byte")), + } + (Op::Misc(3), arg) => { + let n = if arg > 12 { i32::from(arg) - 16 } else { i32::from(arg) }; + Ok(Value::from(n).wrap()) + } + (Op::Misc(_), _) => unreachable!(), + (Op::Atom(minor), arg) => { + let count = self.wirelength(arg)?; + let bs = self.readbytes(count)?; + decodebinary(minor, bs) + } + (Op::Compound(minor), arg) => { + let count = self.wirelength(arg)?; + decodecompound(minor, CountedStream { + reader: ConfiguredBinaryReader { + reader: self, + placeholders, + read_annotations, + }, + count, + }) + } + (Op::Reserved(3), 15) => continue, + (Op::Reserved(_), _) => Err(InvalidOp.into()), } - (Op::Misc(0), 3) => { - let mut bs = [0; 8]; - bs.copy_from_slice(&self.readbytes(8)?); - Ok(Token::Double(f64::from_bits(u64::from_be_bytes(bs.try_into().unwrap())))) - } - (Op::Misc(0), 5) => Ok(Token::Annotation), - (Op::Misc(0), _) => Err(err("Invalid format A encoding")), - (Op::Misc(1), arg) => Ok(Token::PlaceholderRef(self.wirelength(arg)?)), - (Op::Misc(2), arg) => match Op::try_from(arg)? { - Op::Atom(minor) => Ok(Token::OpenAtom(minor)), - Op::Compound(minor) => Ok(Token::OpenCompound(minor, None)), - _ => Err(err("Invalid format C start byte")), - } - (Op::Misc(3), arg) => { - let n = if arg > 12 { i32::from(arg) - 16 } else { i32::from(arg) }; - Ok(Token::SignedInteger(BigInt::from(n))) - } - (Op::Misc(_), _) => unreachable!(), - (Op::Atom(minor), arg) => { - let count = self.wirelength(arg)?; - let bs = self.readbytes(count)?; - decodebinary(minor, bs) - } - (Op::Compound(minor), arg) => - Ok(Token::OpenCompound(minor, Some(self.wirelength(arg)?))), - (Op::Reserved(3), 15) => Ok(Token::Noop), - (Op::Reserved(_), _) => Err(InvalidOp.into()), } } @@ -257,12 +366,4 @@ impl<'re, 'a, R: Read> Reader for BinaryReader<'a, R> { PeekState::Buffer(ref b) => Ok(b.remaining()), } } - - fn at_atom_end(&mut self, _minor: AtomMinor) -> Result { - self.peekend() - } - - fn at_compound_end(&mut self, _paren: CompoundMinor) -> Result { - self.peekend() - } }