diff --git a/implementations/rust/src/lib.rs b/implementations/rust/src/lib.rs index 9681839..ded2c0d 100644 --- a/implementations/rust/src/lib.rs +++ b/implementations/rust/src/lib.rs @@ -234,23 +234,15 @@ mod value_tests { #[cfg(test)] mod decoder_tests { - use crate::value::Decoder; + use crate::value::{Decoder, BinaryReader, Reader}; use crate::value::{Value, PlainValue, NestedValue}; use super::dom::Dom; - #[test] fn read_123() { - let mut buf = &b"abc"[..]; - let mut d = Decoder::<_, PlainValue, Dom>::new(&mut buf, None); - assert_eq!(d.read().ok(), Some(97)); - assert_eq!(d.read().ok(), Some(98)); - assert_eq!(d.read().ok(), Some(99)); - assert!(d.read().err().unwrap().is_eof()) - } - #[test] fn skip_annotations_noskip() { let mut buf = &b"\x0521"[..]; - let mut d = Decoder::<_, PlainValue, Dom>::new(&mut buf, None); - let v = d.next().unwrap(); + let r = BinaryReader::new(&mut buf); + let mut d = Decoder::<_, PlainValue, Dom>::new(r, None); + let v = d.next_or_err().unwrap(); assert_eq!(v.annotations().len(), 1); assert_eq!(v.annotations()[0], Value::from(2).wrap()); assert_eq!(v.value(), &Value::from(1)); @@ -258,9 +250,10 @@ mod decoder_tests { #[test] fn skip_annotations_skip() { let mut buf = &b"\x0521"[..]; - let mut d = Decoder::<_, PlainValue, Dom>::new(&mut buf, None); + let r = BinaryReader::new(&mut buf); + let mut d = Decoder::<_, PlainValue, Dom>::new(r, None); d.set_read_annotations(false); - let v = d.next().unwrap(); + let v = d.next_or_err().unwrap(); assert_eq!(v.annotations().len(), 0); assert_eq!(v.value(), &Value::from(1)); } @@ -268,25 +261,32 @@ mod decoder_tests { #[test] fn two_values_at_once() { let mut buf = &b"\x81tPing\x81tPong"[..]; assert_eq!(buf.len(), 12); - let mut d = Decoder::<_, PlainValue, Dom>::new(&mut buf, None); - assert_eq!(d.next().unwrap().value(), &Value::simple_record("Ping", vec![])); - assert_eq!(d.next().unwrap().value(), &Value::simple_record("Pong", vec![])); + let r = BinaryReader::new(&mut buf); + let mut d = Decoder::<_, PlainValue, Dom>::new(r, None); + assert_eq!(d.next_or_err().unwrap().value(), &Value::simple_record("Ping", vec![])); + assert_eq!(d.next_or_err().unwrap().value(), &Value::simple_record("Pong", vec![])); assert_eq!(buf.len(), 0); } #[test] fn buf_advanced() { let mut buf = &b"\x81tPing\x81tPong"[..]; assert_eq!(buf.len(), 12); - let mut d = Decoder::<_, PlainValue, Dom>::new(&mut buf, None); - assert_eq!(d.next().unwrap().value(), &Value::simple_record("Ping", vec![])); - assert_eq!(buf.len(), 6); + let mut r = BinaryReader::new(&mut buf); + let mut d = Decoder::<_, PlainValue, Dom>::new(&mut r, None); + assert_eq!(d.next_or_err().unwrap().value(), &Value::simple_record("Ping", vec![])); + assert!(r.buffered_len().unwrap() > 0); + let mut d = Decoder::<_, PlainValue, Dom>::new(&mut r, None); + assert_eq!(d.next_or_err().unwrap().value(), &Value::simple_record("Pong", vec![])); + assert!(r.buffered_len().unwrap() == 0); + assert_eq!(buf.len(), 0); } } #[cfg(test)] mod samples_tests { use crate::symbol::Symbol; - use crate::value::{Codec, Decoder, decoder::Error}; + use crate::value::{Codec, Decoder, BinaryReader}; + use crate::value::reader::is_syntax_error; use crate::value::{Value, PlainValue, Map}; use crate::value::DecodePlaceholderMap; use crate::value::to_value; @@ -316,8 +316,9 @@ mod samples_tests { #[test] fn run() -> std::io::Result<()> { let mut fh = std::fs::File::open("../../tests/samples.bin").unwrap(); - let mut d = Decoder::<_, PlainValue, Dom>::new(&mut fh, None); - let tests: TestCases = from_value(&d.next().unwrap()).unwrap(); + let r = BinaryReader::new(&mut fh); + let mut d = Decoder::<_, PlainValue, Dom>::new(r, None); + let tests: TestCases = from_value(&d.next_or_err().unwrap()).unwrap(); // println!("{:#?}", tests); let codec = Codec::new(tests.decode_placeholders.0); @@ -325,8 +326,8 @@ mod samples_tests { println!("{:?} ==> {:?}", name, case); match case { TestCase::Test(ref bin, ref val) => { - assert_eq!(&codec.decode(&mut &codec.encode_bytes(val)?[..])?, val); - assert_eq!(&codec.decode(&mut &bin[..])?, val); + assert_eq!(&codec.decode_all(&mut &codec.encode_bytes(val)?[..])?, &[val.clone()]); + assert_eq!(&codec.decode_all(&mut &bin[..])?, &[val.clone()]); assert_eq!(&codec.encode_bytes(val)?, bin); } TestCase::NondeterministicTest(ref bin, ref val) => { @@ -334,33 +335,32 @@ mod samples_tests { // written so that while strictly // "nondeterministic", the order of keys in // dictionaries follows Preserves order. - assert_eq!(&codec.decode(&mut &codec.encode_bytes(val)?[..])?, val); - assert_eq!(&codec.decode(&mut &bin[..])?, val); + assert_eq!(&codec.decode_all(&mut &codec.encode_bytes(val)?[..])?, &[val.clone()]); + assert_eq!(&codec.decode_all(&mut &bin[..])?, &[val.clone()]); assert_eq!(&codec.encode_bytes(val)?, bin); } TestCase::StreamingTest(ref bin, ref val) => { - assert_eq!(&codec.decode(&mut &codec.encode_bytes(val)?[..])?, val); - assert_eq!(&codec.decode(&mut &bin[..])?, val); + assert_eq!(&codec.decode_all(&mut &codec.encode_bytes(val)?[..])?, &[val.clone()]); + assert_eq!(&codec.decode_all(&mut &bin[..])?, &[val.clone()]); } TestCase::DecodeTest(ref bin, ref val) => { - assert_eq!(&codec.decode(&mut &codec.encode_bytes(val)?[..])?, val); - assert_eq!(&codec.decode(&mut &bin[..])?, val); + assert_eq!(&codec.decode_all(&mut &codec.encode_bytes(val)?[..])?, &[val.clone()]); + assert_eq!(&codec.decode_all(&mut &bin[..])?, &[val.clone()]); } TestCase::ParseError(_) => (), TestCase::ParseShort(_) => (), TestCase::DecodeError(ref bin) => { - match codec.decode(&mut &bin[..]) { + match codec.decode_all(&mut &bin[..]) { Ok(_) => panic!("Unexpected success"), - Err(Error::Syntax(_)) => (), - Err(e) => panic!("Unexpected error {:?}", e), + Err(e) => if is_syntax_error(&e) { + () + } else { + panic!("Unexpected error {:?}", e) + } } } TestCase::DecodeShort(ref bin) => { - match codec.decode(&mut &bin[..]) { - Ok(_) => panic!("Unexpected success"), - Err(Error::Eof) => (), - Err(e) => panic!("Unexpected error {:?}", e), - } + assert_eq!(codec.decode_all(&mut &bin[..])?.len(), 0); } } } diff --git a/implementations/rust/src/value/codec.rs b/implementations/rust/src/value/codec.rs index 55d5ac9..a873b23 100644 --- a/implementations/rust/src/value/codec.rs +++ b/implementations/rust/src/value/codec.rs @@ -1,7 +1,13 @@ -use crate::value::{decoder, encoder, invert_map, NestedValue, Domain}; -use decoder::{Decoder, DecodePlaceholderMap}; -use encoder::{Encoder, EncodePlaceholderMap}; use std::io::{Read, Write}; +use super::{ + decoder::{self, Decoder, DecodePlaceholderMap}, + encoder::{Encoder, EncodePlaceholderMap}, + invert_map, + reader::{Reader, BinaryReader, Error}, + value::{ + NestedValue, Domain, + }, +}; pub struct Codec, D: Domain> { pub decode_placeholders: Option>, @@ -18,16 +24,21 @@ impl, D: Domain> Codec { Codec { decode_placeholders: None, encode_placeholders: None } } - pub fn decoder<'a, 'r, R: Read>(&'a self, read: &'r mut R) -> Decoder<'r, 'a, R, N, D> { - Decoder::new(read, self.decode_placeholders.as_ref()) + pub fn decoder<'a, 'r, R: Read>(&'a self, read: &'r mut R) -> Decoder<'a, BinaryReader<'r, R>, N, D> { + Decoder::new(BinaryReader::new(read), self.decode_placeholders.as_ref()) } pub fn encoder<'a, 'w, W: Write>(&'a self, write: &'w mut W) -> Encoder<'w, 'a, W, N, D> { Encoder::new(write, self.encode_placeholders.as_ref()) } - pub fn decode<'r, R: Read>(&self, read: &'r mut R) -> decoder::Result { - self.decoder(read).next() + pub fn decode_all<'r, R: Read>(&self, read: &'r mut R) -> decoder::Result> { + let mut r = BinaryReader::new(read); + let vs: Vec = Decoder::new(&mut r, self.decode_placeholders.as_ref()).collect::>>()?; + match r.buffered_len()? { + 0 => Ok(vs), + count => Err(Error::new(std::io::ErrorKind::Other, format!("{} trailing bytes", count))) + } } pub fn encode_bytes(&self, v: &N) -> std::io::Result> { diff --git a/implementations/rust/src/value/constants.rs b/implementations/rust/src/value/constants.rs index c304f72..5cfa039 100644 --- a/implementations/rust/src/value/constants.rs +++ b/implementations/rust/src/value/constants.rs @@ -12,6 +12,13 @@ pub enum Op { #[derive(Debug, PartialEq, Eq)] pub struct InvalidOp; +impl From for std::io::Error { + fn from(_v: InvalidOp) -> Self { + std::io::Error::new(std::io::ErrorKind::InvalidData, + "Invalid Preserves lead byte major value") + } +} + impl TryFrom for Op { type Error = InvalidOp; fn try_from(v: u8) -> Result { @@ -36,7 +43,7 @@ impl From for u8 { } } -#[derive(Debug, TryFromPrimitive, PartialEq, Eq)] +#[derive(Debug, TryFromPrimitive, PartialEq, Eq, Clone, Copy)] #[repr(u8)] pub enum AtomMinor { SignedInteger = 0, @@ -45,7 +52,7 @@ pub enum AtomMinor { Symbol = 3, } -#[derive(Debug, TryFromPrimitive, PartialEq, Eq)] +#[derive(Debug, TryFromPrimitive, PartialEq, Eq, Clone, Copy)] #[repr(u8)] pub enum CompoundMinor { Record = 0, diff --git a/implementations/rust/src/value/decoder.rs b/implementations/rust/src/value/decoder.rs index 795c2a6..1efe0ba 100644 --- a/implementations/rust/src/value/decoder.rs +++ b/implementations/rust/src/value/decoder.rs @@ -1,109 +1,24 @@ -use std::io::{Read, ErrorKind}; -use std::convert::TryInto; -use std::convert::TryFrom; -use crate::value::{Value, NestedValue, Set, Map, Domain}; -use num::bigint::BigInt; -use crate::value::constants::{Op, InvalidOp, AtomMinor, CompoundMinor}; +use bytes::BytesMut; +use super::reader::{Reader, Token, err, is_eof_error, decodebinary}; +use super::value::{Value, NestedValue, Set, Map, Domain}; +use super::constants::{AtomMinor, CompoundMinor}; -pub type Result = std::result::Result; - -#[derive(Debug)] -pub enum Error { - Io(std::io::Error), - Syntax(&'static str), - Eof, -} - -impl From for std::io::Error { - fn from(v: Error) -> Self { - match v { - Error::Io(e) => e, - Error::Syntax(msg) => Self::new(ErrorKind::InvalidData, msg), - Error::Eof => Self::new(ErrorKind::UnexpectedEof, "Unexpected EOF"), - } - } -} - -impl From for Error { - fn from(v: std::io::Error) -> Self { - Error::Io(v) - } -} - -impl From for Error { - fn from(_v: std::str::Utf8Error) -> Self { - Error::Syntax("Invalid UTF-8") - } -} - -impl From for Error { - fn from(_v: InvalidOp) -> Self { - Error::Syntax("Invalid lead byte major value") - } -} - -impl Error { - pub fn is_io(&self) -> bool { if let Error::Io(_) = *self { true } else { false } } - pub fn is_syntax(&self) -> bool { if let Error::Syntax(_) = *self { true } else { false } } - pub fn is_eof(&self) -> bool { if let Error::Eof = *self { true } else { false } } -} +pub use super::reader::{Error, Result}; pub type DecodePlaceholderMap = Map>; -#[derive(PartialEq, Eq)] -enum PeekState { - Empty, - Eof, - Full(u8), -} - -pub struct Decoder<'a, 'b, R: Read, N: NestedValue, D: Domain> { - read: &'a mut R, - index: usize, - buf: Box, - placeholders: Option<&'b DecodePlaceholderMap>, +pub struct Decoder<'a, R: Reader, N: NestedValue, D: Domain> { + pub read: R, + placeholders: Option<&'a DecodePlaceholderMap>, read_annotations: bool, } -struct CountedStream<'de, 'a, 'b, R: Read, N: NestedValue, D: Domain> { - count: usize, - decoder: &'de mut Decoder<'a, 'b, R, N, D>, -} - -impl<'de, 'a, 'b, R: Read, N: NestedValue, D: Domain> Iterator for CountedStream<'de, 'a, 'b, R, N, D> { - type Item = Result; - fn next(&mut self) -> Option { - if self.count == 0 { return None } - self.count -= 1; - Some(self.decoder.next()) - } -} - -struct DelimitedStream<'de, 'a, 'b, R: Read, N: NestedValue, D: Domain> { - decoder: &'de mut Decoder<'a, 'b, R, N, D>, -} - -impl<'de, 'a, 'b, R: Read, N: NestedValue, D: Domain> Iterator for DelimitedStream<'de, 'a, 'b, R, N, D> { - type Item = Result; - fn next(&mut self) -> Option { - match self.decoder.peekend() { - Err(e) => Some(Err(e)), - Ok(true) => None, - Ok(false) => Some(self.decoder.next()), - } - } -} - -fn decodeint(bs: &[u8]) -> BigInt { - BigInt::from_signed_bytes_be(bs) -} - -impl<'a, 'b, R: Read, N: NestedValue, D: Domain> Decoder<'a, 'b, R, N, D> { - pub fn new(read: &'a mut R, placeholders: Option<&'b DecodePlaceholderMap>) -> Self { +impl<'a, R: Reader, N: NestedValue, D: Domain> Decoder<'a, R, N, D> { + pub fn new(read: R, placeholders: Option<&'a DecodePlaceholderMap>) -> + Self + { Decoder{ read, - index: 0, - buf: Box::new(PeekState::Empty), placeholders, read_annotations: true, } @@ -113,218 +28,142 @@ impl<'a, 'b, R: Read, N: NestedValue, D: Domain> Decoder<'a, 'b, R, N, D> { self.read_annotations = read_annotations } - fn prime(&mut self) -> Result<()> { - match *self.buf { - PeekState::Empty => { - let bs = &mut [0]; - *self.buf = match self.read.read(bs)? { - 0 => PeekState::Eof, - 1 => { - self.index += 1; - PeekState::Full(bs[0]) - }, - _ => panic!("buffer overrun") + pub fn next_or_err(&mut self) -> Result { + let t = self.read.next_token()?; + self.next_inner(t) + } + + pub fn next_inner(&mut self, mut token: Token) -> Result { + loop { + return match token { + Token::Annotation => + if self.read_annotations { + let mut annotations = vec![self.next_or_err()?]; + loop { + match self.read.next_token()? { + Token::Annotation => + annotations.push(self.next_or_err()?), + other => { + token = other; + break; + } + } + } + let v = self.next_inner(token)?; + assert!(v.annotations().is_empty()); + Ok(N::wrap_ann(annotations, v.value_owned())) + } else { + self.next_or_err()?; + token = self.read.next_token()?; + continue; + } + Token::PlaceholderRef(n) => + match self.placeholders.and_then(|m| m.get(&n)) { + Some(v) => Ok(v.clone().wrap()), + None => Err(err("Invalid Preserves placeholder")), + } + Token::Noop => { + token = self.read.next_token()?; + continue; } + + Token::Boolean(b) => Ok(Value::from(b).wrap()), + Token::Float(f) => Ok(Value::from(f).wrap()), + Token::Double(d) => Ok(Value::from(d).wrap()), + Token::SignedInteger(i) => Ok(Value::from(i).wrap()), + Token::String(s) => Ok(Value::from(s).wrap()), + Token::ByteString(bs) => Ok(Value::ByteString(bs.to_vec()).wrap()), + Token::Symbol(s) => Ok(Value::symbol(&s).wrap()), + + Token::OpenAtom(minor) => self.binarystream(minor), + Token::CloseAtom(minor) => Err(err(&format!("Unexpected {:?} close", minor))), + + Token::OpenCompound(paren, mut limit) => self.decodecompound(paren, &mut limit), + Token::CloseCompound(paren) => Err(err(&format!("Unexpected {:?} close", paren))), } - PeekState::Eof => (), - PeekState::Full(_) => (), - } - Ok(()) - } - - pub fn skip(&mut self) -> Result<()> { - self.prime()?; - *self.buf = PeekState::Empty; - Ok(()) - } - - pub fn peek(&mut self) -> Result { - self.prime()?; - match *self.buf { - PeekState::Full(v) => Ok(v), - PeekState::Eof => Err(Error::Eof), - PeekState::Empty => unreachable!() } } - pub fn read(&mut self) -> Result { - let v = self.peek()?; - self.skip()?; - Ok(v) - } - - pub fn readbytes(&mut self, bs: &mut [u8]) -> Result<()> { - if *self.buf != PeekState::Empty { - unreachable!(); - } - match self.read.read_exact(bs) { - Ok(()) => { - self.index += bs.len(); - Ok(()) + pub fn binarystream(&mut self, minor: AtomMinor) -> Result { + let mut bs = BytesMut::with_capacity(256); + while !self.read.at_atom_end(minor)? { + match self.next_or_err()?.value().as_bytestring() { + Some(chunk) => bs.extend_from_slice(chunk), + None => return Err(err("Unexpected non-binary chunk")), } - Err(e) => - if e.kind() == std::io::ErrorKind::UnexpectedEof { - Err(Error::Eof) - } else { - Err(Error::from(e)) - } } + // We know it'll be a SignedInteger, String, ByteString, or + // Symbol, so the recursion is safe. + self.next_inner(decodebinary(minor, bs)?) } - pub fn decodeop(b: u8) -> Result<(Op, u8)> { - Ok((Op::try_from(b >> 4)?, b & 15)) - } - - pub fn nextop(&mut self) -> Result<(Op, u8)> { - Self::decodeop(self.read()?) - } - - pub fn varint(&mut self) -> Result { - let v = self.read()?; - if v < 128 { - Ok(usize::from(v)) - } else { - Ok(self.varint()? * 128 + usize::from(v - 128)) - } - } - - pub fn wirelength(&mut self, arg: u8) -> Result { - if arg < 15 { - Ok(usize::from(arg)) - } else { - self.varint() - } - } - - pub fn peekend(&mut self) -> Result { - if self.peek()? == 4 { - self.skip()?; - Ok(true) - } else { - Ok(false) - } - } - - pub fn decodebinary(minor: AtomMinor, bs: Vec) -> Result { - Ok(match minor { - AtomMinor::SignedInteger => Value::from(decodeint(&bs)).wrap(), - AtomMinor::String => Value::from(std::str::from_utf8(&bs)?).wrap(), - AtomMinor::ByteString => Value::from(&bs as &[u8]).wrap(), - AtomMinor::Symbol => Value::symbol(std::str::from_utf8(&bs)?).wrap(), - }) - } - - pub fn decodecompound>>(minor: CompoundMinor, mut pieces: I) -> + pub fn decodecompound(&mut self, paren: CompoundMinor, limit: &mut Option) -> Result { - match minor { + match paren { CompoundMinor::Record => - match pieces.next() { - None => Err(Error::Syntax("Too few elements in encoded record")), + match I(self, paren, limit).next() { + None => Err(err("Too few elements in encoded record")), Some(labelres) => { let label = labelres?; - Ok(Value::record(label, pieces.collect::>>()?).wrap()) + Ok(Value::record(label, I(self, paren, limit).collect::>>()?).wrap()) } - }, + } CompoundMinor::Sequence => { - Ok(Value::Sequence(pieces.collect::>>()?).wrap()) + Ok(Value::Sequence(I(self, paren, limit).collect::>>()?).wrap()) } CompoundMinor::Set => { let mut s = Set::new(); - for res in pieces { s.insert(res?); } + for res in I(self, paren, limit) { s.insert(res?); } Ok(Value::Set(s).wrap()) } CompoundMinor::Dictionary => { let mut d = Map::new(); - while let Some(kres) = pieces.next() { + while let Some(kres) = I(self, paren, limit).next() { let k = kres?; - match pieces.next() { + match I(self, paren, limit).next() { Some(vres) => { d.insert(k, vres?); } - None => return Err(Error::Syntax("Missing dictionary value")), + None => return Err(err("Missing dictionary value")), } } Ok(Value::Dictionary(d).wrap()) } } } +} - pub fn binarystream(&mut self, minor: AtomMinor) -> Result { - let mut bs: Vec = Vec::new(); - while !self.peekend()? { - match self.next()?.value().as_bytestring() { - Some(chunk) => bs.extend_from_slice(chunk), - None => return Err(Error::Syntax("Unexpected non-binary chunk")), - } +impl<'a, R: Reader, N: NestedValue, D: Domain> std::iter::Iterator for Decoder<'a, R, N, D> { + type Item = Result; + fn next(&mut self) -> Option { + match self.next_or_err() { + Err(e) if is_eof_error(&e) => None, + other => Some(other) } - Self::decodebinary(minor, bs) } +} - pub fn valuestream(&mut self, minor: CompoundMinor) -> Result { - Self::decodecompound(minor, DelimitedStream { decoder: self }) - } +struct I<'f, 'a, R: Reader, N: NestedValue, D: Domain>( + &'f mut Decoder<'a, R, N, D>, + CompoundMinor, + &'f mut Option, +); - pub fn next(&mut self) -> Result { - loop { - return match self.nextop()? { - (Op::Misc(0), 0) => Ok(Value::from(false).wrap()), - (Op::Misc(0), 1) => Ok(Value::from(true).wrap()), - (Op::Misc(0), 2) => { - let mut bs: [u8; 4] = Default::default(); - self.readbytes(&mut bs)?; - Ok(Value::from(f32::from_bits(u32::from_be_bytes(bs.try_into().unwrap()))).wrap()) +impl<'f, 'a, R: Reader, N: NestedValue, D: Domain> Iterator for I<'f, 'a, R, N, D> { + type Item = Result; + fn next(&mut self) -> Option { + let I(d, paren, count) = self; + match count { + Some(0) => None, + Some(n) => { + *self.2 = Some(*n - 1); + Some(d.next_or_err()) + }, + None => { + match d.read.at_compound_end(*paren) { + Ok(true) => None, + Ok(false) => Some(d.next_or_err()), + Err(e) => Some(Err(e)), } - (Op::Misc(0), 3) => { - let mut bs: [u8; 8] = Default::default(); - self.readbytes(&mut bs)?; - Ok(Value::from(f64::from_bits(u64::from_be_bytes(bs.try_into().unwrap()))).wrap()) - } - (Op::Misc(0), 5) => { - if self.read_annotations { - let mut annotations = vec![self.next()?]; - while Self::decodeop(self.peek()?).ok() == Some((Op::Misc(0), 5)) { - self.skip()?; - annotations.push(self.next()?); - } - let v = self.next()?; - assert!(v.annotations().is_empty()); - Ok(N::wrap_ann(annotations, v.value_owned())) - } else { - self.next()?; - self.next() - } - } - (Op::Misc(0), _) => Err(Error::Syntax("Invalid format A encoding")), - (Op::Misc(1), arg) => { - let n = self.wirelength(arg)?; - match self.placeholders.and_then(|m| m.get(&n)) { - Some(v) => Ok(v.clone().wrap()), - None => Err(Error::Syntax("Invalid Preserves placeholder")), - } - } - (Op::Misc(2), arg) => { - match Op::try_from(arg)? { - Op::Atom(minor) => self.binarystream(minor), - Op::Compound(minor) => self.valuestream(minor), - _ => Err(Error::Syntax("Invalid format C start byte")), - } - } - (Op::Misc(3), arg) => { - let n = if arg > 12 { i32::from(arg) - 16 } else { i32::from(arg) }; - Ok(Value::from(n).wrap()) - } - (Op::Misc(_), _) => unreachable!(), - (Op::Atom(minor), arg) => { - let count = self.wirelength(arg)?; - let mut bs = vec![0; count]; - self.readbytes(&mut bs)?; - Self::decodebinary(minor, bs) - } - (Op::Compound(minor), arg) => { - let count = self.wirelength(arg)?; - Self::decodecompound(minor, CountedStream { count, decoder: self }) - } - (Op::Reserved(3), 15) => continue, - (Op::Reserved(_), _) => Err(InvalidOp.into()), } } } diff --git a/implementations/rust/src/value/mod.rs b/implementations/rust/src/value/mod.rs index 0501055..622dceb 100644 --- a/implementations/rust/src/value/mod.rs +++ b/implementations/rust/src/value/mod.rs @@ -4,6 +4,7 @@ pub mod de; pub mod decoder; pub mod encoder; pub mod error; +pub mod reader; pub mod ser; pub mod value; pub mod writer; @@ -15,6 +16,10 @@ pub use decoder::DecodePlaceholderMap; pub use decoder::Decoder; pub use encoder::EncodePlaceholderMap; pub use encoder::Encoder; +pub use reader::BinaryReader; +pub use reader::Reader; +pub use reader::is_eof_error; +pub use reader::is_syntax_error; pub use ser::Serializer; pub use ser::to_value; pub use value::Domain; @@ -26,6 +31,7 @@ pub use value::RcValue; pub use value::ArcValue; pub use value::Set; pub use value::Map; +pub use writer::Writer; pub fn invert_map(m: &Map) -> Map where A: Clone, B: Clone + Ord diff --git a/implementations/rust/src/value/reader.rs b/implementations/rust/src/value/reader.rs new file mode 100644 index 0000000..c4b7dbf --- /dev/null +++ b/implementations/rust/src/value/reader.rs @@ -0,0 +1,268 @@ +use bytes::{Buf, BufMut, BytesMut}; +use num::bigint::BigInt; +use std::convert::TryFrom; +use std::convert::TryInto; +use std::io::Read; +use super::constants::{Op, InvalidOp, AtomMinor, CompoundMinor}; + +pub type Error = std::io::Error; +pub type Result = std::result::Result; + +#[derive(Debug)] +enum PeekState { + Eof, + Buffer(BytesMut), +} + +#[derive(Debug, Clone)] +pub enum Token { + Annotation, + PlaceholderRef(usize), + Noop, + + Boolean(bool), + Float(f32), + Double(f64), + SignedInteger(BigInt), + String(String), + ByteString(BytesMut), + Symbol(String), + + OpenAtom(AtomMinor), + CloseAtom(AtomMinor), + + OpenCompound(CompoundMinor, Option), + CloseCompound(CompoundMinor), +} + +pub trait Reader { + fn next_token(&mut self) -> Result; + fn buffered_len(&mut self) -> Result; + fn at_atom_end(&mut self, minor: AtomMinor) -> Result; + fn at_compound_end(&mut self, paren: CompoundMinor) -> Result; +} + +pub struct BinaryReader<'a, R: Read> { + read: &'a mut R, + buf: PeekState, + chunksize: usize, +} + +pub fn decodeop(b: u8) -> Result<(Op, u8)> { + Ok((Op::try_from(b >> 4)?, b & 15)) +} + +pub fn decodeint(bs: &[u8]) -> BigInt { + BigInt::from_signed_bytes_be(bs) +} + +pub fn decodestr(bs: &[u8]) -> Result<&str> { + std::str::from_utf8(bs).map_err(|_| err("Invalid UTF-8")) +} + +pub fn decodebinary(minor: AtomMinor, bs: BytesMut) -> Result { + match minor { + AtomMinor::SignedInteger => Ok(Token::SignedInteger(decodeint(&bs))), + AtomMinor::String => Ok(Token::String(decodestr(&bs)?.into())), + AtomMinor::ByteString => Ok(Token::ByteString(bs)), + AtomMinor::Symbol => Ok(Token::Symbol(decodestr(&bs)?.into())), + } +} + +pub fn eof() -> Error { + Error::new(std::io::ErrorKind::UnexpectedEof, "EOF") +} + +pub fn err(s: &str) -> Error { + Error::new(std::io::ErrorKind::InvalidData, s) +} + +pub fn is_syntax_error(e: &Error) -> bool { + match e.kind() { + std::io::ErrorKind::InvalidData => true, + _ => false, + } +} + +pub fn is_eof_error(e: &Error) -> bool { + match e.kind() { + std::io::ErrorKind::UnexpectedEof => true, + _ => false, + } +} + +fn read_buffer(buf: &mut BytesMut, count: usize) -> &mut [u8] { + buf.reserve(count); + unsafe { + let m = &mut buf.bytes_mut()[..count]; + core::ptr::write_bytes(m.as_mut_ptr(), 0, count); + &mut *(m as *mut [core::mem::MaybeUninit] as *mut [u8]) + } +} + +impl<'a, R: Read> BinaryReader<'a, R> { + pub fn new(read: &'a mut R) -> Self { + BinaryReader { + read, + buf: PeekState::Buffer(BytesMut::new()), + chunksize: 1, + } + } + + fn prime(&mut self) -> Result<()> { + if let PeekState::Buffer(ref mut buf) = self.buf { + if buf.remaining() == 0 { + let nbytes = self.read.read(read_buffer(buf, self.chunksize))?; + if nbytes == 0 { + self.buf = PeekState::Eof; + } else { + unsafe { buf.advance_mut(nbytes); } + } + } + } + Ok(()) + } + + pub fn skip(&mut self) -> Result<()> { + self.prime()?; + if let PeekState::Buffer(ref mut buf) = self.buf { + buf.advance(1); + } + Ok(()) + } + + pub fn peek(&mut self) -> Result { + self.prime()?; + match self.buf { + PeekState::Eof => Err(eof()), + PeekState::Buffer(ref mut buf) => Ok(buf[0]), + } + } + + pub fn read(&mut self) -> Result { + let v = self.peek()?; + if let PeekState::Buffer(ref mut buf) = self.buf { + buf.advance(1); + } + Ok(v) + } + + pub fn readbytes(&mut self, req: usize) -> Result { + let buf = match self.buf { + PeekState::Eof => unreachable!(), + PeekState::Buffer(ref mut buf) => buf, + }; + let avail = buf.remaining(); + if avail < req { + let count = req - avail; + self.read.read_exact(read_buffer(buf, count))?; + unsafe { buf.advance_mut(count); } + } + Ok(buf.split_to(req)) + } + + pub fn nextop(&mut self) -> Result<(Op, u8)> { + decodeop(self.read()?) + } + + pub fn varint(&mut self) -> Result { + let v = self.read()?; + if v < 128 { + Ok(usize::from(v)) + } else { + Ok(self.varint()? * 128 + usize::from(v - 128)) + } + } + + pub fn wirelength(&mut self, arg: u8) -> Result { + if arg < 15 { + Ok(usize::from(arg)) + } else { + self.varint() + } + } + + pub fn peekend(&mut self) -> Result { + if self.peek()? == 4 { + self.skip()?; + Ok(true) + } else { + Ok(false) + } + } +} + +impl<'re, 'a, R: Read> Reader for &'re mut BinaryReader<'a, R> { + fn next_token(&mut self) -> Result { + (*self).next_token() + } + + fn buffered_len(&mut self) -> Result { + (*self).buffered_len() + } + + fn at_atom_end(&mut self, minor: AtomMinor) -> Result { + (*self).at_atom_end(minor) + } + + fn at_compound_end(&mut self, paren: CompoundMinor) -> Result { + (*self).at_compound_end(paren) + } +} + +impl<'re, 'a, R: Read> Reader for BinaryReader<'a, R> { + fn next_token(&mut self) -> Result { + match self.nextop()? { + (Op::Misc(0), 0) => Ok(Token::Boolean(false)), + (Op::Misc(0), 1) => Ok(Token::Boolean(true)), + (Op::Misc(0), 2) => { + let mut bs = [0; 4]; + bs.copy_from_slice(&self.readbytes(4)?); + Ok(Token::Float(f32::from_bits(u32::from_be_bytes(bs.try_into().unwrap())))) + } + (Op::Misc(0), 3) => { + let mut bs = [0; 8]; + bs.copy_from_slice(&self.readbytes(8)?); + Ok(Token::Double(f64::from_bits(u64::from_be_bytes(bs.try_into().unwrap())))) + } + (Op::Misc(0), 5) => Ok(Token::Annotation), + (Op::Misc(0), _) => Err(err("Invalid format A encoding")), + (Op::Misc(1), arg) => Ok(Token::PlaceholderRef(self.wirelength(arg)?)), + (Op::Misc(2), arg) => match Op::try_from(arg)? { + Op::Atom(minor) => Ok(Token::OpenAtom(minor)), + Op::Compound(minor) => Ok(Token::OpenCompound(minor, None)), + _ => Err(err("Invalid format C start byte")), + } + (Op::Misc(3), arg) => { + let n = if arg > 12 { i32::from(arg) - 16 } else { i32::from(arg) }; + Ok(Token::SignedInteger(BigInt::from(n))) + } + (Op::Misc(_), _) => unreachable!(), + (Op::Atom(minor), arg) => { + let count = self.wirelength(arg)?; + let bs = self.readbytes(count)?; + decodebinary(minor, bs) + } + (Op::Compound(minor), arg) => + Ok(Token::OpenCompound(minor, Some(self.wirelength(arg)?))), + (Op::Reserved(3), 15) => Ok(Token::Noop), + (Op::Reserved(_), _) => Err(InvalidOp.into()), + } + } + + fn buffered_len(&mut self) -> Result { + self.prime()?; + match self.buf { + PeekState::Eof => Ok(0), + PeekState::Buffer(ref b) => Ok(b.remaining()), + } + } + + fn at_atom_end(&mut self, _minor: AtomMinor) -> Result { + self.peekend() + } + + fn at_compound_end(&mut self, _paren: CompoundMinor) -> Result { + self.peekend() + } +}