diff --git a/implementations/rust/src/lib.rs b/implementations/rust/src/lib.rs index c336852..46d58dd 100644 --- a/implementations/rust/src/lib.rs +++ b/implementations/rust/src/lib.rs @@ -172,3 +172,21 @@ mod value_tests { assert_eq!(r, s); } } + +#[cfg(test)] +mod decoder_tests { + use crate::value::codec::Decoder; + + #[test] fn read_123() { + let mut d = Decoder::new(&b"abc"[..], None); + assert_eq!(d.read().ok(), Some(97)); + assert_eq!(d.read().ok(), Some(98)); + assert_eq!(d.read().ok(), Some(99)); + assert!(d.read().err().unwrap().is_eof()) + } + + #[test] fn read_samples() { + let mut d = Decoder::new(std::fs::File::open("../../tests/samples.bin").unwrap(), None); + println!("{:?}", d.next().ok().unwrap()); + } +} diff --git a/implementations/rust/src/value/codec.rs b/implementations/rust/src/value/codec.rs new file mode 100644 index 0000000..a9c0613 --- /dev/null +++ b/implementations/rust/src/value/codec.rs @@ -0,0 +1,267 @@ +use std::io::Read; +use std::convert::TryInto; +use crate::value::value::{Value, AValue, Set, Dictionary}; +use num::bigint::BigInt; + +pub type Result = std::result::Result; + +#[derive(Debug)] +pub enum Error { + Io(std::io::Error), + Syntax(&'static str), + Eof, +} + +impl From for Error { + fn from(v: std::io::Error) -> Self { + Error::Io(v) + } +} + +impl From for Error { + fn from(_v: std::str::Utf8Error) -> Self { + Error::Syntax("Invalid UTF-8") + } +} + +impl Error { + pub fn is_io(&self) -> bool { if let Error::Io(_) = *self { true } else { false } } + pub fn is_syntax(&self) -> bool { if let Error::Syntax(_) = *self { true } else { false } } + pub fn is_eof(&self) -> bool { if let Error::Eof = *self { true } else { false } } +} + +pub type PlaceholderMap = std::collections::BTreeMap; + +pub struct Decoder { + read: R, + index: usize, + buf: Box>>, + placeholders: PlaceholderMap, +} + +impl Decoder { + pub fn new(read: R, placeholders: Option) -> Self { + Decoder{ + read, + index: 0, + buf: Box::new(None), + placeholders: placeholders.unwrap_or(PlaceholderMap::new()) + } + } + + fn prime_if_possible(&mut self) -> Result<()> { + match *self.buf { + None => { + let bs = &mut [0]; + *self.buf = Some(match self.read.read(bs)? { + 0 => None, + 1 => { + self.index = self.index + 1; + Some(bs[0]) + }, + _ => panic!("buffer overrun") + }) + } + Some(_) => () + } + Ok(()) + } + + pub fn skip(&mut self) -> Result<()> { + self.prime_if_possible()?; + *self.buf = None; + Ok(()) + } + + pub fn peek(&mut self) -> Result { + self.prime_if_possible()?; + match *self.buf { + Some(Some(v)) => Ok(v), + Some(None) => Err(Error::Eof), + None => panic!() + } + } + + pub fn read(&mut self) -> Result { + let v = self.peek()?; + self.skip()?; + Ok(v) + } + + pub fn readbytes(&mut self, n: usize) -> Result> { + if (*self.buf).is_some() { + panic!(); + } + let mut bs = vec![0; n]; + match self.read.read_exact(&mut bs) { + Ok(()) => Ok(bs), + Err(e) => + if e.kind() == std::io::ErrorKind::UnexpectedEof { + Err(Error::Eof) + } else { + Err(Error::from(e)) + } + } + } + + pub fn readvalues(&mut self, mut count: usize) -> Result> { + let mut pieces: Vec = Vec::new(); + while count > 0 { + pieces.push(self.next()?); + count = count - 1; + } + Ok(pieces) + } + + pub fn nextop(&mut self) -> Result<(u8, u8, u8)> { + let b = self.read()?; + let major = b >> 6; + let minor = (b >> 4) & 3; + let arg = b & 15; + Ok((major, minor, arg)) + } + + pub fn varint(&mut self) -> Result { + let v = self.read()?; + if v < 128 { + Ok(usize::from(v)) + } else { + Ok(self.varint()? * 128 + usize::from(v - 128)) + } + } + + pub fn wirelength(&mut self, arg: u8) -> Result { + if arg < 15 { + Ok(usize::from(arg)) + } else { + self.varint() + } + } + + pub fn peekend(&mut self) -> Result { + if self.peek()? == 4 { + self.skip()?; + Ok(true) + } else { + Ok(false) + } + } + + pub fn decodeint(bs: &[u8]) -> BigInt { + BigInt::from_signed_bytes_be(bs) + } + + pub fn decodebinary(minor: u8, bs: Vec) -> Result { + Ok(match minor { + 0 => Value::from(Self::decodeint(&bs)).wrap(), + 1 => Value::from(std::str::from_utf8(&bs)?).wrap(), + 2 => Value::from(bs).wrap(), + 3 => Value::symbol(std::str::from_utf8(&bs)?).wrap(), + _ => panic!() + }) + } + + pub fn decodecompound(minor: u8, mut pieces: Vec) -> Result { + match minor { + 0 => + if pieces.len() == 0 { + Err(Error::Syntax("Too few elements in encoded record")) + } else { + let label = pieces.remove(0).rc(); + Ok(Value::record(&label, pieces).wrap()) + }, + 1 => Ok(Value::from(pieces).wrap()), + 2 => { + let mut s = Set::new(); + while let Some(v) = pieces.pop() { + s.insert(v); + } + Ok(Value::Set(s).wrap()) + } + 3 => + if pieces.len() % 2 != 0 { + Err(Error::Syntax("Missing dictionary value")) + } else { + let mut d = Dictionary::new(); + while let Some(v) = pieces.pop() { + let k = pieces.pop().unwrap(); + d.insert(k, v); + } + Ok(Value::Dictionary(d).wrap()) + }, + _ => panic!() + } + } + + pub fn binarystream(&mut self, minor: u8) -> Result { + let mut bs: Vec = Vec::new(); + while !self.peekend()? { + match self.next()?.1.as_bytestring() { + Some(chunk) => bs.extend_from_slice(chunk), + None => return Err(Error::Syntax("Unexpected non-binary chunk")), + } + } + Self::decodebinary(minor, bs) + } + + pub fn valuestream(&mut self, minor: u8) -> Result { + let mut pieces: Vec = Vec::new(); + while !self.peekend()? { + pieces.push(self.next()?); + } + Self::decodecompound(minor, pieces) + } + + pub fn next(&mut self) -> Result { + match self.nextop()? { + (0, 0, 0) => Ok(Value::from(false).wrap()), + (0, 0, 1) => Ok(Value::from(true).wrap()), + (0, 0, 2) => { + let bs: &[u8] = &self.readbytes(4)?; + Ok(Value::from(f32::from_bits(u32::from_be_bytes(bs.try_into().unwrap()))).wrap()) + } + (0, 0, 3) => { + let bs: &[u8] = &self.readbytes(8)?; + Ok(Value::from(f64::from_bits(u64::from_be_bytes(bs.try_into().unwrap()))).wrap()) + } + (0, 0, 5) => { + let a = self.next()?; + let mut v = self.next()?; + v.annotations_mut().push(a); + Ok(v) + } + (0, 0, _) => Err(Error::Syntax("Invalid format A encoding")), + (0, 1, arg) => { + let n = self.wirelength(arg)?; + match self.placeholders.get(&n) { + Some(v) => Ok(v.clone().wrap()), + None => Err(Error::Syntax("Invalid Preserves placeholder")), + } + } + (0, 2, arg) => { + let t = arg >> 2; + let n = arg & 3; + match t { + 1 => self.binarystream(n), + 2 => self.valuestream(n), + _ => Err(Error::Syntax("Invalid format C start byte")), + } + } + (0, 3, arg) => { + let n = if arg > 12 { i32::from(arg) - 16 } else { i32::from(arg) }; + Ok(Value::from(n).wrap()) + } + (0, _, _) => panic!(), + (1, minor, arg) => { + let n = self.wirelength(arg)?; + Self::decodebinary(minor, self.readbytes(n)?) + } + (2, minor, arg) => { + let n = self.wirelength(arg)?; + Self::decodecompound(minor, self.readvalues(n)?) + } + (3, _, _) => Err(Error::Syntax("Invalid lead byte (major 3)")), + (_, _, _) => panic!(), + } + } +} diff --git a/implementations/rust/src/value/mod.rs b/implementations/rust/src/value/mod.rs index fc45e21..6ee32f0 100644 --- a/implementations/rust/src/value/mod.rs +++ b/implementations/rust/src/value/mod.rs @@ -1 +1,2 @@ pub mod value; +pub mod codec; diff --git a/implementations/rust/src/value/value.rs b/implementations/rust/src/value/value.rs index 54bdc07..28c7def 100644 --- a/implementations/rust/src/value/value.rs +++ b/implementations/rust/src/value/value.rs @@ -21,26 +21,30 @@ pub enum Value { Symbol(String), Record(Record), Sequence(Vec), - Set(BTreeSet), - Dictionary(BTreeMap), + Set(Set), + Dictionary(Dictionary), } /// An possibly-annotated Value, with annotations (themselves /// possibly-annotated) in order of appearance. #[derive(Clone, Debug)] -pub struct AValue(Vec, Value); +pub struct AValue(pub Vec, pub Value); /// Single-precision IEEE 754 Value #[derive(Clone, Debug)] -pub struct Float(f32); +pub struct Float(pub f32); /// Double-precision IEEE 754 Value #[derive(Clone, Debug)] -pub struct Double(f64); +pub struct Double(pub f64); /// A Record `Value` pub type Record = (Rc, Vec); +pub type Set = BTreeSet; + +pub type Dictionary = BTreeMap; + impl From for Float { fn from(v: f32) -> Self { Float(v) @@ -174,6 +178,8 @@ impl From for Value { fn from(v: String) -> Self { Value::String(v) } } impl From<&[u8]> for Value { fn from(v: &[u8]) -> Self { Value::ByteString(Vec::from(v)) } } impl From> for Value { fn from(v: Vec) -> Self { Value::ByteString(v) } } +impl From> for Value { fn from(v: Vec) -> Self { Value::Sequence(v) } } + //--------------------------------------------------------------------------- impl AValue { @@ -395,7 +401,7 @@ impl Value { self.as_set().is_some() } - pub fn as_set(&self) -> Option<&BTreeSet> { + pub fn as_set(&self) -> Option<&Set> { if let Value::Set(ref s) = *self { Some(s) } else { @@ -403,7 +409,7 @@ impl Value { } } - pub fn as_set_mut(&mut self) -> Option<&mut BTreeSet> { + pub fn as_set_mut(&mut self) -> Option<&mut Set> { if let Value::Set(ref mut s) = *self { Some(s) } else { @@ -415,7 +421,7 @@ impl Value { self.as_dictionary().is_some() } - pub fn as_dictionary(&self) -> Option<&BTreeMap> { + pub fn as_dictionary(&self) -> Option<&Dictionary> { if let Value::Dictionary(ref s) = *self { Some(s) } else { @@ -423,7 +429,7 @@ impl Value { } } - pub fn as_dictionary_mut(&mut self) -> Option<&mut BTreeMap> { + pub fn as_dictionary_mut(&mut self) -> Option<&mut Dictionary> { if let Value::Dictionary(ref mut s) = *self { Some(s) } else {