preserves/implementations/rust/src/value/decoder.rs

310 lines
9.8 KiB
Rust

use std::io::{Read, ErrorKind};
use std::convert::TryInto;
use std::convert::TryFrom;
use crate::value::{Value, NestedValue, Set, Map, Domain};
use num::bigint::BigInt;
use crate::value::constants::{Op, InvalidOp, AtomMinor, CompoundMinor};
pub type Result<V> = std::result::Result<V, Error>;
#[derive(Debug)]
pub enum Error {
Io(std::io::Error),
Syntax(&'static str),
Eof,
}
impl From<Error> for std::io::Error {
fn from(v: Error) -> Self {
match v {
Error::Io(e) => e,
Error::Syntax(msg) => Self::new(ErrorKind::InvalidData, msg),
Error::Eof => Self::new(ErrorKind::UnexpectedEof, "Unexpected EOF"),
}
}
}
impl From<std::io::Error> for Error {
fn from(v: std::io::Error) -> Self {
Error::Io(v)
}
}
impl From<std::str::Utf8Error> for Error {
fn from(_v: std::str::Utf8Error) -> Self {
Error::Syntax("Invalid UTF-8")
}
}
impl From<InvalidOp> for Error {
fn from(_v: InvalidOp) -> Self {
Error::Syntax("Invalid lead byte major value")
}
}
impl Error {
pub fn is_io(&self) -> bool { if let Error::Io(_) = *self { true } else { false } }
pub fn is_syntax(&self) -> bool { if let Error::Syntax(_) = *self { true } else { false } }
pub fn is_eof(&self) -> bool { if let Error::Eof = *self { true } else { false } }
}
pub type DecodePlaceholderMap<N, D> = Map<usize, Value<N, D>>;
#[derive(PartialEq, Eq)]
enum PeekState {
Empty,
Eof,
Full(u8),
}
pub struct Decoder<'a, 'b, R: Read, N: NestedValue<D>, D: Domain> {
read: &'a mut R,
index: usize,
buf: Box<PeekState>,
placeholders: Option<&'b DecodePlaceholderMap<N, D>>,
read_annotations: bool,
}
fn decodeint(bs: &[u8]) -> BigInt {
BigInt::from_signed_bytes_be(bs)
}
impl<'a, 'b, R: Read, N: NestedValue<D>, D: Domain> Decoder<'a, 'b, R, N, D> {
pub fn new(read: &'a mut R, placeholders: Option<&'b DecodePlaceholderMap<N, D>>) -> Self {
Decoder{
read,
index: 0,
buf: Box::new(PeekState::Empty),
placeholders,
read_annotations: true,
}
}
pub fn set_read_annotations(&mut self, read_annotations: bool) {
self.read_annotations = read_annotations
}
fn prime(&mut self) -> Result<()> {
match *self.buf {
PeekState::Empty => {
let bs = &mut [0];
*self.buf = match self.read.read(bs)? {
0 => PeekState::Eof,
1 => {
self.index += 1;
PeekState::Full(bs[0])
},
_ => panic!("buffer overrun")
}
}
PeekState::Eof => (),
PeekState::Full(_) => (),
}
Ok(())
}
pub fn skip(&mut self) -> Result<()> {
self.prime()?;
*self.buf = PeekState::Empty;
Ok(())
}
pub fn peek(&mut self) -> Result<u8> {
self.prime()?;
match *self.buf {
PeekState::Full(v) => Ok(v),
PeekState::Eof => Err(Error::Eof),
PeekState::Empty => unreachable!()
}
}
pub fn read(&mut self) -> Result<u8> {
let v = self.peek()?;
self.skip()?;
Ok(v)
}
pub fn readbytes(&mut self, n: usize) -> Result<Vec<u8>> {
if *self.buf != PeekState::Empty {
unreachable!();
}
let mut bs = vec![0; n];
match self.read.read_exact(&mut bs) {
Ok(()) => {
self.index += n;
Ok(bs)
}
Err(e) =>
if e.kind() == std::io::ErrorKind::UnexpectedEof {
Err(Error::Eof)
} else {
Err(Error::from(e))
}
}
}
pub fn readvalues(&mut self, mut count: usize) -> Result<Vec<N>> {
let mut pieces: Vec<N> = Vec::with_capacity(count);
while count > 0 {
pieces.push(self.next()?);
count -= 1;
}
Ok(pieces)
}
pub fn decodeop(b: u8) -> Result<(Op, u8)> {
Ok((Op::try_from(b >> 4)?, b & 15))
}
pub fn nextop(&mut self) -> Result<(Op, u8)> {
Self::decodeop(self.read()?)
}
pub fn varint(&mut self) -> Result<usize> {
let v = self.read()?;
if v < 128 {
Ok(usize::from(v))
} else {
Ok(self.varint()? * 128 + usize::from(v - 128))
}
}
pub fn wirelength(&mut self, arg: u8) -> Result<usize> {
if arg < 15 {
Ok(usize::from(arg))
} else {
self.varint()
}
}
pub fn peekend(&mut self) -> Result<bool> {
if self.peek()? == 4 {
self.skip()?;
Ok(true)
} else {
Ok(false)
}
}
pub fn decodebinary(minor: AtomMinor, bs: Vec<u8>) -> Result<N> {
Ok(match minor {
AtomMinor::SignedInteger => Value::from(decodeint(&bs)).wrap(),
AtomMinor::String => Value::from(std::str::from_utf8(&bs)?).wrap(),
AtomMinor::ByteString => Value::from(&bs as &[u8]).wrap(),
AtomMinor::Symbol => Value::symbol(std::str::from_utf8(&bs)?).wrap(),
})
}
pub fn decodecompound(minor: CompoundMinor, mut pieces: Vec<N>) -> Result<N> {
match minor {
CompoundMinor::Record =>
if pieces.is_empty() {
Err(Error::Syntax("Too few elements in encoded record"))
} else {
let label = pieces.remove(0);
Ok(Value::record(label, pieces).wrap())
},
CompoundMinor::Sequence => Ok(Value::Sequence(pieces).wrap()),
CompoundMinor::Set => {
let mut s = Set::new();
while let Some(v) = pieces.pop() {
s.insert(v);
}
Ok(Value::Set(s).wrap())
}
CompoundMinor::Dictionary =>
if pieces.len() % 2 != 0 {
Err(Error::Syntax("Missing dictionary value"))
} else {
let mut d = Map::new();
while let Some(v) = pieces.pop() {
let k = pieces.pop().unwrap();
d.insert(k, v);
}
Ok(Value::Dictionary(d).wrap())
},
}
}
pub fn binarystream(&mut self, minor: AtomMinor) -> Result<N> {
let mut bs: Vec<u8> = Vec::new();
while !self.peekend()? {
match self.next()?.value().as_bytestring() {
Some(chunk) => bs.extend_from_slice(chunk),
None => return Err(Error::Syntax("Unexpected non-binary chunk")),
}
}
Self::decodebinary(minor, bs)
}
pub fn valuestream(&mut self, minor: CompoundMinor) -> Result<N> {
let mut pieces: Vec<N> = Vec::new();
while !self.peekend()? {
pieces.push(self.next()?);
}
Self::decodecompound(minor, pieces)
}
pub fn next(&mut self) -> Result<N> {
loop {
return match self.nextop()? {
(Op::Misc(0), 0) => Ok(Value::from(false).wrap()),
(Op::Misc(0), 1) => Ok(Value::from(true).wrap()),
(Op::Misc(0), 2) => {
let bs: &[u8] = &self.readbytes(4)?;
Ok(Value::from(f32::from_bits(u32::from_be_bytes(bs.try_into().unwrap()))).wrap())
}
(Op::Misc(0), 3) => {
let bs: &[u8] = &self.readbytes(8)?;
Ok(Value::from(f64::from_bits(u64::from_be_bytes(bs.try_into().unwrap()))).wrap())
}
(Op::Misc(0), 5) => {
if self.read_annotations {
let mut annotations = vec![self.next()?];
while Self::decodeop(self.peek()?).ok() == Some((Op::Misc(0), 5)) {
self.skip()?;
annotations.push(self.next()?);
}
let v = self.next()?;
assert!(v.annotations().is_empty());
Ok(N::wrap_ann(annotations, v.value_owned()))
} else {
self.next()?;
self.next()
}
}
(Op::Misc(0), _) => Err(Error::Syntax("Invalid format A encoding")),
(Op::Misc(1), arg) => {
let n = self.wirelength(arg)?;
match self.placeholders.and_then(|m| m.get(&n)) {
Some(v) => Ok(v.clone().wrap()),
None => Err(Error::Syntax("Invalid Preserves placeholder")),
}
}
(Op::Misc(2), arg) => {
match Op::try_from(arg)? {
Op::Atom(minor) => self.binarystream(minor),
Op::Compound(minor) => self.valuestream(minor),
_ => Err(Error::Syntax("Invalid format C start byte")),
}
}
(Op::Misc(3), arg) => {
let n = if arg > 12 { i32::from(arg) - 16 } else { i32::from(arg) };
Ok(Value::from(n).wrap())
}
(Op::Misc(_), _) => unreachable!(),
(Op::Atom(minor), arg) => {
let count = self.wirelength(arg)?;
Self::decodebinary(minor, self.readbytes(count)?)
}
(Op::Compound(minor), arg) => {
let count = self.wirelength(arg)?;
Self::decodecompound(minor, self.readvalues(count)?)
}
(Op::Reserved(3), 15) => continue,
(Op::Reserved(_), _) => Err(InvalidOp.into()),
}
}
}
}