Simplify and speed up decoding

This commit is contained in:
Tony Garnock-Jones 2020-05-25 15:00:58 +02:00
parent 6ad1707870
commit f07d2e6a5d
3 changed files with 203 additions and 232 deletions

View File

@ -1,15 +1,11 @@
use bytes::BytesMut;
use super::reader::{Reader, Token, err, is_eof_error, decodebinary};
use super::value::{Value, NestedValue, Set, Map, Domain};
use super::constants::{AtomMinor, CompoundMinor};
use super::reader::{Reader, is_eof_error};
use super::value::{NestedValue, Domain};
pub use super::reader::{Error, Result};
pub use super::reader::{Error, Result, DecodePlaceholderMap};
pub type DecodePlaceholderMap<N, D> = Map<usize, Value<N, D>>;
pub struct Decoder<'a, R: Reader, N: NestedValue<D>, D: Domain> {
pub struct Decoder<'a, R: Reader, N: NestedValue<Dom>, Dom: Domain> {
pub read: R,
placeholders: Option<&'a DecodePlaceholderMap<N, D>>,
placeholders: Option<&'a DecodePlaceholderMap<N, Dom>>,
read_annotations: bool,
}
@ -17,7 +13,7 @@ impl<'a, R: Reader, N: NestedValue<D>, D: Domain> Decoder<'a, R, N, D> {
pub fn new(read: R, placeholders: Option<&'a DecodePlaceholderMap<N, D>>) ->
Self
{
Decoder{
Decoder {
read,
placeholders,
read_annotations: true,
@ -29,106 +25,7 @@ impl<'a, R: Reader, N: NestedValue<D>, D: Domain> Decoder<'a, R, N, D> {
}
pub fn next_or_err(&mut self) -> Result<N> {
let t = self.read.next_token()?;
self.next_inner(t)
}
pub fn next_inner(&mut self, mut token: Token) -> Result<N> {
loop {
return match token {
Token::Annotation =>
if self.read_annotations {
let mut annotations = vec![self.next_or_err()?];
loop {
match self.read.next_token()? {
Token::Annotation =>
annotations.push(self.next_or_err()?),
other => {
token = other;
break;
}
}
}
let v = self.next_inner(token)?;
assert!(v.annotations().is_empty());
Ok(N::wrap_ann(annotations, v.value_owned()))
} else {
self.next_or_err()?;
token = self.read.next_token()?;
continue;
}
Token::PlaceholderRef(n) =>
match self.placeholders.and_then(|m| m.get(&n)) {
Some(v) => Ok(v.clone().wrap()),
None => Err(err("Invalid Preserves placeholder")),
}
Token::Noop => {
token = self.read.next_token()?;
continue;
}
Token::Boolean(b) => Ok(Value::from(b).wrap()),
Token::Float(f) => Ok(Value::from(f).wrap()),
Token::Double(d) => Ok(Value::from(d).wrap()),
Token::SignedInteger(i) => Ok(Value::from(i).wrap()),
Token::String(s) => Ok(Value::from(s).wrap()),
Token::ByteString(bs) => Ok(Value::ByteString(bs.to_vec()).wrap()),
Token::Symbol(s) => Ok(Value::symbol(&s).wrap()),
Token::OpenAtom(minor) => self.binarystream(minor),
Token::CloseAtom(minor) => Err(err(&format!("Unexpected {:?} close", minor))),
Token::OpenCompound(paren, mut limit) => self.decodecompound(paren, &mut limit),
Token::CloseCompound(paren) => Err(err(&format!("Unexpected {:?} close", paren))),
}
}
}
pub fn binarystream(&mut self, minor: AtomMinor) -> Result<N> {
let mut bs = BytesMut::with_capacity(256);
while !self.read.at_atom_end(minor)? {
match self.next_or_err()?.value().as_bytestring() {
Some(chunk) => bs.extend_from_slice(chunk),
None => return Err(err("Unexpected non-binary chunk")),
}
}
// We know it'll be a SignedInteger, String, ByteString, or
// Symbol, so the recursion is safe.
self.next_inner(decodebinary(minor, bs)?)
}
pub fn decodecompound(&mut self, paren: CompoundMinor, limit: &mut Option<usize>) ->
Result<N>
{
match paren {
CompoundMinor::Record =>
match I(self, paren, limit).next() {
None => Err(err("Too few elements in encoded record")),
Some(labelres) => {
let label = labelres?;
Ok(Value::record(label, I(self, paren, limit).collect::<Result<Vec<N>>>()?).wrap())
}
}
CompoundMinor::Sequence => {
Ok(Value::Sequence(I(self, paren, limit).collect::<Result<Vec<N>>>()?).wrap())
}
CompoundMinor::Set => {
let mut s = Set::new();
for res in I(self, paren, limit) { s.insert(res?); }
Ok(Value::Set(s).wrap())
}
CompoundMinor::Dictionary => {
let mut d = Map::new();
while let Some(kres) = I(self, paren, limit).next() {
let k = kres?;
match I(self, paren, limit).next() {
Some(vres) => { d.insert(k, vres?); }
None => return Err(err("Missing dictionary value")),
}
}
Ok(Value::Dictionary(d).wrap())
}
}
self.read.next(self.placeholders, self.read_annotations)
}
}
@ -141,30 +38,3 @@ impl<'a, R: Reader, N: NestedValue<D>, D: Domain> std::iter::Iterator for Decode
}
}
}
struct I<'f, 'a, R: Reader, N: NestedValue<D>, D: Domain>(
&'f mut Decoder<'a, R, N, D>,
CompoundMinor,
&'f mut Option<usize>,
);
impl<'f, 'a, R: Reader, N: NestedValue<D>, D: Domain> Iterator for I<'f, 'a, R, N, D> {
type Item = Result<N>;
fn next(&mut self) -> Option<Self::Item> {
let I(d, paren, count) = self;
match count {
Some(0) => None,
Some(n) => {
*self.2 = Some(*n - 1);
Some(d.next_or_err())
},
None => {
match d.read.at_compound_end(*paren) {
Ok(true) => None,
Ok(false) => Some(d.next_or_err()),
Err(e) => Some(Err(e)),
}
}
}
}
}

View File

@ -12,11 +12,11 @@ pub mod writer;
pub use codec::Codec;
pub use de::Deserializer;
pub use de::from_value;
pub use decoder::DecodePlaceholderMap;
pub use decoder::Decoder;
pub use encoder::EncodePlaceholderMap;
pub use encoder::Encoder;
pub use reader::BinaryReader;
pub use reader::DecodePlaceholderMap;
pub use reader::Reader;
pub use reader::is_eof_error;
pub use reader::is_syntax_error;

View File

@ -4,6 +4,7 @@ use std::convert::TryFrom;
use std::convert::TryInto;
use std::io::Read;
use super::constants::{Op, InvalidOp, AtomMinor, CompoundMinor};
use super::value::{Value, Domain, NestedValue, Map, Set};
pub type Error = std::io::Error;
pub type Result<T> = std::result::Result<T, Error>;
@ -14,32 +15,30 @@ enum PeekState {
Buffer(BytesMut),
}
#[derive(Debug, Clone)]
pub enum Token {
Annotation,
PlaceholderRef(usize),
Noop,
Boolean(bool),
Float(f32),
Double(f64),
SignedInteger(BigInt),
String(String),
ByteString(BytesMut),
Symbol(String),
OpenAtom(AtomMinor),
CloseAtom(AtomMinor),
OpenCompound(CompoundMinor, Option<usize>),
CloseCompound(CompoundMinor),
}
pub type DecodePlaceholderMap<N, D> = Map<usize, Value<N, D>>;
pub trait Reader {
fn next_token(&mut self) -> Result<Token>;
fn next<N: NestedValue<Dom>, Dom: Domain>(
&mut self,
placeholders: Option<&DecodePlaceholderMap<N, Dom>>,
read_annotations: bool,
) -> Result<N>;
fn buffered_len(&mut self) -> Result<usize>;
fn at_atom_end(&mut self, minor: AtomMinor) -> Result<bool>;
fn at_compound_end(&mut self, paren: CompoundMinor) -> Result<bool>;
}
impl<'re, R: Reader> Reader for &'re mut R {
fn next<N: NestedValue<Dom>, Dom: Domain>(
&mut self,
placeholders: Option<&DecodePlaceholderMap<N, Dom>>,
read_annotations: bool,
) -> Result<N> {
(*self).next(placeholders, read_annotations)
}
fn buffered_len(&mut self) -> Result<usize> {
(*self).buffered_len()
}
}
pub struct BinaryReader<'a, R: Read> {
@ -48,6 +47,45 @@ pub struct BinaryReader<'a, R: Read> {
chunksize: usize,
}
struct ConfiguredBinaryReader<'de, 'pl, 'a, R: Read, N: NestedValue<Dom>, Dom: Domain> {
reader: &'de mut BinaryReader<'a, R>,
placeholders: Option<&'pl DecodePlaceholderMap<N, Dom>>,
read_annotations: bool,
}
struct CountedStream<'de, 'pl, 'a, R: Read, N: NestedValue<Dom>, Dom: Domain> {
reader: ConfiguredBinaryReader<'de, 'pl, 'a, R, N, Dom>,
count: usize,
}
impl<'de, 'pl, 'a, R: Read, N: NestedValue<Dom>, Dom: Domain> Iterator
for CountedStream<'de, 'pl, 'a, R, N, Dom>
{
type Item = Result<N>;
fn next(&mut self) -> Option<Self::Item> {
if self.count == 0 { return None }
self.count -= 1;
Some(self.reader.reader.next(self.reader.placeholders, self.reader.read_annotations))
}
}
struct DelimitedStream<'de, 'pl, 'a, R: Read, N: NestedValue<Dom>, Dom: Domain> {
reader: ConfiguredBinaryReader<'de, 'pl, 'a, R, N, Dom>,
}
impl<'de, 'pl, 'a, R: Read, N: NestedValue<Dom>, Dom: Domain> Iterator
for DelimitedStream<'de, 'pl, 'a, R, N, Dom>
{
type Item = Result<N>;
fn next(&mut self) -> Option<Self::Item> {
match self.reader.reader.peekend() {
Err(e) => Some(Err(e)),
Ok(true) => None,
Ok(false) => Some(self.reader.reader.next(self.reader.placeholders, self.reader.read_annotations)),
}
}
}
pub fn decodeop(b: u8) -> Result<(Op, u8)> {
Ok((Op::try_from(b >> 4)?, b & 15))
}
@ -60,12 +98,53 @@ pub fn decodestr(bs: &[u8]) -> Result<&str> {
std::str::from_utf8(bs).map_err(|_| err("Invalid UTF-8"))
}
pub fn decodebinary(minor: AtomMinor, bs: BytesMut) -> Result<Token> {
pub fn decodebinary<N: NestedValue<Dom>, Dom: Domain>(minor: AtomMinor, bs: BytesMut) -> Result<N> {
match minor {
AtomMinor::SignedInteger => Ok(Token::SignedInteger(decodeint(&bs))),
AtomMinor::String => Ok(Token::String(decodestr(&bs)?.into())),
AtomMinor::ByteString => Ok(Token::ByteString(bs)),
AtomMinor::Symbol => Ok(Token::Symbol(decodestr(&bs)?.into())),
AtomMinor::SignedInteger => Ok(Value::from(decodeint(&bs)).wrap()),
AtomMinor::String => Ok(Value::from(decodestr(&bs)?).wrap()),
AtomMinor::ByteString => Ok(Value::ByteString(bs.to_vec()).wrap()),
AtomMinor::Symbol => Ok(Value::symbol(decodestr(&bs)?).wrap()),
}
}
pub fn decodecompound<N: NestedValue<Dom>, Dom: Domain, I: Iterator<Item = Result<N>>>(
minor: CompoundMinor,
mut iter: I,
) ->
Result<N>
{
match minor {
CompoundMinor::Record =>
match iter.next() {
None => Err(err("Too few elements in encoded record")),
Some(labelres) => {
let label = labelres?;
Ok(Value::record(label, iter.collect::<Result<Vec<N>>>()?).wrap())
}
}
CompoundMinor::Sequence => {
Ok(Value::Sequence(iter.collect::<Result<Vec<N>>>()?).wrap())
}
CompoundMinor::Set => {
let mut s = Set::new();
for res in iter { s.insert(res?); }
Ok(Value::Set(s).wrap())
}
CompoundMinor::Dictionary => {
let mut d = Map::new();
while let Some(kres) = iter.next() {
let k = kres?;
match iter.next() {
Some(vres) => {
let v = vres?;
d.insert(k, v);
}
None => return Err(err("Missing dictionary value")),
}
}
Ok(Value::Dictionary(d).wrap())
}
}
}
@ -105,7 +184,7 @@ impl<'a, R: Read> BinaryReader<'a, R> {
BinaryReader {
read,
buf: PeekState::Buffer(BytesMut::new()),
chunksize: 1,
chunksize: 64,
}
}
@ -161,10 +240,6 @@ impl<'a, R: Read> BinaryReader<'a, R> {
Ok(buf.split_to(req))
}
pub fn nextop(&mut self) -> Result<(Op, u8)> {
decodeop(self.read()?)
}
pub fn varint(&mut self) -> Result<usize> {
let v = self.read()?;
if v < 128 {
@ -192,61 +267,95 @@ impl<'a, R: Read> BinaryReader<'a, R> {
}
}
impl<'re, 'a, R: Read> Reader for &'re mut BinaryReader<'a, R> {
fn next_token(&mut self) -> Result<Token> {
(*self).next_token()
}
fn buffered_len(&mut self) -> Result<usize> {
(*self).buffered_len()
}
fn at_atom_end(&mut self, minor: AtomMinor) -> Result<bool> {
(*self).at_atom_end(minor)
}
fn at_compound_end(&mut self, paren: CompoundMinor) -> Result<bool> {
(*self).at_compound_end(paren)
}
}
impl<'re, 'a, R: Read> Reader for BinaryReader<'a, R> {
fn next_token(&mut self) -> Result<Token> {
match self.nextop()? {
(Op::Misc(0), 0) => Ok(Token::Boolean(false)),
(Op::Misc(0), 1) => Ok(Token::Boolean(true)),
(Op::Misc(0), 2) => {
let mut bs = [0; 4];
bs.copy_from_slice(&self.readbytes(4)?);
Ok(Token::Float(f32::from_bits(u32::from_be_bytes(bs.try_into().unwrap()))))
fn next<N: NestedValue<Dom>, Dom: Domain>(
&mut self,
placeholders: Option<&DecodePlaceholderMap<N, Dom>>,
read_annotations: bool
) ->
Result<N>
{
loop {
return match decodeop(self.read()?)? {
(Op::Misc(0), 0) => Ok(Value::from(false).wrap()),
(Op::Misc(0), 1) => Ok(Value::from(true).wrap()),
(Op::Misc(0), 2) => {
let mut bs = [0; 4];
bs.copy_from_slice(&self.readbytes(4)?);
Ok(Value::from(f32::from_bits(u32::from_be_bytes(bs.try_into().unwrap()))).wrap())
}
(Op::Misc(0), 3) => {
let mut bs = [0; 8];
bs.copy_from_slice(&self.readbytes(8)?);
Ok(Value::from(f64::from_bits(u64::from_be_bytes(bs.try_into().unwrap()))).wrap())
}
(Op::Misc(0), 5) => {
if read_annotations {
let mut annotations = vec![self.next(placeholders, read_annotations)?];
while decodeop(self.peek()?)? == (Op::Misc(0), 5) {
self.skip()?;
annotations.push(self.next(placeholders, read_annotations)?);
}
let v = self.next(placeholders, read_annotations)?;
assert!(v.annotations().is_empty());
Ok(N::wrap_ann(annotations, v.value_owned()))
} else {
let _ = self.next(placeholders, read_annotations)?;
continue;
}
}
(Op::Misc(0), _) => Err(err("Invalid format A encoding")),
(Op::Misc(1), arg) => {
let n = self.wirelength(arg)?;
match placeholders.and_then(|m| m.get(&n)) {
Some(v) => Ok(v.clone().wrap()),
None => Err(err("Invalid Preserves placeholder")),
}
}
(Op::Misc(2), arg) => match Op::try_from(arg)? {
Op::Atom(minor) => {
let mut bs = BytesMut::with_capacity(256);
while !self.peekend()? {
match self.next(placeholders, false)?.value().as_bytestring() {
Some(chunk) => bs.extend_from_slice(chunk),
None => return Err(err("Unexpected non-binary chunk")),
}
}
decodebinary(minor, bs)
}
Op::Compound(minor) => decodecompound(minor, DelimitedStream {
reader: ConfiguredBinaryReader {
reader: self,
placeholders,
read_annotations,
},
}),
_ => Err(err("Invalid format C start byte")),
}
(Op::Misc(3), arg) => {
let n = if arg > 12 { i32::from(arg) - 16 } else { i32::from(arg) };
Ok(Value::from(n).wrap())
}
(Op::Misc(_), _) => unreachable!(),
(Op::Atom(minor), arg) => {
let count = self.wirelength(arg)?;
let bs = self.readbytes(count)?;
decodebinary(minor, bs)
}
(Op::Compound(minor), arg) => {
let count = self.wirelength(arg)?;
decodecompound(minor, CountedStream {
reader: ConfiguredBinaryReader {
reader: self,
placeholders,
read_annotations,
},
count,
})
}
(Op::Reserved(3), 15) => continue,
(Op::Reserved(_), _) => Err(InvalidOp.into()),
}
(Op::Misc(0), 3) => {
let mut bs = [0; 8];
bs.copy_from_slice(&self.readbytes(8)?);
Ok(Token::Double(f64::from_bits(u64::from_be_bytes(bs.try_into().unwrap()))))
}
(Op::Misc(0), 5) => Ok(Token::Annotation),
(Op::Misc(0), _) => Err(err("Invalid format A encoding")),
(Op::Misc(1), arg) => Ok(Token::PlaceholderRef(self.wirelength(arg)?)),
(Op::Misc(2), arg) => match Op::try_from(arg)? {
Op::Atom(minor) => Ok(Token::OpenAtom(minor)),
Op::Compound(minor) => Ok(Token::OpenCompound(minor, None)),
_ => Err(err("Invalid format C start byte")),
}
(Op::Misc(3), arg) => {
let n = if arg > 12 { i32::from(arg) - 16 } else { i32::from(arg) };
Ok(Token::SignedInteger(BigInt::from(n)))
}
(Op::Misc(_), _) => unreachable!(),
(Op::Atom(minor), arg) => {
let count = self.wirelength(arg)?;
let bs = self.readbytes(count)?;
decodebinary(minor, bs)
}
(Op::Compound(minor), arg) =>
Ok(Token::OpenCompound(minor, Some(self.wirelength(arg)?))),
(Op::Reserved(3), 15) => Ok(Token::Noop),
(Op::Reserved(_), _) => Err(InvalidOp.into()),
}
}
@ -257,12 +366,4 @@ impl<'re, 'a, R: Read> Reader for BinaryReader<'a, R> {
PeekState::Buffer(ref b) => Ok(b.remaining()),
}
}
fn at_atom_end(&mut self, _minor: AtomMinor) -> Result<bool> {
self.peekend()
}
fn at_compound_end(&mut self, _paren: CompoundMinor) -> Result<bool> {
self.peekend()
}
}