First stab at factoring out Reader trait

This commit is contained in:
Tony Garnock-Jones 2020-05-25 13:42:06 +02:00
parent 92463c37d4
commit 6ad1707870
6 changed files with 451 additions and 320 deletions

View File

@ -234,23 +234,15 @@ mod value_tests {
#[cfg(test)]
mod decoder_tests {
use crate::value::Decoder;
use crate::value::{Decoder, BinaryReader, Reader};
use crate::value::{Value, PlainValue, NestedValue};
use super::dom::Dom;
#[test] fn read_123() {
let mut buf = &b"abc"[..];
let mut d = Decoder::<_, PlainValue<Dom>, Dom>::new(&mut buf, None);
assert_eq!(d.read().ok(), Some(97));
assert_eq!(d.read().ok(), Some(98));
assert_eq!(d.read().ok(), Some(99));
assert!(d.read().err().unwrap().is_eof())
}
#[test] fn skip_annotations_noskip() {
let mut buf = &b"\x0521"[..];
let mut d = Decoder::<_, PlainValue<Dom>, Dom>::new(&mut buf, None);
let v = d.next().unwrap();
let r = BinaryReader::new(&mut buf);
let mut d = Decoder::<_, PlainValue<Dom>, Dom>::new(r, None);
let v = d.next_or_err().unwrap();
assert_eq!(v.annotations().len(), 1);
assert_eq!(v.annotations()[0], Value::from(2).wrap());
assert_eq!(v.value(), &Value::from(1));
@ -258,9 +250,10 @@ mod decoder_tests {
#[test] fn skip_annotations_skip() {
let mut buf = &b"\x0521"[..];
let mut d = Decoder::<_, PlainValue<Dom>, Dom>::new(&mut buf, None);
let r = BinaryReader::new(&mut buf);
let mut d = Decoder::<_, PlainValue<Dom>, Dom>::new(r, None);
d.set_read_annotations(false);
let v = d.next().unwrap();
let v = d.next_or_err().unwrap();
assert_eq!(v.annotations().len(), 0);
assert_eq!(v.value(), &Value::from(1));
}
@ -268,25 +261,32 @@ mod decoder_tests {
#[test] fn two_values_at_once() {
let mut buf = &b"\x81tPing\x81tPong"[..];
assert_eq!(buf.len(), 12);
let mut d = Decoder::<_, PlainValue<Dom>, Dom>::new(&mut buf, None);
assert_eq!(d.next().unwrap().value(), &Value::simple_record("Ping", vec![]));
assert_eq!(d.next().unwrap().value(), &Value::simple_record("Pong", vec![]));
let r = BinaryReader::new(&mut buf);
let mut d = Decoder::<_, PlainValue<Dom>, Dom>::new(r, None);
assert_eq!(d.next_or_err().unwrap().value(), &Value::simple_record("Ping", vec![]));
assert_eq!(d.next_or_err().unwrap().value(), &Value::simple_record("Pong", vec![]));
assert_eq!(buf.len(), 0);
}
#[test] fn buf_advanced() {
let mut buf = &b"\x81tPing\x81tPong"[..];
assert_eq!(buf.len(), 12);
let mut d = Decoder::<_, PlainValue<Dom>, Dom>::new(&mut buf, None);
assert_eq!(d.next().unwrap().value(), &Value::simple_record("Ping", vec![]));
assert_eq!(buf.len(), 6);
let mut r = BinaryReader::new(&mut buf);
let mut d = Decoder::<_, PlainValue<Dom>, Dom>::new(&mut r, None);
assert_eq!(d.next_or_err().unwrap().value(), &Value::simple_record("Ping", vec![]));
assert!(r.buffered_len().unwrap() > 0);
let mut d = Decoder::<_, PlainValue<Dom>, Dom>::new(&mut r, None);
assert_eq!(d.next_or_err().unwrap().value(), &Value::simple_record("Pong", vec![]));
assert!(r.buffered_len().unwrap() == 0);
assert_eq!(buf.len(), 0);
}
}
#[cfg(test)]
mod samples_tests {
use crate::symbol::Symbol;
use crate::value::{Codec, Decoder, decoder::Error};
use crate::value::{Codec, Decoder, BinaryReader};
use crate::value::reader::is_syntax_error;
use crate::value::{Value, PlainValue, Map};
use crate::value::DecodePlaceholderMap;
use crate::value::to_value;
@ -316,8 +316,9 @@ mod samples_tests {
#[test] fn run() -> std::io::Result<()> {
let mut fh = std::fs::File::open("../../tests/samples.bin").unwrap();
let mut d = Decoder::<_, PlainValue<Dom>, Dom>::new(&mut fh, None);
let tests: TestCases = from_value(&d.next().unwrap()).unwrap();
let r = BinaryReader::new(&mut fh);
let mut d = Decoder::<_, PlainValue<Dom>, Dom>::new(r, None);
let tests: TestCases = from_value(&d.next_or_err().unwrap()).unwrap();
// println!("{:#?}", tests);
let codec = Codec::new(tests.decode_placeholders.0);
@ -325,8 +326,8 @@ mod samples_tests {
println!("{:?} ==> {:?}", name, case);
match case {
TestCase::Test(ref bin, ref val) => {
assert_eq!(&codec.decode(&mut &codec.encode_bytes(val)?[..])?, val);
assert_eq!(&codec.decode(&mut &bin[..])?, val);
assert_eq!(&codec.decode_all(&mut &codec.encode_bytes(val)?[..])?, &[val.clone()]);
assert_eq!(&codec.decode_all(&mut &bin[..])?, &[val.clone()]);
assert_eq!(&codec.encode_bytes(val)?, bin);
}
TestCase::NondeterministicTest(ref bin, ref val) => {
@ -334,33 +335,32 @@ mod samples_tests {
// written so that while strictly
// "nondeterministic", the order of keys in
// dictionaries follows Preserves order.
assert_eq!(&codec.decode(&mut &codec.encode_bytes(val)?[..])?, val);
assert_eq!(&codec.decode(&mut &bin[..])?, val);
assert_eq!(&codec.decode_all(&mut &codec.encode_bytes(val)?[..])?, &[val.clone()]);
assert_eq!(&codec.decode_all(&mut &bin[..])?, &[val.clone()]);
assert_eq!(&codec.encode_bytes(val)?, bin);
}
TestCase::StreamingTest(ref bin, ref val) => {
assert_eq!(&codec.decode(&mut &codec.encode_bytes(val)?[..])?, val);
assert_eq!(&codec.decode(&mut &bin[..])?, val);
assert_eq!(&codec.decode_all(&mut &codec.encode_bytes(val)?[..])?, &[val.clone()]);
assert_eq!(&codec.decode_all(&mut &bin[..])?, &[val.clone()]);
}
TestCase::DecodeTest(ref bin, ref val) => {
assert_eq!(&codec.decode(&mut &codec.encode_bytes(val)?[..])?, val);
assert_eq!(&codec.decode(&mut &bin[..])?, val);
assert_eq!(&codec.decode_all(&mut &codec.encode_bytes(val)?[..])?, &[val.clone()]);
assert_eq!(&codec.decode_all(&mut &bin[..])?, &[val.clone()]);
}
TestCase::ParseError(_) => (),
TestCase::ParseShort(_) => (),
TestCase::DecodeError(ref bin) => {
match codec.decode(&mut &bin[..]) {
match codec.decode_all(&mut &bin[..]) {
Ok(_) => panic!("Unexpected success"),
Err(Error::Syntax(_)) => (),
Err(e) => panic!("Unexpected error {:?}", e),
Err(e) => if is_syntax_error(&e) {
()
} else {
panic!("Unexpected error {:?}", e)
}
}
}
TestCase::DecodeShort(ref bin) => {
match codec.decode(&mut &bin[..]) {
Ok(_) => panic!("Unexpected success"),
Err(Error::Eof) => (),
Err(e) => panic!("Unexpected error {:?}", e),
}
assert_eq!(codec.decode_all(&mut &bin[..])?.len(), 0);
}
}
}

View File

@ -1,7 +1,13 @@
use crate::value::{decoder, encoder, invert_map, NestedValue, Domain};
use decoder::{Decoder, DecodePlaceholderMap};
use encoder::{Encoder, EncodePlaceholderMap};
use std::io::{Read, Write};
use super::{
decoder::{self, Decoder, DecodePlaceholderMap},
encoder::{Encoder, EncodePlaceholderMap},
invert_map,
reader::{Reader, BinaryReader, Error},
value::{
NestedValue, Domain,
},
};
pub struct Codec<N: NestedValue<D>, D: Domain> {
pub decode_placeholders: Option<DecodePlaceholderMap<N, D>>,
@ -18,16 +24,21 @@ impl<N: NestedValue<D>, D: Domain> Codec<N, D> {
Codec { decode_placeholders: None, encode_placeholders: None }
}
pub fn decoder<'a, 'r, R: Read>(&'a self, read: &'r mut R) -> Decoder<'r, 'a, R, N, D> {
Decoder::new(read, self.decode_placeholders.as_ref())
pub fn decoder<'a, 'r, R: Read>(&'a self, read: &'r mut R) -> Decoder<'a, BinaryReader<'r, R>, N, D> {
Decoder::new(BinaryReader::new(read), self.decode_placeholders.as_ref())
}
pub fn encoder<'a, 'w, W: Write>(&'a self, write: &'w mut W) -> Encoder<'w, 'a, W, N, D> {
Encoder::new(write, self.encode_placeholders.as_ref())
}
pub fn decode<'r, R: Read>(&self, read: &'r mut R) -> decoder::Result<N> {
self.decoder(read).next()
pub fn decode_all<'r, R: Read>(&self, read: &'r mut R) -> decoder::Result<Vec<N>> {
let mut r = BinaryReader::new(read);
let vs: Vec<N> = Decoder::new(&mut r, self.decode_placeholders.as_ref()).collect::<decoder::Result<Vec<N>>>()?;
match r.buffered_len()? {
0 => Ok(vs),
count => Err(Error::new(std::io::ErrorKind::Other, format!("{} trailing bytes", count)))
}
}
pub fn encode_bytes(&self, v: &N) -> std::io::Result<Vec<u8>> {

View File

@ -12,6 +12,13 @@ pub enum Op {
#[derive(Debug, PartialEq, Eq)]
pub struct InvalidOp;
impl From<InvalidOp> for std::io::Error {
fn from(_v: InvalidOp) -> Self {
std::io::Error::new(std::io::ErrorKind::InvalidData,
"Invalid Preserves lead byte major value")
}
}
impl TryFrom<u8> for Op {
type Error = InvalidOp;
fn try_from(v: u8) -> Result<Self, Self::Error> {
@ -36,7 +43,7 @@ impl From<Op> for u8 {
}
}
#[derive(Debug, TryFromPrimitive, PartialEq, Eq)]
#[derive(Debug, TryFromPrimitive, PartialEq, Eq, Clone, Copy)]
#[repr(u8)]
pub enum AtomMinor {
SignedInteger = 0,
@ -45,7 +52,7 @@ pub enum AtomMinor {
Symbol = 3,
}
#[derive(Debug, TryFromPrimitive, PartialEq, Eq)]
#[derive(Debug, TryFromPrimitive, PartialEq, Eq, Clone, Copy)]
#[repr(u8)]
pub enum CompoundMinor {
Record = 0,

View File

@ -1,109 +1,24 @@
use std::io::{Read, ErrorKind};
use std::convert::TryInto;
use std::convert::TryFrom;
use crate::value::{Value, NestedValue, Set, Map, Domain};
use num::bigint::BigInt;
use crate::value::constants::{Op, InvalidOp, AtomMinor, CompoundMinor};
use bytes::BytesMut;
use super::reader::{Reader, Token, err, is_eof_error, decodebinary};
use super::value::{Value, NestedValue, Set, Map, Domain};
use super::constants::{AtomMinor, CompoundMinor};
pub type Result<V> = std::result::Result<V, Error>;
#[derive(Debug)]
pub enum Error {
Io(std::io::Error),
Syntax(&'static str),
Eof,
}
impl From<Error> for std::io::Error {
fn from(v: Error) -> Self {
match v {
Error::Io(e) => e,
Error::Syntax(msg) => Self::new(ErrorKind::InvalidData, msg),
Error::Eof => Self::new(ErrorKind::UnexpectedEof, "Unexpected EOF"),
}
}
}
impl From<std::io::Error> for Error {
fn from(v: std::io::Error) -> Self {
Error::Io(v)
}
}
impl From<std::str::Utf8Error> for Error {
fn from(_v: std::str::Utf8Error) -> Self {
Error::Syntax("Invalid UTF-8")
}
}
impl From<InvalidOp> for Error {
fn from(_v: InvalidOp) -> Self {
Error::Syntax("Invalid lead byte major value")
}
}
impl Error {
pub fn is_io(&self) -> bool { if let Error::Io(_) = *self { true } else { false } }
pub fn is_syntax(&self) -> bool { if let Error::Syntax(_) = *self { true } else { false } }
pub fn is_eof(&self) -> bool { if let Error::Eof = *self { true } else { false } }
}
pub use super::reader::{Error, Result};
pub type DecodePlaceholderMap<N, D> = Map<usize, Value<N, D>>;
#[derive(PartialEq, Eq)]
enum PeekState {
Empty,
Eof,
Full(u8),
}
pub struct Decoder<'a, 'b, R: Read, N: NestedValue<D>, D: Domain> {
read: &'a mut R,
index: usize,
buf: Box<PeekState>,
placeholders: Option<&'b DecodePlaceholderMap<N, D>>,
pub struct Decoder<'a, R: Reader, N: NestedValue<D>, D: Domain> {
pub read: R,
placeholders: Option<&'a DecodePlaceholderMap<N, D>>,
read_annotations: bool,
}
struct CountedStream<'de, 'a, 'b, R: Read, N: NestedValue<D>, D: Domain> {
count: usize,
decoder: &'de mut Decoder<'a, 'b, R, N, D>,
}
impl<'de, 'a, 'b, R: Read, N: NestedValue<D>, D: Domain> Iterator for CountedStream<'de, 'a, 'b, R, N, D> {
type Item = Result<N>;
fn next(&mut self) -> Option<Self::Item> {
if self.count == 0 { return None }
self.count -= 1;
Some(self.decoder.next())
}
}
struct DelimitedStream<'de, 'a, 'b, R: Read, N: NestedValue<D>, D: Domain> {
decoder: &'de mut Decoder<'a, 'b, R, N, D>,
}
impl<'de, 'a, 'b, R: Read, N: NestedValue<D>, D: Domain> Iterator for DelimitedStream<'de, 'a, 'b, R, N, D> {
type Item = Result<N>;
fn next(&mut self) -> Option<Self::Item> {
match self.decoder.peekend() {
Err(e) => Some(Err(e)),
Ok(true) => None,
Ok(false) => Some(self.decoder.next()),
}
}
}
fn decodeint(bs: &[u8]) -> BigInt {
BigInt::from_signed_bytes_be(bs)
}
impl<'a, 'b, R: Read, N: NestedValue<D>, D: Domain> Decoder<'a, 'b, R, N, D> {
pub fn new(read: &'a mut R, placeholders: Option<&'b DecodePlaceholderMap<N, D>>) -> Self {
impl<'a, R: Reader, N: NestedValue<D>, D: Domain> Decoder<'a, R, N, D> {
pub fn new(read: R, placeholders: Option<&'a DecodePlaceholderMap<N, D>>) ->
Self
{
Decoder{
read,
index: 0,
buf: Box::new(PeekState::Empty),
placeholders,
read_annotations: true,
}
@ -113,218 +28,142 @@ impl<'a, 'b, R: Read, N: NestedValue<D>, D: Domain> Decoder<'a, 'b, R, N, D> {
self.read_annotations = read_annotations
}
fn prime(&mut self) -> Result<()> {
match *self.buf {
PeekState::Empty => {
let bs = &mut [0];
*self.buf = match self.read.read(bs)? {
0 => PeekState::Eof,
1 => {
self.index += 1;
PeekState::Full(bs[0])
},
_ => panic!("buffer overrun")
pub fn next_or_err(&mut self) -> Result<N> {
let t = self.read.next_token()?;
self.next_inner(t)
}
pub fn next_inner(&mut self, mut token: Token) -> Result<N> {
loop {
return match token {
Token::Annotation =>
if self.read_annotations {
let mut annotations = vec![self.next_or_err()?];
loop {
match self.read.next_token()? {
Token::Annotation =>
annotations.push(self.next_or_err()?),
other => {
token = other;
break;
}
}
}
let v = self.next_inner(token)?;
assert!(v.annotations().is_empty());
Ok(N::wrap_ann(annotations, v.value_owned()))
} else {
self.next_or_err()?;
token = self.read.next_token()?;
continue;
}
Token::PlaceholderRef(n) =>
match self.placeholders.and_then(|m| m.get(&n)) {
Some(v) => Ok(v.clone().wrap()),
None => Err(err("Invalid Preserves placeholder")),
}
Token::Noop => {
token = self.read.next_token()?;
continue;
}
Token::Boolean(b) => Ok(Value::from(b).wrap()),
Token::Float(f) => Ok(Value::from(f).wrap()),
Token::Double(d) => Ok(Value::from(d).wrap()),
Token::SignedInteger(i) => Ok(Value::from(i).wrap()),
Token::String(s) => Ok(Value::from(s).wrap()),
Token::ByteString(bs) => Ok(Value::ByteString(bs.to_vec()).wrap()),
Token::Symbol(s) => Ok(Value::symbol(&s).wrap()),
Token::OpenAtom(minor) => self.binarystream(minor),
Token::CloseAtom(minor) => Err(err(&format!("Unexpected {:?} close", minor))),
Token::OpenCompound(paren, mut limit) => self.decodecompound(paren, &mut limit),
Token::CloseCompound(paren) => Err(err(&format!("Unexpected {:?} close", paren))),
}
PeekState::Eof => (),
PeekState::Full(_) => (),
}
Ok(())
}
pub fn skip(&mut self) -> Result<()> {
self.prime()?;
*self.buf = PeekState::Empty;
Ok(())
}
pub fn peek(&mut self) -> Result<u8> {
self.prime()?;
match *self.buf {
PeekState::Full(v) => Ok(v),
PeekState::Eof => Err(Error::Eof),
PeekState::Empty => unreachable!()
}
}
pub fn read(&mut self) -> Result<u8> {
let v = self.peek()?;
self.skip()?;
Ok(v)
}
pub fn readbytes(&mut self, bs: &mut [u8]) -> Result<()> {
if *self.buf != PeekState::Empty {
unreachable!();
}
match self.read.read_exact(bs) {
Ok(()) => {
self.index += bs.len();
Ok(())
pub fn binarystream(&mut self, minor: AtomMinor) -> Result<N> {
let mut bs = BytesMut::with_capacity(256);
while !self.read.at_atom_end(minor)? {
match self.next_or_err()?.value().as_bytestring() {
Some(chunk) => bs.extend_from_slice(chunk),
None => return Err(err("Unexpected non-binary chunk")),
}
Err(e) =>
if e.kind() == std::io::ErrorKind::UnexpectedEof {
Err(Error::Eof)
} else {
Err(Error::from(e))
}
}
// We know it'll be a SignedInteger, String, ByteString, or
// Symbol, so the recursion is safe.
self.next_inner(decodebinary(minor, bs)?)
}
pub fn decodeop(b: u8) -> Result<(Op, u8)> {
Ok((Op::try_from(b >> 4)?, b & 15))
}
pub fn nextop(&mut self) -> Result<(Op, u8)> {
Self::decodeop(self.read()?)
}
pub fn varint(&mut self) -> Result<usize> {
let v = self.read()?;
if v < 128 {
Ok(usize::from(v))
} else {
Ok(self.varint()? * 128 + usize::from(v - 128))
}
}
pub fn wirelength(&mut self, arg: u8) -> Result<usize> {
if arg < 15 {
Ok(usize::from(arg))
} else {
self.varint()
}
}
pub fn peekend(&mut self) -> Result<bool> {
if self.peek()? == 4 {
self.skip()?;
Ok(true)
} else {
Ok(false)
}
}
pub fn decodebinary(minor: AtomMinor, bs: Vec<u8>) -> Result<N> {
Ok(match minor {
AtomMinor::SignedInteger => Value::from(decodeint(&bs)).wrap(),
AtomMinor::String => Value::from(std::str::from_utf8(&bs)?).wrap(),
AtomMinor::ByteString => Value::from(&bs as &[u8]).wrap(),
AtomMinor::Symbol => Value::symbol(std::str::from_utf8(&bs)?).wrap(),
})
}
pub fn decodecompound<I: Iterator<Item = Result<N>>>(minor: CompoundMinor, mut pieces: I) ->
pub fn decodecompound(&mut self, paren: CompoundMinor, limit: &mut Option<usize>) ->
Result<N>
{
match minor {
match paren {
CompoundMinor::Record =>
match pieces.next() {
None => Err(Error::Syntax("Too few elements in encoded record")),
match I(self, paren, limit).next() {
None => Err(err("Too few elements in encoded record")),
Some(labelres) => {
let label = labelres?;
Ok(Value::record(label, pieces.collect::<Result<Vec<N>>>()?).wrap())
Ok(Value::record(label, I(self, paren, limit).collect::<Result<Vec<N>>>()?).wrap())
}
},
}
CompoundMinor::Sequence => {
Ok(Value::Sequence(pieces.collect::<Result<Vec<N>>>()?).wrap())
Ok(Value::Sequence(I(self, paren, limit).collect::<Result<Vec<N>>>()?).wrap())
}
CompoundMinor::Set => {
let mut s = Set::new();
for res in pieces { s.insert(res?); }
for res in I(self, paren, limit) { s.insert(res?); }
Ok(Value::Set(s).wrap())
}
CompoundMinor::Dictionary => {
let mut d = Map::new();
while let Some(kres) = pieces.next() {
while let Some(kres) = I(self, paren, limit).next() {
let k = kres?;
match pieces.next() {
match I(self, paren, limit).next() {
Some(vres) => { d.insert(k, vres?); }
None => return Err(Error::Syntax("Missing dictionary value")),
None => return Err(err("Missing dictionary value")),
}
}
Ok(Value::Dictionary(d).wrap())
}
}
}
}
pub fn binarystream(&mut self, minor: AtomMinor) -> Result<N> {
let mut bs: Vec<u8> = Vec::new();
while !self.peekend()? {
match self.next()?.value().as_bytestring() {
Some(chunk) => bs.extend_from_slice(chunk),
None => return Err(Error::Syntax("Unexpected non-binary chunk")),
}
impl<'a, R: Reader, N: NestedValue<D>, D: Domain> std::iter::Iterator for Decoder<'a, R, N, D> {
type Item = Result<N>;
fn next(&mut self) -> Option<Self::Item> {
match self.next_or_err() {
Err(e) if is_eof_error(&e) => None,
other => Some(other)
}
Self::decodebinary(minor, bs)
}
}
pub fn valuestream(&mut self, minor: CompoundMinor) -> Result<N> {
Self::decodecompound(minor, DelimitedStream { decoder: self })
}
struct I<'f, 'a, R: Reader, N: NestedValue<D>, D: Domain>(
&'f mut Decoder<'a, R, N, D>,
CompoundMinor,
&'f mut Option<usize>,
);
pub fn next(&mut self) -> Result<N> {
loop {
return match self.nextop()? {
(Op::Misc(0), 0) => Ok(Value::from(false).wrap()),
(Op::Misc(0), 1) => Ok(Value::from(true).wrap()),
(Op::Misc(0), 2) => {
let mut bs: [u8; 4] = Default::default();
self.readbytes(&mut bs)?;
Ok(Value::from(f32::from_bits(u32::from_be_bytes(bs.try_into().unwrap()))).wrap())
impl<'f, 'a, R: Reader, N: NestedValue<D>, D: Domain> Iterator for I<'f, 'a, R, N, D> {
type Item = Result<N>;
fn next(&mut self) -> Option<Self::Item> {
let I(d, paren, count) = self;
match count {
Some(0) => None,
Some(n) => {
*self.2 = Some(*n - 1);
Some(d.next_or_err())
},
None => {
match d.read.at_compound_end(*paren) {
Ok(true) => None,
Ok(false) => Some(d.next_or_err()),
Err(e) => Some(Err(e)),
}
(Op::Misc(0), 3) => {
let mut bs: [u8; 8] = Default::default();
self.readbytes(&mut bs)?;
Ok(Value::from(f64::from_bits(u64::from_be_bytes(bs.try_into().unwrap()))).wrap())
}
(Op::Misc(0), 5) => {
if self.read_annotations {
let mut annotations = vec![self.next()?];
while Self::decodeop(self.peek()?).ok() == Some((Op::Misc(0), 5)) {
self.skip()?;
annotations.push(self.next()?);
}
let v = self.next()?;
assert!(v.annotations().is_empty());
Ok(N::wrap_ann(annotations, v.value_owned()))
} else {
self.next()?;
self.next()
}
}
(Op::Misc(0), _) => Err(Error::Syntax("Invalid format A encoding")),
(Op::Misc(1), arg) => {
let n = self.wirelength(arg)?;
match self.placeholders.and_then(|m| m.get(&n)) {
Some(v) => Ok(v.clone().wrap()),
None => Err(Error::Syntax("Invalid Preserves placeholder")),
}
}
(Op::Misc(2), arg) => {
match Op::try_from(arg)? {
Op::Atom(minor) => self.binarystream(minor),
Op::Compound(minor) => self.valuestream(minor),
_ => Err(Error::Syntax("Invalid format C start byte")),
}
}
(Op::Misc(3), arg) => {
let n = if arg > 12 { i32::from(arg) - 16 } else { i32::from(arg) };
Ok(Value::from(n).wrap())
}
(Op::Misc(_), _) => unreachable!(),
(Op::Atom(minor), arg) => {
let count = self.wirelength(arg)?;
let mut bs = vec![0; count];
self.readbytes(&mut bs)?;
Self::decodebinary(minor, bs)
}
(Op::Compound(minor), arg) => {
let count = self.wirelength(arg)?;
Self::decodecompound(minor, CountedStream { count, decoder: self })
}
(Op::Reserved(3), 15) => continue,
(Op::Reserved(_), _) => Err(InvalidOp.into()),
}
}
}

View File

@ -4,6 +4,7 @@ pub mod de;
pub mod decoder;
pub mod encoder;
pub mod error;
pub mod reader;
pub mod ser;
pub mod value;
pub mod writer;
@ -15,6 +16,10 @@ pub use decoder::DecodePlaceholderMap;
pub use decoder::Decoder;
pub use encoder::EncodePlaceholderMap;
pub use encoder::Encoder;
pub use reader::BinaryReader;
pub use reader::Reader;
pub use reader::is_eof_error;
pub use reader::is_syntax_error;
pub use ser::Serializer;
pub use ser::to_value;
pub use value::Domain;
@ -26,6 +31,7 @@ pub use value::RcValue;
pub use value::ArcValue;
pub use value::Set;
pub use value::Map;
pub use writer::Writer;
pub fn invert_map<A, B>(m: &Map<A, B>) -> Map<B, A>
where A: Clone, B: Clone + Ord

View File

@ -0,0 +1,268 @@
use bytes::{Buf, BufMut, BytesMut};
use num::bigint::BigInt;
use std::convert::TryFrom;
use std::convert::TryInto;
use std::io::Read;
use super::constants::{Op, InvalidOp, AtomMinor, CompoundMinor};
pub type Error = std::io::Error;
pub type Result<T> = std::result::Result<T, Error>;
#[derive(Debug)]
enum PeekState {
Eof,
Buffer(BytesMut),
}
#[derive(Debug, Clone)]
pub enum Token {
Annotation,
PlaceholderRef(usize),
Noop,
Boolean(bool),
Float(f32),
Double(f64),
SignedInteger(BigInt),
String(String),
ByteString(BytesMut),
Symbol(String),
OpenAtom(AtomMinor),
CloseAtom(AtomMinor),
OpenCompound(CompoundMinor, Option<usize>),
CloseCompound(CompoundMinor),
}
pub trait Reader {
fn next_token(&mut self) -> Result<Token>;
fn buffered_len(&mut self) -> Result<usize>;
fn at_atom_end(&mut self, minor: AtomMinor) -> Result<bool>;
fn at_compound_end(&mut self, paren: CompoundMinor) -> Result<bool>;
}
pub struct BinaryReader<'a, R: Read> {
read: &'a mut R,
buf: PeekState,
chunksize: usize,
}
pub fn decodeop(b: u8) -> Result<(Op, u8)> {
Ok((Op::try_from(b >> 4)?, b & 15))
}
pub fn decodeint(bs: &[u8]) -> BigInt {
BigInt::from_signed_bytes_be(bs)
}
pub fn decodestr(bs: &[u8]) -> Result<&str> {
std::str::from_utf8(bs).map_err(|_| err("Invalid UTF-8"))
}
pub fn decodebinary(minor: AtomMinor, bs: BytesMut) -> Result<Token> {
match minor {
AtomMinor::SignedInteger => Ok(Token::SignedInteger(decodeint(&bs))),
AtomMinor::String => Ok(Token::String(decodestr(&bs)?.into())),
AtomMinor::ByteString => Ok(Token::ByteString(bs)),
AtomMinor::Symbol => Ok(Token::Symbol(decodestr(&bs)?.into())),
}
}
pub fn eof() -> Error {
Error::new(std::io::ErrorKind::UnexpectedEof, "EOF")
}
pub fn err(s: &str) -> Error {
Error::new(std::io::ErrorKind::InvalidData, s)
}
pub fn is_syntax_error(e: &Error) -> bool {
match e.kind() {
std::io::ErrorKind::InvalidData => true,
_ => false,
}
}
pub fn is_eof_error(e: &Error) -> bool {
match e.kind() {
std::io::ErrorKind::UnexpectedEof => true,
_ => false,
}
}
fn read_buffer(buf: &mut BytesMut, count: usize) -> &mut [u8] {
buf.reserve(count);
unsafe {
let m = &mut buf.bytes_mut()[..count];
core::ptr::write_bytes(m.as_mut_ptr(), 0, count);
&mut *(m as *mut [core::mem::MaybeUninit<u8>] as *mut [u8])
}
}
impl<'a, R: Read> BinaryReader<'a, R> {
pub fn new(read: &'a mut R) -> Self {
BinaryReader {
read,
buf: PeekState::Buffer(BytesMut::new()),
chunksize: 1,
}
}
fn prime(&mut self) -> Result<()> {
if let PeekState::Buffer(ref mut buf) = self.buf {
if buf.remaining() == 0 {
let nbytes = self.read.read(read_buffer(buf, self.chunksize))?;
if nbytes == 0 {
self.buf = PeekState::Eof;
} else {
unsafe { buf.advance_mut(nbytes); }
}
}
}
Ok(())
}
pub fn skip(&mut self) -> Result<()> {
self.prime()?;
if let PeekState::Buffer(ref mut buf) = self.buf {
buf.advance(1);
}
Ok(())
}
pub fn peek(&mut self) -> Result<u8> {
self.prime()?;
match self.buf {
PeekState::Eof => Err(eof()),
PeekState::Buffer(ref mut buf) => Ok(buf[0]),
}
}
pub fn read(&mut self) -> Result<u8> {
let v = self.peek()?;
if let PeekState::Buffer(ref mut buf) = self.buf {
buf.advance(1);
}
Ok(v)
}
pub fn readbytes(&mut self, req: usize) -> Result<BytesMut> {
let buf = match self.buf {
PeekState::Eof => unreachable!(),
PeekState::Buffer(ref mut buf) => buf,
};
let avail = buf.remaining();
if avail < req {
let count = req - avail;
self.read.read_exact(read_buffer(buf, count))?;
unsafe { buf.advance_mut(count); }
}
Ok(buf.split_to(req))
}
pub fn nextop(&mut self) -> Result<(Op, u8)> {
decodeop(self.read()?)
}
pub fn varint(&mut self) -> Result<usize> {
let v = self.read()?;
if v < 128 {
Ok(usize::from(v))
} else {
Ok(self.varint()? * 128 + usize::from(v - 128))
}
}
pub fn wirelength(&mut self, arg: u8) -> Result<usize> {
if arg < 15 {
Ok(usize::from(arg))
} else {
self.varint()
}
}
pub fn peekend(&mut self) -> Result<bool> {
if self.peek()? == 4 {
self.skip()?;
Ok(true)
} else {
Ok(false)
}
}
}
impl<'re, 'a, R: Read> Reader for &'re mut BinaryReader<'a, R> {
fn next_token(&mut self) -> Result<Token> {
(*self).next_token()
}
fn buffered_len(&mut self) -> Result<usize> {
(*self).buffered_len()
}
fn at_atom_end(&mut self, minor: AtomMinor) -> Result<bool> {
(*self).at_atom_end(minor)
}
fn at_compound_end(&mut self, paren: CompoundMinor) -> Result<bool> {
(*self).at_compound_end(paren)
}
}
impl<'re, 'a, R: Read> Reader for BinaryReader<'a, R> {
fn next_token(&mut self) -> Result<Token> {
match self.nextop()? {
(Op::Misc(0), 0) => Ok(Token::Boolean(false)),
(Op::Misc(0), 1) => Ok(Token::Boolean(true)),
(Op::Misc(0), 2) => {
let mut bs = [0; 4];
bs.copy_from_slice(&self.readbytes(4)?);
Ok(Token::Float(f32::from_bits(u32::from_be_bytes(bs.try_into().unwrap()))))
}
(Op::Misc(0), 3) => {
let mut bs = [0; 8];
bs.copy_from_slice(&self.readbytes(8)?);
Ok(Token::Double(f64::from_bits(u64::from_be_bytes(bs.try_into().unwrap()))))
}
(Op::Misc(0), 5) => Ok(Token::Annotation),
(Op::Misc(0), _) => Err(err("Invalid format A encoding")),
(Op::Misc(1), arg) => Ok(Token::PlaceholderRef(self.wirelength(arg)?)),
(Op::Misc(2), arg) => match Op::try_from(arg)? {
Op::Atom(minor) => Ok(Token::OpenAtom(minor)),
Op::Compound(minor) => Ok(Token::OpenCompound(minor, None)),
_ => Err(err("Invalid format C start byte")),
}
(Op::Misc(3), arg) => {
let n = if arg > 12 { i32::from(arg) - 16 } else { i32::from(arg) };
Ok(Token::SignedInteger(BigInt::from(n)))
}
(Op::Misc(_), _) => unreachable!(),
(Op::Atom(minor), arg) => {
let count = self.wirelength(arg)?;
let bs = self.readbytes(count)?;
decodebinary(minor, bs)
}
(Op::Compound(minor), arg) =>
Ok(Token::OpenCompound(minor, Some(self.wirelength(arg)?))),
(Op::Reserved(3), 15) => Ok(Token::Noop),
(Op::Reserved(_), _) => Err(InvalidOp.into()),
}
}
fn buffered_len(&mut self) -> Result<usize> {
self.prime()?;
match self.buf {
PeekState::Eof => Ok(0),
PeekState::Buffer(ref b) => Ok(b.remaining()),
}
}
fn at_atom_end(&mut self, _minor: AtomMinor) -> Result<bool> {
self.peekend()
}
fn at_compound_end(&mut self, _paren: CompoundMinor) -> Result<bool> {
self.peekend()
}
}