preserves/implementations/rust/src/value/reader.rs

269 lines
7.5 KiB
Rust

use bytes::{Buf, BufMut, BytesMut};
use num::bigint::BigInt;
use std::convert::TryFrom;
use std::convert::TryInto;
use std::io::Read;
use super::constants::{Op, InvalidOp, AtomMinor, CompoundMinor};
pub type Error = std::io::Error;
pub type Result<T> = std::result::Result<T, Error>;
#[derive(Debug)]
enum PeekState {
Eof,
Buffer(BytesMut),
}
#[derive(Debug, Clone)]
pub enum Token {
Annotation,
PlaceholderRef(usize),
Noop,
Boolean(bool),
Float(f32),
Double(f64),
SignedInteger(BigInt),
String(String),
ByteString(BytesMut),
Symbol(String),
OpenAtom(AtomMinor),
CloseAtom(AtomMinor),
OpenCompound(CompoundMinor, Option<usize>),
CloseCompound(CompoundMinor),
}
pub trait Reader {
fn next_token(&mut self) -> Result<Token>;
fn buffered_len(&mut self) -> Result<usize>;
fn at_atom_end(&mut self, minor: AtomMinor) -> Result<bool>;
fn at_compound_end(&mut self, paren: CompoundMinor) -> Result<bool>;
}
pub struct BinaryReader<'a, R: Read> {
read: &'a mut R,
buf: PeekState,
chunksize: usize,
}
pub fn decodeop(b: u8) -> Result<(Op, u8)> {
Ok((Op::try_from(b >> 4)?, b & 15))
}
pub fn decodeint(bs: &[u8]) -> BigInt {
BigInt::from_signed_bytes_be(bs)
}
pub fn decodestr(bs: &[u8]) -> Result<&str> {
std::str::from_utf8(bs).map_err(|_| err("Invalid UTF-8"))
}
pub fn decodebinary(minor: AtomMinor, bs: BytesMut) -> Result<Token> {
match minor {
AtomMinor::SignedInteger => Ok(Token::SignedInteger(decodeint(&bs))),
AtomMinor::String => Ok(Token::String(decodestr(&bs)?.into())),
AtomMinor::ByteString => Ok(Token::ByteString(bs)),
AtomMinor::Symbol => Ok(Token::Symbol(decodestr(&bs)?.into())),
}
}
pub fn eof() -> Error {
Error::new(std::io::ErrorKind::UnexpectedEof, "EOF")
}
pub fn err(s: &str) -> Error {
Error::new(std::io::ErrorKind::InvalidData, s)
}
pub fn is_syntax_error(e: &Error) -> bool {
match e.kind() {
std::io::ErrorKind::InvalidData => true,
_ => false,
}
}
pub fn is_eof_error(e: &Error) -> bool {
match e.kind() {
std::io::ErrorKind::UnexpectedEof => true,
_ => false,
}
}
fn read_buffer(buf: &mut BytesMut, count: usize) -> &mut [u8] {
buf.reserve(count);
unsafe {
let m = &mut buf.bytes_mut()[..count];
core::ptr::write_bytes(m.as_mut_ptr(), 0, count);
&mut *(m as *mut [core::mem::MaybeUninit<u8>] as *mut [u8])
}
}
impl<'a, R: Read> BinaryReader<'a, R> {
pub fn new(read: &'a mut R) -> Self {
BinaryReader {
read,
buf: PeekState::Buffer(BytesMut::new()),
chunksize: 1,
}
}
fn prime(&mut self) -> Result<()> {
if let PeekState::Buffer(ref mut buf) = self.buf {
if buf.remaining() == 0 {
let nbytes = self.read.read(read_buffer(buf, self.chunksize))?;
if nbytes == 0 {
self.buf = PeekState::Eof;
} else {
unsafe { buf.advance_mut(nbytes); }
}
}
}
Ok(())
}
pub fn skip(&mut self) -> Result<()> {
self.prime()?;
if let PeekState::Buffer(ref mut buf) = self.buf {
buf.advance(1);
}
Ok(())
}
pub fn peek(&mut self) -> Result<u8> {
self.prime()?;
match self.buf {
PeekState::Eof => Err(eof()),
PeekState::Buffer(ref mut buf) => Ok(buf[0]),
}
}
pub fn read(&mut self) -> Result<u8> {
let v = self.peek()?;
if let PeekState::Buffer(ref mut buf) = self.buf {
buf.advance(1);
}
Ok(v)
}
pub fn readbytes(&mut self, req: usize) -> Result<BytesMut> {
let buf = match self.buf {
PeekState::Eof => unreachable!(),
PeekState::Buffer(ref mut buf) => buf,
};
let avail = buf.remaining();
if avail < req {
let count = req - avail;
self.read.read_exact(read_buffer(buf, count))?;
unsafe { buf.advance_mut(count); }
}
Ok(buf.split_to(req))
}
pub fn nextop(&mut self) -> Result<(Op, u8)> {
decodeop(self.read()?)
}
pub fn varint(&mut self) -> Result<usize> {
let v = self.read()?;
if v < 128 {
Ok(usize::from(v))
} else {
Ok(self.varint()? * 128 + usize::from(v - 128))
}
}
pub fn wirelength(&mut self, arg: u8) -> Result<usize> {
if arg < 15 {
Ok(usize::from(arg))
} else {
self.varint()
}
}
pub fn peekend(&mut self) -> Result<bool> {
if self.peek()? == 4 {
self.skip()?;
Ok(true)
} else {
Ok(false)
}
}
}
impl<'re, 'a, R: Read> Reader for &'re mut BinaryReader<'a, R> {
fn next_token(&mut self) -> Result<Token> {
(*self).next_token()
}
fn buffered_len(&mut self) -> Result<usize> {
(*self).buffered_len()
}
fn at_atom_end(&mut self, minor: AtomMinor) -> Result<bool> {
(*self).at_atom_end(minor)
}
fn at_compound_end(&mut self, paren: CompoundMinor) -> Result<bool> {
(*self).at_compound_end(paren)
}
}
impl<'re, 'a, R: Read> Reader for BinaryReader<'a, R> {
fn next_token(&mut self) -> Result<Token> {
match self.nextop()? {
(Op::Misc(0), 0) => Ok(Token::Boolean(false)),
(Op::Misc(0), 1) => Ok(Token::Boolean(true)),
(Op::Misc(0), 2) => {
let mut bs = [0; 4];
bs.copy_from_slice(&self.readbytes(4)?);
Ok(Token::Float(f32::from_bits(u32::from_be_bytes(bs.try_into().unwrap()))))
}
(Op::Misc(0), 3) => {
let mut bs = [0; 8];
bs.copy_from_slice(&self.readbytes(8)?);
Ok(Token::Double(f64::from_bits(u64::from_be_bytes(bs.try_into().unwrap()))))
}
(Op::Misc(0), 5) => Ok(Token::Annotation),
(Op::Misc(0), _) => Err(err("Invalid format A encoding")),
(Op::Misc(1), arg) => Ok(Token::PlaceholderRef(self.wirelength(arg)?)),
(Op::Misc(2), arg) => match Op::try_from(arg)? {
Op::Atom(minor) => Ok(Token::OpenAtom(minor)),
Op::Compound(minor) => Ok(Token::OpenCompound(minor, None)),
_ => Err(err("Invalid format C start byte")),
}
(Op::Misc(3), arg) => {
let n = if arg > 12 { i32::from(arg) - 16 } else { i32::from(arg) };
Ok(Token::SignedInteger(BigInt::from(n)))
}
(Op::Misc(_), _) => unreachable!(),
(Op::Atom(minor), arg) => {
let count = self.wirelength(arg)?;
let bs = self.readbytes(count)?;
decodebinary(minor, bs)
}
(Op::Compound(minor), arg) =>
Ok(Token::OpenCompound(minor, Some(self.wirelength(arg)?))),
(Op::Reserved(3), 15) => Ok(Token::Noop),
(Op::Reserved(_), _) => Err(InvalidOp.into()),
}
}
fn buffered_len(&mut self) -> Result<usize> {
self.prime()?;
match self.buf {
PeekState::Eof => Ok(0),
PeekState::Buffer(ref b) => Ok(b.remaining()),
}
}
fn at_atom_end(&mut self, _minor: AtomMinor) -> Result<bool> {
self.peekend()
}
fn at_compound_end(&mut self, _paren: CompoundMinor) -> Result<bool> {
self.peekend()
}
}