preserves/implementations/rust/oo/src/packed/reader.rs

585 lines
20 KiB
Rust

use crate::error::{self, ExpectedKind, io_eof};
use crate::value::Domain;
use num::bigint::BigInt;
use num::traits::cast::{FromPrimitive, ToPrimitive};
use std::borrow::Cow;
use std::convert::TryFrom;
use std::convert::TryInto;
use std::io;
use std::marker::PhantomData;
use super::constants::Tag;
use super::super::{
CompoundClass,
DomainDecode,
Map,
NestedValue,
Record,
Set,
Value,
boundary as B,
reader::{
Token,
Reader,
ReaderResult,
},
signed_integer::SignedInteger,
source::BinarySource,
};
pub struct PackedReader<'de, 'src, S: BinarySource<'de>> {
pub source: &'src mut S,
phantom: PhantomData<&'de ()>,
}
impl<'de, 'src, S: BinarySource<'de>> BinarySource<'de> for PackedReader<'de, 'src, S> {
type Mark = S::Mark;
#[inline(always)]
fn mark(&mut self) -> io::Result<Self::Mark> {
self.source.mark()
}
#[inline(always)]
fn restore(&mut self, mark: &Self::Mark) -> io::Result<()> {
self.source.restore(mark)
}
#[inline(always)]
fn skip(&mut self) -> io::Result<()> {
self.source.skip()
}
#[inline(always)]
fn peek(&mut self) -> io::Result<Option<u8>> {
self.source.peek()
}
#[inline(always)]
fn readbytes(&mut self, count: u64) -> io::Result<Cow<'de, [u8]>> {
self.source.readbytes(count)
}
#[inline(always)]
fn readbytes_into(&mut self, bs: &mut [u8]) -> io::Result<()> {
self.source.readbytes_into(bs)
}
#[inline(always)]
fn input_position(&mut self) -> io::Result<Option<usize>> {
self.source.input_position()
}
#[inline(always)]
fn discard(&mut self, count: u64) -> io::Result<()> {
self.source.discard(count)
}
#[inline(always)]
fn read_to_end(&mut self) -> io::Result<Cow<'de, [u8]>> {
self.source.read_to_end()
}
}
fn out_of_range<I: Into<BigInt>>(i: I) -> error::Error {
error::Error::NumberOutOfRange(i.into())
}
impl<'de, 'src, S: BinarySource<'de>> PackedReader<'de, 'src, S> {
#[inline(always)]
pub fn new(source: &'src mut S) -> Self {
PackedReader { source, phantom: PhantomData }
}
#[inline(always)]
fn peek_noeof(&mut self) -> io::Result<u8> {
self.peek()?.ok_or_else(io_eof)
}
#[inline(always)]
fn read(&mut self) -> io::Result<u8> {
let v = self.peek_noeof()?;
self.skip()?;
Ok(v)
}
#[inline(always)]
fn varint(&mut self) -> io::Result<u64> {
let mut shift = 0;
let mut acc: u64 = 0;
loop {
let v = self.read()?;
if shift == 63 && v > 1 { Err(error::Error::Message("PackedReader length too long".to_string()))? }
acc |= ((v & 0x7f) as u64) << shift;
shift += 7;
if v & 0x80 == 0 { return Ok(acc) }
if shift >= 70 { Err(error::Error::Message("PackedReader length too long".to_string()))? }
}
}
#[inline(always)]
fn peekend(&mut self) -> io::Result<bool> {
if self.peek()? == Some(Tag::End.into()) {
self.skip()?;
Ok(true)
} else {
Ok(false)
}
}
#[inline(always)]
fn peek_next_nonannotation_tag(&mut self) -> ReaderResult<Tag> {
loop {
match Tag::try_from(self.peek_noeof()?)? {
Tag::Annotation => {
self.skip()?;
self.skip_value()?;
},
other => return Ok(other),
}
}
}
fn next_atomic(&mut self, expected_tag: Tag, k: ExpectedKind) -> ReaderResult<Cow<'de, [u8]>> {
let actual_tag = self.peek_next_nonannotation_tag()?;
if actual_tag == expected_tag {
self.skip()?;
let count = self.varint()?;
Ok(self.readbytes(count)?)
} else {
Err(error::Error::Expected(k))
}
}
fn next_compound(&mut self, expected_tag: Tag, k: ExpectedKind) -> ReaderResult<()>
{
let actual_tag = self.peek_next_nonannotation_tag()?;
if actual_tag == expected_tag {
self.skip()?;
Ok(())
} else {
Err(error::Error::Expected(k))
}
}
#[inline(always)]
fn read_signed_integer(&mut self, count: u64) -> io::Result<SignedInteger> {
let count_u64 = count;
let count: usize = count.try_into().map_err(
|_| error::Error::Message("Signed integer too long".to_string()))?;
if count == 0 {
return Ok(SignedInteger::from(0_i128));
}
if count > 16 {
let bs = self.readbytes(count_u64)?;
if (bs[0] & 0x80) == 0 {
// Positive or zero.
let mut i = 0;
while i < count && bs[i] == 0 { i += 1; }
if count - i <= 16 {
Ok(SignedInteger::from(u128::from_be_bytes(bs[bs.len() - 16..].try_into().unwrap())))
} else {
Ok(SignedInteger::from(Cow::Owned(BigInt::from_bytes_be(num::bigint::Sign::Plus, &bs[i..]))))
}
} else {
// Negative.
let mut i = 0;
while i < count && bs[i] == 0xff { i += 1; }
if count - i <= 16 {
Ok(SignedInteger::from(i128::from_be_bytes(bs[bs.len() - 16..].try_into().unwrap())))
} else {
Ok(SignedInteger::from(Cow::Owned(BigInt::from_signed_bytes_be(&bs))))
}
}
} else {
let first_byte = self.read()?;
let prefix_byte = if (first_byte & 0x80) == 0 { 0x00 } else { 0xff };
let mut bs = [prefix_byte; 16];
bs[16 - count] = first_byte;
self.readbytes_into(&mut bs[16 - (count - 1)..])?;
Ok(SignedInteger::from(i128::from_be_bytes(bs)))
}
}
#[inline(always)]
fn next_unsigned<T: FromPrimitive, F>(&mut self, f: F) -> ReaderResult<T>
where
F: FnOnce(u128) -> Option<T>
{
let tag = self.peek_next_nonannotation_tag()?;
match tag {
Tag::SmallInteger(v) => {
self.skip()?;
if v < 0 {
Err(out_of_range(v))
} else {
f(v as u128).ok_or_else(|| out_of_range(v))
}
}
Tag::MediumInteger(count) => {
self.skip()?;
let n = &self.read_signed_integer(count.into())?;
let i = n.try_into().map_err(|_| out_of_range(n))?;
f(i).ok_or_else(|| out_of_range(i))
}
Tag::SignedInteger => {
self.skip()?;
let count = self.varint()?;
let n = &self.read_signed_integer(count)?;
let i = n.try_into().map_err(|_| out_of_range(n))?;
f(i).ok_or_else(|| out_of_range(i))
}
_ => Err(error::Error::Expected(ExpectedKind::SignedInteger))
}
}
#[inline(always)]
fn next_signed<T: FromPrimitive, F>(&mut self, f: F) -> ReaderResult<T>
where
F: FnOnce(i128) -> Option<T>
{
let tag = self.peek_next_nonannotation_tag()?;
match tag {
Tag::SmallInteger(v) => {
self.skip()?;
f(v.into()).ok_or_else(|| out_of_range(v))
}
Tag::MediumInteger(count) => {
self.skip()?;
let n = &self.read_signed_integer(count.into())?;
let i = n.try_into().map_err(|_| out_of_range(n))?;
f(i).ok_or_else(|| out_of_range(i))
}
Tag::SignedInteger => {
self.skip()?;
let count = self.varint()?;
let n = &self.read_signed_integer(count)?;
let i = n.try_into().map_err(|_| out_of_range(n))?;
f(i).ok_or_else(|| out_of_range(i))
}
_ => Err(error::Error::Expected(ExpectedKind::SignedInteger))
}
}
fn gather_annotations<N: NestedValue, Dec: DomainDecode<N::Embedded>>(
&mut self,
dec: &mut Dec,
) -> io::Result<Vec<N>> {
let mut annotations = vec![self.demand_next_domain(true, dec)?];
while Tag::try_from(self.peek_noeof()?)? == Tag::Annotation {
self.skip()?;
annotations.push(self.demand_next_domain(true, dec)?);
}
Ok(annotations)
}
fn skip_annotations(&mut self) -> io::Result<()> {
self.skip_value()?;
while Tag::try_from(self.peek_noeof()?)? == Tag::Annotation {
self.skip()?;
self.skip_value()?;
}
Ok(())
}
fn next_upto_end<N: NestedValue, Dec: DomainDecode<N::Embedded>>(
&mut self,
read_annotations: bool,
dec: &mut Dec,
) -> io::Result<Option<N>> {
match self.peekend()? {
true => Ok(None),
false => Ok(Some(self.demand_next_domain(read_annotations, dec)?)),
}
}
#[inline(always)]
fn decodestr<'a>(&mut self, cow: Cow<'a, [u8]>) -> io::Result<Cow<'a, str>> {
match cow {
Cow::Borrowed(bs) =>
Ok(Cow::Borrowed(std::str::from_utf8(bs).map_err(|_| self.syntax_error("Invalid UTF-8"))?)),
Cow::Owned(bs) =>
Ok(Cow::Owned(String::from_utf8(bs).map_err(|_| self.syntax_error("Invalid UTF-8"))?)),
}
}
}
impl<'de, 'src, S: BinarySource<'de>> Reader<'de> for PackedReader<'de, 'src, S> {
fn next_domain<N: NestedValue, Dec: DomainDecode<N::Embedded>>(
&mut self,
read_annotations: bool,
dec: &mut Dec,
) -> io::Result<Option<N>> {
match self.peek()? {
None => return Ok(None),
Some(_) => (),
}
Ok(Some(match Tag::try_from(self.read()?)? {
Tag::False => N::new(false),
Tag::True => N::new(true),
Tag::Float => {
let mut bs = [0; 4];
self.readbytes_into(&mut bs)?;
Value::from(f32::from_bits(u32::from_be_bytes(bs))).wrap()
}
Tag::Double => {
let mut bs = [0; 8];
self.readbytes_into(&mut bs)?;
Value::from(f64::from_bits(u64::from_be_bytes(bs))).wrap()
}
Tag::Annotation => {
if read_annotations {
let mut annotations = self.gather_annotations(dec)?;
let (existing_annotations, v) = self.demand_next_domain::<N, _>(read_annotations, dec)?.pieces();
if let Some(vs) = existing_annotations {
annotations.extend_from_slice(&vs[..]);
}
N::wrap(Some(Box::new(annotations)), v)
} else {
self.skip_annotations()?;
self.demand_next_domain(read_annotations, dec)?
}
}
Tag::Embedded => {
Value::Embedded(dec.decode_embedded(self, read_annotations)?).wrap()
}
Tag::SmallInteger(v) => {
// TODO: prebuild these in value.rs
Value::from(v).wrap()
}
Tag::MediumInteger(count) => {
let n = self.read_signed_integer(count.into())?;
Value::SignedInteger(n).wrap()
}
Tag::SignedInteger => {
let count = self.varint()?;
let n = self.read_signed_integer(count)?;
Value::SignedInteger(n).wrap()
}
Tag::String => {
let count = self.varint()?;
let bs = self.readbytes(count)?;
Value::String(self.decodestr(bs)?.into_owned()).wrap()
}
Tag::ByteString => {
let count = self.varint()?;
Value::ByteString(self.readbytes(count)?.into_owned()).wrap()
}
Tag::Symbol => {
let count = self.varint()?;
let bs = self.readbytes(count)?;
Value::Symbol(self.decodestr(bs)?.into_owned()).wrap()
}
Tag::Record => {
let mut vs = Vec::new();
while let Some(v) = self.next_upto_end(read_annotations, dec)? { vs.push(v); }
if vs.is_empty() {
return Err(self.syntax_error("Too few elements in encoded record"))
}
Value::Record(Record(vs)).wrap()
}
Tag::Sequence => {
let mut vs = Vec::new();
while let Some(v) = self.next_upto_end(read_annotations, dec)? { vs.push(v); }
Value::Sequence(vs).wrap()
}
Tag::Set => {
let mut s = Set::new();
while let Some(v) = self.next_upto_end(read_annotations, dec)? { s.insert(v); }
Value::Set(s).wrap()
}
Tag::Dictionary => {
let mut d = Map::new();
while let Some(k) = self.next_upto_end(read_annotations, dec)? {
match self.next_upto_end(read_annotations, dec)? {
Some(v) => { d.insert(k, v); }
None => return Err(self.syntax_error("Missing dictionary value")),
}
}
Value::Dictionary(d).wrap()
}
tag @ Tag::End => {
return Err(self.syntax_error(&format!("Invalid tag: {:?}", tag)));
}
}))
}
#[inline(always)]
fn open_record(&mut self) -> ReaderResult<()> {
self.next_compound(Tag::Record, ExpectedKind::Record)
}
#[inline(always)]
fn open_sequence(&mut self) -> ReaderResult<()> {
self.next_compound(Tag::Sequence, ExpectedKind::Sequence)
}
#[inline(always)]
fn open_set(&mut self) -> ReaderResult<()> {
self.next_compound(Tag::Set, ExpectedKind::Set)
}
#[inline(always)]
fn open_dictionary(&mut self) -> ReaderResult<()> {
self.next_compound(Tag::Dictionary, ExpectedKind::Dictionary)
}
#[inline(always)]
fn boundary(&mut self, _b: &B::Type) -> ReaderResult<()> {
Ok(())
}
#[inline(always)]
fn close_compound(&mut self, _b: &mut B::Type, _i: &B::Item) -> ReaderResult<bool> {
Ok(self.peekend()?)
}
#[inline(always)]
fn open_embedded(&mut self) -> ReaderResult<()> {
self.next_compound(Tag::Embedded, ExpectedKind::Embedded)
}
#[inline(always)]
fn close_embedded(&mut self) -> ReaderResult<()> {
Ok(())
}
type Mark = S::Mark;
#[inline(always)]
fn mark(&mut self) -> io::Result<Self::Mark> {
self.source.mark()
}
#[inline(always)]
fn restore(&mut self, mark: &Self::Mark) -> io::Result<()> {
self.source.restore(mark)
}
fn next_token<D: Domain, Dec: DomainDecode<D>>(
&mut self,
read_embedded_annotations: bool,
decode_embedded: &mut Dec,
) -> io::Result<Token<D>> {
loop {
return Ok(match Tag::try_from(self.peek_noeof()?)? {
Tag::Embedded => {
self.skip()?;
Token::Embedded(decode_embedded.decode_embedded(self, read_embedded_annotations)?)
}
Tag::False |
Tag::True |
Tag::Float |
Tag::Double |
Tag::SmallInteger(_) |
Tag::MediumInteger(_) |
Tag::SignedInteger |
Tag::String |
Tag::ByteString |
Tag::Symbol =>
Token::Atom(self.demand_next_domain(false, decode_embedded)?),
Tag::Record => { self.skip()?; Token::Compound(CompoundClass::Record) }
Tag::Sequence => { self.skip()?; Token::Compound(CompoundClass::Sequence) }
Tag::Set => { self.skip()?; Token::Compound(CompoundClass::Set) }
Tag::Dictionary => { self.skip()?; Token::Compound(CompoundClass::Dictionary) }
Tag::End => { self.skip()?; Token::End }
Tag::Annotation => {
self.skip()?;
self.skip_annotations()?;
continue
}
})
}
}
#[inline(always)]
fn next_boolean(&mut self) -> ReaderResult<bool> {
match self.peek_next_nonannotation_tag()? {
Tag::False => { self.skip()?; Ok(false) }
Tag::True => { self.skip()?; Ok(true) }
_ => Err(error::Error::Expected(ExpectedKind::Boolean)),
}
}
fn next_signedinteger(&mut self) -> ReaderResult<SignedInteger> {
let tag = self.peek_next_nonannotation_tag()?;
match tag {
Tag::SmallInteger(v) => {
self.skip()?;
Ok(SignedInteger::from(v as i32))
}
Tag::MediumInteger(count) => {
self.skip()?;
Ok(self.read_signed_integer(count.into())?)
}
Tag::SignedInteger => {
self.skip()?;
let count = self.varint()?;
Ok(self.read_signed_integer(count)?)
}
_ => Err(error::Error::Expected(ExpectedKind::SignedInteger))
}
}
fn next_i8(&mut self) -> ReaderResult<i8> { self.next_signed(|n| n.to_i8()) }
fn next_i16(&mut self) -> ReaderResult<i16> { self.next_signed(|n| n.to_i16()) }
fn next_i32(&mut self) -> ReaderResult<i32> { self.next_signed(|n| n.to_i32()) }
fn next_i64(&mut self) -> ReaderResult<i64> { self.next_signed(|n| n.to_i64()) }
fn next_i128(&mut self) -> ReaderResult<i128> { self.next_signed(|n| n.to_i128()) }
fn next_u8(&mut self) -> ReaderResult<u8> { self.next_unsigned(|n| n.to_u8()) }
fn next_u16(&mut self) -> ReaderResult<u16> { self.next_unsigned(|n| n.to_u16()) }
fn next_u32(&mut self) -> ReaderResult<u32> { self.next_unsigned(|n| n.to_u32()) }
fn next_u64(&mut self) -> ReaderResult<u64> { self.next_unsigned(|n| n.to_u64()) }
fn next_u128(&mut self) -> ReaderResult<u128> { self.next_unsigned(|n| n.to_u128()) }
fn next_f32(&mut self) -> ReaderResult<f32> {
match self.peek_next_nonannotation_tag()? {
Tag::Float => {
self.skip()?;
let mut bs = [0; 4];
self.readbytes_into(&mut bs)?;
Ok(f32::from_bits(u32::from_be_bytes(bs)))
},
Tag::Double => {
self.skip()?;
let mut bs = [0; 8];
self.readbytes_into(&mut bs)?;
Ok(f64::from_bits(u64::from_be_bytes(bs)) as f32)
},
_ => Err(error::Error::Expected(ExpectedKind::Float)),
}
}
fn next_f64(&mut self) -> ReaderResult<f64> {
match self.peek_next_nonannotation_tag()? {
Tag::Float => {
self.skip()?;
let mut bs = [0; 4];
self.readbytes_into(&mut bs)?;
Ok(f32::from_bits(u32::from_be_bytes(bs)) as f64)
},
Tag::Double => {
self.skip()?;
let mut bs = [0; 8];
self.readbytes_into(&mut bs)?;
Ok(f64::from_bits(u64::from_be_bytes(bs)))
},
_ => Err(error::Error::Expected(ExpectedKind::Double)),
}
}
fn next_str(&mut self) -> ReaderResult<Cow<'de, str>> {
let bs = self.next_atomic(Tag::String, ExpectedKind::Symbol)?;
Ok(self.decodestr(bs)?)
}
fn next_bytestring(&mut self) -> ReaderResult<Cow<'de, [u8]>> {
self.next_atomic(Tag::ByteString, ExpectedKind::Symbol)
}
fn next_symbol(&mut self) -> ReaderResult<Cow<'de, str>> {
let bs = self.next_atomic(Tag::Symbol, ExpectedKind::Symbol)?;
Ok(self.decodestr(bs)?)
}
}