568 lines
19 KiB
Rust
568 lines
19 KiB
Rust
//! Generic [Reader] trait for parsing Preserves [Value][crate::value::repr::Value]s,
|
|
//! implemented by code that provides each specific transfer syntax.
|
|
|
|
use crate::error::{self, io_eof, ExpectedKind, Received};
|
|
|
|
use std::borrow::Cow;
|
|
use std::io;
|
|
use std::marker::PhantomData;
|
|
|
|
use super::boundary as B;
|
|
use super::signed_integer::SignedInteger;
|
|
use super::CompoundClass;
|
|
use super::DomainDecode;
|
|
use super::DomainParse;
|
|
use super::Double;
|
|
use super::IOValue;
|
|
use super::IOValueDomainCodec;
|
|
use super::NestedValue;
|
|
use super::ViaCodec;
|
|
|
|
pub type ReaderResult<T> = std::result::Result<T, error::Error>;
|
|
|
|
/// Tokens produced when performing
|
|
/// [SAX](https://en.wikipedia.org/wiki/Simple_API_for_XML)-style reading of terms.
|
|
pub enum Token<N: NestedValue> {
|
|
/// An embedded value was seen and completely decoded.
|
|
Embedded(N::Embedded),
|
|
/// An atomic value was seen and completely decoded.
|
|
Atom(N),
|
|
/// A compound value has been opened; its contents follow, and it will be terminated by
|
|
/// [Token::End].
|
|
Compound(CompoundClass),
|
|
/// Closes a previously-opened compound value.
|
|
End,
|
|
}
|
|
|
|
/// Generic parser for Preserves.
|
|
pub trait Reader<'de, N: NestedValue> {
|
|
/// Retrieve the next parseable value or an indication of end-of-input.
|
|
///
|
|
/// Yields `Ok(Some(...))` if a complete value is available, `Ok(None)` if the end of
|
|
/// stream has been reached, or `Err(...)` for parse or IO errors, including
|
|
/// incomplete/partial input. See also [Reader::demand_next].
|
|
fn next(&mut self, read_annotations: bool) -> io::Result<Option<N>>;
|
|
|
|
// Hiding these from the documentation for the moment because I don't want to have to
|
|
// document the whole Boundary thing.
|
|
#[doc(hidden)]
|
|
fn open_record(&mut self, arity: Option<usize>) -> ReaderResult<B::Type>;
|
|
#[doc(hidden)]
|
|
fn open_sequence_or_set(&mut self) -> ReaderResult<B::Item>;
|
|
#[doc(hidden)]
|
|
fn open_sequence(&mut self) -> ReaderResult<()>;
|
|
#[doc(hidden)]
|
|
fn open_set(&mut self) -> ReaderResult<()>;
|
|
#[doc(hidden)]
|
|
fn open_dictionary(&mut self) -> ReaderResult<()>;
|
|
#[doc(hidden)]
|
|
fn boundary(&mut self, b: &B::Type) -> ReaderResult<()>;
|
|
|
|
#[doc(hidden)]
|
|
// close_compound implies a b.shift(...) and a self.boundary(b).
|
|
fn close_compound(&mut self, b: &mut B::Type, i: &B::Item) -> ReaderResult<bool>;
|
|
|
|
#[doc(hidden)]
|
|
fn open_embedded(&mut self) -> ReaderResult<()>;
|
|
#[doc(hidden)]
|
|
fn close_embedded(&mut self) -> ReaderResult<()>;
|
|
|
|
/// Allows structured backtracking to an earlier stage in a parse. Useful for layering
|
|
/// parser combinators atop a Reader.
|
|
type Mark;
|
|
/// Retrieve a marker for the current position in the input.
|
|
fn mark(&mut self) -> io::Result<Self::Mark>;
|
|
/// Seek the input to a previously-saved position.
|
|
fn restore(&mut self, mark: &Self::Mark) -> io::Result<()>;
|
|
|
|
/// Get the next [SAX](https://en.wikipedia.org/wiki/Simple_API_for_XML)-style event,
|
|
/// discarding annotations.
|
|
///
|
|
/// The `read_embedded_annotations` controls whether annotations are also skipped on
|
|
/// *embedded* values or not.
|
|
fn next_token(&mut self, read_embedded_annotations: bool) -> io::Result<Token<N>>;
|
|
/// Get the next [SAX](https://en.wikipedia.org/wiki/Simple_API_for_XML)-style event, plus
|
|
/// a vector containing any annotations that preceded it.
|
|
fn next_annotations_and_token(&mut self) -> io::Result<(Vec<N>, Token<N>)>;
|
|
|
|
//---------------------------------------------------------------------------
|
|
|
|
/// Skips the next available complete value. Yields an error if no such value exists.
|
|
fn skip_value(&mut self) -> io::Result<()> {
|
|
// TODO efficient skipping in specific impls of this trait
|
|
let _ = self.demand_next(false)?;
|
|
Ok(())
|
|
}
|
|
|
|
/// Retrieve the next parseable value, treating end-of-input as an error.
|
|
///
|
|
/// Yields `Ok(...)` if a complete value is available or `Err(...)` for parse or IO errors,
|
|
/// including incomplete/partial input or end of stream. See also [Reader::next].
|
|
fn demand_next(&mut self, read_annotations: bool) -> io::Result<N> {
|
|
self.next(read_annotations)?.ok_or_else(io_eof)
|
|
}
|
|
|
|
/// Yields the next value, if it is a `Boolean`, or an error otherwise.
|
|
fn next_boolean(&mut self) -> ReaderResult<bool> {
|
|
self.demand_next(false)?.value().to_boolean()
|
|
}
|
|
|
|
/// Yields the next value, if it is a `Double`, or an error otherwise.
|
|
fn next_double(&mut self) -> ReaderResult<Double> {
|
|
Ok(self.demand_next(false)?.value().to_double()?.to_owned())
|
|
}
|
|
|
|
/// Yields the next value, if it is a `SignedInteger`, or an error otherwise.
|
|
fn next_signedinteger(&mut self) -> ReaderResult<SignedInteger> {
|
|
Ok(self
|
|
.demand_next(false)?
|
|
.value()
|
|
.to_signedinteger()?
|
|
.to_owned())
|
|
}
|
|
|
|
/// Yields the next value, if it is a `SignedInteger` that fits in [i8], or an error
|
|
/// otherwise.
|
|
fn next_i8(&mut self) -> ReaderResult<i8> {
|
|
self.demand_next(false)?.value().to_i8()
|
|
}
|
|
/// Yields the next value, if it is a `SignedInteger` that fits in [u8], or an error
|
|
/// otherwise.
|
|
fn next_u8(&mut self) -> ReaderResult<u8> {
|
|
self.demand_next(false)?.value().to_u8()
|
|
}
|
|
/// Yields the next value, if it is a `SignedInteger` that fits in [i16], or an error
|
|
/// otherwise.
|
|
fn next_i16(&mut self) -> ReaderResult<i16> {
|
|
self.demand_next(false)?.value().to_i16()
|
|
}
|
|
/// Yields the next value, if it is a `SignedInteger` that fits in [u16], or an error
|
|
/// otherwise.
|
|
fn next_u16(&mut self) -> ReaderResult<u16> {
|
|
self.demand_next(false)?.value().to_u16()
|
|
}
|
|
/// Yields the next value, if it is a `SignedInteger` that fits in [i32], or an error
|
|
/// otherwise.
|
|
fn next_i32(&mut self) -> ReaderResult<i32> {
|
|
self.demand_next(false)?.value().to_i32()
|
|
}
|
|
/// Yields the next value, if it is a `SignedInteger` that fits in [u32], or an error
|
|
/// otherwise.
|
|
fn next_u32(&mut self) -> ReaderResult<u32> {
|
|
self.demand_next(false)?.value().to_u32()
|
|
}
|
|
/// Yields the next value, if it is a `SignedInteger` that fits in [i64], or an error
|
|
/// otherwise.
|
|
fn next_i64(&mut self) -> ReaderResult<i64> {
|
|
self.demand_next(false)?.value().to_i64()
|
|
}
|
|
/// Yields the next value, if it is a `SignedInteger` that fits in [u64], or an error
|
|
/// otherwise.
|
|
fn next_u64(&mut self) -> ReaderResult<u64> {
|
|
self.demand_next(false)?.value().to_u64()
|
|
}
|
|
/// Yields the next value, if it is a `SignedInteger` that fits in [i128], or an error
|
|
/// otherwise.
|
|
fn next_i128(&mut self) -> ReaderResult<i128> {
|
|
self.demand_next(false)?.value().to_i128()
|
|
}
|
|
/// Yields the next value, if it is a `SignedInteger` that fits in [u128], or an error
|
|
/// otherwise.
|
|
fn next_u128(&mut self) -> ReaderResult<u128> {
|
|
self.demand_next(false)?.value().to_u128()
|
|
}
|
|
/// Yields the next value as an [f64], if it is a `Double`, or an error otherwise.
|
|
fn next_f64(&mut self) -> ReaderResult<f64> {
|
|
self.demand_next(false)?.value().to_f64()
|
|
}
|
|
/// Yields the next value as a [char], if it is parseable by
|
|
/// [Value::to_char][crate::value::Value::to_char], or an error otherwise.
|
|
fn next_char(&mut self) -> ReaderResult<char> {
|
|
self.demand_next(false)?.value().to_char()
|
|
}
|
|
|
|
/// Yields the next value, if it is a `String`, or an error otherwise.
|
|
fn next_str(&mut self) -> ReaderResult<Cow<'de, str>> {
|
|
Ok(Cow::Owned(
|
|
self.demand_next(false)?.value().to_string()?.to_owned(),
|
|
))
|
|
}
|
|
|
|
/// Yields the next value, if it is a `ByteString`, or an error otherwise.
|
|
fn next_bytestring(&mut self) -> ReaderResult<Cow<'de, [u8]>> {
|
|
Ok(Cow::Owned(
|
|
self.demand_next(false)?.value().to_bytestring()?.to_owned(),
|
|
))
|
|
}
|
|
|
|
/// Yields the next value, if it is a `Symbol`, or an error otherwise.
|
|
fn next_symbol(&mut self) -> ReaderResult<Cow<'de, str>> {
|
|
Ok(Cow::Owned(
|
|
self.demand_next(false)?.value().to_symbol()?.to_owned(),
|
|
))
|
|
}
|
|
|
|
#[doc(hidden)]
|
|
fn open_option(&mut self) -> ReaderResult<Option<B::Type>> {
|
|
let b = self.open_record(None)?;
|
|
let label: &str = &self.next_symbol()?;
|
|
match label {
|
|
"None" => {
|
|
self.ensure_complete(b, &B::Item::RecordField)?;
|
|
Ok(None)
|
|
}
|
|
"Some" => Ok(Some(b)),
|
|
_ => Err(error::Error::Expected(
|
|
ExpectedKind::Option,
|
|
Received::ReceivedRecordWithLabel(label.to_owned()),
|
|
)),
|
|
}
|
|
}
|
|
|
|
#[doc(hidden)]
|
|
fn open_simple_record(&mut self, name: &str, arity: Option<usize>) -> ReaderResult<B::Type> {
|
|
let b = self.open_record(arity)?;
|
|
let label: &str = &self.next_symbol()?;
|
|
if label == name {
|
|
Ok(b)
|
|
} else {
|
|
Err(error::Error::Expected(
|
|
ExpectedKind::SimpleRecord(name.to_owned(), arity),
|
|
Received::ReceivedRecordWithLabel(label.to_owned()),
|
|
))
|
|
}
|
|
}
|
|
|
|
/// Constructs a [ConfiguredReader] set with the given value for `read_annotations`.
|
|
fn configured(self, read_annotations: bool) -> ConfiguredReader<'de, N, Self>
|
|
where
|
|
Self: std::marker::Sized,
|
|
{
|
|
ConfiguredReader {
|
|
reader: self,
|
|
read_annotations,
|
|
phantom: PhantomData,
|
|
}
|
|
}
|
|
|
|
#[doc(hidden)]
|
|
fn ensure_more_expected(&mut self, b: &mut B::Type, i: &B::Item) -> ReaderResult<()> {
|
|
if !self.close_compound(b, i)? {
|
|
Ok(())
|
|
} else {
|
|
Err(error::Error::MissingItem)
|
|
}
|
|
}
|
|
|
|
#[doc(hidden)]
|
|
fn ensure_complete(&mut self, mut b: B::Type, i: &B::Item) -> ReaderResult<()> {
|
|
if !self.close_compound(&mut b, i)? {
|
|
Err(error::Error::MissingCloseDelimiter)
|
|
} else {
|
|
Ok(())
|
|
}
|
|
}
|
|
}
|
|
|
|
impl<'r, 'de, N: NestedValue, R: Reader<'de, N>> Reader<'de, N> for &'r mut R {
|
|
fn next(&mut self, read_annotations: bool) -> io::Result<Option<N>> {
|
|
(*self).next(read_annotations)
|
|
}
|
|
|
|
fn open_record(&mut self, arity: Option<usize>) -> ReaderResult<B::Type> {
|
|
(*self).open_record(arity)
|
|
}
|
|
|
|
fn open_sequence_or_set(&mut self) -> ReaderResult<B::Item> {
|
|
(*self).open_sequence_or_set()
|
|
}
|
|
|
|
fn open_sequence(&mut self) -> ReaderResult<()> {
|
|
(*self).open_sequence()
|
|
}
|
|
|
|
fn open_set(&mut self) -> ReaderResult<()> {
|
|
(*self).open_set()
|
|
}
|
|
|
|
fn open_dictionary(&mut self) -> ReaderResult<()> {
|
|
(*self).open_dictionary()
|
|
}
|
|
|
|
fn boundary(&mut self, b: &B::Type) -> ReaderResult<()> {
|
|
(*self).boundary(b)
|
|
}
|
|
|
|
fn close_compound(&mut self, b: &mut B::Type, i: &B::Item) -> ReaderResult<bool> {
|
|
(*self).close_compound(b, i)
|
|
}
|
|
|
|
fn open_embedded(&mut self) -> ReaderResult<()> {
|
|
(*self).open_embedded()
|
|
}
|
|
|
|
fn close_embedded(&mut self) -> ReaderResult<()> {
|
|
(*self).close_embedded()
|
|
}
|
|
|
|
type Mark = R::Mark;
|
|
|
|
fn mark(&mut self) -> io::Result<Self::Mark> {
|
|
(*self).mark()
|
|
}
|
|
|
|
fn restore(&mut self, mark: &Self::Mark) -> io::Result<()> {
|
|
(*self).restore(mark)
|
|
}
|
|
|
|
fn next_token(&mut self, read_embedded_annotations: bool) -> io::Result<Token<N>> {
|
|
(*self).next_token(read_embedded_annotations)
|
|
}
|
|
|
|
fn next_annotations_and_token(&mut self) -> io::Result<(Vec<N>, Token<N>)> {
|
|
(*self).next_annotations_and_token()
|
|
}
|
|
}
|
|
|
|
/// Generic seekable stream of input bytes.
|
|
pub trait BinarySource<'de>: Sized {
|
|
/// Allows structured backtracking to an earlier position in an input.
|
|
type Mark;
|
|
/// Retrieve a marker for the current position in the input.
|
|
fn mark(&mut self) -> io::Result<Self::Mark>;
|
|
/// Seek the input to a previously-saved position.
|
|
fn restore(&mut self, mark: &Self::Mark) -> io::Result<()>;
|
|
|
|
/// Skip the next byte.
|
|
fn skip(&mut self) -> io::Result<()>;
|
|
/// Returns the next byte without advancing over it.
|
|
fn peek(&mut self) -> io::Result<u8>;
|
|
/// Returns and consumes the next `count` bytes, which must all be available. Always yields
|
|
/// exactly `count` bytes or an error.
|
|
fn readbytes(&mut self, count: usize) -> io::Result<Cow<'de, [u8]>>;
|
|
/// As [BinarySource::readbytes], but uses `bs` as destination for the read bytes as well
|
|
/// as taking the size of `bs` as the count of bytes to read.
|
|
fn readbytes_into(&mut self, bs: &mut [u8]) -> io::Result<()>;
|
|
|
|
/// Constructs a [PackedReader][super::PackedReader] that will read from `self`.
|
|
fn packed<N: NestedValue, Dec: DomainDecode<N::Embedded>>(
|
|
&mut self,
|
|
decode_embedded: Dec,
|
|
) -> super::PackedReader<'de, '_, N, Dec, Self> {
|
|
super::PackedReader::new(self, decode_embedded)
|
|
}
|
|
|
|
/// Constructs a [PackedReader][super::PackedReader] that will read [IOValue]s from `self`.
|
|
fn packed_iovalues(
|
|
&mut self,
|
|
) -> super::PackedReader<'de, '_, IOValue, IOValueDomainCodec, Self> {
|
|
self.packed(IOValueDomainCodec)
|
|
}
|
|
|
|
/// Constructs a [TextReader][super::TextReader] that will read from `self`.
|
|
fn text<N: NestedValue, Dec: DomainParse<N::Embedded>>(
|
|
&mut self,
|
|
decode_embedded: Dec,
|
|
) -> super::TextReader<'de, '_, N, Dec, Self> {
|
|
super::TextReader::new(self, decode_embedded)
|
|
}
|
|
|
|
/// Constructs a [TextReader][super::TextReader] that will read [IOValue]s from `self`.
|
|
fn text_iovalues(
|
|
&mut self,
|
|
) -> super::TextReader<'de, '_, IOValue, ViaCodec<IOValueDomainCodec>, Self> {
|
|
self.text::<IOValue, _>(ViaCodec::new(IOValueDomainCodec))
|
|
}
|
|
}
|
|
|
|
/// Implementation of [BinarySource] backed by an [`io::Read`]` + `[`io::Seek`] implementation.
|
|
pub struct IOBinarySource<R: io::Read + io::Seek> {
|
|
/// The underlying byte source.
|
|
pub read: R,
|
|
#[doc(hidden)]
|
|
/// One-place buffer for peeked bytes.
|
|
pub buf: Option<u8>,
|
|
}
|
|
|
|
impl<R: io::Read + io::Seek> IOBinarySource<R> {
|
|
/// Constructs an [IOBinarySource] from the given [`io::Read`]` + `[`io::Seek`]
|
|
/// implementation.
|
|
#[inline(always)]
|
|
pub fn new(read: R) -> Self {
|
|
IOBinarySource { read, buf: None }
|
|
}
|
|
}
|
|
|
|
impl<'de, R: io::Read + io::Seek> BinarySource<'de> for IOBinarySource<R> {
|
|
type Mark = u64;
|
|
|
|
#[inline(always)]
|
|
fn mark(&mut self) -> io::Result<Self::Mark> {
|
|
Ok(self.read.stream_position()? - (if self.buf.is_some() { 1 } else { 0 }))
|
|
}
|
|
|
|
#[inline(always)]
|
|
fn restore(&mut self, mark: &Self::Mark) -> io::Result<()> {
|
|
self.read.seek(io::SeekFrom::Start(*mark))?;
|
|
self.buf = None;
|
|
Ok(())
|
|
}
|
|
|
|
#[inline(always)]
|
|
fn skip(&mut self) -> io::Result<()> {
|
|
if self.buf.is_none() {
|
|
unreachable!();
|
|
}
|
|
self.buf = None;
|
|
Ok(())
|
|
}
|
|
|
|
#[inline(always)]
|
|
fn peek(&mut self) -> io::Result<u8> {
|
|
match self.buf {
|
|
Some(b) => Ok(b),
|
|
None => {
|
|
let b = &mut [0];
|
|
match self.read.read(b)? {
|
|
0 => Err(io_eof()),
|
|
1 => {
|
|
self.buf = Some(b[0]);
|
|
Ok(b[0])
|
|
}
|
|
_ => unreachable!(),
|
|
}
|
|
}
|
|
}
|
|
}
|
|
|
|
#[inline(always)]
|
|
fn readbytes(&mut self, count: usize) -> io::Result<Cow<'de, [u8]>> {
|
|
if self.buf.is_some() {
|
|
unreachable!();
|
|
}
|
|
let mut bs = vec![0; count];
|
|
self.read.read_exact(&mut bs)?;
|
|
Ok(Cow::Owned(bs))
|
|
}
|
|
|
|
#[inline(always)]
|
|
fn readbytes_into(&mut self, bs: &mut [u8]) -> io::Result<()> {
|
|
if self.buf.is_some() {
|
|
unreachable!();
|
|
}
|
|
self.read.read_exact(bs)
|
|
}
|
|
}
|
|
|
|
/// Implementation of [BinarySource] backed by a slice of [u8].
|
|
pub struct BytesBinarySource<'de> {
|
|
/// The underlying byte source.
|
|
pub bytes: &'de [u8],
|
|
#[doc(hidden)]
|
|
/// Current position within `bytes`.
|
|
pub index: usize,
|
|
}
|
|
|
|
impl<'de> BytesBinarySource<'de> {
|
|
/// Constructs a [BytesBinarySource] from the given `u8` slice.
|
|
#[inline(always)]
|
|
pub fn new(bytes: &'de [u8]) -> Self {
|
|
BytesBinarySource { bytes, index: 0 }
|
|
}
|
|
}
|
|
|
|
impl<'de> BinarySource<'de> for BytesBinarySource<'de> {
|
|
type Mark = usize;
|
|
|
|
#[inline(always)]
|
|
fn mark(&mut self) -> io::Result<Self::Mark> {
|
|
Ok(self.index)
|
|
}
|
|
|
|
#[inline(always)]
|
|
fn restore(&mut self, mark: &Self::Mark) -> io::Result<()> {
|
|
self.index = *mark;
|
|
Ok(())
|
|
}
|
|
|
|
#[inline(always)]
|
|
fn skip(&mut self) -> io::Result<()> {
|
|
if self.index >= self.bytes.len() {
|
|
unreachable!();
|
|
}
|
|
self.index += 1;
|
|
Ok(())
|
|
}
|
|
|
|
#[inline(always)]
|
|
fn peek(&mut self) -> io::Result<u8> {
|
|
if self.index >= self.bytes.len() {
|
|
Err(io_eof())
|
|
} else {
|
|
Ok(self.bytes[self.index])
|
|
}
|
|
}
|
|
|
|
#[inline(always)]
|
|
fn readbytes(&mut self, count: usize) -> io::Result<Cow<'de, [u8]>> {
|
|
if self.index + count > self.bytes.len() {
|
|
Err(io_eof())
|
|
} else {
|
|
let bs = &self.bytes[self.index..self.index + count];
|
|
self.index += count;
|
|
Ok(Cow::Borrowed(bs))
|
|
}
|
|
}
|
|
|
|
#[inline(always)]
|
|
fn readbytes_into(&mut self, bs: &mut [u8]) -> io::Result<()> {
|
|
let count = bs.len();
|
|
if self.index + count > self.bytes.len() {
|
|
Err(io_eof())
|
|
} else {
|
|
bs.copy_from_slice(&self.bytes[self.index..self.index + count]);
|
|
self.index += count;
|
|
Ok(())
|
|
}
|
|
}
|
|
}
|
|
|
|
/// A combination of a [Reader] with presets governing its operation.
|
|
pub struct ConfiguredReader<'de, N: NestedValue, R: Reader<'de, N>> {
|
|
/// The underlying [Reader].
|
|
pub reader: R,
|
|
/// Configuration as to whether to include or discard annotations while reading.
|
|
pub read_annotations: bool,
|
|
phantom: PhantomData<&'de N>,
|
|
}
|
|
|
|
impl<'de, N: NestedValue, R: Reader<'de, N>> ConfiguredReader<'de, N, R> {
|
|
/// Constructs a [ConfiguredReader] based on the given `reader`.
|
|
pub fn new(reader: R) -> Self {
|
|
reader.configured(true)
|
|
}
|
|
|
|
/// Updates the `read_annotations` field of `self`.
|
|
pub fn set_read_annotations(&mut self, read_annotations: bool) {
|
|
self.read_annotations = read_annotations
|
|
}
|
|
|
|
/// Retrieve the next parseable value, treating end-of-input as an error.
|
|
///
|
|
/// Delegates directly to [Reader::demand_next].
|
|
pub fn demand_next(&mut self) -> io::Result<N> {
|
|
self.reader.demand_next(self.read_annotations)
|
|
}
|
|
}
|
|
|
|
impl<'de, N: NestedValue, R: Reader<'de, N>> std::iter::Iterator for ConfiguredReader<'de, N, R> {
|
|
type Item = io::Result<N>;
|
|
fn next(&mut self) -> Option<Self::Item> {
|
|
match self.reader.next(self.read_annotations) {
|
|
Err(e) => Some(Err(e)),
|
|
Ok(None) => None,
|
|
Ok(Some(v)) => Some(Ok(v)),
|
|
}
|
|
}
|
|
}
|