preserves/implementations/rust/preserves/src/value/reader.rs

568 lines
19 KiB
Rust

//! Generic [Reader] trait for parsing Preserves [Value][crate::value::repr::Value]s,
//! implemented by code that provides each specific transfer syntax.
use crate::error::{self, io_eof, ExpectedKind, Received};
use std::borrow::Cow;
use std::io;
use std::marker::PhantomData;
use super::boundary as B;
use super::signed_integer::SignedInteger;
use super::CompoundClass;
use super::DomainDecode;
use super::DomainParse;
use super::Double;
use super::IOValue;
use super::IOValueDomainCodec;
use super::NestedValue;
use super::ViaCodec;
pub type ReaderResult<T> = std::result::Result<T, error::Error>;
/// Tokens produced when performing
/// [SAX](https://en.wikipedia.org/wiki/Simple_API_for_XML)-style reading of terms.
pub enum Token<N: NestedValue> {
/// An embedded value was seen and completely decoded.
Embedded(N::Embedded),
/// An atomic value was seen and completely decoded.
Atom(N),
/// A compound value has been opened; its contents follow, and it will be terminated by
/// [Token::End].
Compound(CompoundClass),
/// Closes a previously-opened compound value.
End,
}
/// Generic parser for Preserves.
pub trait Reader<'de, N: NestedValue> {
/// Retrieve the next parseable value or an indication of end-of-input.
///
/// Yields `Ok(Some(...))` if a complete value is available, `Ok(None)` if the end of
/// stream has been reached, or `Err(...)` for parse or IO errors, including
/// incomplete/partial input. See also [Reader::demand_next].
fn next(&mut self, read_annotations: bool) -> io::Result<Option<N>>;
// Hiding these from the documentation for the moment because I don't want to have to
// document the whole Boundary thing.
#[doc(hidden)]
fn open_record(&mut self, arity: Option<usize>) -> ReaderResult<B::Type>;
#[doc(hidden)]
fn open_sequence_or_set(&mut self) -> ReaderResult<B::Item>;
#[doc(hidden)]
fn open_sequence(&mut self) -> ReaderResult<()>;
#[doc(hidden)]
fn open_set(&mut self) -> ReaderResult<()>;
#[doc(hidden)]
fn open_dictionary(&mut self) -> ReaderResult<()>;
#[doc(hidden)]
fn boundary(&mut self, b: &B::Type) -> ReaderResult<()>;
#[doc(hidden)]
// close_compound implies a b.shift(...) and a self.boundary(b).
fn close_compound(&mut self, b: &mut B::Type, i: &B::Item) -> ReaderResult<bool>;
#[doc(hidden)]
fn open_embedded(&mut self) -> ReaderResult<()>;
#[doc(hidden)]
fn close_embedded(&mut self) -> ReaderResult<()>;
/// Allows structured backtracking to an earlier stage in a parse. Useful for layering
/// parser combinators atop a Reader.
type Mark;
/// Retrieve a marker for the current position in the input.
fn mark(&mut self) -> io::Result<Self::Mark>;
/// Seek the input to a previously-saved position.
fn restore(&mut self, mark: &Self::Mark) -> io::Result<()>;
/// Get the next [SAX](https://en.wikipedia.org/wiki/Simple_API_for_XML)-style event,
/// discarding annotations.
///
/// The `read_embedded_annotations` controls whether annotations are also skipped on
/// *embedded* values or not.
fn next_token(&mut self, read_embedded_annotations: bool) -> io::Result<Token<N>>;
/// Get the next [SAX](https://en.wikipedia.org/wiki/Simple_API_for_XML)-style event, plus
/// a vector containing any annotations that preceded it.
fn next_annotations_and_token(&mut self) -> io::Result<(Vec<N>, Token<N>)>;
//---------------------------------------------------------------------------
/// Skips the next available complete value. Yields an error if no such value exists.
fn skip_value(&mut self) -> io::Result<()> {
// TODO efficient skipping in specific impls of this trait
let _ = self.demand_next(false)?;
Ok(())
}
/// Retrieve the next parseable value, treating end-of-input as an error.
///
/// Yields `Ok(...)` if a complete value is available or `Err(...)` for parse or IO errors,
/// including incomplete/partial input or end of stream. See also [Reader::next].
fn demand_next(&mut self, read_annotations: bool) -> io::Result<N> {
self.next(read_annotations)?.ok_or_else(io_eof)
}
/// Yields the next value, if it is a `Boolean`, or an error otherwise.
fn next_boolean(&mut self) -> ReaderResult<bool> {
self.demand_next(false)?.value().to_boolean()
}
/// Yields the next value, if it is a `Double`, or an error otherwise.
fn next_double(&mut self) -> ReaderResult<Double> {
Ok(self.demand_next(false)?.value().to_double()?.to_owned())
}
/// Yields the next value, if it is a `SignedInteger`, or an error otherwise.
fn next_signedinteger(&mut self) -> ReaderResult<SignedInteger> {
Ok(self
.demand_next(false)?
.value()
.to_signedinteger()?
.to_owned())
}
/// Yields the next value, if it is a `SignedInteger` that fits in [i8], or an error
/// otherwise.
fn next_i8(&mut self) -> ReaderResult<i8> {
self.demand_next(false)?.value().to_i8()
}
/// Yields the next value, if it is a `SignedInteger` that fits in [u8], or an error
/// otherwise.
fn next_u8(&mut self) -> ReaderResult<u8> {
self.demand_next(false)?.value().to_u8()
}
/// Yields the next value, if it is a `SignedInteger` that fits in [i16], or an error
/// otherwise.
fn next_i16(&mut self) -> ReaderResult<i16> {
self.demand_next(false)?.value().to_i16()
}
/// Yields the next value, if it is a `SignedInteger` that fits in [u16], or an error
/// otherwise.
fn next_u16(&mut self) -> ReaderResult<u16> {
self.demand_next(false)?.value().to_u16()
}
/// Yields the next value, if it is a `SignedInteger` that fits in [i32], or an error
/// otherwise.
fn next_i32(&mut self) -> ReaderResult<i32> {
self.demand_next(false)?.value().to_i32()
}
/// Yields the next value, if it is a `SignedInteger` that fits in [u32], or an error
/// otherwise.
fn next_u32(&mut self) -> ReaderResult<u32> {
self.demand_next(false)?.value().to_u32()
}
/// Yields the next value, if it is a `SignedInteger` that fits in [i64], or an error
/// otherwise.
fn next_i64(&mut self) -> ReaderResult<i64> {
self.demand_next(false)?.value().to_i64()
}
/// Yields the next value, if it is a `SignedInteger` that fits in [u64], or an error
/// otherwise.
fn next_u64(&mut self) -> ReaderResult<u64> {
self.demand_next(false)?.value().to_u64()
}
/// Yields the next value, if it is a `SignedInteger` that fits in [i128], or an error
/// otherwise.
fn next_i128(&mut self) -> ReaderResult<i128> {
self.demand_next(false)?.value().to_i128()
}
/// Yields the next value, if it is a `SignedInteger` that fits in [u128], or an error
/// otherwise.
fn next_u128(&mut self) -> ReaderResult<u128> {
self.demand_next(false)?.value().to_u128()
}
/// Yields the next value as an [f64], if it is a `Double`, or an error otherwise.
fn next_f64(&mut self) -> ReaderResult<f64> {
self.demand_next(false)?.value().to_f64()
}
/// Yields the next value as a [char], if it is parseable by
/// [Value::to_char][crate::value::Value::to_char], or an error otherwise.
fn next_char(&mut self) -> ReaderResult<char> {
self.demand_next(false)?.value().to_char()
}
/// Yields the next value, if it is a `String`, or an error otherwise.
fn next_str(&mut self) -> ReaderResult<Cow<'de, str>> {
Ok(Cow::Owned(
self.demand_next(false)?.value().to_string()?.to_owned(),
))
}
/// Yields the next value, if it is a `ByteString`, or an error otherwise.
fn next_bytestring(&mut self) -> ReaderResult<Cow<'de, [u8]>> {
Ok(Cow::Owned(
self.demand_next(false)?.value().to_bytestring()?.to_owned(),
))
}
/// Yields the next value, if it is a `Symbol`, or an error otherwise.
fn next_symbol(&mut self) -> ReaderResult<Cow<'de, str>> {
Ok(Cow::Owned(
self.demand_next(false)?.value().to_symbol()?.to_owned(),
))
}
#[doc(hidden)]
fn open_option(&mut self) -> ReaderResult<Option<B::Type>> {
let b = self.open_record(None)?;
let label: &str = &self.next_symbol()?;
match label {
"None" => {
self.ensure_complete(b, &B::Item::RecordField)?;
Ok(None)
}
"Some" => Ok(Some(b)),
_ => Err(error::Error::Expected(
ExpectedKind::Option,
Received::ReceivedRecordWithLabel(label.to_owned()),
)),
}
}
#[doc(hidden)]
fn open_simple_record(&mut self, name: &str, arity: Option<usize>) -> ReaderResult<B::Type> {
let b = self.open_record(arity)?;
let label: &str = &self.next_symbol()?;
if label == name {
Ok(b)
} else {
Err(error::Error::Expected(
ExpectedKind::SimpleRecord(name.to_owned(), arity),
Received::ReceivedRecordWithLabel(label.to_owned()),
))
}
}
/// Constructs a [ConfiguredReader] set with the given value for `read_annotations`.
fn configured(self, read_annotations: bool) -> ConfiguredReader<'de, N, Self>
where
Self: std::marker::Sized,
{
ConfiguredReader {
reader: self,
read_annotations,
phantom: PhantomData,
}
}
#[doc(hidden)]
fn ensure_more_expected(&mut self, b: &mut B::Type, i: &B::Item) -> ReaderResult<()> {
if !self.close_compound(b, i)? {
Ok(())
} else {
Err(error::Error::MissingItem)
}
}
#[doc(hidden)]
fn ensure_complete(&mut self, mut b: B::Type, i: &B::Item) -> ReaderResult<()> {
if !self.close_compound(&mut b, i)? {
Err(error::Error::MissingCloseDelimiter)
} else {
Ok(())
}
}
}
impl<'r, 'de, N: NestedValue, R: Reader<'de, N>> Reader<'de, N> for &'r mut R {
fn next(&mut self, read_annotations: bool) -> io::Result<Option<N>> {
(*self).next(read_annotations)
}
fn open_record(&mut self, arity: Option<usize>) -> ReaderResult<B::Type> {
(*self).open_record(arity)
}
fn open_sequence_or_set(&mut self) -> ReaderResult<B::Item> {
(*self).open_sequence_or_set()
}
fn open_sequence(&mut self) -> ReaderResult<()> {
(*self).open_sequence()
}
fn open_set(&mut self) -> ReaderResult<()> {
(*self).open_set()
}
fn open_dictionary(&mut self) -> ReaderResult<()> {
(*self).open_dictionary()
}
fn boundary(&mut self, b: &B::Type) -> ReaderResult<()> {
(*self).boundary(b)
}
fn close_compound(&mut self, b: &mut B::Type, i: &B::Item) -> ReaderResult<bool> {
(*self).close_compound(b, i)
}
fn open_embedded(&mut self) -> ReaderResult<()> {
(*self).open_embedded()
}
fn close_embedded(&mut self) -> ReaderResult<()> {
(*self).close_embedded()
}
type Mark = R::Mark;
fn mark(&mut self) -> io::Result<Self::Mark> {
(*self).mark()
}
fn restore(&mut self, mark: &Self::Mark) -> io::Result<()> {
(*self).restore(mark)
}
fn next_token(&mut self, read_embedded_annotations: bool) -> io::Result<Token<N>> {
(*self).next_token(read_embedded_annotations)
}
fn next_annotations_and_token(&mut self) -> io::Result<(Vec<N>, Token<N>)> {
(*self).next_annotations_and_token()
}
}
/// Generic seekable stream of input bytes.
pub trait BinarySource<'de>: Sized {
/// Allows structured backtracking to an earlier position in an input.
type Mark;
/// Retrieve a marker for the current position in the input.
fn mark(&mut self) -> io::Result<Self::Mark>;
/// Seek the input to a previously-saved position.
fn restore(&mut self, mark: &Self::Mark) -> io::Result<()>;
/// Skip the next byte.
fn skip(&mut self) -> io::Result<()>;
/// Returns the next byte without advancing over it.
fn peek(&mut self) -> io::Result<u8>;
/// Returns and consumes the next `count` bytes, which must all be available. Always yields
/// exactly `count` bytes or an error.
fn readbytes(&mut self, count: usize) -> io::Result<Cow<'de, [u8]>>;
/// As [BinarySource::readbytes], but uses `bs` as destination for the read bytes as well
/// as taking the size of `bs` as the count of bytes to read.
fn readbytes_into(&mut self, bs: &mut [u8]) -> io::Result<()>;
/// Constructs a [PackedReader][super::PackedReader] that will read from `self`.
fn packed<N: NestedValue, Dec: DomainDecode<N::Embedded>>(
&mut self,
decode_embedded: Dec,
) -> super::PackedReader<'de, '_, N, Dec, Self> {
super::PackedReader::new(self, decode_embedded)
}
/// Constructs a [PackedReader][super::PackedReader] that will read [IOValue]s from `self`.
fn packed_iovalues(
&mut self,
) -> super::PackedReader<'de, '_, IOValue, IOValueDomainCodec, Self> {
self.packed(IOValueDomainCodec)
}
/// Constructs a [TextReader][super::TextReader] that will read from `self`.
fn text<N: NestedValue, Dec: DomainParse<N::Embedded>>(
&mut self,
decode_embedded: Dec,
) -> super::TextReader<'de, '_, N, Dec, Self> {
super::TextReader::new(self, decode_embedded)
}
/// Constructs a [TextReader][super::TextReader] that will read [IOValue]s from `self`.
fn text_iovalues(
&mut self,
) -> super::TextReader<'de, '_, IOValue, ViaCodec<IOValueDomainCodec>, Self> {
self.text::<IOValue, _>(ViaCodec::new(IOValueDomainCodec))
}
}
/// Implementation of [BinarySource] backed by an [`io::Read`]` + `[`io::Seek`] implementation.
pub struct IOBinarySource<R: io::Read + io::Seek> {
/// The underlying byte source.
pub read: R,
#[doc(hidden)]
/// One-place buffer for peeked bytes.
pub buf: Option<u8>,
}
impl<R: io::Read + io::Seek> IOBinarySource<R> {
/// Constructs an [IOBinarySource] from the given [`io::Read`]` + `[`io::Seek`]
/// implementation.
#[inline(always)]
pub fn new(read: R) -> Self {
IOBinarySource { read, buf: None }
}
}
impl<'de, R: io::Read + io::Seek> BinarySource<'de> for IOBinarySource<R> {
type Mark = u64;
#[inline(always)]
fn mark(&mut self) -> io::Result<Self::Mark> {
Ok(self.read.stream_position()? - (if self.buf.is_some() { 1 } else { 0 }))
}
#[inline(always)]
fn restore(&mut self, mark: &Self::Mark) -> io::Result<()> {
self.read.seek(io::SeekFrom::Start(*mark))?;
self.buf = None;
Ok(())
}
#[inline(always)]
fn skip(&mut self) -> io::Result<()> {
if self.buf.is_none() {
unreachable!();
}
self.buf = None;
Ok(())
}
#[inline(always)]
fn peek(&mut self) -> io::Result<u8> {
match self.buf {
Some(b) => Ok(b),
None => {
let b = &mut [0];
match self.read.read(b)? {
0 => Err(io_eof()),
1 => {
self.buf = Some(b[0]);
Ok(b[0])
}
_ => unreachable!(),
}
}
}
}
#[inline(always)]
fn readbytes(&mut self, count: usize) -> io::Result<Cow<'de, [u8]>> {
if self.buf.is_some() {
unreachable!();
}
let mut bs = vec![0; count];
self.read.read_exact(&mut bs)?;
Ok(Cow::Owned(bs))
}
#[inline(always)]
fn readbytes_into(&mut self, bs: &mut [u8]) -> io::Result<()> {
if self.buf.is_some() {
unreachable!();
}
self.read.read_exact(bs)
}
}
/// Implementation of [BinarySource] backed by a slice of [u8].
pub struct BytesBinarySource<'de> {
/// The underlying byte source.
pub bytes: &'de [u8],
#[doc(hidden)]
/// Current position within `bytes`.
pub index: usize,
}
impl<'de> BytesBinarySource<'de> {
/// Constructs a [BytesBinarySource] from the given `u8` slice.
#[inline(always)]
pub fn new(bytes: &'de [u8]) -> Self {
BytesBinarySource { bytes, index: 0 }
}
}
impl<'de> BinarySource<'de> for BytesBinarySource<'de> {
type Mark = usize;
#[inline(always)]
fn mark(&mut self) -> io::Result<Self::Mark> {
Ok(self.index)
}
#[inline(always)]
fn restore(&mut self, mark: &Self::Mark) -> io::Result<()> {
self.index = *mark;
Ok(())
}
#[inline(always)]
fn skip(&mut self) -> io::Result<()> {
if self.index >= self.bytes.len() {
unreachable!();
}
self.index += 1;
Ok(())
}
#[inline(always)]
fn peek(&mut self) -> io::Result<u8> {
if self.index >= self.bytes.len() {
Err(io_eof())
} else {
Ok(self.bytes[self.index])
}
}
#[inline(always)]
fn readbytes(&mut self, count: usize) -> io::Result<Cow<'de, [u8]>> {
if self.index + count > self.bytes.len() {
Err(io_eof())
} else {
let bs = &self.bytes[self.index..self.index + count];
self.index += count;
Ok(Cow::Borrowed(bs))
}
}
#[inline(always)]
fn readbytes_into(&mut self, bs: &mut [u8]) -> io::Result<()> {
let count = bs.len();
if self.index + count > self.bytes.len() {
Err(io_eof())
} else {
bs.copy_from_slice(&self.bytes[self.index..self.index + count]);
self.index += count;
Ok(())
}
}
}
/// A combination of a [Reader] with presets governing its operation.
pub struct ConfiguredReader<'de, N: NestedValue, R: Reader<'de, N>> {
/// The underlying [Reader].
pub reader: R,
/// Configuration as to whether to include or discard annotations while reading.
pub read_annotations: bool,
phantom: PhantomData<&'de N>,
}
impl<'de, N: NestedValue, R: Reader<'de, N>> ConfiguredReader<'de, N, R> {
/// Constructs a [ConfiguredReader] based on the given `reader`.
pub fn new(reader: R) -> Self {
reader.configured(true)
}
/// Updates the `read_annotations` field of `self`.
pub fn set_read_annotations(&mut self, read_annotations: bool) {
self.read_annotations = read_annotations
}
/// Retrieve the next parseable value, treating end-of-input as an error.
///
/// Delegates directly to [Reader::demand_next].
pub fn demand_next(&mut self) -> io::Result<N> {
self.reader.demand_next(self.read_annotations)
}
}
impl<'de, N: NestedValue, R: Reader<'de, N>> std::iter::Iterator for ConfiguredReader<'de, N, R> {
type Item = io::Result<N>;
fn next(&mut self) -> Option<Self::Item> {
match self.reader.next(self.read_annotations) {
Err(e) => Some(Err(e)),
Ok(None) => None,
Ok(Some(v)) => Some(Ok(v)),
}
}
}