More documentation

This commit is contained in:
Tony Garnock-Jones 2023-10-27 11:40:55 +02:00
parent 4b40bf174d
commit 130e58a3e1
16 changed files with 283 additions and 36 deletions

View File

@ -1,17 +1,18 @@
Value = Atom ```text
| Compound Value = Atom
| Embedded | Compound
| Embedded
Atom = Boolean Atom = Boolean
| Float | Float
| Double | Double
| SignedInteger | SignedInteger
| String | String
| ByteString | ByteString
| Symbol | Symbol
Compound = Record
| Sequence
| Set
| Dictionary
Compound = Record
| Sequence
| Set
| Dictionary
```

View File

@ -24,12 +24,17 @@ fi
# samples.pr and samples.bin are in fact identical. # samples.pr and samples.bin are in fact identical.
${COMMAND} path/path.bin implementations/python/preserves/path.prb ${COMMAND} path/path.bin implementations/python/preserves/path.prb
${COMMAND} path/path.bin implementations/rust/preserves-path/path.bin ${COMMAND} path/path.bin implementations/rust/preserves-path/path.bin
${COMMAND} schema/schema.bin implementations/python/preserves/schema.prb ${COMMAND} schema/schema.bin implementations/python/preserves/schema.prb
${COMMAND} schema/schema.prs implementations/racket/preserves/preserves-schema/schema.prs ${COMMAND} schema/schema.prs implementations/racket/preserves/preserves-schema/schema.prs
${COMMAND} tests/samples.bin implementations/python/tests/samples.bin ${COMMAND} tests/samples.bin implementations/python/tests/samples.bin
${COMMAND} tests/samples.pr implementations/python/tests/samples.pr ${COMMAND} tests/samples.pr implementations/python/tests/samples.pr
${COMMAND} tests/samples.pr implementations/racket/preserves/preserves/tests/samples.pr ${COMMAND} tests/samples.pr implementations/racket/preserves/preserves/tests/samples.pr
${COMMAND} _includes/what-is-preserves.md implementations/rust/preserves/doc/what-is-preserves.md ${COMMAND} _includes/what-is-preserves.md implementations/rust/preserves/doc/what-is-preserves.md
${COMMAND} _includes/what-is-preserves-schema.md implementations/rust/preserves-schema/doc/what-is-preserves-schema.md
${COMMAND} _includes/cheatsheet-binary-plaintext.md implementations/rust/preserves/doc/cheatsheet-binary-plaintext.md ${COMMAND} _includes/cheatsheet-binary-plaintext.md implementations/rust/preserves/doc/cheatsheet-binary-plaintext.md
${COMMAND} _includes/cheatsheet-text-plaintext.md implementations/rust/preserves/doc/cheatsheet-text-plaintext.md ${COMMAND} _includes/cheatsheet-text-plaintext.md implementations/rust/preserves/doc/cheatsheet-text-plaintext.md
${COMMAND} _includes/value-grammar.md implementations/rust/preserves/doc/value-grammar.md
${COMMAND} _includes/what-is-preserves-schema.md implementations/rust/preserves-schema/doc/what-is-preserves-schema.md

View File

@ -1,8 +1,23 @@
# Preserves core: value representation, codecs, and Serde support ```shell
cargo install preserves
```
This crate implements [Preserves](https://preserves.dev/) for Rust, including This crate ([`preserves` on crates.io](https://crates.io/crates/preserves)) implements
[Preserves](https://preserves.dev/) for Rust. It provides the core
[semantics](https://preserves.dev/preserves.html#semantics) as well as both the [human-readable
text syntax][crate::value::text] (a superset of JSON) and [machine-oriented binary
format][crate::value::packed] (including
[canonicalization](https://preserves.dev/canonical-binary.html)) for Preserves.
- serde support (modules [de], [ser], [symbol], [set]) This crate is the foundation for others such as
- plain Preserves value support, plus [text][crate::value::text] and - [`preserves-schema`](https://docs.rs/preserves-schema/), which implements [Preserves
[binary][crate::value::packed] codecs (the [value] module) Schema](https://preserves.dev/preserves-schema.html);
- [`preserves-path`](https://docs.rs/preserves-path/), which implements [Preserves
Path](https://preserves.dev/preserves-path.html); and
- [`preserves-tools`](https://crates.io/crates/preserves-tools), which provides command-line
utilities for working with Preserves, in particular
[`preserves-tool`](https://preserves.dev/doc/preserves-tool.html), a kind of Preserves
Swiss-army knife.
It also includes [Serde](https://serde.rs/) support (modules [de], [ser], [symbol], [set]).

View File

@ -1,6 +1,7 @@
#![doc = concat!( #![doc = concat!(
include_str!("../doc/what-is-preserves.md"),
include_str!("../README.md"), include_str!("../README.md"),
"# What is Preserves?\n\n",
include_str!("../doc/what-is-preserves.md"),
)] )]
pub mod de; pub mod de;

View File

@ -9,6 +9,7 @@
//! `#[serde(with = "preserves::set")]`: //! `#[serde(with = "preserves::set")]`:
//! //!
//! ```rust //! ```rust
//! #[derive(serde::Serialize, serde::Deserialize)]
//! struct Example { //! struct Example {
//! #[serde(with = "preserves::set")] //! #[serde(with = "preserves::set")]
//! items: preserves::value::Set<String>, //! items: preserves::value::Set<String>,

View File

@ -9,6 +9,7 @@
//! you want to (en|de)code as Preserves `Symbol`s with `#[serde(with = "preserves::symbol")]`: //! you want to (en|de)code as Preserves `Symbol`s with `#[serde(with = "preserves::symbol")]`:
//! //!
//! ```rust //! ```rust
//! #[derive(serde::Serialize, serde::Deserialize)]
//! struct Example { //! struct Example {
//! sym1: preserves::symbol::Symbol, //! sym1: preserves::symbol::Symbol,
//! #[serde(with = "preserves::symbol")] //! #[serde(with = "preserves::symbol")]

View File

@ -1,10 +1,52 @@
//! Implementation of Preserves itself, separate from Serde. //! # Representing, reading, and writing Preserves `Value`s as Rust data
//! //!
//! Preserves terms are [represented][repr] in memory as instances of [NestedValue]. //! ```
//! use preserves::value::{IOValue, text, packed};
//! let v: IOValue = text::iovalue_from_str("<hi>")?;
//! let w: IOValue = packed::iovalue_from_bytes(b"\xb4\xb3\x02hi\x84")?;
//! assert_eq!(v, w);
//! assert_eq!(text::TextWriter::encode_iovalue(&v)?, "<hi>");
//! assert_eq!(packed::PackedWriter::encode_iovalue(&v)?, b"\xb4\xb3\x02hi\x84");
//! # Ok::<(), std::io::Error>(())
//! ```
//! //!
//! - in-memory storage, representation, and APIs in module [repr] //! Preserves `Value`s are categorized in the following way. The core representation type,
//! - machine-oriented binary syntax codec in module [packed] //! [crate::value::repr::Value], reflects this structure. However, most of the time you will
//! - human-oriented text syntax codec in module [text] //! work with [IOValue] or some other implementation of trait [NestedValue], which augments an
//! underlying [Value] with [*annotations*][crate::value::repr::Annotations] (e.g. comments) and fixes a strategy
//! for memory management.
//!
#![doc = include_str!("../../doc/value-grammar.md")]
//!
//! ## Memory management
//!
//! Each implementation of [NestedValue] chooses a different point in the space of possible
//! approaches to memory management for `Value`s.
//!
//! ##### `IOValue`
//!
//! The most commonly-used and versatile implementation, [IOValue], uses [std::sync::Arc] for
//! internal links in compound `Value`s. Unlike many of the other implementations of
//! [NestedValue], [IOValue] doesn't offer flexibility in the Rust data type to be used for
//! Preserves [embedded values](https://preserves.dev/preserves.html#embeddeds): instead,
//! embedded values in an [IOValue] are themselves [IOValue]s.
//!
//! ##### `ArcValue<D>`, `RcValue<D>`, and `PlainValue<D>`
//!
//! For control over the Rust type to use for embedded values, choose [ArcValue], [RcValue], or
//! [PlainValue]. Use [ArcValue] when you wish to transfer values among threads. [RcValue] is
//! more niche; it may be useful for complex terms that do not need to cross thread boundaries.
//! [PlainValue] is even more niche: it does not use a reference-counted pointer type, meaning
//! it does not offer any kind of aliasing or sharing among subterms at all.
//!
//! # Parsing, pretty-printing, encoding and decoding `Value`s
//!
//! Modules [reader] and [writer] supply generic [Reader] and [Writer] traits for parsing and
//! unparsing Preserves data. Implementations of [Reader] and [Writer] connect Preserves data
//! to specific transfer syntaxes:
//!
//! - module [packed] supplies tools for working with the machine-oriented binary syntax
//! - module [text] supplies tools for working with human-readable text syntax
pub mod boundary; pub mod boundary;
pub mod de; pub mod de;

View File

@ -1,6 +1,12 @@
//! Implements the Preserves [machine-oriented binary //! Implements the Preserves [machine-oriented binary
//! syntax](https://preserves.dev/preserves-binary.html). //! syntax](https://preserves.dev/preserves-binary.html).
//! //!
//! The main entry points for reading are functions [iovalue_from_bytes],
//! [annotated_iovalue_from_bytes], [from_bytes], and [annotated_from_bytes].
//!
//! The main entry points for writing are [PackedWriter::encode_iovalue] and
//! [PackedWriter::encode].
//!
//! # Summary of Binary Syntax //! # Summary of Binary Syntax
#![doc = include_str!("../../../doc/cheatsheet-binary-plaintext.md")] #![doc = include_str!("../../../doc/cheatsheet-binary-plaintext.md")]

View File

@ -1,4 +1,4 @@
//! Implementation of [Reader][crate::value::reader::Reader] for the binary encoding. //! Implementation of [Reader] for the binary encoding.
use crate::error::{self, io_syntax_error, is_eof_io_error, ExpectedKind, Received}; use crate::error::{self, io_syntax_error, is_eof_io_error, ExpectedKind, Received};

View File

@ -1,4 +1,4 @@
//! Implementation of [Writer][crate::value::writer::Writer] for the binary encoding. //! Implementation of [Writer] for the binary encoding.
use super::super::boundary as B; use super::super::boundary as B;
use super::super::suspendable::Suspendable; use super::super::suspendable::Suspendable;

View File

@ -1,3 +1,6 @@
//! Generic [Reader] trait for parsing Preserves [Value][crate::value::repr::Value]s,
//! implemented by code that provides each specific transfer syntax.
use crate::error::{self, io_eof, ExpectedKind, Received}; use crate::error::{self, io_eof, ExpectedKind, Received};
use std::borrow::Cow; use std::borrow::Cow;
@ -18,59 +21,104 @@ use super::ViaCodec;
pub type ReaderResult<T> = std::result::Result<T, error::Error>; pub type ReaderResult<T> = std::result::Result<T, error::Error>;
/// Tokens produced when performing
/// [SAX](https://en.wikipedia.org/wiki/Simple_API_for_XML)-style reading of terms.
pub enum Token<N: NestedValue> { pub enum Token<N: NestedValue> {
/// An embedded value was seen and completely decoded.
Embedded(N::Embedded), Embedded(N::Embedded),
/// An atomic value was seen and completely decoded.
Atom(N), Atom(N),
/// A compound value has been opened; its contents follow, and it will be terminated by
/// [Token::End].
Compound(CompoundClass), Compound(CompoundClass),
/// Closes a previously-opened compound value.
End, End,
} }
/// Generic parser for Preserves.
pub trait Reader<'de, N: NestedValue> { pub trait Reader<'de, N: NestedValue> {
/// Retrieve the next parseable value or an indication of end-of-input.
///
/// Yields `Ok(Some(...))` if a complete value is available, `Ok(None)` if the end of
/// stream has been reached, or `Err(...)` for parse or IO errors, including
/// incomplete/partial input. See also [Reader::demand_next].
fn next(&mut self, read_annotations: bool) -> io::Result<Option<N>>; fn next(&mut self, read_annotations: bool) -> io::Result<Option<N>>;
// Hiding these from the documentation for the moment because I don't want to have to
// document the whole Boundary thing.
#[doc(hidden)]
fn open_record(&mut self, arity: Option<usize>) -> ReaderResult<B::Type>; fn open_record(&mut self, arity: Option<usize>) -> ReaderResult<B::Type>;
#[doc(hidden)]
fn open_sequence_or_set(&mut self) -> ReaderResult<B::Item>; fn open_sequence_or_set(&mut self) -> ReaderResult<B::Item>;
#[doc(hidden)]
fn open_sequence(&mut self) -> ReaderResult<()>; fn open_sequence(&mut self) -> ReaderResult<()>;
#[doc(hidden)]
fn open_set(&mut self) -> ReaderResult<()>; fn open_set(&mut self) -> ReaderResult<()>;
#[doc(hidden)]
fn open_dictionary(&mut self) -> ReaderResult<()>; fn open_dictionary(&mut self) -> ReaderResult<()>;
#[doc(hidden)]
fn boundary(&mut self, b: &B::Type) -> ReaderResult<()>; fn boundary(&mut self, b: &B::Type) -> ReaderResult<()>;
#[doc(hidden)]
// close_compound implies a b.shift(...) and a self.boundary(b). // close_compound implies a b.shift(...) and a self.boundary(b).
fn close_compound(&mut self, b: &mut B::Type, i: &B::Item) -> ReaderResult<bool>; fn close_compound(&mut self, b: &mut B::Type, i: &B::Item) -> ReaderResult<bool>;
#[doc(hidden)]
fn open_embedded(&mut self) -> ReaderResult<()>; fn open_embedded(&mut self) -> ReaderResult<()>;
#[doc(hidden)]
fn close_embedded(&mut self) -> ReaderResult<()>; fn close_embedded(&mut self) -> ReaderResult<()>;
/// Allows structured backtracking to an earlier stage in a parse. Useful for layering
/// parser combinators atop a Reader.
type Mark; type Mark;
/// Retrieve a marker for the current position in the input.
fn mark(&mut self) -> io::Result<Self::Mark>; fn mark(&mut self) -> io::Result<Self::Mark>;
/// Seek the input to a previously-saved position.
fn restore(&mut self, mark: &Self::Mark) -> io::Result<()>; fn restore(&mut self, mark: &Self::Mark) -> io::Result<()>;
/// Get the next [SAX](https://en.wikipedia.org/wiki/Simple_API_for_XML)-style event,
/// discarding annotations.
///
/// The `read_embedded_annotations` controls whether annotations are also skipped on
/// *embedded* values or not.
fn next_token(&mut self, read_embedded_annotations: bool) -> io::Result<Token<N>>; fn next_token(&mut self, read_embedded_annotations: bool) -> io::Result<Token<N>>;
/// Get the next [SAX](https://en.wikipedia.org/wiki/Simple_API_for_XML)-style event, plus
/// a vector containing any annotations that preceded it.
fn next_annotations_and_token(&mut self) -> io::Result<(Vec<N>, Token<N>)>; fn next_annotations_and_token(&mut self) -> io::Result<(Vec<N>, Token<N>)>;
//--------------------------------------------------------------------------- //---------------------------------------------------------------------------
/// Skips the next available complete value. Yields an error if no such value exists.
fn skip_value(&mut self) -> io::Result<()> { fn skip_value(&mut self) -> io::Result<()> {
// TODO efficient skipping in specific impls of this trait // TODO efficient skipping in specific impls of this trait
let _ = self.demand_next(false)?; let _ = self.demand_next(false)?;
Ok(()) Ok(())
} }
/// Retrieve the next parseable value, treating end-of-input as an error.
///
/// Yields `Ok(...)` if a complete value is available or `Err(...)` for parse or IO errors,
/// including incomplete/partial input or end of stream. See also [Reader::next].
fn demand_next(&mut self, read_annotations: bool) -> io::Result<N> { fn demand_next(&mut self, read_annotations: bool) -> io::Result<N> {
self.next(read_annotations)?.ok_or_else(io_eof) self.next(read_annotations)?.ok_or_else(io_eof)
} }
/// Yields the next value, if it is a `Boolean`, or an error otherwise.
fn next_boolean(&mut self) -> ReaderResult<bool> { fn next_boolean(&mut self) -> ReaderResult<bool> {
self.demand_next(false)?.value().to_boolean() self.demand_next(false)?.value().to_boolean()
} }
/// Yields the next value, if it is a `Float`, or an error otherwise.
fn next_float(&mut self) -> ReaderResult<Float> { fn next_float(&mut self) -> ReaderResult<Float> {
Ok(self.demand_next(false)?.value().to_float()?.to_owned()) Ok(self.demand_next(false)?.value().to_float()?.to_owned())
} }
/// Yields the next value, if it is a `Double`, or an error otherwise.
fn next_double(&mut self) -> ReaderResult<Double> { fn next_double(&mut self) -> ReaderResult<Double> {
Ok(self.demand_next(false)?.value().to_double()?.to_owned()) Ok(self.demand_next(false)?.value().to_double()?.to_owned())
} }
/// Yields the next value, if it is a `SignedInteger`, or an error otherwise.
fn next_signedinteger(&mut self) -> ReaderResult<SignedInteger> { fn next_signedinteger(&mut self) -> ReaderResult<SignedInteger> {
Ok(self Ok(self
.demand_next(false)? .demand_next(false)?
@ -79,64 +127,92 @@ pub trait Reader<'de, N: NestedValue> {
.to_owned()) .to_owned())
} }
/// Yields the next value, if it is a `SignedInteger` that fits in [i8], or an error
/// otherwise.
fn next_i8(&mut self) -> ReaderResult<i8> { fn next_i8(&mut self) -> ReaderResult<i8> {
self.demand_next(false)?.value().to_i8() self.demand_next(false)?.value().to_i8()
} }
/// Yields the next value, if it is a `SignedInteger` that fits in [u8], or an error
/// otherwise.
fn next_u8(&mut self) -> ReaderResult<u8> { fn next_u8(&mut self) -> ReaderResult<u8> {
self.demand_next(false)?.value().to_u8() self.demand_next(false)?.value().to_u8()
} }
/// Yields the next value, if it is a `SignedInteger` that fits in [i16], or an error
/// otherwise.
fn next_i16(&mut self) -> ReaderResult<i16> { fn next_i16(&mut self) -> ReaderResult<i16> {
self.demand_next(false)?.value().to_i16() self.demand_next(false)?.value().to_i16()
} }
/// Yields the next value, if it is a `SignedInteger` that fits in [u16], or an error
/// otherwise.
fn next_u16(&mut self) -> ReaderResult<u16> { fn next_u16(&mut self) -> ReaderResult<u16> {
self.demand_next(false)?.value().to_u16() self.demand_next(false)?.value().to_u16()
} }
/// Yields the next value, if it is a `SignedInteger` that fits in [i32], or an error
/// otherwise.
fn next_i32(&mut self) -> ReaderResult<i32> { fn next_i32(&mut self) -> ReaderResult<i32> {
self.demand_next(false)?.value().to_i32() self.demand_next(false)?.value().to_i32()
} }
/// Yields the next value, if it is a `SignedInteger` that fits in [u32], or an error
/// otherwise.
fn next_u32(&mut self) -> ReaderResult<u32> { fn next_u32(&mut self) -> ReaderResult<u32> {
self.demand_next(false)?.value().to_u32() self.demand_next(false)?.value().to_u32()
} }
/// Yields the next value, if it is a `SignedInteger` that fits in [i64], or an error
/// otherwise.
fn next_i64(&mut self) -> ReaderResult<i64> { fn next_i64(&mut self) -> ReaderResult<i64> {
self.demand_next(false)?.value().to_i64() self.demand_next(false)?.value().to_i64()
} }
/// Yields the next value, if it is a `SignedInteger` that fits in [u64], or an error
/// otherwise.
fn next_u64(&mut self) -> ReaderResult<u64> { fn next_u64(&mut self) -> ReaderResult<u64> {
self.demand_next(false)?.value().to_u64() self.demand_next(false)?.value().to_u64()
} }
/// Yields the next value, if it is a `SignedInteger` that fits in [i128], or an error
/// otherwise.
fn next_i128(&mut self) -> ReaderResult<i128> { fn next_i128(&mut self) -> ReaderResult<i128> {
self.demand_next(false)?.value().to_i128() self.demand_next(false)?.value().to_i128()
} }
/// Yields the next value, if it is a `SignedInteger` that fits in [u128], or an error
/// otherwise.
fn next_u128(&mut self) -> ReaderResult<u128> { fn next_u128(&mut self) -> ReaderResult<u128> {
self.demand_next(false)?.value().to_u128() self.demand_next(false)?.value().to_u128()
} }
/// Yields the next value as an [f32], if it is a `Float`, or an error otherwise.
fn next_f32(&mut self) -> ReaderResult<f32> { fn next_f32(&mut self) -> ReaderResult<f32> {
self.demand_next(false)?.value().to_f32() self.demand_next(false)?.value().to_f32()
} }
/// Yields the next value as an [f64], if it is a `Double`, or an error otherwise.
fn next_f64(&mut self) -> ReaderResult<f64> { fn next_f64(&mut self) -> ReaderResult<f64> {
self.demand_next(false)?.value().to_f64() self.demand_next(false)?.value().to_f64()
} }
/// Yields the next value as a [char], if it is parseable by
/// [Value::to_char][crate::value::Value::to_char], or an error otherwise.
fn next_char(&mut self) -> ReaderResult<char> { fn next_char(&mut self) -> ReaderResult<char> {
self.demand_next(false)?.value().to_char() self.demand_next(false)?.value().to_char()
} }
/// Yields the next value, if it is a `String`, or an error otherwise.
fn next_str(&mut self) -> ReaderResult<Cow<'de, str>> { fn next_str(&mut self) -> ReaderResult<Cow<'de, str>> {
Ok(Cow::Owned( Ok(Cow::Owned(
self.demand_next(false)?.value().to_string()?.to_owned(), self.demand_next(false)?.value().to_string()?.to_owned(),
)) ))
} }
/// Yields the next value, if it is a `ByteString`, or an error otherwise.
fn next_bytestring(&mut self) -> ReaderResult<Cow<'de, [u8]>> { fn next_bytestring(&mut self) -> ReaderResult<Cow<'de, [u8]>> {
Ok(Cow::Owned( Ok(Cow::Owned(
self.demand_next(false)?.value().to_bytestring()?.to_owned(), self.demand_next(false)?.value().to_bytestring()?.to_owned(),
)) ))
} }
/// Yields the next value, if it is a `Symbol`, or an error otherwise.
fn next_symbol(&mut self) -> ReaderResult<Cow<'de, str>> { fn next_symbol(&mut self) -> ReaderResult<Cow<'de, str>> {
Ok(Cow::Owned( Ok(Cow::Owned(
self.demand_next(false)?.value().to_symbol()?.to_owned(), self.demand_next(false)?.value().to_symbol()?.to_owned(),
)) ))
} }
#[doc(hidden)]
fn open_option(&mut self) -> ReaderResult<Option<B::Type>> { fn open_option(&mut self) -> ReaderResult<Option<B::Type>> {
let b = self.open_record(None)?; let b = self.open_record(None)?;
let label: &str = &self.next_symbol()?; let label: &str = &self.next_symbol()?;
@ -153,6 +229,7 @@ pub trait Reader<'de, N: NestedValue> {
} }
} }
#[doc(hidden)]
fn open_simple_record(&mut self, name: &str, arity: Option<usize>) -> ReaderResult<B::Type> { fn open_simple_record(&mut self, name: &str, arity: Option<usize>) -> ReaderResult<B::Type> {
let b = self.open_record(arity)?; let b = self.open_record(arity)?;
let label: &str = &self.next_symbol()?; let label: &str = &self.next_symbol()?;
@ -166,6 +243,7 @@ pub trait Reader<'de, N: NestedValue> {
} }
} }
/// Constructs a [ConfiguredReader] set with the given value for `read_annotations`.
fn configured(self, read_annotations: bool) -> ConfiguredReader<'de, N, Self> fn configured(self, read_annotations: bool) -> ConfiguredReader<'de, N, Self>
where where
Self: std::marker::Sized, Self: std::marker::Sized,
@ -177,6 +255,7 @@ pub trait Reader<'de, N: NestedValue> {
} }
} }
#[doc(hidden)]
fn ensure_more_expected(&mut self, b: &mut B::Type, i: &B::Item) -> ReaderResult<()> { fn ensure_more_expected(&mut self, b: &mut B::Type, i: &B::Item) -> ReaderResult<()> {
if !self.close_compound(b, i)? { if !self.close_compound(b, i)? {
Ok(()) Ok(())
@ -185,6 +264,7 @@ pub trait Reader<'de, N: NestedValue> {
} }
} }
#[doc(hidden)]
fn ensure_complete(&mut self, mut b: B::Type, i: &B::Item) -> ReaderResult<()> { fn ensure_complete(&mut self, mut b: B::Type, i: &B::Item) -> ReaderResult<()> {
if !self.close_compound(&mut b, i)? { if !self.close_compound(&mut b, i)? {
Err(error::Error::MissingCloseDelimiter) Err(error::Error::MissingCloseDelimiter)
@ -254,16 +334,27 @@ impl<'r, 'de, N: NestedValue, R: Reader<'de, N>> Reader<'de, N> for &'r mut R {
} }
} }
/// Generic seekable stream of input bytes.
pub trait BinarySource<'de>: Sized { pub trait BinarySource<'de>: Sized {
/// Allows structured backtracking to an earlier position in an input.
type Mark; type Mark;
/// Retrieve a marker for the current position in the input.
fn mark(&mut self) -> io::Result<Self::Mark>; fn mark(&mut self) -> io::Result<Self::Mark>;
/// Seek the input to a previously-saved position.
fn restore(&mut self, mark: &Self::Mark) -> io::Result<()>; fn restore(&mut self, mark: &Self::Mark) -> io::Result<()>;
/// Skip the next byte.
fn skip(&mut self) -> io::Result<()>; fn skip(&mut self) -> io::Result<()>;
/// Returns the next byte without advancing over it.
fn peek(&mut self) -> io::Result<u8>; fn peek(&mut self) -> io::Result<u8>;
/// Returns and consumes the next `count` bytes, which must all be available. Always yields
/// exactly `count` bytes or an error.
fn readbytes(&mut self, count: usize) -> io::Result<Cow<'de, [u8]>>; fn readbytes(&mut self, count: usize) -> io::Result<Cow<'de, [u8]>>;
/// As [BinarySource::readbytes], but uses `bs` as destination for the read bytes as well
/// as taking the size of `bs` as the count of bytes to read.
fn readbytes_into(&mut self, bs: &mut [u8]) -> io::Result<()>; fn readbytes_into(&mut self, bs: &mut [u8]) -> io::Result<()>;
/// Constructs a [PackedReader][super::PackedReader] that will read from `self`.
fn packed<N: NestedValue, Dec: DomainDecode<N::Embedded>>( fn packed<N: NestedValue, Dec: DomainDecode<N::Embedded>>(
&mut self, &mut self,
decode_embedded: Dec, decode_embedded: Dec,
@ -271,12 +362,14 @@ pub trait BinarySource<'de>: Sized {
super::PackedReader::new(self, decode_embedded) super::PackedReader::new(self, decode_embedded)
} }
/// Constructs a [PackedReader][super::PackedReader] that will read [IOValue]s from `self`.
fn packed_iovalues( fn packed_iovalues(
&mut self, &mut self,
) -> super::PackedReader<'de, '_, IOValue, IOValueDomainCodec, Self> { ) -> super::PackedReader<'de, '_, IOValue, IOValueDomainCodec, Self> {
self.packed(IOValueDomainCodec) self.packed(IOValueDomainCodec)
} }
/// Constructs a [TextReader][super::TextReader] that will read from `self`.
fn text<N: NestedValue, Dec: DomainParse<N::Embedded>>( fn text<N: NestedValue, Dec: DomainParse<N::Embedded>>(
&mut self, &mut self,
decode_embedded: Dec, decode_embedded: Dec,
@ -284,6 +377,7 @@ pub trait BinarySource<'de>: Sized {
super::TextReader::new(self, decode_embedded) super::TextReader::new(self, decode_embedded)
} }
/// Constructs a [TextReader][super::TextReader] that will read [IOValue]s from `self`.
fn text_iovalues( fn text_iovalues(
&mut self, &mut self,
) -> super::TextReader<'de, '_, IOValue, ViaCodec<IOValueDomainCodec>, Self> { ) -> super::TextReader<'de, '_, IOValue, ViaCodec<IOValueDomainCodec>, Self> {
@ -291,12 +385,18 @@ pub trait BinarySource<'de>: Sized {
} }
} }
/// Implementation of [BinarySource] backed by an [`io::Read`]` + `[`io::Seek`] implementation.
pub struct IOBinarySource<R: io::Read + io::Seek> { pub struct IOBinarySource<R: io::Read + io::Seek> {
/// The underlying byte source.
pub read: R, pub read: R,
#[doc(hidden)]
/// One-place buffer for peeked bytes.
pub buf: Option<u8>, pub buf: Option<u8>,
} }
impl<R: io::Read + io::Seek> IOBinarySource<R> { impl<R: io::Read + io::Seek> IOBinarySource<R> {
/// Constructs an [IOBinarySource] from the given [`io::Read`]` + `[`io::Seek`]
/// implementation.
#[inline(always)] #[inline(always)]
pub fn new(read: R) -> Self { pub fn new(read: R) -> Self {
IOBinarySource { read, buf: None } IOBinarySource { read, buf: None }
@ -364,12 +464,17 @@ impl<'de, R: io::Read + io::Seek> BinarySource<'de> for IOBinarySource<R> {
} }
} }
/// Implementation of [BinarySource] backed by a slice of [u8].
pub struct BytesBinarySource<'de> { pub struct BytesBinarySource<'de> {
/// The underlying byte source.
pub bytes: &'de [u8], pub bytes: &'de [u8],
#[doc(hidden)]
/// Current position within `bytes`.
pub index: usize, pub index: usize,
} }
impl<'de> BytesBinarySource<'de> { impl<'de> BytesBinarySource<'de> {
/// Constructs a [BytesBinarySource] from the given `u8` slice.
#[inline(always)] #[inline(always)]
pub fn new(bytes: &'de [u8]) -> Self { pub fn new(bytes: &'de [u8]) -> Self {
BytesBinarySource { bytes, index: 0 } BytesBinarySource { bytes, index: 0 }
@ -432,21 +537,29 @@ impl<'de> BinarySource<'de> for BytesBinarySource<'de> {
} }
} }
/// A combination of a [Reader] with presets governing its operation.
pub struct ConfiguredReader<'de, N: NestedValue, R: Reader<'de, N>> { pub struct ConfiguredReader<'de, N: NestedValue, R: Reader<'de, N>> {
/// The underlying [Reader].
pub reader: R, pub reader: R,
/// Configuration as to whether to include or discard annotations while reading.
pub read_annotations: bool, pub read_annotations: bool,
phantom: PhantomData<&'de N>, phantom: PhantomData<&'de N>,
} }
impl<'de, N: NestedValue, R: Reader<'de, N>> ConfiguredReader<'de, N, R> { impl<'de, N: NestedValue, R: Reader<'de, N>> ConfiguredReader<'de, N, R> {
/// Constructs a [ConfiguredReader] based on the given `reader`.
pub fn new(reader: R) -> Self { pub fn new(reader: R) -> Self {
reader.configured(true) reader.configured(true)
} }
/// Updates the `read_annotations` field of `self`.
pub fn set_read_annotations(&mut self, read_annotations: bool) { pub fn set_read_annotations(&mut self, read_annotations: bool) {
self.read_annotations = read_annotations self.read_annotations = read_annotations
} }
/// Retrieve the next parseable value, treating end-of-input as an error.
///
/// Delegates directly to [Reader::demand_next].
pub fn demand_next(&mut self) -> io::Result<N> { pub fn demand_next(&mut self) -> io::Result<N> {
self.reader.demand_next(self.read_annotations) self.reader.demand_next(self.read_annotations)
} }

View File

@ -1572,12 +1572,15 @@ impl<'de> serde::Deserialize<'de> for UnwrappedIOValue {
//--------------------------------------------------------------------------- //---------------------------------------------------------------------------
/// Representation of a collection of annotations to be attached to a [Value]. /// Representation of a collection of annotations to be attached to a [Value] by way of an
/// /// implementation of trait [NestedValue].
/// The complex-seeming `Option<Box<Vec<N>>>` is used to save memory, since a `Box` is smaller
/// than a `Vec`.
#[derive(Clone)] #[derive(Clone)]
pub struct Annotations<N: NestedValue>(Option<Box<Vec<N>>>); pub struct Annotations<N: NestedValue>(
/// The complex-seeming `Option<Box<Vec<N>>>` is used to save memory, since a `Box` is
/// smaller than a `Vec`.
Option<Box<Vec<N>>>,
);
impl<N: NestedValue> Annotations<N> { impl<N: NestedValue> Annotations<N> {
/// Yield the empty [Annotations] sequence. /// Yield the empty [Annotations] sequence.

View File

@ -1,6 +1,12 @@
//! Implements the Preserves [human-oriented text //! Implements the Preserves [human-oriented text
//! syntax](https://preserves.dev/preserves-text.html). //! syntax](https://preserves.dev/preserves-text.html).
//! //!
//! The main entry points for reading are functions [iovalue_from_str],
//! [annotated_iovalue_from_str], [from_str], and [annotated_from_str].
//!
//! The main entry points for writing are [TextWriter::encode_iovalue] and
//! [TextWriter::encode].
//!
//! # Summary of Text Syntax //! # Summary of Text Syntax
#![doc = include_str!("../../../doc/cheatsheet-text-plaintext.md")] #![doc = include_str!("../../../doc/cheatsheet-text-plaintext.md")]

View File

@ -1,4 +1,4 @@
//! Implementation of [Reader][crate::value::reader::Reader] for the text syntax. //! Implementation of [Reader] for the text syntax.
use crate::error::io_syntax_error; use crate::error::io_syntax_error;
use crate::error::is_eof_io_error; use crate::error::is_eof_io_error;

View File

@ -1,4 +1,4 @@
//! Implementation of [Writer][crate::value::writer::Writer] for the text syntax. //! Implementation of [Writer] for the text syntax.
use crate::hex::HexFormatter; use crate::hex::HexFormatter;
use crate::value::suspendable::Suspendable; use crate::value::suspendable::Suspendable;

View File

@ -1,3 +1,6 @@
//! Generic [Writer] trait for unparsing Preserves [Value]s, implemented by code that provides
//! each specific transfer syntax.
use super::boundary as B; use super::boundary as B;
use super::repr::{Double, Float, NestedValue, Value}; use super::repr::{Double, Float, NestedValue, Value};
use super::signed_integer::SignedIntegerRepr; use super::signed_integer::SignedIntegerRepr;
@ -5,61 +8,103 @@ use super::DomainEncode;
use num::bigint::BigInt; use num::bigint::BigInt;
use std::io; use std::io;
#[doc(hidden)]
/// Utility trait for tracking unparser state during production of compound `Value`s.
pub trait CompoundWriter: Writer { pub trait CompoundWriter: Writer {
fn boundary(&mut self, b: &B::Type) -> io::Result<()>; fn boundary(&mut self, b: &B::Type) -> io::Result<()>;
} }
/// Generic unparser for Preserves.
pub trait Writer: Sized { pub trait Writer: Sized {
// Hiding these from the documentation for the moment because I don't want to have to
// document the whole Boundary thing.
#[doc(hidden)]
type AnnWriter: CompoundWriter; type AnnWriter: CompoundWriter;
#[doc(hidden)]
type RecWriter: CompoundWriter; type RecWriter: CompoundWriter;
#[doc(hidden)]
type SeqWriter: CompoundWriter; type SeqWriter: CompoundWriter;
#[doc(hidden)]
type SetWriter: CompoundWriter; type SetWriter: CompoundWriter;
#[doc(hidden)]
type DictWriter: CompoundWriter; type DictWriter: CompoundWriter;
#[doc(hidden)]
type EmbeddedWriter: Writer; type EmbeddedWriter: Writer;
#[doc(hidden)]
fn start_annotations(&mut self) -> io::Result<Self::AnnWriter>; fn start_annotations(&mut self) -> io::Result<Self::AnnWriter>;
#[doc(hidden)]
fn end_annotations(&mut self, ann: Self::AnnWriter) -> io::Result<()>; fn end_annotations(&mut self, ann: Self::AnnWriter) -> io::Result<()>;
#[doc(hidden)]
fn write_bool(&mut self, v: bool) -> io::Result<()>; fn write_bool(&mut self, v: bool) -> io::Result<()>;
#[doc(hidden)]
fn write_f32(&mut self, v: f32) -> io::Result<()>; fn write_f32(&mut self, v: f32) -> io::Result<()>;
#[doc(hidden)]
fn write_f64(&mut self, v: f64) -> io::Result<()>; fn write_f64(&mut self, v: f64) -> io::Result<()>;
#[doc(hidden)]
fn write_i8(&mut self, v: i8) -> io::Result<()>; fn write_i8(&mut self, v: i8) -> io::Result<()>;
#[doc(hidden)]
fn write_u8(&mut self, v: u8) -> io::Result<()>; fn write_u8(&mut self, v: u8) -> io::Result<()>;
#[doc(hidden)]
fn write_i16(&mut self, v: i16) -> io::Result<()>; fn write_i16(&mut self, v: i16) -> io::Result<()>;
#[doc(hidden)]
fn write_u16(&mut self, v: u16) -> io::Result<()>; fn write_u16(&mut self, v: u16) -> io::Result<()>;
#[doc(hidden)]
fn write_i32(&mut self, v: i32) -> io::Result<()>; fn write_i32(&mut self, v: i32) -> io::Result<()>;
#[doc(hidden)]
fn write_u32(&mut self, v: u32) -> io::Result<()>; fn write_u32(&mut self, v: u32) -> io::Result<()>;
#[doc(hidden)]
fn write_i64(&mut self, v: i64) -> io::Result<()>; fn write_i64(&mut self, v: i64) -> io::Result<()>;
#[doc(hidden)]
fn write_u64(&mut self, v: u64) -> io::Result<()>; fn write_u64(&mut self, v: u64) -> io::Result<()>;
#[doc(hidden)]
fn write_i128(&mut self, v: i128) -> io::Result<()>; fn write_i128(&mut self, v: i128) -> io::Result<()>;
#[doc(hidden)]
fn write_u128(&mut self, v: u128) -> io::Result<()>; fn write_u128(&mut self, v: u128) -> io::Result<()>;
#[doc(hidden)]
fn write_int(&mut self, v: &BigInt) -> io::Result<()>; fn write_int(&mut self, v: &BigInt) -> io::Result<()>;
#[doc(hidden)]
fn write_string(&mut self, v: &str) -> io::Result<()>; fn write_string(&mut self, v: &str) -> io::Result<()>;
#[doc(hidden)]
fn write_bytes(&mut self, v: &[u8]) -> io::Result<()>; fn write_bytes(&mut self, v: &[u8]) -> io::Result<()>;
#[doc(hidden)]
fn write_symbol(&mut self, v: &str) -> io::Result<()>; fn write_symbol(&mut self, v: &str) -> io::Result<()>;
#[doc(hidden)]
fn start_record(&mut self, field_count: Option<usize>) -> io::Result<Self::RecWriter>; fn start_record(&mut self, field_count: Option<usize>) -> io::Result<Self::RecWriter>;
#[doc(hidden)]
fn end_record(&mut self, rec: Self::RecWriter) -> io::Result<()>; fn end_record(&mut self, rec: Self::RecWriter) -> io::Result<()>;
#[doc(hidden)]
fn start_sequence(&mut self, item_count: Option<usize>) -> io::Result<Self::SeqWriter>; fn start_sequence(&mut self, item_count: Option<usize>) -> io::Result<Self::SeqWriter>;
#[doc(hidden)]
fn end_sequence(&mut self, seq: Self::SeqWriter) -> io::Result<()>; fn end_sequence(&mut self, seq: Self::SeqWriter) -> io::Result<()>;
#[doc(hidden)]
fn start_set(&mut self, item_count: Option<usize>) -> io::Result<Self::SetWriter>; fn start_set(&mut self, item_count: Option<usize>) -> io::Result<Self::SetWriter>;
#[doc(hidden)]
fn end_set(&mut self, set: Self::SetWriter) -> io::Result<()>; fn end_set(&mut self, set: Self::SetWriter) -> io::Result<()>;
#[doc(hidden)]
fn start_dictionary(&mut self, entry_count: Option<usize>) -> io::Result<Self::DictWriter>; fn start_dictionary(&mut self, entry_count: Option<usize>) -> io::Result<Self::DictWriter>;
#[doc(hidden)]
fn end_dictionary(&mut self, dict: Self::DictWriter) -> io::Result<()>; fn end_dictionary(&mut self, dict: Self::DictWriter) -> io::Result<()>;
#[doc(hidden)]
fn start_embedded(&mut self) -> io::Result<Self::EmbeddedWriter>; fn start_embedded(&mut self) -> io::Result<Self::EmbeddedWriter>;
#[doc(hidden)]
fn end_embedded(&mut self, ptr: Self::EmbeddedWriter) -> io::Result<()>; fn end_embedded(&mut self, ptr: Self::EmbeddedWriter) -> io::Result<()>;
/// Flushes any buffered output.
fn flush(&mut self) -> io::Result<()>; fn flush(&mut self) -> io::Result<()>;
//--------------------------------------------------------------------------- //---------------------------------------------------------------------------
/// Writes [NestedValue] `v` to the output of this [Writer].
fn write<N: NestedValue, Enc: DomainEncode<N::Embedded>>( fn write<N: NestedValue, Enc: DomainEncode<N::Embedded>>(
&mut self, &mut self,
enc: &mut Enc, enc: &mut Enc,
@ -88,6 +133,7 @@ pub trait Writer: Sized {
Ok(()) Ok(())
} }
/// Writes [Value] `v` to the output of this [Writer].
fn write_value<N: NestedValue, Enc: DomainEncode<N::Embedded>>( fn write_value<N: NestedValue, Enc: DomainEncode<N::Embedded>>(
&mut self, &mut self,
enc: &mut Enc, enc: &mut Enc,
@ -167,6 +213,13 @@ pub trait Writer: Sized {
} }
} }
/// Writes a [varint](https://protobuf.dev/programming-guides/encoding/#varints) to `w`.
/// Returns the number of bytes written.
///
/// ```text
/// varint(n) = [n] if n < 128
/// [(n & 127) | 128] ++ varint(n >> 7) if n ≥ 128
/// ```
pub fn varint<W: io::Write>(w: &mut W, mut v: u64) -> io::Result<usize> { pub fn varint<W: io::Write>(w: &mut W, mut v: u64) -> io::Result<usize> {
let mut byte_count = 0; let mut byte_count = 0;
loop { loop {