Document Preserves crate
This commit is contained in:
parent
01d3659216
commit
4b40bf174d
|
@ -0,0 +1,33 @@
|
|||
For a value `V`, we write `«V»` for the binary encoding of `V`.
|
||||
|
||||
```text
|
||||
«#f» = [0x80]
|
||||
«#t» = [0x81]
|
||||
|
||||
«@W V» = [0x85] ++ «W» ++ «V»
|
||||
«#!V» = [0x86] ++ «V»
|
||||
|
||||
«V» if V ∈ Float = [0x87, 0x04] ++ binary32(V)
|
||||
«V» if V ∈ Double = [0x87, 0x08] ++ binary64(V)
|
||||
|
||||
«V» if V ∈ SignedInteger = [0xB0] ++ varint(|intbytes(V)|) ++ intbytes(V)
|
||||
«V» if V ∈ String = [0xB1] ++ varint(|utf8(V)|) ++ utf8(V)
|
||||
«V» if V ∈ ByteString = [0xB2] ++ varint(|V|) ++ V
|
||||
«V» if V ∈ Symbol = [0xB3] ++ varint(|utf8(V)|) ++ utf8(V)
|
||||
|
||||
«<L F_1...F_m>» = [0xB4] ++ «L» ++ «F_1» ++...++ «F_m» ++ [0x84]
|
||||
«[X_1...X_m]» = [0xB5] ++ «X_1» ++...++ «X_m» ++ [0x84]
|
||||
«#{E_1...E_m}» = [0xB6] ++ «E_1» ++...++ «E_m» ++ [0x84]
|
||||
«{K_1:V_1...K_m:V_m}» = [0xB7] ++ «K_1» ++ «V_1» ++...++ «K_m» ++ «V_m» ++ [0x84]
|
||||
|
||||
varint(n) = [n] if n < 128
|
||||
[(n & 127) | 128] ++ varint(n >> 7) if n ≥ 128
|
||||
|
||||
intbytes(n) = the empty sequence if n = 0, otherwise signedBigEndian(n)
|
||||
|
||||
signedBigEndian(n) = [n & 255] if -128 ≤ n ≤ 127
|
||||
signedBigEndian(n >> 8) ++ [n & 255] otherwise
|
||||
```
|
||||
|
||||
The functions `binary32(F)` and `binary64(D)` yield big-endian 4- and
|
||||
8-byte IEEE 754 binary representations of `F` and `D`, respectively.
|
|
@ -51,36 +51,3 @@ division](https://en.wikipedia.org/wiki/Euclidean_division); that is, if
|
|||
<span class="postcard-grammar binarysyntax">*n* = *dq* + *r*</span> and
|
||||
<span class="postcard-grammar binarysyntax">0 ≤ *r* < |d|</span>.
|
||||
-->
|
||||
|
||||
<!--
|
||||
For a value `V`, we write `«V»` for the binary encoding of `V`.
|
||||
|
||||
«#f» = [0x80]
|
||||
«#t» = [0x81]
|
||||
|
||||
«@W V» = [0x85] ++ «W» ++ «V»
|
||||
«#!V» = [0x86] ++ «V»
|
||||
|
||||
«V» if V ∈ Float = [0x87, 0x04] ++ binary32(V)
|
||||
«V» if V ∈ Double = [0x87, 0x08] ++ binary64(V)
|
||||
|
||||
«V» if V ∈ SignedInteger = [0xB0] ++ varint(|intbytes(V)|) ++ intbytes(V)
|
||||
«V» if V ∈ String = [0xB1] ++ varint(|utf8(V)|) ++ utf8(V)
|
||||
«V» if V ∈ ByteString = [0xB2] ++ varint(|V|) ++ V
|
||||
«V» if V ∈ Symbol = [0xB3] ++ varint(|utf8(V)|) ++ utf8(V)
|
||||
|
||||
«<L F_1...F_m>» = [0xB4] ++ «L» ++ «F_1» ++...++ «F_m» ++ [0x84]
|
||||
«[X_1...X_m]» = [0xB5] ++ «X_1» ++...++ «X_m» ++ [0x84]
|
||||
«#{E_1...E_m}» = [0xB6] ++ «E_1» ++...++ «E_m» ++ [0x84]
|
||||
«{K_1:V_1...K_m:V_m}» = [0xB7] ++ «K_1» ++ «V_1» ++...++ «K_m» ++ «V_m» ++ [0x84]
|
||||
|
||||
varint(v) = [v] if v < 128
|
||||
[(v & 0x7F) + 128] ++ varint(v >> 7) if v ≥ 128
|
||||
|
||||
The functions `binary32(F)` and `binary64(D)` yield big-endian 4- and
|
||||
8-byte IEEE 754 binary representations of `F` and `D`, respectively.
|
||||
|
||||
The function `intbytes(x)` is a big-endian two's-complement signed binary representation of
|
||||
`x`, taking exactly as many whole bytes as needed to unambiguously identify the value and its
|
||||
sign. In particular, `intbytes(0)` is the empty byte sequence.
|
||||
-->
|
||||
|
|
|
@ -0,0 +1,44 @@
|
|||
```text
|
||||
Document := Value ws
|
||||
Value := ws (Record | Collection | Atom | Embedded | Annotated)
|
||||
Collection := Sequence | Dictionary | Set
|
||||
Atom := Boolean | ByteString | String | QuotedSymbol | Symbol | Number
|
||||
ws := (space | tab | cr | lf | `,`)*
|
||||
|
||||
Record := `<` Value+ ws `>`
|
||||
Sequence := `[` Value* ws `]`
|
||||
Dictionary := `{` (Value ws `:` Value)* ws `}`
|
||||
Set := `#{` Value* ws `}`
|
||||
|
||||
Boolean := `#t` | `#f`
|
||||
ByteString := `#"` binchar* `"`
|
||||
| `#x"` (ws hex hex)* ws `"`
|
||||
| `#[` (ws base64char)* ws `]`
|
||||
String := `"` («any unicode scalar except `\` or `"`» | escaped | `\"`)* `"`
|
||||
QuotedSymbol := `|` («any unicode scalar except `\` or `|`» | escaped | `\|`)* `|`
|
||||
Symbol := (`A`..`Z` | `a`..`z` | `0`..`9` | sympunct | symuchar)+
|
||||
Number := Float | Double | SignedInteger
|
||||
Float := flt (`f`|`F`) | `#xf"` (ws hex hex)4 ws `"`
|
||||
Double := flt | `#xd"` (ws hex hex)8 ws `"`
|
||||
SignedInteger := int
|
||||
|
||||
Embedded := `#!` Value
|
||||
Annotated := Annotation Value
|
||||
Annotation := `@` Value | `;` «any unicode scalar except cr or lf»* (cr | lf)
|
||||
|
||||
escaped := `\\` | `\/` | `\b` | `\f` | `\n` | `\r` | `\t` | `\u` hex hex hex hex
|
||||
binescaped := `\\` | `\/` | `\b` | `\f` | `\n` | `\r` | `\t` | `\x` hex hex
|
||||
binchar := «any scalar ≥32 and ≤126, except `\` or `"`» | binescaped | `\"`
|
||||
base64char := `A`..`Z` | `a`..`z` | `0`..`9` | `+` | `/` | `-` | `_` | `=`
|
||||
sympunct := `~` | `!` | `$` | `%` | `^` | `&` | `*` | `?`
|
||||
| `_` | `=` | `+` | `-` | `/` | `.`
|
||||
symuchar := «any scalar value ≥128 whose Unicode category is
|
||||
Lu, Ll, Lt, Lm, Lo, Mn, Mc, Me, Nd, Nl, No, Pc,
|
||||
Pd, Po, Sc, Sm, Sk, So, or Co»
|
||||
|
||||
flt := int ( frac exp | frac | exp )
|
||||
int := (`-`|`+`) (`0`..`9`)+
|
||||
frac := `.` (`0`..`9`)+
|
||||
exp := (`e`|`E`) (`-`|`+`) (`0`..`9`)+
|
||||
hex := `A`..`F` | `a`..`f` | `0`..`9`
|
||||
```
|
|
@ -0,0 +1,11 @@
|
|||
---
|
||||
no_site_title: true
|
||||
title: "Preserves Quick Reference (Plaintext)"
|
||||
---
|
||||
|
||||
Tony Garnock-Jones <tonyg@leastfixedpoint.com>
|
||||
{{ site.version_date }}. Version {{ site.version }}.
|
||||
|
||||
{% include cheatsheet-binary-plaintext.md %}
|
||||
|
||||
{% include cheatsheet-text-plaintext.md %}
|
|
@ -3,6 +3,12 @@
|
|||
set -e
|
||||
exec 1>&2
|
||||
|
||||
COMMAND=cmp
|
||||
if [ "$1" = "--fix" ];
|
||||
then
|
||||
COMMAND=cp
|
||||
fi
|
||||
|
||||
# https://gitlab.com/preserves/preserves/-/issues/30
|
||||
#
|
||||
# So it turns out that Racket's git-checkout mechanism pays attention
|
||||
|
@ -16,10 +22,14 @@ exec 1>&2
|
|||
|
||||
# Ensure that various copies of schema.prs, schema.bin, path.bin,
|
||||
# samples.pr and samples.bin are in fact identical.
|
||||
cmp path/path.bin implementations/python/preserves/path.prb
|
||||
cmp path/path.bin implementations/rust/preserves-path/path.bin
|
||||
cmp schema/schema.bin implementations/python/preserves/schema.prb
|
||||
cmp schema/schema.prs implementations/racket/preserves/preserves-schema/schema.prs
|
||||
cmp tests/samples.bin implementations/python/tests/samples.bin
|
||||
cmp tests/samples.pr implementations/python/tests/samples.pr
|
||||
cmp tests/samples.pr implementations/racket/preserves/preserves/tests/samples.pr
|
||||
${COMMAND} path/path.bin implementations/python/preserves/path.prb
|
||||
${COMMAND} path/path.bin implementations/rust/preserves-path/path.bin
|
||||
${COMMAND} schema/schema.bin implementations/python/preserves/schema.prb
|
||||
${COMMAND} schema/schema.prs implementations/racket/preserves/preserves-schema/schema.prs
|
||||
${COMMAND} tests/samples.bin implementations/python/tests/samples.bin
|
||||
${COMMAND} tests/samples.pr implementations/python/tests/samples.pr
|
||||
${COMMAND} tests/samples.pr implementations/racket/preserves/preserves/tests/samples.pr
|
||||
${COMMAND} _includes/what-is-preserves.md implementations/rust/preserves/doc/what-is-preserves.md
|
||||
${COMMAND} _includes/what-is-preserves-schema.md implementations/rust/preserves-schema/doc/what-is-preserves-schema.md
|
||||
${COMMAND} _includes/cheatsheet-binary-plaintext.md implementations/rust/preserves/doc/cheatsheet-binary-plaintext.md
|
||||
${COMMAND} _includes/cheatsheet-text-plaintext.md implementations/rust/preserves/doc/cheatsheet-text-plaintext.md
|
||||
|
|
|
@ -5,6 +5,9 @@
|
|||
all:
|
||||
cargo build --all-targets
|
||||
|
||||
doc:
|
||||
cargo doc --workspace
|
||||
|
||||
x86_64-binary: x86_64-binary-release
|
||||
|
||||
x86_64-binary-release:
|
||||
|
|
|
@ -0,0 +1,16 @@
|
|||
A Preserves schema connects Preserves `Value`s to host-language data
|
||||
structures. Each definition within a schema can be processed by a
|
||||
compiler to produce
|
||||
|
||||
- a simple host-language *type definition*;
|
||||
|
||||
- a partial *parsing* function from `Value`s to instances of the
|
||||
produced type; and
|
||||
|
||||
- a total *serialization* function from instances of the type to
|
||||
`Value`s.
|
||||
|
||||
Every parsed `Value` retains enough information to always be able to
|
||||
be serialized again, and every instance of a host-language data
|
||||
structure contains, by construction, enough information to be
|
||||
successfully serialized.
|
|
@ -0,0 +1,8 @@
|
|||
# Preserves core: value representation, codecs, and Serde support
|
||||
|
||||
This crate implements [Preserves](https://preserves.dev/) for Rust, including
|
||||
|
||||
- serde support (modules [de], [ser], [symbol], [set])
|
||||
|
||||
- plain Preserves value support, plus [text][crate::value::text] and
|
||||
[binary][crate::value::packed] codecs (the [value] module)
|
|
@ -0,0 +1,33 @@
|
|||
For a value `V`, we write `«V»` for the binary encoding of `V`.
|
||||
|
||||
```text
|
||||
«#f» = [0x80]
|
||||
«#t» = [0x81]
|
||||
|
||||
«@W V» = [0x85] ++ «W» ++ «V»
|
||||
«#!V» = [0x86] ++ «V»
|
||||
|
||||
«V» if V ∈ Float = [0x87, 0x04] ++ binary32(V)
|
||||
«V» if V ∈ Double = [0x87, 0x08] ++ binary64(V)
|
||||
|
||||
«V» if V ∈ SignedInteger = [0xB0] ++ varint(|intbytes(V)|) ++ intbytes(V)
|
||||
«V» if V ∈ String = [0xB1] ++ varint(|utf8(V)|) ++ utf8(V)
|
||||
«V» if V ∈ ByteString = [0xB2] ++ varint(|V|) ++ V
|
||||
«V» if V ∈ Symbol = [0xB3] ++ varint(|utf8(V)|) ++ utf8(V)
|
||||
|
||||
«<L F_1...F_m>» = [0xB4] ++ «L» ++ «F_1» ++...++ «F_m» ++ [0x84]
|
||||
«[X_1...X_m]» = [0xB5] ++ «X_1» ++...++ «X_m» ++ [0x84]
|
||||
«#{E_1...E_m}» = [0xB6] ++ «E_1» ++...++ «E_m» ++ [0x84]
|
||||
«{K_1:V_1...K_m:V_m}» = [0xB7] ++ «K_1» ++ «V_1» ++...++ «K_m» ++ «V_m» ++ [0x84]
|
||||
|
||||
varint(n) = [n] if n < 128
|
||||
[(n & 127) | 128] ++ varint(n >> 7) if n ≥ 128
|
||||
|
||||
intbytes(n) = the empty sequence if n = 0, otherwise signedBigEndian(n)
|
||||
|
||||
signedBigEndian(n) = [n & 255] if -128 ≤ n ≤ 127
|
||||
signedBigEndian(n >> 8) ++ [n & 255] otherwise
|
||||
```
|
||||
|
||||
The functions `binary32(F)` and `binary64(D)` yield big-endian 4- and
|
||||
8-byte IEEE 754 binary representations of `F` and `D`, respectively.
|
|
@ -0,0 +1,44 @@
|
|||
```text
|
||||
Document := Value ws
|
||||
Value := ws (Record | Collection | Atom | Embedded | Annotated)
|
||||
Collection := Sequence | Dictionary | Set
|
||||
Atom := Boolean | ByteString | String | QuotedSymbol | Symbol | Number
|
||||
ws := (space | tab | cr | lf | `,`)*
|
||||
|
||||
Record := `<` Value+ ws `>`
|
||||
Sequence := `[` Value* ws `]`
|
||||
Dictionary := `{` (Value ws `:` Value)* ws `}`
|
||||
Set := `#{` Value* ws `}`
|
||||
|
||||
Boolean := `#t` | `#f`
|
||||
ByteString := `#"` binchar* `"`
|
||||
| `#x"` (ws hex hex)* ws `"`
|
||||
| `#[` (ws base64char)* ws `]`
|
||||
String := `"` («any unicode scalar except `\` or `"`» | escaped | `\"`)* `"`
|
||||
QuotedSymbol := `|` («any unicode scalar except `\` or `|`» | escaped | `\|`)* `|`
|
||||
Symbol := (`A`..`Z` | `a`..`z` | `0`..`9` | sympunct | symuchar)+
|
||||
Number := Float | Double | SignedInteger
|
||||
Float := flt (`f`|`F`) | `#xf"` (ws hex hex)4 ws `"`
|
||||
Double := flt | `#xd"` (ws hex hex)8 ws `"`
|
||||
SignedInteger := int
|
||||
|
||||
Embedded := `#!` Value
|
||||
Annotated := Annotation Value
|
||||
Annotation := `@` Value | `;` «any unicode scalar except cr or lf»* (cr | lf)
|
||||
|
||||
escaped := `\\` | `\/` | `\b` | `\f` | `\n` | `\r` | `\t` | `\u` hex hex hex hex
|
||||
binescaped := `\\` | `\/` | `\b` | `\f` | `\n` | `\r` | `\t` | `\x` hex hex
|
||||
binchar := «any scalar ≥32 and ≤126, except `\` or `"`» | binescaped | `\"`
|
||||
base64char := `A`..`Z` | `a`..`z` | `0`..`9` | `+` | `/` | `-` | `_` | `=`
|
||||
sympunct := `~` | `!` | `$` | `%` | `^` | `&` | `*` | `?`
|
||||
| `_` | `=` | `+` | `-` | `/` | `.`
|
||||
symuchar := «any scalar value ≥128 whose Unicode category is
|
||||
Lu, Ll, Lt, Lm, Lo, Mn, Mc, Me, Nd, Nl, No, Pc,
|
||||
Pd, Po, Sc, Sm, Sk, So, or Co»
|
||||
|
||||
flt := int ( frac exp | frac | exp )
|
||||
int := (`-`|`+`) (`0`..`9`)+
|
||||
frac := `.` (`0`..`9`)+
|
||||
exp := (`e`|`E`) (`-`|`+`) (`0`..`9`)+
|
||||
hex := `A`..`F` | `a`..`f` | `0`..`9`
|
||||
```
|
|
@ -0,0 +1,12 @@
|
|||
*Preserves* is a data model, with associated serialization formats.
|
||||
|
||||
It supports *records* with user-defined *labels*, embedded
|
||||
*references*, and the usual suite of atomic and compound data types,
|
||||
including *binary* data as a distinct type from text strings. Its
|
||||
*annotations* allow separation of data from metadata such as comments,
|
||||
trace information, and provenance information.
|
||||
|
||||
Preserves departs from many other data languages in defining how to
|
||||
*compare* two values. Comparison is based on the data model, not on
|
||||
syntax or on data structures of any particular implementation
|
||||
language.
|
|
@ -1,3 +1,5 @@
|
|||
//! Support for Serde deserialization of Preserves terms described by Rust data types.
|
||||
|
||||
use serde::de::{DeserializeSeed, EnumAccess, MapAccess, SeqAccess, VariantAccess, Visitor};
|
||||
use serde::Deserialize;
|
||||
|
||||
|
@ -11,13 +13,21 @@ use super::value::{IOValue, IOValueDomainCodec, PackedReader, TextReader, ViaCod
|
|||
|
||||
pub use super::error::Error;
|
||||
|
||||
/// A [std::result::Result] type including [Error], the Preserves Serde deserialization error
|
||||
/// type, as its error.
|
||||
pub type Result<T> = std::result::Result<T, Error>;
|
||||
|
||||
/// Serde deserializer for Preserves-encoded Rust data. Use [Deserializer::from_reader] to
|
||||
/// construct instances, or [from_bytes]/[from_text]/[from_read]/[from_reader] etc to
|
||||
/// deserialize single terms directly.
|
||||
pub struct Deserializer<'de, 'r, R: Reader<'de, IOValue>> {
|
||||
/// The underlying Preserves [reader][crate::value::reader::Reader].
|
||||
pub read: &'r mut R,
|
||||
phantom: PhantomData<&'de ()>,
|
||||
}
|
||||
|
||||
/// Deserialize a `T` from `bytes`, which must contain a Preserves [machine-oriented binary
|
||||
/// syntax][crate::value::packed] term corresponding to the Serde serialization of a `T`.
|
||||
pub fn from_bytes<'de, T>(bytes: &'de [u8]) -> Result<T>
|
||||
where
|
||||
T: Deserialize<'de>,
|
||||
|
@ -28,6 +38,8 @@ where
|
|||
))
|
||||
}
|
||||
|
||||
/// Deserialize a `T` from `text`, which must contain a Preserves [text
|
||||
/// syntax][crate::value::text] term corresponding to the Serde serialization of a `T`.
|
||||
pub fn from_text<'de, T>(text: &'de str) -> Result<T>
|
||||
where
|
||||
T: Deserialize<'de>,
|
||||
|
@ -38,6 +50,8 @@ where
|
|||
))
|
||||
}
|
||||
|
||||
/// Deserialize a `T` from `read`, which must yield a Preserves [machine-oriented binary
|
||||
/// syntax][crate::value::packed] term corresponding to the Serde serialization of a `T`.
|
||||
pub fn from_read<'de, 'r, IOR: io::Read + io::Seek, T>(read: &'r mut IOR) -> Result<T>
|
||||
where
|
||||
T: Deserialize<'de>,
|
||||
|
@ -48,6 +62,8 @@ where
|
|||
))
|
||||
}
|
||||
|
||||
/// Deserialize a `T` from `read`, which must yield a Preserves term corresponding to the Serde
|
||||
/// serialization of a `T`.
|
||||
pub fn from_reader<'r, 'de, R: Reader<'de, IOValue>, T>(read: &'r mut R) -> Result<T>
|
||||
where
|
||||
T: Deserialize<'de>,
|
||||
|
@ -58,6 +74,7 @@ where
|
|||
}
|
||||
|
||||
impl<'r, 'de, R: Reader<'de, IOValue>> Deserializer<'de, 'r, R> {
|
||||
/// Construct a Deserializer from `read`, a Preserves [reader][crate::value::Reader].
|
||||
pub fn from_reader(read: &'r mut R) -> Self {
|
||||
Deserializer {
|
||||
read,
|
||||
|
@ -344,6 +361,7 @@ impl<'r, 'de, 'a, R: Reader<'de, IOValue>> serde::de::Deserializer<'de>
|
|||
}
|
||||
}
|
||||
|
||||
#[doc(hidden)]
|
||||
pub struct Seq<'de, 'r, 'a, R: Reader<'de, IOValue>> {
|
||||
b: B::Type,
|
||||
i: B::Item,
|
||||
|
|
|
@ -1,27 +1,47 @@
|
|||
//! Serde and plain-Preserves codec errors.
|
||||
|
||||
use num::bigint::BigInt;
|
||||
use std::convert::From;
|
||||
use std::io;
|
||||
|
||||
/// Representation of parse, deserialization, and other conversion errors.
|
||||
#[derive(Debug)]
|
||||
pub enum Error {
|
||||
/// Generic IO error.
|
||||
Io(io::Error),
|
||||
/// Generic message for the user.
|
||||
Message(String),
|
||||
/// Invalid unicode scalar `n` found during interpretation of a `<UnicodeScalar n>` record
|
||||
/// as a Rust `char`.
|
||||
InvalidUnicodeScalar(u32),
|
||||
/// Preserves supports arbitrary integers; when these are converted to specific Rust
|
||||
/// machine word types, sometimes they exceed the available range.
|
||||
NumberOutOfRange(BigInt),
|
||||
/// Serde has limited support for deserializing free-form data; this error is signalled
|
||||
/// when one of the limits is hit.
|
||||
CannotDeserializeAny,
|
||||
/// Syntax error: missing closing delimiter (`)`, `]`, `}`, `>` in text syntax; `0x84` in binary syntax; etc.)
|
||||
MissingCloseDelimiter,
|
||||
/// Signalled when an expected term is not present.
|
||||
MissingItem,
|
||||
/// Signalled when what was received did not match expectations.
|
||||
Expected(ExpectedKind, Received),
|
||||
#[doc(hidden)] // TODO remove this enum variant? It isn't used
|
||||
StreamingSerializationUnsupported,
|
||||
}
|
||||
|
||||
/// Used in [Error::Expected] to indicate what was received.
|
||||
#[derive(Debug)]
|
||||
pub enum Received {
|
||||
#[doc(hidden)] // TODO remove this enum variant? It isn't used
|
||||
ReceivedSomethingElse,
|
||||
/// Received a record with the given label symbol text.
|
||||
ReceivedRecordWithLabel(String),
|
||||
/// Received some other value, described in the `String`
|
||||
ReceivedOtherValue(String),
|
||||
}
|
||||
|
||||
/// Used in [Error::Expected] to indicate what was expected.
|
||||
#[derive(Debug, PartialEq)]
|
||||
pub enum ExpectedKind {
|
||||
Boolean,
|
||||
|
@ -35,7 +55,9 @@ pub enum ExpectedKind {
|
|||
ByteString,
|
||||
Symbol,
|
||||
|
||||
/// Expected a record, either of a specific arity (length) or of no specific arity
|
||||
Record(Option<usize>),
|
||||
/// Expected a record with a symbol label with text `String`, perhaps of some specific arity
|
||||
SimpleRecord(String, Option<usize>),
|
||||
Sequence,
|
||||
Set,
|
||||
|
@ -87,14 +109,17 @@ impl std::fmt::Display for Error {
|
|||
|
||||
//---------------------------------------------------------------------------
|
||||
|
||||
/// True iff `e` is `Error::Io`
|
||||
pub fn is_io_error(e: &Error) -> bool {
|
||||
matches!(e, Error::Io(_))
|
||||
}
|
||||
|
||||
/// Produce the generic "end of file" error, `Error::Io(`[io_eof]`())`
|
||||
pub fn eof() -> Error {
|
||||
Error::Io(io_eof())
|
||||
}
|
||||
|
||||
/// True iff `e` is an "end of file" error; see [is_eof_io_error]
|
||||
pub fn is_eof_error(e: &Error) -> bool {
|
||||
if let Error::Io(ioe) = e {
|
||||
is_eof_io_error(ioe)
|
||||
|
@ -103,10 +128,12 @@ pub fn is_eof_error(e: &Error) -> bool {
|
|||
}
|
||||
}
|
||||
|
||||
/// Produce a syntax error bearing the message `s`
|
||||
pub fn syntax_error(s: &str) -> Error {
|
||||
Error::Io(io_syntax_error(s))
|
||||
}
|
||||
|
||||
/// True iff `e` is a syntax error; see [is_syntax_io_error]
|
||||
pub fn is_syntax_error(e: &Error) -> bool {
|
||||
if let Error::Io(ioe) = e {
|
||||
is_syntax_io_error(ioe)
|
||||
|
@ -117,18 +144,22 @@ pub fn is_syntax_error(e: &Error) -> bool {
|
|||
|
||||
//---------------------------------------------------------------------------
|
||||
|
||||
/// Produce an [io::Error] of [io::ErrorKind::UnexpectedEof].
|
||||
pub fn io_eof() -> io::Error {
|
||||
io::Error::new(io::ErrorKind::UnexpectedEof, "EOF")
|
||||
}
|
||||
|
||||
/// True iff `e` is [io::ErrorKind::UnexpectedEof]
|
||||
pub fn is_eof_io_error(e: &io::Error) -> bool {
|
||||
matches!(e.kind(), io::ErrorKind::UnexpectedEof)
|
||||
}
|
||||
|
||||
/// Produce a syntax error ([io::ErrorKind::InvalidData]) bearing the message `s`
|
||||
pub fn io_syntax_error(s: &str) -> io::Error {
|
||||
io::Error::new(io::ErrorKind::InvalidData, s)
|
||||
}
|
||||
|
||||
/// True iff `e` is an [io::ErrorKind::InvalidData] (a syntax error)
|
||||
pub fn is_syntax_io_error(e: &io::Error) -> bool {
|
||||
matches!(e.kind(), io::ErrorKind::InvalidData)
|
||||
}
|
||||
|
|
|
@ -1,19 +1,38 @@
|
|||
//! Utilities for producing and flexibly parsing strings containing hexadecimal binary data.
|
||||
|
||||
/// Utility for parsing hex binary data from strings.
|
||||
pub enum HexParser {
|
||||
/// "Liberal" parsing simply ignores characters that are not (case-insensitive) hex digits.
|
||||
Liberal,
|
||||
/// "Whitespace allowed" parsing ignores whitespace, but fails a parse on anything other
|
||||
/// than hex or whitespace.
|
||||
WhitespaceAllowed,
|
||||
/// "Strict" parsing accepts only (case-insensitive) hex digits; no whitespace, no other
|
||||
/// characters.
|
||||
Strict,
|
||||
}
|
||||
|
||||
/// Utility for formatting binary data as hex.
|
||||
pub enum HexFormatter {
|
||||
/// Produces LF-separated lines with a maximum of `usize` hex digits in each line.
|
||||
Lines(usize),
|
||||
/// Simply packs hex digits in as tightly as possible.
|
||||
Packed,
|
||||
}
|
||||
|
||||
/// Convert a number 0..15 to a hex digit [char].
|
||||
///
|
||||
/// # Panics
|
||||
///
|
||||
/// Panics if given `v` outside the range 0..15 inclusive.
|
||||
///
|
||||
pub fn hexdigit(v: u8) -> char {
|
||||
char::from_digit(v as u32, 16).expect("hexadecimal digit value")
|
||||
}
|
||||
|
||||
impl HexParser {
|
||||
/// Decode `s` according to the given rules for `self`; see [HexParser].
|
||||
/// If the parse fails, yield `None`.
|
||||
pub fn decode(&self, s: &str) -> Option<Vec<u8>> {
|
||||
let mut result = Vec::new();
|
||||
let mut buf: u8 = 0;
|
||||
|
@ -49,6 +68,7 @@ impl HexParser {
|
|||
}
|
||||
|
||||
impl HexFormatter {
|
||||
/// Encode `bs` according to the given rules for `self; see [HexFormatter].
|
||||
pub fn encode(&self, bs: &[u8]) -> String {
|
||||
match self {
|
||||
HexFormatter::Lines(max_line_length) => {
|
||||
|
|
|
@ -1,3 +1,8 @@
|
|||
#![doc = concat!(
|
||||
include_str!("../doc/what-is-preserves.md"),
|
||||
include_str!("../README.md"),
|
||||
)]
|
||||
|
||||
pub mod de;
|
||||
pub mod error;
|
||||
pub mod hex;
|
||||
|
|
|
@ -1,3 +1,5 @@
|
|||
//! Support for Serde serialization of Rust data types into Preserves terms.
|
||||
|
||||
use super::value::boundary as B;
|
||||
use super::value::writer::{CompoundWriter, Writer};
|
||||
use super::value::IOValueDomainCodec;
|
||||
|
@ -7,11 +9,16 @@ pub use super::error::Error;
|
|||
type Result<T> = std::result::Result<T, Error>;
|
||||
|
||||
#[derive(Debug)]
|
||||
/// Serde serializer for Preserves-encoding Rust data. Construct via [Serializer::new], and use
|
||||
/// with [serde::Serialize::serialize] methods.
|
||||
pub struct Serializer<'w, W: Writer> {
|
||||
/// The underlying Preserves [writer][crate::value::writer::Writer].
|
||||
pub write: &'w mut W,
|
||||
}
|
||||
|
||||
impl<'w, W: Writer> Serializer<'w, W> {
|
||||
/// Construct a new [Serializer] targetting the given
|
||||
/// [writer][crate::value::writer::Writer].
|
||||
pub fn new(write: &'w mut W) -> Self {
|
||||
Serializer { write }
|
||||
}
|
||||
|
@ -22,6 +29,7 @@ enum SequenceVariant<W: Writer> {
|
|||
Record(W::RecWriter),
|
||||
}
|
||||
|
||||
#[doc(hidden)]
|
||||
pub struct SerializeCompound<'a, 'w, W: Writer> {
|
||||
b: B::Type,
|
||||
i: B::Item,
|
||||
|
@ -29,6 +37,7 @@ pub struct SerializeCompound<'a, 'w, W: Writer> {
|
|||
c: SequenceVariant<W>,
|
||||
}
|
||||
|
||||
#[doc(hidden)]
|
||||
pub struct SerializeDictionary<'a, 'w, W: Writer> {
|
||||
b: B::Type,
|
||||
ser: &'a mut Serializer<'w, W>,
|
||||
|
@ -442,6 +451,8 @@ impl<'a, 'w, W: Writer> serde::ser::SerializeSeq for SerializeCompound<'a, 'w, W
|
|||
}
|
||||
}
|
||||
|
||||
/// Convenience function for directly serializing a Serde-serializable `T` to the given
|
||||
/// `write`, a Preserves [writer][crate::value::writer::Writer].
|
||||
pub fn to_writer<W: Writer, T: Serialize + ?Sized>(write: &mut W, value: &T) -> Result<()> {
|
||||
Ok(value.serialize(&mut Serializer::new(write))?)
|
||||
}
|
||||
|
|
|
@ -1,7 +1,25 @@
|
|||
//! Serde support for serializing Rust collections as Preserves sets.
|
||||
//!
|
||||
//! Serde doesn't include sets in its data model, so we do some somewhat awful tricks to force
|
||||
//! things to come out the way we want them.
|
||||
//!
|
||||
//! # Example
|
||||
//!
|
||||
//! Annotate collection-valued fields that you want to (en|de)code as Preserves `Set`s with
|
||||
//! `#[serde(with = "preserves::set")]`:
|
||||
//!
|
||||
//! ```rust
|
||||
//! struct Example {
|
||||
//! #[serde(with = "preserves::set")]
|
||||
//! items: preserves::value::Set<String>,
|
||||
//! }
|
||||
//! ```
|
||||
|
||||
use crate::value::{self, to_value, IOValue, UnwrappedIOValue};
|
||||
use serde::{Deserialize, Deserializer, Serialize, Serializer};
|
||||
use std::iter::IntoIterator;
|
||||
|
||||
#[doc(hidden)]
|
||||
pub fn serialize<S, T, Item>(s: T, serializer: S) -> Result<S::Ok, S::Error>
|
||||
where
|
||||
S: Serializer,
|
||||
|
@ -12,6 +30,7 @@ where
|
|||
UnwrappedIOValue::from(s).wrap().serialize(serializer)
|
||||
}
|
||||
|
||||
#[doc(hidden)]
|
||||
pub fn deserialize<'de, D, T>(deserializer: D) -> Result<T, D::Error>
|
||||
where
|
||||
D: Deserializer<'de>,
|
||||
|
|
|
@ -1,5 +1,24 @@
|
|||
//! Serde support for serializing Rust data as Preserves symbols.
|
||||
//!
|
||||
//! Serde doesn't include symbols in its data model, so we do some somewhat awful tricks to
|
||||
//! force things to come out the way we want them.
|
||||
//!
|
||||
//! # Example
|
||||
//!
|
||||
//! Either use [Symbol] directly in your data types, or annotate [String]-valued fields that
|
||||
//! you want to (en|de)code as Preserves `Symbol`s with `#[serde(with = "preserves::symbol")]`:
|
||||
//!
|
||||
//! ```rust
|
||||
//! struct Example {
|
||||
//! sym1: preserves::symbol::Symbol,
|
||||
//! #[serde(with = "preserves::symbol")]
|
||||
//! sym2: String,
|
||||
//! }
|
||||
//! ```
|
||||
|
||||
use crate::value::{IOValue, NestedValue};
|
||||
|
||||
/// Wrapper for a string to coerce its Preserves-serialization to `Symbol`.
|
||||
#[derive(Debug, PartialEq, Eq, PartialOrd, Ord, Clone)]
|
||||
pub struct Symbol(pub String);
|
||||
|
||||
|
@ -26,6 +45,7 @@ impl<'de> serde::Deserialize<'de> for Symbol {
|
|||
}
|
||||
}
|
||||
|
||||
#[doc(hidden)]
|
||||
pub fn serialize<S>(s: &str, serializer: S) -> Result<S::Ok, S::Error>
|
||||
where
|
||||
S: serde::Serializer,
|
||||
|
@ -34,6 +54,7 @@ where
|
|||
Symbol(s.to_string()).serialize(serializer)
|
||||
}
|
||||
|
||||
#[doc(hidden)]
|
||||
pub fn deserialize<'de, D>(deserializer: D) -> Result<String, D::Error>
|
||||
where
|
||||
D: serde::Deserializer<'de>,
|
||||
|
|
|
@ -1,3 +1,5 @@
|
|||
#![doc(hidden)]
|
||||
|
||||
#[derive(Default, Clone, Debug)]
|
||||
pub struct Type {
|
||||
pub closing: Option<Item>,
|
||||
|
|
|
@ -1,3 +1,5 @@
|
|||
//! Support Serde deserialization of Rust data types from Preserves *values* (not syntax).
|
||||
|
||||
use crate::error::{Error, ExpectedKind, Received};
|
||||
use crate::value::repr::{Double, Float};
|
||||
use crate::value::{IOValue, Map, NestedValue, UnwrappedIOValue, Value};
|
||||
|
@ -7,10 +9,14 @@ use std::iter::Iterator;
|
|||
|
||||
pub type Result<T> = std::result::Result<T, Error>;
|
||||
|
||||
/// Serde deserializer for constructing Rust data from an in-memory Preserves value. Use
|
||||
/// [Deserializer::from_value] to construct instances, or [from_value] to deserialize single
|
||||
/// values directly.
|
||||
pub struct Deserializer<'de> {
|
||||
input: &'de IOValue,
|
||||
}
|
||||
|
||||
/// Deserialize a `T` from `v`, a Preserves [IOValue].
|
||||
pub fn from_value<'a, T>(v: &'a IOValue) -> Result<T>
|
||||
where
|
||||
T: Deserialize<'a>,
|
||||
|
@ -21,6 +27,7 @@ where
|
|||
}
|
||||
|
||||
impl<'de> Deserializer<'de> {
|
||||
/// Construct a Deserializer from `v`, an [IOValue].
|
||||
pub fn from_value(v: &'de IOValue) -> Self {
|
||||
Deserializer { input: v }
|
||||
}
|
||||
|
@ -331,6 +338,7 @@ impl<'de, 'a> serde::de::Deserializer<'de> for &'a mut Deserializer<'de> {
|
|||
}
|
||||
}
|
||||
|
||||
#[doc(hidden)]
|
||||
pub struct VecSeq<'a, 'de: 'a, I: Iterator<Item = &'de IOValue>> {
|
||||
iter: I,
|
||||
de: &'a mut Deserializer<'de>,
|
||||
|
@ -359,6 +367,7 @@ impl<'de, 'a, I: Iterator<Item = &'de IOValue>> SeqAccess<'de> for VecSeq<'a, 'd
|
|||
}
|
||||
}
|
||||
|
||||
#[doc(hidden)]
|
||||
pub struct DictMap<'a, 'de: 'a> {
|
||||
pending: Option<&'de IOValue>,
|
||||
iter: Box<dyn Iterator<Item = (&'de IOValue, &'de IOValue)> + 'a>,
|
||||
|
|
|
@ -1,3 +1,6 @@
|
|||
//! Traits for working with Preserves [embedded
|
||||
//! values](https://preserves.dev/preserves.html#embeddeds).
|
||||
|
||||
use std::io;
|
||||
|
||||
use super::packed;
|
||||
|
@ -9,10 +12,12 @@ use super::NestedValue;
|
|||
use super::Reader;
|
||||
use super::Writer;
|
||||
|
||||
/// Implementations parse [IOValue]s to their own particular [Embeddable] values of type `D`.
|
||||
pub trait DomainParse<D: Embeddable> {
|
||||
fn parse_embedded(&mut self, v: &IOValue) -> io::Result<D>;
|
||||
}
|
||||
|
||||
/// Implementations read and parse from `src` to produce [Embeddable] values of type `D`.
|
||||
pub trait DomainDecode<D: Embeddable> {
|
||||
fn decode_embedded<'de, 'src, S: BinarySource<'de>>(
|
||||
&mut self,
|
||||
|
@ -21,6 +26,7 @@ pub trait DomainDecode<D: Embeddable> {
|
|||
) -> io::Result<D>;
|
||||
}
|
||||
|
||||
/// Implementations unparse and write `D`s to `w`, a [writer][crate::value::writer::Writer].
|
||||
pub trait DomainEncode<D: Embeddable> {
|
||||
fn encode_embedded<W: Writer>(&mut self, w: &mut W, d: &D) -> io::Result<()>;
|
||||
}
|
||||
|
@ -41,6 +47,9 @@ impl<'a, D: Embeddable, T: DomainDecode<D>> DomainDecode<D> for &'a mut T {
|
|||
}
|
||||
}
|
||||
|
||||
/// Convenience codec: use this as embedded codec for encoding (only) when embedded values
|
||||
/// should be serialized as Preserves `String`s holding their Rust [std::fmt::Debug]
|
||||
/// representation.
|
||||
pub struct DebugDomainEncode;
|
||||
|
||||
impl<D: Embeddable> DomainEncode<D> for DebugDomainEncode {
|
||||
|
@ -49,6 +58,8 @@ impl<D: Embeddable> DomainEncode<D> for DebugDomainEncode {
|
|||
}
|
||||
}
|
||||
|
||||
/// Convenience codec: use this as embedded codec for decoding (only) when embedded values are
|
||||
/// expected to conform to the syntax implicit in their [std::str::FromStr] implementation.
|
||||
pub struct FromStrDomainParse;
|
||||
|
||||
impl<Err: Into<io::Error>, D: Embeddable + std::str::FromStr<Err = Err>> DomainParse<D>
|
||||
|
@ -59,6 +70,8 @@ impl<Err: Into<io::Error>, D: Embeddable + std::str::FromStr<Err = Err>> DomainP
|
|||
}
|
||||
}
|
||||
|
||||
/// Use this as embedded codec when embedded data are already [IOValue]s that can be directly
|
||||
/// serialized and deserialized without further transformation.
|
||||
pub struct IOValueDomainCodec;
|
||||
|
||||
impl DomainDecode<IOValue> for IOValueDomainCodec {
|
||||
|
@ -77,6 +90,7 @@ impl DomainEncode<IOValue> for IOValueDomainCodec {
|
|||
}
|
||||
}
|
||||
|
||||
/// Use this as embedded codec to forbid use of embedded values; an [io::Error] is signalled.
|
||||
pub struct NoEmbeddedDomainCodec;
|
||||
|
||||
impl<D: Embeddable> DomainDecode<D> for NoEmbeddedDomainCodec {
|
||||
|
@ -101,9 +115,12 @@ impl<D: Embeddable> DomainEncode<D> for NoEmbeddedDomainCodec {
|
|||
}
|
||||
}
|
||||
|
||||
/// If some `C` implements [DomainDecode] but not [DomainParse], or vice versa, use `ViaCodec`
|
||||
/// to promote the one to the other. Construct instances with [ViaCodec::new].
|
||||
pub struct ViaCodec<C>(C);
|
||||
|
||||
impl<C> ViaCodec<C> {
|
||||
/// Constructs a `ViaCodec` wrapper around an underlying codec of type `C`.
|
||||
pub fn new(c: C) -> Self {
|
||||
ViaCodec(c)
|
||||
}
|
||||
|
|
|
@ -1,3 +1,12 @@
|
|||
#![doc(hidden)]
|
||||
|
||||
//! A horrifying hack to Serde-serialize [IOValue] instances to Preserves *as themselves*.
|
||||
//!
|
||||
//! Frankly I think this portion of the codebase might not survive for long. I can't think of a
|
||||
//! better way of achieving this, but the drawbacks of having this functionality are *severe*.
|
||||
//!
|
||||
//! See <https://gitlab.com/preserves/preserves/-/issues/42>.
|
||||
|
||||
use super::repr::IOValue;
|
||||
|
||||
pub static MAGIC: &str = "$____Preserves_Serde_Magic";
|
||||
|
|
|
@ -1,8 +1,13 @@
|
|||
//! Implements the Preserves
|
||||
//! [merge](https://preserves.dev/preserves.html#appendix-merging-values) of values.
|
||||
|
||||
use super::Map;
|
||||
use super::NestedValue;
|
||||
use super::Record;
|
||||
use super::Value;
|
||||
|
||||
/// Merge two sequences of values according to [the
|
||||
/// specification](https://preserves.dev/preserves.html#appendix-merging-values).
|
||||
pub fn merge_seqs<N: NestedValue>(mut a: Vec<N>, mut b: Vec<N>) -> Option<Vec<N>> {
|
||||
if a.len() > b.len() {
|
||||
std::mem::swap(&mut a, &mut b);
|
||||
|
@ -16,6 +21,8 @@ pub fn merge_seqs<N: NestedValue>(mut a: Vec<N>, mut b: Vec<N>) -> Option<Vec<N>
|
|||
Some(r)
|
||||
}
|
||||
|
||||
/// Merge two values according to [the
|
||||
/// specification](https://preserves.dev/preserves.html#appendix-merging-values).
|
||||
pub fn merge2<N: NestedValue>(v: N, w: N) -> Option<N> {
|
||||
let (mut v_anns, v_val) = v.pieces();
|
||||
let (w_anns, w_val) = w.pieces();
|
||||
|
@ -52,6 +59,8 @@ pub fn merge2<N: NestedValue>(v: N, w: N) -> Option<N> {
|
|||
}
|
||||
}
|
||||
|
||||
/// Merge several values into a single value according to [the
|
||||
/// specification](https://preserves.dev/preserves.html#appendix-merging-values).
|
||||
pub fn merge<N: NestedValue, I: IntoIterator<Item = N>>(vs: I) -> Option<N> {
|
||||
let mut vs = vs.into_iter();
|
||||
let mut v = vs.next().expect("at least one value in merge()");
|
||||
|
|
|
@ -1,3 +1,11 @@
|
|||
//! Implementation of Preserves itself, separate from Serde.
|
||||
//!
|
||||
//! Preserves terms are [represented][repr] in memory as instances of [NestedValue].
|
||||
//!
|
||||
//! - in-memory storage, representation, and APIs in module [repr]
|
||||
//! - machine-oriented binary syntax codec in module [packed]
|
||||
//! - human-oriented text syntax codec in module [text]
|
||||
|
||||
pub mod boundary;
|
||||
pub mod de;
|
||||
pub mod domain;
|
||||
|
@ -56,6 +64,7 @@ pub use text::TextReader;
|
|||
pub use text::TextWriter;
|
||||
pub use writer::Writer;
|
||||
|
||||
#[doc(hidden)]
|
||||
pub fn invert_map<A, B>(m: &Map<A, B>) -> Map<B, A>
|
||||
where
|
||||
A: Clone,
|
||||
|
|
|
@ -1,6 +1,9 @@
|
|||
//! Definitions of the tags used in the binary encoding.
|
||||
|
||||
use std::convert::{From, TryFrom};
|
||||
use std::io;
|
||||
|
||||
/// Rust representation of tags used in the binary encoding.
|
||||
#[derive(Debug, PartialEq, Eq)]
|
||||
pub enum Tag {
|
||||
False,
|
||||
|
@ -19,8 +22,9 @@ pub enum Tag {
|
|||
Dictionary,
|
||||
}
|
||||
|
||||
/// Error value representing failure to decode a byte into a [Tag].
|
||||
#[derive(Debug, PartialEq, Eq)]
|
||||
pub struct InvalidTag(u8);
|
||||
pub struct InvalidTag(pub u8);
|
||||
|
||||
impl From<InvalidTag> for io::Error {
|
||||
fn from(v: InvalidTag) -> Self {
|
||||
|
|
|
@ -1,3 +1,9 @@
|
|||
//! Implements the Preserves [machine-oriented binary
|
||||
//! syntax](https://preserves.dev/preserves-binary.html).
|
||||
//!
|
||||
//! # Summary of Binary Syntax
|
||||
#![doc = include_str!("../../../doc/cheatsheet-binary-plaintext.md")]
|
||||
|
||||
pub mod constants;
|
||||
pub mod reader;
|
||||
pub mod writer;
|
||||
|
@ -9,6 +15,8 @@ use std::io;
|
|||
|
||||
use super::{BinarySource, DomainDecode, IOValue, IOValueDomainCodec, NestedValue, Reader};
|
||||
|
||||
/// Reads a value from the given byte vector `bs` using the binary encoding, discarding
|
||||
/// annotations.
|
||||
pub fn from_bytes<N: NestedValue, Dec: DomainDecode<N::Embedded>>(
|
||||
bs: &[u8],
|
||||
decode_embedded: Dec,
|
||||
|
@ -18,10 +26,13 @@ pub fn from_bytes<N: NestedValue, Dec: DomainDecode<N::Embedded>>(
|
|||
.demand_next(false)
|
||||
}
|
||||
|
||||
/// Reads an [IOValue] from the given byte vector `bs` using the binary encoding, discarding
|
||||
/// annotations.
|
||||
pub fn iovalue_from_bytes(bs: &[u8]) -> io::Result<IOValue> {
|
||||
from_bytes(bs, IOValueDomainCodec)
|
||||
}
|
||||
|
||||
/// As [from_bytes], but includes annotations.
|
||||
pub fn annotated_from_bytes<N: NestedValue, Dec: DomainDecode<N::Embedded>>(
|
||||
bs: &[u8],
|
||||
decode_embedded: Dec,
|
||||
|
@ -31,6 +42,7 @@ pub fn annotated_from_bytes<N: NestedValue, Dec: DomainDecode<N::Embedded>>(
|
|||
.demand_next(true)
|
||||
}
|
||||
|
||||
/// As [iovalue_from_bytes], but includes annotations.
|
||||
pub fn annotated_iovalue_from_bytes(bs: &[u8]) -> io::Result<IOValue> {
|
||||
annotated_from_bytes(bs, IOValueDomainCodec)
|
||||
}
|
||||
|
|
|
@ -1,3 +1,5 @@
|
|||
//! Implementation of [Reader][crate::value::reader::Reader] for the binary encoding.
|
||||
|
||||
use crate::error::{self, io_syntax_error, is_eof_io_error, ExpectedKind, Received};
|
||||
|
||||
use num::bigint::BigInt;
|
||||
|
@ -18,6 +20,7 @@ use super::super::{
|
|||
};
|
||||
use super::constants::Tag;
|
||||
|
||||
/// The binary encoding Preserves reader.
|
||||
pub struct PackedReader<
|
||||
'de,
|
||||
'src,
|
||||
|
@ -25,7 +28,9 @@ pub struct PackedReader<
|
|||
Dec: DomainDecode<N::Embedded>,
|
||||
S: BinarySource<'de>,
|
||||
> {
|
||||
/// Underlying source of bytes.
|
||||
pub source: &'src mut S,
|
||||
/// Decoder for producing Rust values embedded in the binary data.
|
||||
pub decode_embedded: Dec,
|
||||
phantom: PhantomData<&'de N>,
|
||||
}
|
||||
|
@ -67,6 +72,7 @@ fn out_of_range<I: Into<BigInt>>(i: I) -> error::Error {
|
|||
impl<'de, 'src, N: NestedValue, Dec: DomainDecode<N::Embedded>, S: BinarySource<'de>>
|
||||
PackedReader<'de, 'src, N, Dec, S>
|
||||
{
|
||||
/// Construct a new reader from a byte source and embedded-value decoder.
|
||||
#[inline(always)]
|
||||
pub fn new(source: &'src mut S, decode_embedded: Dec) -> Self {
|
||||
PackedReader {
|
||||
|
|
|
@ -1,3 +1,5 @@
|
|||
//! Implementation of [Writer][crate::value::writer::Writer] for the binary encoding.
|
||||
|
||||
use super::super::boundary as B;
|
||||
use super::super::suspendable::Suspendable;
|
||||
use super::super::DomainEncode;
|
||||
|
@ -13,9 +15,11 @@ use std::ops::DerefMut;
|
|||
|
||||
use super::super::writer::{varint, CompoundWriter, Writer};
|
||||
|
||||
/// The binary encoding Preserves writer.
|
||||
pub struct PackedWriter<W: io::Write>(Suspendable<W>);
|
||||
|
||||
impl PackedWriter<&mut Vec<u8>> {
|
||||
/// Encodes `v` to a byte vector.
|
||||
#[inline(always)]
|
||||
pub fn encode<N: NestedValue, Enc: DomainEncode<N::Embedded>>(
|
||||
enc: &mut Enc,
|
||||
|
@ -26,6 +30,7 @@ impl PackedWriter<&mut Vec<u8>> {
|
|||
Ok(buf)
|
||||
}
|
||||
|
||||
/// Encodes `v` to a byte vector.
|
||||
#[inline(always)]
|
||||
pub fn encode_iovalue(v: &IOValue) -> io::Result<Vec<u8>> {
|
||||
Self::encode(&mut IOValueDomainCodec, v)
|
||||
|
@ -33,26 +38,31 @@ impl PackedWriter<&mut Vec<u8>> {
|
|||
}
|
||||
|
||||
impl<W: io::Write> PackedWriter<W> {
|
||||
/// Construct a writer from the given byte sink `write`.
|
||||
#[inline(always)]
|
||||
pub fn new(write: W) -> Self {
|
||||
PackedWriter(Suspendable::new(write))
|
||||
}
|
||||
|
||||
/// Retrieve a mutable reference to the underlying byte sink.
|
||||
#[inline(always)]
|
||||
pub fn w(&mut self) -> &mut W {
|
||||
self.0.deref_mut()
|
||||
}
|
||||
|
||||
#[doc(hidden)]
|
||||
#[inline(always)]
|
||||
pub fn write_byte(&mut self, b: u8) -> io::Result<()> {
|
||||
self.w().write_all(&[b])
|
||||
}
|
||||
|
||||
#[doc(hidden)]
|
||||
#[inline(always)]
|
||||
pub fn write_integer(&mut self, bs: &[u8]) -> io::Result<()> {
|
||||
self.write_atom(Tag::SignedInteger, bs)
|
||||
}
|
||||
|
||||
#[doc(hidden)]
|
||||
#[inline(always)]
|
||||
pub fn write_atom(&mut self, tag: Tag, bs: &[u8]) -> io::Result<()> {
|
||||
self.write_byte(tag.into())?;
|
||||
|
@ -60,17 +70,20 @@ impl<W: io::Write> PackedWriter<W> {
|
|||
self.w().write_all(bs)
|
||||
}
|
||||
|
||||
#[doc(hidden)]
|
||||
#[inline(always)]
|
||||
pub fn suspend(&mut self) -> Self {
|
||||
PackedWriter(self.0.suspend())
|
||||
}
|
||||
|
||||
#[doc(hidden)]
|
||||
#[inline(always)]
|
||||
pub fn resume(&mut self, other: Self) {
|
||||
self.0.resume(other.0)
|
||||
}
|
||||
}
|
||||
|
||||
#[doc(hidden)]
|
||||
pub struct BinaryOrderWriter(Vec<Vec<u8>>);
|
||||
|
||||
impl BinaryOrderWriter {
|
||||
|
@ -119,6 +132,7 @@ impl BinaryOrderWriter {
|
|||
}
|
||||
}
|
||||
|
||||
#[doc(hidden)]
|
||||
pub trait WriteWriter: Writer {
|
||||
fn write_raw_bytes(&mut self, v: &[u8]) -> io::Result<()>;
|
||||
|
||||
|
|
File diff suppressed because it is too large
Load Diff
|
@ -1,6 +1,10 @@
|
|||
//! Support for Serde serialization of Rust data types into Preserves *values* (not syntax).
|
||||
|
||||
use crate::value::{repr::Record, IOValue, Map, Value};
|
||||
use serde::Serialize;
|
||||
|
||||
/// Empty/placeholder type for representing serialization errors: serialization to values
|
||||
/// cannot fail.
|
||||
#[derive(Debug)]
|
||||
pub enum Error {}
|
||||
impl serde::ser::Error for Error {
|
||||
|
@ -20,17 +24,22 @@ impl std::fmt::Display for Error {
|
|||
|
||||
type Result<T> = std::result::Result<T, Error>;
|
||||
|
||||
/// Serde serializer for converting Rust data to in-memory Preserves values, which can then be
|
||||
/// serialized using text or binary syntax, analyzed further, etc.
|
||||
pub struct Serializer;
|
||||
|
||||
#[doc(hidden)]
|
||||
pub struct SerializeDictionary {
|
||||
next_key: Option<IOValue>,
|
||||
items: Map<IOValue, IOValue>,
|
||||
}
|
||||
|
||||
#[doc(hidden)]
|
||||
pub struct SerializeRecord {
|
||||
r: Record<IOValue>,
|
||||
}
|
||||
|
||||
#[doc(hidden)]
|
||||
pub struct SerializeSequence {
|
||||
vec: Vec<IOValue>,
|
||||
}
|
||||
|
@ -359,6 +368,7 @@ impl serde::ser::SerializeSeq for SerializeSequence {
|
|||
}
|
||||
}
|
||||
|
||||
/// Convenience function for directly converting a Serde-serializable `T` to an [IOValue].
|
||||
pub fn to_value<T>(value: T) -> IOValue
|
||||
where
|
||||
T: Serialize,
|
||||
|
|
|
@ -1,3 +1,6 @@
|
|||
//! Representation of Preserves `SignedInteger`s as [i128]/[u128] (if they fit) or [BigInt] (if
|
||||
//! they don't).
|
||||
|
||||
use num::bigint::BigInt;
|
||||
use num::traits::cast::ToPrimitive;
|
||||
use num::traits::sign::Signed;
|
||||
|
@ -7,8 +10,10 @@ use std::convert::TryFrom;
|
|||
use std::convert::TryInto;
|
||||
use std::fmt;
|
||||
|
||||
// Invariant: if I128 can be used, it will be; otherwise, if U128 can
|
||||
// be used, it will be; otherwise, Big will be used.
|
||||
/// Internal representation of Preserves `SignedInteger`s.
|
||||
///
|
||||
/// Invariant: if I128 can be used, it will be; otherwise, if U128 can be used, it will be;
|
||||
/// otherwise, Big will be used.
|
||||
#[derive(Clone, Debug, PartialEq, Eq, Hash)]
|
||||
pub enum SignedIntegerRepr {
|
||||
I128(i128),
|
||||
|
@ -16,6 +21,7 @@ pub enum SignedIntegerRepr {
|
|||
Big(Box<BigInt>),
|
||||
}
|
||||
|
||||
/// Main representation of Preserves `SignedInteger`s.
|
||||
#[derive(Clone, PartialEq, Eq, Hash)]
|
||||
pub struct SignedInteger(SignedIntegerRepr);
|
||||
|
||||
|
@ -87,18 +93,25 @@ impl PartialOrd for SignedInteger {
|
|||
}
|
||||
|
||||
impl SignedInteger {
|
||||
/// Extract the internal representation.
|
||||
pub fn repr(&self) -> &SignedIntegerRepr {
|
||||
&self.0
|
||||
}
|
||||
|
||||
/// Does this `SignedInteger` fit in an [i128]? (See also [the TryFrom instance for
|
||||
/// i128](#impl-TryFrom<%26SignedInteger>-for-i128).)
|
||||
pub fn is_i(&self) -> bool {
|
||||
matches!(self.0, SignedIntegerRepr::I128(_))
|
||||
}
|
||||
|
||||
/// Does this `SignedInteger` fit in a [u128], but not an [i128]? (See also [the TryFrom
|
||||
/// instance for u128](#impl-TryFrom<%26SignedInteger>-for-u128).)
|
||||
pub fn is_u(&self) -> bool {
|
||||
matches!(self.0, SignedIntegerRepr::U128(_))
|
||||
}
|
||||
|
||||
/// Does this `SignedInteger` fit neither in a [u128] nor an [i128]? (See also [the TryFrom
|
||||
/// instance for BigInt](#impl-From<%26'a+SignedInteger>-for-BigInt).)
|
||||
pub fn is_big(&self) -> bool {
|
||||
matches!(self.0, SignedIntegerRepr::Big(_))
|
||||
}
|
||||
|
|
|
@ -1,3 +1,5 @@
|
|||
#![doc(hidden)]
|
||||
|
||||
use std::ops::{Deref, DerefMut};
|
||||
|
||||
pub enum Suspendable<T> {
|
||||
|
|
|
@ -1,3 +1,9 @@
|
|||
//! Implements the Preserves [human-oriented text
|
||||
//! syntax](https://preserves.dev/preserves-text.html).
|
||||
//!
|
||||
//! # Summary of Text Syntax
|
||||
#![doc = include_str!("../../../doc/cheatsheet-text-plaintext.md")]
|
||||
|
||||
pub mod reader;
|
||||
pub mod writer;
|
||||
|
||||
|
@ -10,6 +16,7 @@ use std::io;
|
|||
|
||||
use super::{DomainParse, IOValue, IOValueDomainCodec, NestedValue, Reader, ViaCodec};
|
||||
|
||||
/// Reads a value from the given string using the text syntax, discarding annotations.
|
||||
pub fn from_str<N: NestedValue, Dec: DomainParse<N::Embedded>>(
|
||||
s: &str,
|
||||
decode_embedded: Dec,
|
||||
|
@ -17,10 +24,12 @@ pub fn from_str<N: NestedValue, Dec: DomainParse<N::Embedded>>(
|
|||
TextReader::new(&mut BytesBinarySource::new(s.as_bytes()), decode_embedded).demand_next(false)
|
||||
}
|
||||
|
||||
/// Reads an [IOValue] from the given string using the text syntax, discarding annotations.
|
||||
pub fn iovalue_from_str(s: &str) -> io::Result<IOValue> {
|
||||
from_str(s, ViaCodec::new(IOValueDomainCodec))
|
||||
}
|
||||
|
||||
/// As [from_str], but includes annotations.
|
||||
pub fn annotated_from_str<N: NestedValue, Dec: DomainParse<N::Embedded>>(
|
||||
s: &str,
|
||||
decode_embedded: Dec,
|
||||
|
@ -28,6 +37,7 @@ pub fn annotated_from_str<N: NestedValue, Dec: DomainParse<N::Embedded>>(
|
|||
TextReader::new(&mut BytesBinarySource::new(s.as_bytes()), decode_embedded).demand_next(true)
|
||||
}
|
||||
|
||||
/// As [iovalue_from_str], but includes annotations.
|
||||
pub fn annotated_iovalue_from_str(s: &str) -> io::Result<IOValue> {
|
||||
annotated_from_str(s, ViaCodec::new(IOValueDomainCodec))
|
||||
}
|
||||
|
|
|
@ -1,3 +1,5 @@
|
|||
//! Implementation of [Reader][crate::value::reader::Reader] for the text syntax.
|
||||
|
||||
use crate::error::io_syntax_error;
|
||||
use crate::error::is_eof_io_error;
|
||||
use crate::error::syntax_error;
|
||||
|
@ -35,8 +37,11 @@ use std::io;
|
|||
use std::iter::FromIterator;
|
||||
use std::marker::PhantomData;
|
||||
|
||||
/// The text syntax Preserves reader.
|
||||
pub struct TextReader<'de, 'src, D: Embeddable, Dec: DomainParse<D>, S: BinarySource<'de>> {
|
||||
/// Underlying source of (utf8) bytes.
|
||||
pub source: &'src mut S,
|
||||
/// Decoder for producing Rust values embedded in the text.
|
||||
pub dec: Dec,
|
||||
phantom: PhantomData<&'de D>,
|
||||
}
|
||||
|
@ -56,6 +61,7 @@ fn append_codepoint(bs: &mut Vec<u8>, n: u32) -> io::Result<()> {
|
|||
impl<'de, 'src, D: Embeddable, Dec: DomainParse<D>, S: BinarySource<'de>>
|
||||
TextReader<'de, 'src, D, Dec, S>
|
||||
{
|
||||
/// Construct a new reader from a byte (utf8) source and embedded-value decoder.
|
||||
pub fn new(source: &'src mut S, dec: Dec) -> Self {
|
||||
TextReader {
|
||||
source,
|
||||
|
@ -134,6 +140,7 @@ impl<'de, 'src, D: Embeddable, Dec: DomainParse<D>, S: BinarySource<'de>>
|
|||
}
|
||||
}
|
||||
|
||||
/// Retrieve the next [IOValue] in the input stream.
|
||||
pub fn next_iovalue(&mut self, read_annotations: bool) -> io::Result<IOValue> {
|
||||
let mut r = TextReader::new(self.source, ViaCodec::new(IOValueDomainCodec));
|
||||
let v = r.demand_next(read_annotations)?;
|
||||
|
|
|
@ -1,3 +1,5 @@
|
|||
//! Implementation of [Writer][crate::value::writer::Writer] for the text syntax.
|
||||
|
||||
use crate::hex::HexFormatter;
|
||||
use crate::value::suspendable::Suspendable;
|
||||
use crate::value::writer::CompoundWriter;
|
||||
|
@ -15,17 +17,26 @@ use std::io;
|
|||
|
||||
use super::super::boundary as B;
|
||||
|
||||
/// Specifies a comma style for printing using [TextWriter].
|
||||
#[derive(Clone, Copy, Debug)]
|
||||
pub enum CommaStyle {
|
||||
/// No commas will be printed. (Preserves text syntax treats commas as whitespace (!).)
|
||||
None,
|
||||
/// Commas will be used to separate subterms.
|
||||
Separating,
|
||||
/// Commas will be used to terminate subterms.
|
||||
Terminating,
|
||||
}
|
||||
|
||||
/// The (optionally pretty-printing) text syntax Preserves writer.
|
||||
pub struct TextWriter<W: io::Write> {
|
||||
w: Suspendable<W>,
|
||||
/// Selects a comma style to use when printing.
|
||||
pub comma_style: CommaStyle,
|
||||
/// Specifies indentation to use when pretty-printing; 0 disables pretty-printing.
|
||||
pub indentation: usize,
|
||||
/// An aid to use of printed terms in shell scripts: set `true` to escape spaces embedded
|
||||
/// in strings and symbols.
|
||||
pub escape_spaces: bool,
|
||||
indent: String,
|
||||
}
|
||||
|
@ -37,6 +48,8 @@ impl std::default::Default for CommaStyle {
|
|||
}
|
||||
|
||||
impl TextWriter<&mut Vec<u8>> {
|
||||
/// Writes `v` to `f` using text syntax. Selects indentation mode based on
|
||||
/// [`f.alternate()`][std::fmt::Formatter::alternate].
|
||||
pub fn fmt_value<N: NestedValue, Enc: DomainEncode<N::Embedded>>(
|
||||
f: &mut std::fmt::Formatter<'_>,
|
||||
enc: &mut Enc,
|
||||
|
@ -52,6 +65,7 @@ impl TextWriter<&mut Vec<u8>> {
|
|||
.map_err(|_| io::Error::new(io::ErrorKind::Other, "could not append to Formatter"))
|
||||
}
|
||||
|
||||
/// Encode `v` to a [String].
|
||||
pub fn encode<N: NestedValue, Enc: DomainEncode<N::Embedded>>(
|
||||
enc: &mut Enc,
|
||||
v: &N,
|
||||
|
@ -61,12 +75,14 @@ impl TextWriter<&mut Vec<u8>> {
|
|||
Ok(String::from_utf8(buf).expect("valid UTF-8 from TextWriter"))
|
||||
}
|
||||
|
||||
/// Encode `v` to a [String].
|
||||
pub fn encode_iovalue(v: &IOValue) -> io::Result<String> {
|
||||
Self::encode(&mut IOValueDomainCodec, v)
|
||||
}
|
||||
}
|
||||
|
||||
impl<W: io::Write> TextWriter<W> {
|
||||
/// Construct a writer from the given byte sink `w`.
|
||||
pub fn new(w: W) -> Self {
|
||||
TextWriter {
|
||||
w: Suspendable::new(w),
|
||||
|
@ -77,16 +93,19 @@ impl<W: io::Write> TextWriter<W> {
|
|||
}
|
||||
}
|
||||
|
||||
/// Update selected comma-printing style.
|
||||
pub fn set_comma_style(mut self, v: CommaStyle) -> Self {
|
||||
self.comma_style = v;
|
||||
self
|
||||
}
|
||||
|
||||
/// Update selected space-escaping style.
|
||||
pub fn set_escape_spaces(mut self, v: bool) -> Self {
|
||||
self.escape_spaces = v;
|
||||
self
|
||||
}
|
||||
|
||||
#[doc(hidden)]
|
||||
pub fn suspend(&mut self) -> Self {
|
||||
TextWriter {
|
||||
w: self.w.suspend(),
|
||||
|
@ -95,10 +114,12 @@ impl<W: io::Write> TextWriter<W> {
|
|||
}
|
||||
}
|
||||
|
||||
#[doc(hidden)]
|
||||
pub fn resume(&mut self, other: Self) {
|
||||
self.w.resume(other.w)
|
||||
}
|
||||
|
||||
#[doc(hidden)]
|
||||
pub fn write_stringlike_char_fallback<F>(&mut self, c: char, f: F) -> io::Result<()>
|
||||
where
|
||||
F: FnOnce(&mut W, char) -> io::Result<()>,
|
||||
|
@ -114,22 +135,26 @@ impl<W: io::Write> TextWriter<W> {
|
|||
}
|
||||
}
|
||||
|
||||
#[doc(hidden)]
|
||||
pub fn write_stringlike_char(&mut self, c: char) -> io::Result<()> {
|
||||
self.write_stringlike_char_fallback(c, |w, c| write!(w, "{}", c))
|
||||
}
|
||||
|
||||
#[doc(hidden)]
|
||||
pub fn add_indent(&mut self) {
|
||||
for _ in 0..self.indentation {
|
||||
self.indent.push(' ')
|
||||
}
|
||||
}
|
||||
|
||||
#[doc(hidden)]
|
||||
pub fn del_indent(&mut self) {
|
||||
if self.indentation > 0 {
|
||||
self.indent.truncate(self.indent.len() - self.indentation)
|
||||
}
|
||||
}
|
||||
|
||||
#[doc(hidden)]
|
||||
pub fn indent(&mut self) -> io::Result<()> {
|
||||
if self.indentation > 0 {
|
||||
write!(self.w, "{}", &self.indent)
|
||||
|
@ -138,6 +163,7 @@ impl<W: io::Write> TextWriter<W> {
|
|||
}
|
||||
}
|
||||
|
||||
#[doc(hidden)]
|
||||
pub fn indent_sp(&mut self) -> io::Result<()> {
|
||||
if self.indentation > 0 {
|
||||
write!(self.w, "{}", &self.indent)
|
||||
|
@ -146,6 +172,7 @@ impl<W: io::Write> TextWriter<W> {
|
|||
}
|
||||
}
|
||||
|
||||
/// Borrow the underlying byte sink.
|
||||
pub fn borrow_write(&mut self) -> &mut W {
|
||||
&mut self.w
|
||||
}
|
||||
|
|
Loading…
Reference in New Issue