diff --git a/implementations/rust/preserves-schema/README.md b/implementations/rust/preserves-schema/README.md index 75f73a5..5392f8c 100644 --- a/implementations/rust/preserves-schema/README.md +++ b/implementations/rust/preserves-schema/README.md @@ -1,4 +1,6 @@ -# Preserves Schema for Rust +```shell +cargo add preserves preserves-schema +``` -This is an implementation of [Preserves Schema](https://preserves.dev/preserves-schema.html) -for Rust. +This crate ([`preserves-schema` on crates.io](https://crates.io/crates/preserves-schema)) is an +implementation of [Preserves Schema](https://preserves.dev/preserves-schema.html) for Rust. diff --git a/implementations/rust/preserves-schema/doc/example.md b/implementations/rust/preserves-schema/doc/example.md new file mode 100644 index 0000000..8520813 --- /dev/null +++ b/implementations/rust/preserves-schema/doc/example.md @@ -0,0 +1,112 @@ +# Example + +[preserves-schemac]: https://preserves.dev/doc/preserves-schemac.html +[preserves-schema-rs]: https://preserves.dev/doc/preserves-schema-rs.html + +Preserves schemas are written in a syntax that (ab)uses [Preserves text +syntax][preserves::value::text] as a kind of S-expression. Schema source code looks like this: + +```preserves-schema +version 1 . +Present = . +Says = . +UserStatus = . +Status = =here / . +TimeStamp = string . +``` + +Conventionally, schema source code is stored in `*.prs` files. In this example, the source code +above is placed in `simpleChatProtocol.prs`. + +The Rust code generator for schemas requires not source code, but instances of the [Preserves +metaschema](https://preserves.dev/preserves-schema.html#appendix-metaschema). To compile schema +source code to metaschema instances, use [preserves-schemac][]: + +```shell +yarn global add @preserves/schema +preserves-schemac .:simpleChatProtocol.prs > simpleChatProtocol.prb +``` + +Binary-syntax metaschema instances are conventionally stored in `*.prb` files. If you have a +whole directory tree of `*.prs` files, you can supply just "`.`" without the "`:`"-prefixed +fileglob part.[^converting-metaschema-to-text] See the [preserves-schemac documentation][preserves-schemac]. + +[^converting-metaschema-to-text]: + Converting the `simpleChatProtocol.prb` file to Preserves text syntax lets us read the + metaschema instance corresponding to the source code: + ```shell + cat simpleChatProtocol.prb | preserves-tool convert + ``` + The result: + ```preserves + > + ]>> + Says: > + > + ]>> + Status: + ] + [ + "away" + > + ]>> + ] + ]> + TimeStamp: + UserStatus: > + > + ]>> + } + embeddedType: #f + version: 1 + }> + }> + ``` + +#### Generating Rust code from a schema + +Generate Rust definitions corresponding to a metaschema instance with [preserves-schema-rs][]. +The best way to use it is to integrate it into your `build.rs` (see [the +docs][preserves-schema-rs]), but you can also use it as a standalone command-line tool. + +The following command generates a directory `./rs/chat` containing rust sources for a module +that expects to be called `chat` in Rust code: + +```shell +preserves-schema-rs --output-dir rs/chat --prefix chat simpleChatProtocol.prb +``` + +Representative excerpts from one of the generated files, `./rs/chat/simple_chat_protocol.rs`: + +```rust,noplayground +pub struct Present { + pub username: std::string::String +} +pub struct Says { + pub who: std::string::String, + pub what: std::string::String +} +pub struct UserStatus { + pub username: std::string::String, + pub status: Status +} +pub enum Status { + Here, + Away { + since: std::boxed::Box + } +} +pub struct TimeStamp(pub std::string::String); +``` diff --git a/implementations/rust/preserves-schema/src/bin/preserves-schema-rs.rs b/implementations/rust/preserves-schema/src/bin/preserves-schema-rs.rs index d269723..6987812 100644 --- a/implementations/rust/preserves-schema/src/bin/preserves-schema-rs.rs +++ b/implementations/rust/preserves-schema/src/bin/preserves-schema-rs.rs @@ -1,3 +1,6 @@ +//! Command-line Rust code generator for Preserves Schema. See the documentation at +//! . + use std::io::Error; use std::io::ErrorKind; use std::path::PathBuf; diff --git a/implementations/rust/preserves-schema/src/compiler/mod.rs b/implementations/rust/preserves-schema/src/compiler/mod.rs index 67c965a..af06535 100644 --- a/implementations/rust/preserves-schema/src/compiler/mod.rs +++ b/implementations/rust/preserves-schema/src/compiler/mod.rs @@ -1,3 +1,5 @@ +//! Implementation of the Schema-to-Rust compiler. + pub mod context; pub mod cycles; pub mod names; diff --git a/implementations/rust/preserves-schema/src/lib.rs b/implementations/rust/preserves-schema/src/lib.rs index 4f045e9..289c037 100644 --- a/implementations/rust/preserves-schema/src/lib.rs +++ b/implementations/rust/preserves-schema/src/lib.rs @@ -1,4 +1,12 @@ +#![doc = concat!( + include_str!("../README.md"), + "# What is Preserves Schema?\n\n", + include_str!("../doc/what-is-preserves-schema.md"), + include_str!("../doc/example.md"), +)] + pub mod compiler; +/// Auto-generated Preserves Schema Metaschema types, parsers, and unparsers. pub mod gen; pub mod support; pub mod syntax; diff --git a/implementations/rust/preserves-schema/src/support/interpret.rs b/implementations/rust/preserves-schema/src/support/interpret.rs index ccc3f97..a746d5a 100644 --- a/implementations/rust/preserves-schema/src/support/interpret.rs +++ b/implementations/rust/preserves-schema/src/support/interpret.rs @@ -1,3 +1,6 @@ +//! Interpreter for instances of Preserves Schema Metaschema, for schema-directed dynamic +//! parsing and unparsing of terms. + use crate::gen::schema::*; use preserves::value::merge::merge2; @@ -5,8 +8,10 @@ use preserves::value::Map; use preserves::value::NestedValue; use preserves::value::Value; +/// Represents an environment mapping schema module names to [Schema] instances. pub type Env = Map, Schema>; +/// Context for a given interpretation of a [Schema]. #[derive(Debug)] pub struct Context<'a, V: NestedValue> { pub env: &'a Env, @@ -20,6 +25,7 @@ enum DynField { } impl<'a, V: NestedValue> Context<'a, V> { + /// Construct a new [Context] with the given [Env]. pub fn new(env: &'a Env) -> Self { Context { env, @@ -27,6 +33,8 @@ impl<'a, V: NestedValue> Context<'a, V> { } } + /// Parse `v` using the rule named `name` from the module at path `module` in `self.env`. + /// Yields `Some(...)` if the parse succeeds, and `None` otherwise. pub fn dynamic_parse(&mut self, module: &Vec, name: &str, v: &V) -> Option { let old_module = (module.len() > 0).then(|| std::mem::replace(&mut self.module, module.clone())); @@ -39,6 +47,7 @@ impl<'a, V: NestedValue> Context<'a, V> { result } + #[doc(hidden)] pub fn dynamic_unparse(&mut self, _module: &Vec, _name: &str, _w: &V) -> Option { panic!("Not yet implemented"); } diff --git a/implementations/rust/preserves-schema/src/support/mod.rs b/implementations/rust/preserves-schema/src/support/mod.rs index e53e0ac..fa457bc 100644 --- a/implementations/rust/preserves-schema/src/support/mod.rs +++ b/implementations/rust/preserves-schema/src/support/mod.rs @@ -1,3 +1,7 @@ +//! The runtime support library for compiled Schemas. + +#[doc(hidden)] +/// Reexport lazy_static for generated code to use. pub use lazy_static::lazy_static; pub use preserves; @@ -21,10 +25,16 @@ use std::sync::Arc; use thiserror::Error; +/// Every [language][crate::define_language] implements [NestedValueCodec] as a marker trait. pub trait NestedValueCodec {} // marker trait impl NestedValueCodec for () {} +/// Implementors of [Parse] can produce instances of themselves from a [Value], given a +/// supporting [language][crate::define_language]. All Schema-compiler-produced types implement +/// [Parse]. pub trait Parse: Sized { + /// Decode the given `value` (using auxiliary structure from the `language` instance) to + /// produce an instance of [Self]. fn parse(language: L, value: &Value) -> Result; } @@ -34,7 +44,10 @@ impl<'a, T: NestedValueCodec, Value: NestedValue> Parse<&'a T, Value> for Value } } +/// Implementors of [Unparse] can convert themselves into a [Value], given a supporting +/// [language][crate::define_language]. All Schema-compiler-produced types implement [Unparse]. pub trait Unparse { + /// Encode `self` into a [Value] (using auxiliary structure from the `language` instance). fn unparse(&self, language: L) -> Value; } @@ -44,8 +57,13 @@ impl<'a, T: NestedValueCodec, Value: NestedValue> Unparse<&'a T, Value> for Valu } } +/// Every [language][crate::define_language] implements [Codec], which supplies convenient +/// shorthand for invoking [Parse::parse] and [Unparse::unparse]. pub trait Codec { + /// Delegates to [`T::parse`][Parse::parse], using `self` as language and the given `value` + /// as input. fn parse<'a, T: Parse<&'a Self, N>>(&'a self, value: &N) -> Result; + /// Delegates to [`value.unparse`][Unparse::unparse], using `self` as language. fn unparse<'a, T: Unparse<&'a Self, N>>(&'a self, value: &T) -> N; } @@ -59,6 +77,11 @@ impl Codec for L { } } +/// Implementors of [Deserialize] can produce instances of themselves from a [Value]. All +/// Schema-compiler-produced types implement [Deserialize]. +/// +/// The difference between [Deserialize] and [Parse] is that implementors of [Deserialize] know +/// which [language][crate::define_language] to use. pub trait Deserialize where Self: Sized, @@ -66,10 +89,14 @@ where fn deserialize<'de, R: Reader<'de, N>>(r: &mut R) -> Result; } +/// Extracts a simple literal term from a byte array using +/// [PackedReader][preserves::value::packed::PackedReader]. No embedded values are permitted. pub fn decode_lit(bs: &[u8]) -> io::Result { preserves::value::packed::from_bytes(bs, NoEmbeddedDomainCodec) } +/// When `D` can parse itself from an [IOValue], this function parses all embedded [IOValue]s +/// into `D`s. pub fn decode_embedded(v: &IOValue) -> Result>, ParseError> where for<'a> D: TryFrom<&'a IOValue, Error = ParseError>, @@ -77,6 +104,8 @@ where v.copy_via(&mut |d| Ok(Value::Embedded(Arc::new(D::try_from(d)?)))) } +/// When `D` can unparse itself into an [IOValue], this function converts all embedded `D`s +/// into [IOValue]s. pub fn encode_embedded(v: &ArcValue>) -> IOValue where for<'a> IOValue: From<&'a D>, @@ -85,10 +114,13 @@ where .unwrap() } +/// Error value yielded when parsing of an [IOValue] into a Schema-compiler-produced type. #[derive(Error, Debug)] pub enum ParseError { + /// Signalled when the input does not match the Preserves Schema associated with the type. #[error("Input not conformant with Schema: {0}")] ConformanceError(&'static str), + /// Signalled when the underlying Preserves library signals an error. #[error(transparent)] Preserves(preserves::error::Error), } @@ -120,10 +152,12 @@ impl From for io::Error { } impl ParseError { + /// Constructs a [ParseError::ConformanceError]. pub fn conformance_error(context: &'static str) -> Self { ParseError::ConformanceError(context) } + /// True iff `self` is a [ParseError::ConformanceError]. pub fn is_conformance_error(&self) -> bool { return if let ParseError::ConformanceError(_) = self { true diff --git a/implementations/rust/preserves-schema/src/syntax/block.rs b/implementations/rust/preserves-schema/src/syntax/block.rs index 3d4b47a..7bf1abc 100644 --- a/implementations/rust/preserves-schema/src/syntax/block.rs +++ b/implementations/rust/preserves-schema/src/syntax/block.rs @@ -1,12 +1,21 @@ +//! A library for emitting pretty-formatted structured source code. +//! +//! The main entry points are [Formatter::to_string] and [Formatter::write], plus the utilities +//! in the [macros] submodule. + use std::fmt::Write; use std::str; +/// Default width for pretty-formatting, in columns. pub const DEFAULT_WIDTH: usize = 80; +/// All pretty-formattable items must implement this trait. pub trait Emittable: std::fmt::Debug { + /// Serializes `self`, as pretty-printed code, on `f`. fn write_on(&self, f: &mut Formatter); } +/// Tailoring of behaviour for [Vertical] groupings. #[derive(Clone, PartialEq, Eq)] pub enum VerticalMode { Variable, @@ -14,13 +23,16 @@ pub enum VerticalMode { ExtraNewline, } +/// Vertical formatting for [Emittable]s. pub trait Vertical { fn set_vertical_mode(&mut self, mode: VerticalMode); fn write_vertically_on(&self, f: &mut Formatter); } +/// Polymorphic [Emittable], used consistently in the API. pub type Item = std::rc::Rc; +/// A possibly-vertical sequence of items with item-separating and -terminating text. #[derive(Clone)] pub struct Sequence { pub items: Vec, @@ -29,6 +41,8 @@ pub struct Sequence { pub terminator: &'static str, } +/// A sequence of items, indented when formatted vertically, surrounded by opening and closing +/// text. #[derive(Clone)] pub struct Grouping { pub sequence: Sequence, @@ -36,14 +50,18 @@ pub struct Grouping { pub close: &'static str, } +/// State needed for pretty-formatting of [Emittable]s. pub struct Formatter { + /// Number of available columns. Used to decide between horizontal and vertical layouts. pub width: usize, indent_delta: String, current_indent: String, + /// Mutable output buffer. Accumulates emitted text during writing. pub buffer: String, } impl Formatter { + /// Construct a Formatter using [DEFAULT_WIDTH] and a four-space indent. pub fn new() -> Self { Formatter { width: DEFAULT_WIDTH, @@ -53,6 +71,7 @@ impl Formatter { } } + /// Construct a Formatter just like `self` but with an empty `buffer`. pub fn copy_empty(&self) -> Formatter { Formatter { width: self.width, @@ -62,28 +81,37 @@ impl Formatter { } } + /// Yields the indent size. pub fn indent_size(self) -> usize { self.indent_delta.len() } + /// Updates the indent size. pub fn set_indent_size(&mut self, n: usize) { self.indent_delta = str::repeat(" ", n) } + /// Accumulates a text serialization of `e` in `buffer`. pub fn write(&mut self, e: E) { e.write_on(self) } + /// Emits a newline followed by indentation into `buffer`. pub fn newline(&mut self) { self.buffer.push_str(&self.current_indent) } + /// Creates a default Formatter, uses it to [write][Formatter::write] `e`, and yields the + /// contents of its `buffer`. pub fn to_string(e: E) -> String { let mut f = Formatter::new(); f.write(e); f.buffer } + /// Calls `f` in a context where the indentation has been increased by + /// [Formatter::indent_size] spaces. Restores the indentation level after `f` returns. + /// Yields the result of the call to `f`. pub fn with_indent R>(&mut self, f: F) -> R { let old_indent = self.current_indent.clone(); self.current_indent += &self.indent_delta; @@ -93,6 +121,12 @@ impl Formatter { } } +impl Default for Formatter { + fn default() -> Self { + Self::new() + } +} + impl Default for VerticalMode { fn default() -> Self { Self::Variable @@ -238,6 +272,12 @@ impl std::fmt::Debug for Grouping { //--------------------------------------------------------------------------- +/// Escapes `s` by substituting `\\` for `\`, `\"` for `"`, and `\u{...}` for characters +/// outside the range 32..126, inclusive. +/// +/// This process is intended to generate literals compatible with `rustc`; see [the language +/// reference on "Character and string +/// literals"](https://doc.rust-lang.org/reference/tokens.html#character-and-string-literals). pub fn escape_string(s: &str) -> String { let mut buf = String::new(); buf.push('"'); @@ -253,6 +293,13 @@ pub fn escape_string(s: &str) -> String { buf } +/// Escapes `bs` into a Rust byte string literal, treating each byte as its ASCII equivalent +/// except producing `\\` for 0x5c, `\"` for 0x22, and `\x..` for bytes outside the range +/// 0x20..0x7e, inclusive. +/// +/// This process is intended to generate literals compatible with `rustc`; see [the language +/// reference on "Byte string +/// literals"](https://doc.rust-lang.org/reference/tokens.html#byte-string-literals). pub fn escape_bytes(bs: &[u8]) -> String { let mut buf = String::new(); buf.push_str("b\""); @@ -262,7 +309,7 @@ pub fn escape_bytes(bs: &[u8]) -> String { '\\' => buf.push_str("\\\\"), '"' => buf.push_str("\\\""), _ if c >= ' ' && c <= '~' => buf.push(c), - _ => write!(&mut buf, "\\x{{{:02x}}}", b).expect("no IO errors building a string"), + _ => write!(&mut buf, "\\x{:02x}", b).expect("no IO errors building a string"), } } buf.push('"'); @@ -271,6 +318,7 @@ pub fn escape_bytes(bs: &[u8]) -> String { //--------------------------------------------------------------------------- +/// Utilities for constructing many useful kinds of [Sequence] and [Grouping]. pub mod constructors { use super::Emittable; use super::Grouping; @@ -279,10 +327,12 @@ pub mod constructors { use super::Vertical; use super::VerticalMode; + /// Produces a polymorphic, reference-counted [Item] from some generic [Emittable]. pub fn item(i: E) -> Item { std::rc::Rc::new(i) } + /// *a*`::`*b*`::`*...*`::`*z* pub fn name(pieces: Vec) -> Sequence { Sequence { items: pieces, @@ -292,6 +342,7 @@ pub mod constructors { } } + /// *ab...z* (directly adjacent, no separators or terminators) pub fn seq(items: Vec) -> Sequence { Sequence { items: items, @@ -301,6 +352,7 @@ pub mod constructors { } } + /// *a*`, `*b*`, `*...*`, `*z* pub fn commas(items: Vec) -> Sequence { Sequence { items: items, @@ -310,6 +362,7 @@ pub mod constructors { } } + /// `(`*a*`, `*b*`, `*...*`, `*z*`)` pub fn parens(items: Vec) -> Grouping { Grouping { sequence: commas(items), @@ -318,6 +371,7 @@ pub mod constructors { } } + /// `[`*a*`, `*b*`, `*...*`, `*z*`]` pub fn brackets(items: Vec) -> Grouping { Grouping { sequence: commas(items), @@ -326,6 +380,7 @@ pub mod constructors { } } + /// `<`*a*`, `*b*`, `*...*`, `*z*`>` pub fn anglebrackets(items: Vec) -> Grouping { Grouping { sequence: commas(items), @@ -334,6 +389,7 @@ pub mod constructors { } } + /// `{`*a*`, `*b*`, `*...*`, `*z*`}` pub fn braces(items: Vec) -> Grouping { Grouping { sequence: commas(items), @@ -342,6 +398,7 @@ pub mod constructors { } } + /// `{`*a*` `*b*` `*...*` `*z*`}` pub fn block(items: Vec) -> Grouping { Grouping { sequence: Sequence { @@ -355,10 +412,12 @@ pub mod constructors { } } + /// As [block], but always vertical pub fn codeblock(items: Vec) -> Grouping { vertical(false, block(items)) } + /// `{`*a*`; `*b*`; `*...*`; `*z*`}` pub fn semiblock(items: Vec) -> Grouping { Grouping { sequence: Sequence { @@ -372,6 +431,9 @@ pub mod constructors { } } + /// Overrides `v` to be always vertical. + /// + /// If `spaced` is true, inserts an extra newline between items. pub fn vertical(spaced: bool, mut v: V) -> V { v.set_vertical_mode(if spaced { VerticalMode::ExtraNewline @@ -381,6 +443,7 @@ pub mod constructors { v } + /// Adds a layer of indentation to the given [Sequence]. pub fn indented(sequence: Sequence) -> Grouping { Grouping { sequence, @@ -390,52 +453,84 @@ pub mod constructors { } } +/// Ergonomic syntax for using the constructors in submodule [constructors]; see the +/// documentation for the macros, which appears on the [page for the crate +/// itself][crate#macros]. pub mod macros { + /// `name!(`*a*`, `*b*`, `*...*`, `*z*`)` ⟶ *a*`::`*b*`::`*...*`::`*z* + /// + /// See [super::constructors::name]. #[macro_export] macro_rules! name { ($($item:expr),*) => {$crate::syntax::block::constructors::name(vec![$(std::rc::Rc::new($item)),*])} } + /// `seq!(`*a*`, `*b*`, `*...*`, `*z*`)` ⟶ *ab...z* + /// + /// See [super::constructors::seq]. #[macro_export] macro_rules! seq { ($($item:expr),*) => {$crate::syntax::block::constructors::seq(vec![$(std::rc::Rc::new($item)),*])} } + /// `commas!(`*a*`, `*b*`, `*...*`, `*z*`)` ⟶ *a*`, `*b*`, `*...*`, `*z* + /// + /// See [super::constructors::commas]. #[macro_export] macro_rules! commas { ($($item:expr),*) => {$crate::syntax::block::constructors::commas(vec![$(std::rc::Rc::new($item)),*])} } + /// `parens!(`*a*`, `*b*`, `*...*`, `*z*`)` ⟶ `(`*a*`, `*b*`, `*...*`, `*z*`)` + /// + /// See [super::constructors::parens]. #[macro_export] macro_rules! parens { ($($item:expr),*) => {$crate::syntax::block::constructors::parens(vec![$(std::rc::Rc::new($item)),*])} } + /// `brackets!(`*a*`, `*b*`, `*...*`, `*z*`)` ⟶ `[`*a*`, `*b*`, `*...*`, `*z*`]` + /// + /// See [super::constructors::brackets]. #[macro_export] macro_rules! brackets { ($($item:expr),*) => {$crate::syntax::block::constructors::brackets(vec![$(std::rc::Rc::new($item)),*])} } + /// `anglebrackets!(`*a*`, `*b*`, `*...*`, `*z*`)` ⟶ `<`*a*`, `*b*`, `*...*`, `*z*`>` + /// + /// See [super::constructors::anglebrackets]. #[macro_export] macro_rules! anglebrackets { ($($item:expr),*) => {$crate::syntax::block::constructors::anglebrackets(vec![$(std::rc::Rc::new($item)),*])} } + /// `braces!(`*a*`, `*b*`, `*...*`, `*z*`)` ⟶ `{`*a*`, `*b*`, `*...*`, `*z*`}` + /// + /// See [super::constructors::braces]. #[macro_export] macro_rules! braces { ($($item:expr),*) => {$crate::syntax::block::constructors::braces(vec![$(std::rc::Rc::new($item)),*])} } + /// `block!(`*a*`, `*b*`, `*...*`, `*z*`)` ⟶ `{`*a*` `*b*` `*...*` `*z*`}` + /// + /// See [super::constructors::block]. #[macro_export] macro_rules! block { ($($item:expr),*) => {$crate::syntax::block::constructors::block(vec![$(std::rc::Rc::new($item)),*])} } + /// As [`block`]`!`, but always vertical. See + /// [constructors::codeblock][super::constructors::codeblock]. #[macro_export] macro_rules! codeblock { ($($item:expr),*) => {$crate::syntax::block::constructors::codeblock(vec![$(std::rc::Rc::new($item)),*])} } + /// `semiblock!(`*a*`, `*b*`, `*...*`, `*z*`)` ⟶ `{`*a*`; `*b*`; `*...*`; `*z*`}` + /// + /// See [super::constructors::semiblock]. #[macro_export] macro_rules! semiblock { ($($item:expr),*) => {$crate::syntax::block::constructors::semiblock(vec![$(std::rc::Rc::new($item)),*])} diff --git a/implementations/rust/preserves-schema/src/syntax/mod.rs b/implementations/rust/preserves-schema/src/syntax/mod.rs index a863eaa..5aee0e8 100644 --- a/implementations/rust/preserves-schema/src/syntax/mod.rs +++ b/implementations/rust/preserves-schema/src/syntax/mod.rs @@ -1 +1,3 @@ +//! A library for emitting pretty-formatted structured source code. + pub mod block; diff --git a/implementations/rust/preserves/README.md b/implementations/rust/preserves/README.md index 2a1a6a3..e09a241 100644 --- a/implementations/rust/preserves/README.md +++ b/implementations/rust/preserves/README.md @@ -1,5 +1,5 @@ ```shell -cargo install preserves +cargo add preserves ``` This crate ([`preserves` on crates.io](https://crates.io/crates/preserves)) implements