preserves/implementations/rust/preserves-schema/src/compiler/mod.rs

637 lines
21 KiB
Rust

//! Implementation of the Schema-to-Rust compiler; this is the core of the
//! [preserves-schema-rs][] program.
//!
//! See the [documentation for preserves-schema-rs][preserves-schema-rs] for examples of how to
//! use the compiler programmatically from a `build.rs` script, but very briefly, use
//! [preserves-schemac](https://preserves.dev/doc/preserves-schemac.html) to generate a
//! metaschema instance `*.prb` file, and then put something like this in `build.rs`:
//!
//! ```rust,ignore
//! use preserves_schema::compiler::*;
//!
//! const PATH_TO_PRB_FILE: &'static str = "your-metaschema-instance-file.prb";
//!
//! fn main() -> Result<(), std::io::Error> {
//! let buildroot = std::path::PathBuf::from(std::env::var_os("OUT_DIR").unwrap());
//!
//! let mut gen_dir = buildroot.clone();
//! gen_dir.push("src/schemas");
//! let mut c = CompilerConfig::new("crate::schemas".to_owned());
//!
//! let inputs = expand_inputs(&vec![PATH_TO_PRB_FILE.to_owned()])?;
//! c.load_schemas_and_bundles(&inputs, &vec![])?;
//! compile(&c, &mut CodeCollector::files(gen_dir))
//! }
//! ```
//!
//! plus something like this in your `lib.rs` or main program:
//!
//! ```rust,ignore
//! pub mod schemas {
//! include!(concat!(env!("OUT_DIR"), "/src/schemas/mod.rs"));
//! }
//! ```
//!
//! [preserves-schema-rs]: https://preserves.dev/doc/preserves-schema-rs.html
pub mod context;
pub mod cycles;
pub mod names;
pub mod parsers;
pub mod readers;
pub mod types;
pub mod unparsers;
use crate::compiler::context::*;
use crate::compiler::types::Purpose;
use crate::gen::schema;
use crate::gen::schema::*;
use crate::gen::Language;
use crate::syntax::block::constructors::*;
use crate::syntax::block::{Formatter, Item};
use crate::*;
use glob::glob;
use preserves::value::BinarySource;
use preserves::value::BytesBinarySource;
use preserves::value::Map;
use preserves::value::Reader;
use preserves::value::Set;
use std::fs::DirBuilder;
use std::fs::File;
use std::io;
use std::io::Read;
use std::io::Write;
use std::path::PathBuf;
/// Names a Schema module within a (collection of) Schema bundle(s).
pub type ModulePath = Vec<String>;
/// Implement this trait to extend the compiler with custom code generation support. The main
/// code generators are also implemented as plugins.
///
/// For an example of its use outside the core compiler, see [`build.rs` for the `syndicate-rs` project](https://git.syndicate-lang.org/syndicate-lang/syndicate-rs/src/commit/60e6c6badfcbcbccc902994f4f32db6048f60d1f/syndicate/build.rs).
pub trait Plugin: std::fmt::Debug {
/// Use `_module_ctxt` to emit code at a per-module level.
fn generate_module(&self, _module_ctxt: &mut ModuleContext) {}
/// Use `module_ctxt` to emit code at a per-Schema-[Definition] level.
fn generate_definition(
&self,
module_ctxt: &mut ModuleContext,
definition_name: &str,
definition: &Definition,
);
}
pub struct LanguageTypes {
pub fallback: Option<Box<dyn Fn(&str) -> Set<String>>>,
pub definitions: Map<String, Box<dyn Fn(&str) -> Set<String>>>,
}
impl std::fmt::Debug for LanguageTypes {
fn fmt(&self, f: &mut std::fmt::Formatter) -> Result<(), std::fmt::Error> {
f.debug_struct("LanguageTypes")
.field("fallback", &self.fallback.as_ref().map(|f| f("_")))
.field(
"definitions",
&self
.definitions
.iter()
.map(|(k, f)| (k.clone(), f("_")))
.collect::<Map<String, Set<String>>>(),
)
.finish()
}
}
#[derive(Debug)]
pub struct ExternalModule {
pub path: ModulePath,
pub rust_namespace: String,
pub rust_language_types: LanguageTypes,
}
impl ExternalModule {
pub fn new(path: ModulePath, rust_namespace: &str) -> Self {
ExternalModule {
path,
rust_namespace: rust_namespace.to_owned(),
rust_language_types: LanguageTypes {
fallback: None,
definitions: Map::new(),
},
}
}
pub fn set_fallback_language_types<F: 'static + Fn(&str) -> Set<String>>(
mut self,
f: F,
) -> Self {
self.rust_language_types.fallback = Some(Box::new(f));
self
}
pub fn set_definition_language_types<F: 'static + Fn(&str) -> Set<String>>(
mut self,
d: &str,
f: F,
) -> Self {
if self
.rust_language_types
.definitions
.insert(d.to_owned(), Box::new(f))
.is_some()
{
panic!(
"Duplicate language types definition installed: {:?} {:?}",
&self.path, d
);
}
self
}
}
/// Used to collect output from the compiler.
pub enum CodeModuleCollector<'a> {
/// Default file-based code emitter.
Files {
/// Where output Rust code files will be placed.
output_dir: PathBuf,
},
Custom {
/// Used to collect the various produced source files.
/// Useful for when compiling in e.g. proc_macro context.
collect_output: &'a mut dyn FnMut(Option<&ModulePath>, &str) -> io::Result<()>,
},
}
/// Used to configure and collect output from the compiler.
pub struct CodeCollector<'a> {
pub emit_mod_declarations: bool,
pub collect_module: CodeModuleCollector<'a>,
}
/// Main entry point to the compiler.
#[derive(Debug)]
pub struct CompilerConfig {
/// All known Schema modules, indexed by [ModulePath] and annotated with a [Purpose].
pub bundle: Map<ModulePath, (Schema, Purpose)>,
/// Fully-qualified Rust module prefix to use for each generated module.
pub fully_qualified_module_prefix: String,
/// Rust module path to the [preserves_schema::support][crate::support] module.
pub support_crate: String,
/// External modules for cross-referencing.
pub external_modules: Map<ModulePath, ExternalModule>,
/// Plugins active in this compiler instance.
pub plugins: Vec<Box<dyn Plugin>>,
/// If true, a directive is emitted in each module instructing
/// [rustfmt](https://github.com/rust-lang/rustfmt) to ignore it.
pub rustfmt_skip: bool,
}
/// Loads a [Schema] or [Bundle] from path `i` into `bundle` for the given `purpose`.
///
/// If `i` holds a [Schema], then the file stem of `i` is used as the module name when placing
/// the schema in `bundle`.
pub fn load_schema_or_bundle_with_purpose(
bundle: &mut Map<ModulePath, (Schema, Purpose)>,
i: &PathBuf,
purpose: Purpose,
) -> io::Result<()> {
let mut inserted = Map::<ModulePath, Schema>::new();
load_schema_or_bundle(&mut inserted, i)?;
for (k, v) in inserted.into_iter() {
bundle.insert(k, (v, purpose));
}
Ok(())
}
/// Loads a [Schema] or [Bundle] from raw binary encoded value `input` into `bundle` for the
/// given `purpose`.
///
/// If `input` corresponds to a [Schema], then `prefix` is used as its module name; otherwise,
/// it's a [Bundle], and `prefix` is ignored.
pub fn load_schema_or_bundle_bin_with_purpose(
bundle: &mut Map<ModulePath, (Schema, Purpose)>,
prefix: &str,
input: &[u8],
purpose: Purpose,
) -> io::Result<()> {
let mut inserted = Map::<ModulePath, Schema>::new();
load_schema_or_bundle_bin(&mut inserted, prefix, input)?;
for (k, v) in inserted.into_iter() {
bundle.insert(k, (v, purpose));
}
Ok(())
}
fn bundle_prefix(i: &PathBuf) -> io::Result<&str> {
i.file_stem()
.ok_or_else(|| {
io::Error::new(
io::ErrorKind::InvalidData,
format!("Bad schema file stem: {:?}", i),
)
})?
.to_str()
.ok_or_else(|| {
io::Error::new(
io::ErrorKind::InvalidData,
format!("Invalid UTF-8 in schema file name: {:?}", i),
)
})
}
/// Loads a [Schema] or [Bundle] from path `i` into `bundle`.
///
/// If `i` holds a [Schema], then the file stem of `i` is used as the module name when placing
/// the schema in `bundle`.
///
/// Returns true if it was a schema, false if it was a bundle.
pub fn load_schema_or_bundle(bundle: &mut Map<ModulePath, Schema>, i: &PathBuf) -> io::Result<bool> {
let mut f = File::open(&i)?;
let mut bs = vec![];
f.read_to_end(&mut bs)?;
load_schema_or_bundle_bin(bundle, bundle_prefix(i)?, &bs[..])
}
/// Loads a [Schema] or [Bundle] from raw binary encoded value `input` into `bundle`.
///
/// If `input` corresponds to a [Schema], then `prefix` is used as its module name; otherwise,
/// it's a [Bundle], and `prefix` is ignored.
///
/// Returns true if it was a schema, false if it was a bundle.
pub fn load_schema_or_bundle_bin(
bundle: &mut Map<ModulePath, Schema>,
prefix: &str,
input: &[u8],
) -> io::Result<bool> {
let mut src = BytesBinarySource::new(input);
let mut reader = src.packed_iovalues();
let blob = reader.demand_next(false)?;
let language = Language::default();
if let Ok(s) = language.parse(&blob) {
bundle.insert(vec![prefix.to_owned()], s);
Ok(true)
} else if let Ok(Bundle { modules }) = language.parse(&blob) {
for (ModulePath(k), v) in modules.0 {
bundle.insert(k, v);
}
Ok(false)
} else {
Err(io::Error::new(
io::ErrorKind::InvalidData,
format!("Invalid schema binary blob {:?}", prefix),
))
}
}
impl CompilerConfig {
/// Construct a [CompilerConfig] configured to use `fully_qualified_module_prefix` as the
/// Rust module prefix for generated code.
pub fn new(fully_qualified_module_prefix: String) -> Self {
CompilerConfig {
bundle: Map::new(),
fully_qualified_module_prefix,
support_crate: "preserves_schema".to_owned(),
external_modules: Map::new(),
plugins: vec![
Box::new(types::TypePlugin),
Box::new(readers::ReaderPlugin),
Box::new(parsers::ParserPlugin),
Box::new(unparsers::UnparserPlugin),
],
rustfmt_skip: false,
}
}
pub fn add_external_module(&mut self, m: ExternalModule) {
let path = m.path.clone();
if self.external_modules.insert(path.clone(), m).is_some() {
panic!("Duplicate external module installed: {:?}", path)
}
}
pub fn load_schemas_and_bundles(
&mut self,
inputs: &Vec<PathBuf>,
xrefs: &Vec<PathBuf>,
) -> io::Result<()> {
for i in inputs {
load_schema_or_bundle_with_purpose(&mut self.bundle, i, Purpose::Codegen)?;
}
for i in xrefs {
load_schema_or_bundle_with_purpose(&mut self.bundle, i, Purpose::Xref)?;
}
Ok(())
}
pub fn load_xref_bin(&mut self, prefix: &str, bundle_or_schema: &[u8]) -> io::Result<()> {
load_schema_or_bundle_bin_with_purpose(
&mut self.bundle,
prefix,
bundle_or_schema,
Purpose::Xref,
)
}
fn build_type_cache(&self) -> Map<Ref, types::TDefinition> {
self.bundle
.iter()
.flat_map(|(modpath, s)| {
let modpath = ModulePath(modpath.clone());
s.0.definitions.0.iter().map(move |(name, def)| {
let ty = types::definition_type(&modpath, s.1, name, def);
(ty.self_ref.clone(), ty)
})
})
.collect()
}
fn generate_definition(
&self,
b: &mut BundleContext,
k: &ModulePath,
v: &Schema,
n: &str,
d: &Definition,
mode: ModuleContextMode,
generated: &mut Map<ModuleContextMode, Vec<Item>>,
) {
b.generate_module(k, v, mode, generated, |m| {
for plugin in self.plugins.iter() {
plugin.generate_definition(m, n, d);
}
});
}
}
/// Expands a vector of [mod@glob]s to a vector of actual paths.
pub fn expand_inputs(globs: &Vec<String>) -> io::Result<Vec<PathBuf>> {
let mut result = Vec::new();
for g in globs.iter() {
for p in
glob(g).map_err(|e| io::Error::new(io::ErrorKind::InvalidData, format!("{}", e)))?
{
result.push(p.map_err(glob::GlobError::into_error)?)
}
}
Ok(result)
}
impl<'a> CodeCollector<'a> {
/// Construct a [CodeCollector] that collects output Rust modules directly into the file
/// system tree rooted at `output_dir`.
pub fn files(output_dir: PathBuf) -> Self {
CodeCollector {
emit_mod_declarations: true,
collect_module: CodeModuleCollector::Files { output_dir },
}
}
#[doc(hidden)]
pub fn collect_output(&mut self, module: Option<&ModulePath>, contents: &str) -> io::Result<()> {
match &mut self.collect_module {
CodeModuleCollector::Files { output_dir } => {
let mut output_path = output_dir.clone();
if let Some(k) = module {
output_path.extend(k);
let module_name = output_path
.file_stem()
.unwrap()
.to_str()
.unwrap()
.to_owned();
let module_name = names::render_modname(&module_name);
output_path.set_file_name(format!("{}.rs", module_name));
} else {
output_path.push("mod.rs");
}
DirBuilder::new().recursive(true).create(output_path.parent().unwrap())?;
if output_path.exists() {
if let Ok(mut f) = File::open(&output_path) {
let mut existing_contents = String::new();
f.read_to_string(&mut existing_contents)?;
if existing_contents == contents {
return Ok(());
}
}
}
let mut f = File::create(output_path)?;
f.write_all(contents.as_bytes())
}
CodeModuleCollector::Custom { collect_output } => {
collect_output(module, contents)
}
}
}
}
impl Ref {
pub fn qualify(&self, default_module_path: &schema::ModulePath) -> Ref {
if self.module.0.is_empty() {
Ref {
module: default_module_path.clone(),
name: self.name.clone(),
}
} else {
self.clone()
}
}
}
impl Schema {
pub fn has_embedded_type(&self) -> bool {
self.embedded_type != EmbeddedTypeName::False
}
}
/// Main entry point: runs the compilation process.
pub fn compile<'a>(config: &CompilerConfig, emitter: &mut CodeCollector<'a>) -> io::Result<()> {
let mut b = BundleContext::new(config);
for (k, (v, module_purpose)) in config.bundle.iter() {
if *module_purpose != Purpose::Codegen {
continue;
}
//---------------------------------------------------------------------------
let mut generated = Map::new();
b.generate_module(k, v, ModuleContextMode::TargetModule, &mut generated, |m| {
for plugin in config.plugins.iter() {
plugin.generate_module(m);
}
});
for (n, d) in &v.definitions.0 {
use ModuleContextMode::*;
config.generate_definition(&mut b, k, v, n, d, TargetToplevel, &mut generated);
config.generate_definition(&mut b, k, v, n, d, TargetGeneric, &mut generated);
}
//---------------------------------------------------------------------------
let mut lines: Vec<String> = Vec::new();
lines.push(Formatter::to_string(vertical(
false,
seq!["#![allow(unused_parens)]", "#![allow(unused_imports)]"],
)));
if config.rustfmt_skip {
lines.push("#![cfg_attr(rustfmt, rustfmt_skip)]".to_owned());
}
lines.push(Formatter::to_string(vertical(
false,
seq![
"",
"use std::convert::TryFrom;",
format!("use {}::support as _support;", &config.support_crate),
"use _support::Deserialize;",
"use _support::Parse;",
"use _support::Unparse;",
"use _support::preserves;",
"use preserves::value::Domain;",
"use preserves::value::NestedValue;",
""
],
)));
let mut emit_items = |items: Vec<Item>| {
if !items.is_empty() {
lines.push(Formatter::to_string(vertical(true, seq(items))));
lines.push("".to_owned());
}
};
emit_items(generated.remove(&ModuleContextMode::TargetModule).unwrap());
emit_items(
generated
.remove(&ModuleContextMode::TargetToplevel)
.unwrap(),
);
emit_items(generated.remove(&ModuleContextMode::TargetGeneric).unwrap());
{
let contents = lines.join("\n");
emitter.collect_output(Some(k), &contents)?;
}
}
{
let mut lines = Vec::new();
if config.rustfmt_skip {
lines.push("#![cfg_attr(rustfmt, rustfmt_skip)]".to_owned());
lines.push("".to_owned());
}
if emitter.emit_mod_declarations {
for (modpath, (_, module_purpose)) in config.bundle.iter() {
if *module_purpose != Purpose::Codegen {
continue;
}
lines.push(format!(
"pub mod {};",
names::render_modname(modpath.last().unwrap())
));
}
lines.push("".to_owned());
}
lines.push(format!(
"use {}::support as _support;",
&config.support_crate
));
lines.push("use _support::preserves;".to_owned());
lines.push("".to_owned());
lines.push("#[allow(non_snake_case)]".to_owned());
lines.push(Formatter::to_string(item(seq![
"pub struct ",
b.language_struct_name(),
anglebrackets!["N: preserves::value::NestedValue"],
" ",
vertical(
false,
braces(
b.literals
.iter()
.map(|(value, name)| item(format!("pub {}: N /* {:?} */", name, value)))
.collect()
)
)
])));
lines.push("".to_owned());
lines.push(Formatter::to_string(item(seq![
"impl",
anglebrackets!["N: preserves::value::NestedValue"],
" Default for ",
b.language_struct_name(),
"<N> ",
codeblock![seq![
"fn default() -> Self ",
codeblock![seq![
b.language_struct_name(),
" ",
vertical(
false,
braces(
b.literals
.iter()
.map(|(value, name)| {
let bs = preserves::value::PackedWriter::encode_iovalue(&value)
.unwrap();
item(format!(
"{}: /* {:?} */ _support::decode_lit(&{:?}).unwrap()",
name, value, bs
))
})
.collect()
)
)
]]
]]
])));
lines.push("".to_owned());
{
let mut b = Bundle {
modules: Modules(Map::new()),
};
for (modpath, (schema, purpose)) in config.bundle.iter() {
if *purpose == Purpose::Codegen {
b.modules
.0
.insert(ModulePath(modpath.clone()), schema.clone());
}
}
let b_value = Language::default().unparse(&b);
let b_bin = preserves::value::PackedWriter::encode_iovalue(&b_value).unwrap();
let mut hex_encoded_bundle = String::new();
let mut count = 0;
for b in b_bin {
if count % 16 == 0 {
hex_encoded_bundle.push_str("\\\n ");
}
count += 1;
hex_encoded_bundle.push_str(&format!("\\x{:02x}", b));
}
lines.push(Formatter::to_string(item(seq![
"pub fn _bundle() -> &'static [u8] ",
codeblock![seq!["b\"", hex_encoded_bundle, "\""]]
])));
}
lines.push("".to_owned());
let contents = lines.join("\n");
emitter.collect_output(None, &contents)?;
}
Ok(())
}