//! Implementation of the Schema-to-Rust compiler; this is the core of the //! [preserves-schema-rs][] program. //! //! See the [documentation for preserves-schema-rs][preserves-schema-rs] for examples of how to //! use the compiler programmatically from a `build.rs` script, but very briefly, use //! [preserves-schemac](https://preserves.dev/doc/preserves-schemac.html) to generate a //! metaschema instance `*.prb` file, and then put something like this in `build.rs`: //! //! ```rust,ignore //! use preserves_schema::compiler::*; //! //! const PATH_TO_PRB_FILE: &'static str = "your-metaschema-instance-file.prb"; //! //! fn main() -> Result<(), std::io::Error> { //! let buildroot = std::path::PathBuf::from(std::env::var_os("OUT_DIR").unwrap()); //! //! let mut gen_dir = buildroot.clone(); //! gen_dir.push("src/schemas"); //! let mut c = CompilerConfig::new("crate::schemas".to_owned()); //! //! let inputs = expand_inputs(&vec![PATH_TO_PRB_FILE.to_owned()])?; //! c.load_schemas_and_bundles(&inputs, &vec![])?; //! compile(&c, &mut CodeCollector::files(gen_dir)) //! } //! ``` //! //! plus something like this in your `lib.rs` or main program: //! //! ```rust,ignore //! pub mod schemas { //! include!(concat!(env!("OUT_DIR"), "/src/schemas/mod.rs")); //! } //! ``` //! //! [preserves-schema-rs]: https://preserves.dev/doc/preserves-schema-rs.html pub mod context; pub mod cycles; pub mod names; pub mod parsers; pub mod readers; pub mod types; pub mod unparsers; use crate::compiler::context::*; use crate::compiler::types::Purpose; use crate::gen::schema; use crate::gen::schema::*; use crate::gen::Language; use crate::syntax::block::constructors::*; use crate::syntax::block::{Formatter, Item}; use crate::*; use glob::glob; use preserves::value::BinarySource; use preserves::value::BytesBinarySource; use preserves::value::Map; use preserves::value::Reader; use preserves::value::Set; use std::fs::DirBuilder; use std::fs::File; use std::io; use std::io::Read; use std::io::Write; use std::path::PathBuf; /// Names a Schema module within a (collection of) Schema bundle(s). pub type ModulePath = Vec; /// Implement this trait to extend the compiler with custom code generation support. The main /// code generators are also implemented as plugins. /// /// For an example of its use outside the core compiler, see [`build.rs` for the `syndicate-rs` project](https://git.syndicate-lang.org/syndicate-lang/syndicate-rs/src/commit/60e6c6badfcbcbccc902994f4f32db6048f60d1f/syndicate/build.rs). pub trait Plugin: std::fmt::Debug { /// Use `_module_ctxt` to emit code at a per-module level. fn generate_module(&self, _module_ctxt: &mut ModuleContext) {} /// Use `module_ctxt` to emit code at a per-Schema-[Definition] level. fn generate_definition( &self, module_ctxt: &mut ModuleContext, definition_name: &str, definition: &Definition, ); } pub struct LanguageTypes { pub fallback: Option Set>>, pub definitions: Map Set>>, } impl std::fmt::Debug for LanguageTypes { fn fmt(&self, f: &mut std::fmt::Formatter) -> Result<(), std::fmt::Error> { f.debug_struct("LanguageTypes") .field("fallback", &self.fallback.as_ref().map(|f| f("_"))) .field( "definitions", &self .definitions .iter() .map(|(k, f)| (k.clone(), f("_"))) .collect::>>(), ) .finish() } } #[derive(Debug)] pub struct ExternalModule { pub path: ModulePath, pub rust_namespace: String, pub rust_language_types: LanguageTypes, } impl ExternalModule { pub fn new(path: ModulePath, rust_namespace: &str) -> Self { ExternalModule { path, rust_namespace: rust_namespace.to_owned(), rust_language_types: LanguageTypes { fallback: None, definitions: Map::new(), }, } } pub fn set_fallback_language_types Set>( mut self, f: F, ) -> Self { self.rust_language_types.fallback = Some(Box::new(f)); self } pub fn set_definition_language_types Set>( mut self, d: &str, f: F, ) -> Self { if self .rust_language_types .definitions .insert(d.to_owned(), Box::new(f)) .is_some() { panic!( "Duplicate language types definition installed: {:?} {:?}", &self.path, d ); } self } } /// Used to collect output from the compiler. pub enum CodeModuleCollector<'a> { /// Default file-based code emitter. Files { /// Where output Rust code files will be placed. output_dir: PathBuf, }, Custom { /// Used to collect the various produced source files. /// Useful for when compiling in e.g. proc_macro context. collect_output: &'a mut dyn FnMut(Option<&ModulePath>, &str) -> io::Result<()>, }, } /// Used to configure and collect output from the compiler. pub struct CodeCollector<'a> { pub emit_mod_declarations: bool, pub collect_module: CodeModuleCollector<'a>, } /// Main entry point to the compiler. #[derive(Debug)] pub struct CompilerConfig { /// All known Schema modules, indexed by [ModulePath] and annotated with a [Purpose]. pub bundle: Map, /// Fully-qualified Rust module prefix to use for each generated module. pub fully_qualified_module_prefix: String, /// Rust module path to the [preserves_schema::support][crate::support] module. pub support_crate: String, /// External modules for cross-referencing. pub external_modules: Map, /// Plugins active in this compiler instance. pub plugins: Vec>, /// If true, a directive is emitted in each module instructing /// [rustfmt](https://github.com/rust-lang/rustfmt) to ignore it. pub rustfmt_skip: bool, } /// Loads a [Schema] or [Bundle] from path `i` into `bundle` for the given `purpose`. /// /// If `i` holds a [Schema], then the file stem of `i` is used as the module name when placing /// the schema in `bundle`. pub fn load_schema_or_bundle_with_purpose( bundle: &mut Map, i: &PathBuf, purpose: Purpose, ) -> io::Result<()> { let mut inserted = Map::::new(); load_schema_or_bundle(&mut inserted, i)?; for (k, v) in inserted.into_iter() { bundle.insert(k, (v, purpose)); } Ok(()) } /// Loads a [Schema] or [Bundle] from raw binary encoded value `input` into `bundle` for the /// given `purpose`. /// /// If `input` corresponds to a [Schema], then `prefix` is used as its module name; otherwise, /// it's a [Bundle], and `prefix` is ignored. pub fn load_schema_or_bundle_bin_with_purpose( bundle: &mut Map, prefix: &str, input: &[u8], purpose: Purpose, ) -> io::Result<()> { let mut inserted = Map::::new(); load_schema_or_bundle_bin(&mut inserted, prefix, input)?; for (k, v) in inserted.into_iter() { bundle.insert(k, (v, purpose)); } Ok(()) } fn bundle_prefix(i: &PathBuf) -> io::Result<&str> { i.file_stem() .ok_or_else(|| { io::Error::new( io::ErrorKind::InvalidData, format!("Bad schema file stem: {:?}", i), ) })? .to_str() .ok_or_else(|| { io::Error::new( io::ErrorKind::InvalidData, format!("Invalid UTF-8 in schema file name: {:?}", i), ) }) } /// Loads a [Schema] or [Bundle] from path `i` into `bundle`. /// /// If `i` holds a [Schema], then the file stem of `i` is used as the module name when placing /// the schema in `bundle`. /// /// Returns true if it was a schema, false if it was a bundle. pub fn load_schema_or_bundle(bundle: &mut Map, i: &PathBuf) -> io::Result { let mut f = File::open(&i)?; let mut bs = vec![]; f.read_to_end(&mut bs)?; load_schema_or_bundle_bin(bundle, bundle_prefix(i)?, &bs[..]) } /// Loads a [Schema] or [Bundle] from raw binary encoded value `input` into `bundle`. /// /// If `input` corresponds to a [Schema], then `prefix` is used as its module name; otherwise, /// it's a [Bundle], and `prefix` is ignored. /// /// Returns true if it was a schema, false if it was a bundle. pub fn load_schema_or_bundle_bin( bundle: &mut Map, prefix: &str, input: &[u8], ) -> io::Result { let mut src = BytesBinarySource::new(input); let mut reader = src.packed_iovalues(); let blob = reader.demand_next(false)?; let language = Language::default(); if let Ok(s) = language.parse(&blob) { bundle.insert(vec![prefix.to_owned()], s); Ok(true) } else if let Ok(Bundle { modules }) = language.parse(&blob) { for (ModulePath(k), v) in modules.0 { bundle.insert(k, v); } Ok(false) } else { Err(io::Error::new( io::ErrorKind::InvalidData, format!("Invalid schema binary blob {:?}", prefix), )) } } impl CompilerConfig { /// Construct a [CompilerConfig] configured to use `fully_qualified_module_prefix` as the /// Rust module prefix for generated code. pub fn new(fully_qualified_module_prefix: String) -> Self { CompilerConfig { bundle: Map::new(), fully_qualified_module_prefix, support_crate: "preserves_schema".to_owned(), external_modules: Map::new(), plugins: vec![ Box::new(types::TypePlugin), Box::new(readers::ReaderPlugin), Box::new(parsers::ParserPlugin), Box::new(unparsers::UnparserPlugin), ], rustfmt_skip: false, } } pub fn add_external_module(&mut self, m: ExternalModule) { let path = m.path.clone(); if self.external_modules.insert(path.clone(), m).is_some() { panic!("Duplicate external module installed: {:?}", path) } } pub fn load_schemas_and_bundles( &mut self, inputs: &Vec, xrefs: &Vec, ) -> io::Result<()> { for i in inputs { load_schema_or_bundle_with_purpose(&mut self.bundle, i, Purpose::Codegen)?; } for i in xrefs { load_schema_or_bundle_with_purpose(&mut self.bundle, i, Purpose::Xref)?; } Ok(()) } pub fn load_xref_bin(&mut self, prefix: &str, bundle_or_schema: &[u8]) -> io::Result<()> { load_schema_or_bundle_bin_with_purpose( &mut self.bundle, prefix, bundle_or_schema, Purpose::Xref, ) } fn build_type_cache(&self) -> Map { self.bundle .iter() .flat_map(|(modpath, s)| { let modpath = ModulePath(modpath.clone()); s.0.definitions.0.iter().map(move |(name, def)| { let ty = types::definition_type(&modpath, s.1, name, def); (ty.self_ref.clone(), ty) }) }) .collect() } fn generate_definition( &self, b: &mut BundleContext, k: &ModulePath, v: &Schema, n: &str, d: &Definition, mode: ModuleContextMode, generated: &mut Map>, ) { b.generate_module(k, v, mode, generated, |m| { for plugin in self.plugins.iter() { plugin.generate_definition(m, n, d); } }); } } /// Expands a vector of [mod@glob]s to a vector of actual paths. pub fn expand_inputs(globs: &Vec) -> io::Result> { let mut result = Vec::new(); for g in globs.iter() { for p in glob(g).map_err(|e| io::Error::new(io::ErrorKind::InvalidData, format!("{}", e)))? { result.push(p.map_err(glob::GlobError::into_error)?) } } Ok(result) } impl<'a> CodeCollector<'a> { /// Construct a [CodeCollector] that collects output Rust modules directly into the file /// system tree rooted at `output_dir`. pub fn files(output_dir: PathBuf) -> Self { CodeCollector { emit_mod_declarations: true, collect_module: CodeModuleCollector::Files { output_dir }, } } #[doc(hidden)] pub fn collect_output(&mut self, module: Option<&ModulePath>, contents: &str) -> io::Result<()> { match &mut self.collect_module { CodeModuleCollector::Files { output_dir } => { let mut output_path = output_dir.clone(); if let Some(k) = module { output_path.extend(k); let module_name = output_path .file_stem() .unwrap() .to_str() .unwrap() .to_owned(); let module_name = names::render_modname(&module_name); output_path.set_file_name(format!("{}.rs", module_name)); } else { output_path.push("mod.rs"); } DirBuilder::new().recursive(true).create(output_path.parent().unwrap())?; if output_path.exists() { if let Ok(mut f) = File::open(&output_path) { let mut existing_contents = String::new(); f.read_to_string(&mut existing_contents)?; if existing_contents == contents { return Ok(()); } } } let mut f = File::create(output_path)?; f.write_all(contents.as_bytes()) } CodeModuleCollector::Custom { collect_output } => { collect_output(module, contents) } } } } impl Ref { pub fn qualify(&self, default_module_path: &schema::ModulePath) -> Ref { if self.module.0.is_empty() { Ref { module: default_module_path.clone(), name: self.name.clone(), } } else { self.clone() } } } impl Schema { pub fn has_embedded_type(&self) -> bool { self.embedded_type != EmbeddedTypeName::False } } /// Main entry point: runs the compilation process. pub fn compile<'a>(config: &CompilerConfig, emitter: &mut CodeCollector<'a>) -> io::Result<()> { let mut b = BundleContext::new(config); for (k, (v, module_purpose)) in config.bundle.iter() { if *module_purpose != Purpose::Codegen { continue; } //--------------------------------------------------------------------------- let mut generated = Map::new(); b.generate_module(k, v, ModuleContextMode::TargetModule, &mut generated, |m| { for plugin in config.plugins.iter() { plugin.generate_module(m); } }); for (n, d) in &v.definitions.0 { use ModuleContextMode::*; config.generate_definition(&mut b, k, v, n, d, TargetToplevel, &mut generated); config.generate_definition(&mut b, k, v, n, d, TargetGeneric, &mut generated); } //--------------------------------------------------------------------------- let mut lines: Vec = Vec::new(); lines.push(Formatter::to_string(vertical( false, seq!["#![allow(unused_parens)]", "#![allow(unused_imports)]"], ))); if config.rustfmt_skip { lines.push("#![cfg_attr(rustfmt, rustfmt_skip)]".to_owned()); } lines.push(Formatter::to_string(vertical( false, seq![ "", "use std::convert::TryFrom;", format!("use {}::support as _support;", &config.support_crate), "use _support::Deserialize;", "use _support::Parse;", "use _support::Unparse;", "use _support::preserves;", "use preserves::value::Domain;", "use preserves::value::NestedValue;", "" ], ))); let mut emit_items = |items: Vec| { if !items.is_empty() { lines.push(Formatter::to_string(vertical(true, seq(items)))); lines.push("".to_owned()); } }; emit_items(generated.remove(&ModuleContextMode::TargetModule).unwrap()); emit_items( generated .remove(&ModuleContextMode::TargetToplevel) .unwrap(), ); emit_items(generated.remove(&ModuleContextMode::TargetGeneric).unwrap()); { let contents = lines.join("\n"); emitter.collect_output(Some(k), &contents)?; } } { let mut lines = Vec::new(); if config.rustfmt_skip { lines.push("#![cfg_attr(rustfmt, rustfmt_skip)]".to_owned()); lines.push("".to_owned()); } if emitter.emit_mod_declarations { for (modpath, (_, module_purpose)) in config.bundle.iter() { if *module_purpose != Purpose::Codegen { continue; } lines.push(format!( "pub mod {};", names::render_modname(modpath.last().unwrap()) )); } lines.push("".to_owned()); } lines.push(format!( "use {}::support as _support;", &config.support_crate )); lines.push("use _support::preserves;".to_owned()); lines.push("".to_owned()); lines.push("#[allow(non_snake_case)]".to_owned()); lines.push(Formatter::to_string(item(seq![ "pub struct ", b.language_struct_name(), anglebrackets!["N: preserves::value::NestedValue"], " ", vertical( false, braces( b.literals .iter() .map(|(value, name)| item(format!("pub {}: N /* {:?} */", name, value))) .collect() ) ) ]))); lines.push("".to_owned()); lines.push(Formatter::to_string(item(seq![ "impl", anglebrackets!["N: preserves::value::NestedValue"], " Default for ", b.language_struct_name(), " ", codeblock![seq![ "fn default() -> Self ", codeblock![seq![ b.language_struct_name(), " ", vertical( false, braces( b.literals .iter() .map(|(value, name)| { let bs = preserves::value::PackedWriter::encode_iovalue(&value) .unwrap(); item(format!( "{}: /* {:?} */ _support::decode_lit(&{:?}).unwrap()", name, value, bs )) }) .collect() ) ) ]] ]] ]))); lines.push("".to_owned()); { let mut b = Bundle { modules: Modules(Map::new()), }; for (modpath, (schema, purpose)) in config.bundle.iter() { if *purpose == Purpose::Codegen { b.modules .0 .insert(ModulePath(modpath.clone()), schema.clone()); } } let b_value = Language::default().unparse(&b); let b_bin = preserves::value::PackedWriter::encode_iovalue(&b_value).unwrap(); let mut hex_encoded_bundle = String::new(); let mut count = 0; for b in b_bin { if count % 16 == 0 { hex_encoded_bundle.push_str("\\\n "); } count += 1; hex_encoded_bundle.push_str(&format!("\\x{:02x}", b)); } lines.push(Formatter::to_string(item(seq![ "pub fn _bundle() -> &'static [u8] ", codeblock![seq!["b\"", hex_encoded_bundle, "\""]] ]))); } lines.push("".to_owned()); let contents = lines.join("\n"); emitter.collect_output(None, &contents)?; } Ok(()) }