pub mod schemas; // Paths operate on IOValues because the AST includes keys of IOValue type. // If we could make Schemas produce generics... pub use crate::schemas::path; use preserves::value::AtomClass; use preserves::value::BinarySource; use preserves::value::BytesBinarySource; use preserves::value::CompoundClass; use preserves::value::IOValue; use preserves::value::NestedValue; use preserves::value::Reader; use preserves::value::Value; use preserves::value::ValueClass; use std::cell::RefCell; use std::collections::VecDeque; use std::iter::Iterator; use std::io; use std::rc::Rc; use thiserror::Error; #[derive(Debug)] enum Binop { Union, Intersection, } #[derive(Error, Debug)] pub enum CompilationError { #[error(transparent)] IoError(#[from] io::Error), #[error("Cannot mix binary operators")] MixedOperators, #[error("Invalid step")] InvalidStep, #[error(transparent)] RegexError(#[from] regex::Error), } pub enum Path { Root, Step(IOValue, Rc), } pub trait Predicate: std::fmt::Debug { fn test(&mut self, path: Rc, value: &IOValue) -> bool; } pub trait Step: std::fmt::Debug { fn accept(&mut self, path: Rc, value: &IOValue); fn finish(&mut self); fn reset(&mut self) -> Vec; } macro_rules! delegate_finish_and_reset { ($self:ident, $target:expr) => { fn finish(&mut $self) { $target.finish() } fn reset(&mut $self) -> Vec { $target.reset() } } } #[derive(Clone, Debug)] pub struct Node(pub Rc>); impl Node { fn new(s: S) -> Self { Node(Rc::new(RefCell::new(s))) } pub fn test(&self, path: Rc, value: &IOValue) -> bool { self.accept(path, value); self.finish(); !self.reset().is_empty() } pub fn accept(&self, path: Rc, value: &IOValue) { self.0.borrow_mut().accept(path, value) } pub fn finish(&self) { self.0.borrow_mut().finish() } pub fn reset(&self) -> Vec { self.0.borrow_mut().reset() } pub fn exec(&self, value: &IOValue) -> Vec { self.accept(Path::root(), value); self.finish(); self.reset() } } pub trait StepMaker { fn connect(&self, step: Node) -> Result; } impl Path { fn root() -> Rc { Rc::new(Path::Root) } fn step(self: &Rc, v: &IOValue) -> Rc { Rc::new(Path::Step(v.clone(), Rc::clone(self))) } } impl StepMaker for path::Selector { fn connect(&self, step: Node) -> Result { self.0.connect(step) } } impl StepMaker for Vec { fn connect(&self, mut step: Node) -> Result { for s in self.iter().rev() { step = s.connect(step)?; } Ok(step) } } #[derive(Debug)] enum CompiledPredicate { Selector(Node), Not(Box), Or(Vec), And(Vec), } fn compile_predicate(p: &path::Predicate) -> Result { match p { path::Predicate::Selector(b) => Ok(CompiledPredicate::Selector((&**b).connect(BoolCollector::new())?)), path::Predicate::Not { pred } => Ok(CompiledPredicate::Not(Box::new(compile_predicate(&**pred)?))), path::Predicate::Or { preds } => Ok(CompiledPredicate::Or(preds.iter().map(compile_predicate).collect::>()?)), path::Predicate::And { preds } => Ok(CompiledPredicate::And(preds.iter().map(compile_predicate).collect::>()?)), } } impl Predicate for CompiledPredicate { fn test(&mut self, path: Rc, value: &IOValue) -> bool { match self { CompiledPredicate::Selector(n) => n.test(path, value), CompiledPredicate::Not(p) => !p.test(path, value), CompiledPredicate::Or(ps) => { for p in ps.iter_mut() { if p.test(Rc::clone(&path), value) { return true; } } return false; }, CompiledPredicate::And(ps) => { for p in ps.iter_mut() { if !p.test(Rc::clone(&path), value) { return false; } } return true; }, } } } impl StepMaker for path::Step { fn connect(&self, step: Node) -> Result { match self { path::Step::Axis(b) => (&**b).connect(step), path::Step::Filter(b) => (&**b).connect(step), } } } #[derive(Debug)] struct AxisStep { step: Node, axis: path::Axis, } impl StepMaker for path::Axis { fn connect(&self, step: Node) -> Result { Ok(Node::new(AxisStep { step, axis: self.clone() })) } } impl Step for AxisStep { fn accept(&mut self, path: Rc, value: &IOValue) { match &self.axis { path::Axis::Values => { let path = path.step(value); for c in value.value().children() { self.step.accept(Rc::clone(&path), &c) } } path::Axis::Descendants => { let mut q = VecDeque::new(); q.push_back((path, value.clone())); while let Some((p, c)) = q.pop_front() { let p = p.step(&c); for cc in c.value().children() { q.push_back((Rc::clone(&p), cc.clone())); } self.step.accept(p, &c) } } path::Axis::At { key } => match value.value() { Value::String(s) => step_index(path.step(value), s.chars(), &key, |c| IOValue::new(String::from(c)), &mut self.step), Value::Record(r) => step_index(path.step(value), r.fields().iter(), &key, |v| v.clone(), &mut self.step), Value::Sequence(vs) => step_index(path.step(value), vs.iter(), &key, |v| v.clone(), &mut self.step), Value::Dictionary(d) => if let Some(v) = d.get(&key) { self.step.accept(path.step(value), v) }, _ => (), }, path::Axis::Label => if let Some(r) = value.value().as_record(None) { self.step.accept(path.step(value), r.label()) }, path::Axis::Keys => match value.value() { Value::String(s) => step_keys(path.step(value), s.len(), &mut self.step), Value::ByteString(bs) => step_keys(path.step(value), bs.len(), &mut self.step), Value::Symbol(s) => step_keys(path.step(value), s.len(), &mut self.step), Value::Record(r) => step_keys(path.step(value), r.arity(), &mut self.step), Value::Sequence(vs) => step_keys(path.step(value), vs.len(), &mut self.step), Value::Dictionary(d) => { let path = path.step(value); for k in d.keys() { self.step.accept(Rc::clone(&path), k) } }, _ => (), }, path::Axis::Length => match value.value() { Value::String(s) => self.step.accept(path.step(value), &IOValue::new(s.len())), Value::ByteString(bs) => self.step.accept(path.step(value), &IOValue::new(bs.len())), Value::Symbol(s) => self.step.accept(path.step(value), &IOValue::new(s.len())), Value::Record(r) => self.step.accept(path.step(value), &IOValue::new(r.arity())), Value::Sequence(vs) => self.step.accept(path.step(value), &IOValue::new(vs.len())), Value::Dictionary(d) => self.step.accept(path.step(value), &IOValue::new(d.len())), _ => self.step.accept(path.step(value), &IOValue::new(0)), }, path::Axis::Annotations => { let path = path.step(value); for c in value.annotations().slice() { self.step.accept(Rc::clone(&path), &c) } } path::Axis::Embedded => if let Some(d) = value.value().as_embedded() { self.step.accept(path.step(value), d) }, } } delegate_finish_and_reset!(self, self.step); } fn step_index, F: FnOnce(T) -> IOValue>( p: Rc, mut vs: Ts, key: &IOValue, f: F, step: &mut Node, ) { if let Some(i) = key.value().as_usize() { match vs.nth(i) { None => (), Some(v) => step.accept(p, &f(v)), } } } fn step_keys(p: Rc, count: usize, step: &mut Node) { for i in 0 .. count { step.accept(Rc::clone(&p), &IOValue::new(i)) } } impl StepMaker for path::Filter { fn connect(&self, step: Node) -> Result { match self { path::Filter::Nop => Ok(step), path::Filter::Compare { op, literal } => Ok(Node::new(CompareStep { op: (**op).clone(), literal: literal.clone(), step, })), path::Filter::Regex { regex } => Ok(Node::new(RegexStep { regex: regex::Regex::new(regex)?, step })), path::Filter::Test { pred } => Ok(Node::new(TestStep { pred: compile_predicate(&**pred)?, step })), path::Filter::Kind { kind } => Ok(Node::new(KindStep { kind: match &**kind { path::ValueKind::Boolean => ValueClass::Atomic(AtomClass::Boolean), path::ValueKind::Float => ValueClass::Atomic(AtomClass::Float), path::ValueKind::Double => ValueClass::Atomic(AtomClass::Double), path::ValueKind::SignedInteger => ValueClass::Atomic(AtomClass::SignedInteger), path::ValueKind::String => ValueClass::Atomic(AtomClass::String), path::ValueKind::ByteString => ValueClass::Atomic(AtomClass::ByteString), path::ValueKind::Symbol => ValueClass::Atomic(AtomClass::Symbol), path::ValueKind::Record => ValueClass::Compound(CompoundClass::Record), path::ValueKind::Sequence => ValueClass::Compound(CompoundClass::Sequence), path::ValueKind::Set => ValueClass::Compound(CompoundClass::Set), path::ValueKind::Dictionary => ValueClass::Compound(CompoundClass::Dictionary), path::ValueKind::Embedded => ValueClass::Embedded, }, step, })), } } } #[derive(Debug)] struct InertStep; impl Step for InertStep { fn accept(&mut self, _path: Rc, _value: &IOValue) {} fn finish(&mut self) {} fn reset(&mut self) -> Vec { vec![] } } #[derive(Debug)] struct CompareStep { op: path::Comparison, literal: IOValue, step: Node, } impl Step for CompareStep { fn accept(&mut self, path: Rc, value: &IOValue) { if match self.op { path::Comparison::Eq => value == &self.literal, path::Comparison::Ne => value != &self.literal, path::Comparison::Lt => value < &self.literal, path::Comparison::Ge => value >= &self.literal, path::Comparison::Gt => value > &self.literal, path::Comparison::Le => value <= &self.literal, } { self.step.accept(path, value) } } delegate_finish_and_reset!(self, self.step); } #[derive(Debug)] struct RegexStep { regex: regex::Regex, step: Node, } impl Step for RegexStep { fn accept(&mut self, path: Rc, value: &IOValue) { match value.value() { Value::String(s) => if self.regex.is_match(s) { self.step.accept(path, value) }, Value::Symbol(s) => if self.regex.is_match(s) { self.step.accept(path, value) }, _ => (), } } delegate_finish_and_reset!(self, self.step); } #[derive(Debug)] struct TestStep { pred: CompiledPredicate, step: Node, } impl Step for TestStep { fn accept(&mut self, path: Rc, value: &IOValue) { if self.pred.test(Rc::clone(&path), value) { self.step.accept(path, value) } } delegate_finish_and_reset!(self, self.step); } #[derive(Debug)] struct VecCollector { accumulator: Vec, } impl VecCollector { fn new() -> Node { Node::new(VecCollector { accumulator: Vec::new() }) } } impl Step for VecCollector { fn accept(&mut self, _path: Rc, value: &IOValue) { self.accumulator.push(value.clone()) } fn finish(&mut self) { } fn reset(&mut self) -> Vec { std::mem::take(&mut self.accumulator) } } #[derive(Debug)] struct BoolCollector { seen_value: bool, } impl BoolCollector { fn new() -> Node { Node::new(BoolCollector { seen_value: false }) } } impl Step for BoolCollector { fn accept(&mut self, _path: Rc, _value: &IOValue) { self.seen_value = true } fn finish(&mut self) { } fn reset(&mut self) -> Vec { let result = if self.seen_value { vec![IOValue::new(true)] } else { vec![] }; self.seen_value = false; result } } #[derive(Debug)] struct KindStep { kind: ValueClass, step: Node, } impl Step for KindStep { fn accept(&mut self, path: Rc, value: &IOValue) { if value.value_class() == self.kind { self.step.accept(path, value) } } delegate_finish_and_reset!(self, self.step); } fn split_values_by_symbol<'a>(tokens: &'a [IOValue], separator: &str) -> Vec<&'a [IOValue]> { tokens .split(|t| matches!(t.value().as_symbol(), Some(s) if s == separator)) .collect() } fn split_binop(tokens: &[IOValue]) -> Result<(Vec<&[IOValue]>, Option), CompilationError> { let union_pieces = split_values_by_symbol(&tokens, "+"); let intersection_pieces = split_values_by_symbol(&tokens, "&"); match (union_pieces.len(), intersection_pieces.len()) { (1, 1) => Ok((union_pieces, None)), (_, 1) => Ok((union_pieces, Some(Binop::Union))), (1, _) => Ok((intersection_pieces, Some(Binop::Intersection))), _ => Err(CompilationError::MixedOperators), } } pub fn parse_selector(tokens: &[IOValue]) -> Result { let mut steps = Vec::new(); let mut tokens = tokens; while let Some((s, remaining)) = parse_step(tokens)? { steps.push(s); tokens = remaining; } Ok(path::Selector(steps)) } pub fn parse_predicate(tokens: &[IOValue]) -> Result { let (pieces, binop) = split_binop(tokens)?; match binop { None => parse_non_binop(&pieces[0]), Some(o) => { let preds = pieces.into_iter().map(|ts| parse_non_binop(&ts)).collect::>()?; Ok(match o { Binop::Union => path::Predicate::Or { preds }, Binop::Intersection => path::Predicate::And { preds }, }) } } } fn parse_non_binop(tokens: &[IOValue]) -> Result { if !tokens.is_empty() { let t = tokens[0].value(); if let Some("!") = t.as_symbol().map(|s| s.as_str()) { return Ok(path::Predicate::Not { pred: Box::new(parse_non_binop(&tokens[1..])?) }); } } Ok(path::Predicate::Selector(Box::new(parse_selector(tokens)?))) } fn parse_step(tokens: &[IOValue]) -> Result, CompilationError> { if tokens.is_empty() { return Ok(None); } let remainder = &tokens[1..]; if tokens[0].value().is_sequence() { return Ok(Some((path::Step::Filter(Box::new(path::Filter::Test { pred: Box::new(parse_predicate(tokens[0].value().as_sequence().unwrap())?), })), remainder))); } match tokens[0].value().as_symbol() { None => return Err(CompilationError::InvalidStep), Some(t) => match t.as_str() { "/" => Ok(Some((path::Step::Axis(Box::new(path::Axis::Values)), remainder))), "//" => Ok(Some((path::Step::Axis(Box::new(path::Axis::Descendants)), remainder))), "." => { let (key, remainder) = pop_step_arg(remainder)?; Ok(Some((path::Step::Axis(Box::new(path::Axis::At { key })), remainder))) } ".^" => Ok(Some((path::Step::Axis(Box::new(path::Axis::Label)), remainder))), ".keys" => Ok(Some((path::Step::Axis(Box::new(path::Axis::Keys)), remainder))), ".length" => Ok(Some((path::Step::Axis(Box::new(path::Axis::Length)), remainder))), ".annotations" => Ok(Some((path::Step::Axis(Box::new(path::Axis::Annotations)), remainder))), ".embedded" => Ok(Some((path::Step::Axis(Box::new(path::Axis::Embedded)), remainder))), "*" => Ok(Some((path::Step::Filter(Box::new(path::Filter::Nop)), remainder))), "eq" => parse_comparison(remainder, path::Comparison::Eq), "=" => parse_comparison(remainder, path::Comparison::Eq), "ne" => parse_comparison(remainder, path::Comparison::Ne), "lt" => parse_comparison(remainder, path::Comparison::Lt), "gt" => parse_comparison(remainder, path::Comparison::Gt), "le" => parse_comparison(remainder, path::Comparison::Le), "ge" => parse_comparison(remainder, path::Comparison::Ge), "re" | "=r" => { let (regex_val, remainder) = pop_step_arg(remainder)?; let regex = regex_val.value().to_string().map_err(|_| CompilationError::InvalidStep)?.clone(); let _ = regex::Regex::new(®ex)?; Ok(Some((path::Step::Filter(Box::new(path::Filter::Regex { regex })), remainder))) } "^" => { let (literal, remainder) = pop_step_arg(remainder)?; Ok(Some((path::Step::Filter(Box::new(path::Filter::Test { pred: Box::new(path::Predicate::Selector(Box::new(path::Selector(vec![ path::Step::Axis(Box::new(path::Axis::Label)), path::Step::Filter(Box::new(path::Filter::Compare { op: Box::new(path::Comparison::Eq), literal, })), ])))), })), remainder))) } "bool" => Ok(Some((path::Step::from(path::ValueKind::Boolean), remainder))), "float" => Ok(Some((path::Step::from(path::ValueKind::Float), remainder))), "double" => Ok(Some((path::Step::from(path::ValueKind::Double), remainder))), "int" => Ok(Some((path::Step::from(path::ValueKind::SignedInteger), remainder))), "string" => Ok(Some((path::Step::from(path::ValueKind::String), remainder))), "bytes" => Ok(Some((path::Step::from(path::ValueKind::ByteString), remainder))), "symbol" => Ok(Some((path::Step::from(path::ValueKind::Symbol), remainder))), "rec" => Ok(Some((path::Step::from(path::ValueKind::Record), remainder))), "seq" => Ok(Some((path::Step::from(path::ValueKind::Sequence), remainder))), "set" => Ok(Some((path::Step::from(path::ValueKind::Set), remainder))), "dict" => Ok(Some((path::Step::from(path::ValueKind::Dictionary), remainder))), "embedded" => Ok(Some((path::Step::from(path::ValueKind::Embedded), remainder))), _ => Err(CompilationError::InvalidStep), } } } impl From for path::Step { fn from(k: path::ValueKind) -> Self { path::Step::Filter(Box::new(path::Filter::Kind { kind: Box::new(k), })) } } fn pop_step_arg(tokens: &[IOValue]) -> Result<(IOValue, &[IOValue]), CompilationError> { if tokens.is_empty() { return Err(CompilationError::InvalidStep); } Ok((tokens[0].clone(), &tokens[1..])) } fn parse_comparison( tokens: &[IOValue], op: path::Comparison, ) -> Result, CompilationError> { let (literal, remainder) = pop_step_arg(tokens)?; Ok(Some((path::Step::Filter(Box::new(path::Filter::Compare { op: Box::new(op), literal, })), remainder))) } impl path::Selector { pub fn compile(&self) -> Result { self.connect(VecCollector::new()) } pub fn exec(&self, value: &IOValue) -> Result, CompilationError> { Ok(self.compile()?.exec(value)) } } impl std::str::FromStr for path::Selector { type Err = CompilationError; fn from_str(s: &str) -> Result { parse_selector(&(BytesBinarySource::new(s.as_bytes()) .text_iovalues() .configured(false) .collect::, _>>()?)) } } impl std::str::FromStr for Node { type Err = CompilationError; fn from_str(s: &str) -> Result { let expr = path::Selector::from_str(s)?; expr.compile() } }