diff --git a/implementations/rust/preserves-path/src/error.rs b/implementations/rust/preserves-path/src/error.rs new file mode 100644 index 0000000..1780594 --- /dev/null +++ b/implementations/rust/preserves-path/src/error.rs @@ -0,0 +1,15 @@ +use std::io; + +use thiserror::Error; + +#[derive(Error, Debug)] +pub enum CompilationError { + #[error(transparent)] + IoError(#[from] io::Error), + #[error("Cannot mix binary operators")] + MixedOperators, + #[error("Invalid step")] + InvalidStep, + #[error(transparent)] + RegexError(#[from] regex::Error), +} diff --git a/implementations/rust/preserves-path/src/lib.rs b/implementations/rust/preserves-path/src/lib.rs index 958603f..c36c68b 100644 --- a/implementations/rust/preserves-path/src/lib.rs +++ b/implementations/rust/preserves-path/src/lib.rs @@ -1,684 +1,17 @@ +pub mod error; +pub mod parse; +pub mod path; +pub mod predicate; pub mod schemas; +pub mod step; -// Paths operate on IOValues because the AST includes keys of IOValue type. -// If we could make Schemas produce generics... +pub use error::CompilationError; -pub use crate::schemas::path; +pub use parse::parse_selector; +pub use parse::parse_predicate; -use num::bigint::BigInt; -use num::traits::cast::ToPrimitive; -use num::traits::cast::FromPrimitive; +pub use schemas::path::Predicate; +pub use schemas::path::Selector; +pub use schemas::path::Step; -use preserves::value::AtomClass; -use preserves::value::BinarySource; -use preserves::value::BytesBinarySource; -use preserves::value::CompoundClass; -use preserves::value::IOValue; -use preserves::value::NestedValue; -use preserves::value::Reader; -use preserves::value::Value; -use preserves::value::ValueClass; - -use std::cell::RefCell; -use std::collections::VecDeque; -use std::iter::Iterator; -use std::io; -use std::rc::Rc; - -use thiserror::Error; - -#[derive(Debug)] -enum Binop { - Union, - Intersection, -} - -#[derive(Error, Debug)] -pub enum CompilationError { - #[error(transparent)] - IoError(#[from] io::Error), - #[error("Cannot mix binary operators")] - MixedOperators, - #[error("Invalid step")] - InvalidStep, - #[error(transparent)] - RegexError(#[from] regex::Error), -} - -pub enum Path { - Root, - Step(IOValue, Rc), -} - -pub trait Predicate: std::fmt::Debug { - fn test(&mut self, path: Rc, value: &IOValue) -> bool; -} - -pub trait Step: std::fmt::Debug { - fn accept(&mut self, path: Rc, value: &IOValue); - fn finish(&mut self); - fn reset(&mut self) -> Vec; -} - -macro_rules! delegate_finish_and_reset { - ($self:ident, $target:expr) => { - fn finish(&mut $self) { $target.finish() } - fn reset(&mut $self) -> Vec { $target.reset() } - } -} - -#[derive(Clone, Debug)] -pub struct Node(pub Rc>); - -impl Node { - fn new(s: S) -> Self { - Node(Rc::new(RefCell::new(s))) - } - - pub fn test(&self, path: Rc, value: &IOValue) -> bool { - self.accept(path, value); - self.finish(); - !self.reset().is_empty() - } - - pub fn accept(&self, path: Rc, value: &IOValue) { - self.0.borrow_mut().accept(path, value) - } - - pub fn finish(&self) { - self.0.borrow_mut().finish() - } - - pub fn reset(&self) -> Vec { - self.0.borrow_mut().reset() - } - - pub fn exec(&self, value: &IOValue) -> Vec { - self.accept(Path::root(), value); - self.finish(); - self.reset() - } -} - -pub trait StepMaker { - fn connect(&self, step: Node) -> Result; -} - -impl Path { - fn root() -> Rc { - Rc::new(Path::Root) - } - - fn step(self: &Rc, v: &IOValue) -> Rc { - Rc::new(Path::Step(v.clone(), Rc::clone(self))) - } -} - -impl StepMaker for path::Selector { - fn connect(&self, step: Node) -> Result { - self.0.connect(step) - } -} - -impl StepMaker for Vec { - fn connect(&self, mut step: Node) -> Result { - for s in self.iter().rev() { - step = s.connect(step)?; - } - Ok(step) - } -} - -#[derive(Debug)] -enum CompiledPredicate { - Selector(Node), - Not(Box), - Or(Vec), - And(Vec), -} - -fn compile_predicate(p: &path::Predicate) -> Result { - match p { - path::Predicate::Selector(b) => - Ok(CompiledPredicate::Selector((&**b).connect(BoolCollector::new())?)), - path::Predicate::Not { pred } => - Ok(CompiledPredicate::Not(Box::new(compile_predicate(&**pred)?))), - path::Predicate::Or { preds } => - Ok(CompiledPredicate::Or(preds.iter().map(compile_predicate).collect::>()?)), - path::Predicate::And { preds } => - Ok(CompiledPredicate::And(preds.iter().map(compile_predicate).collect::>()?)), - } -} - -impl Predicate for CompiledPredicate { - fn test(&mut self, path: Rc, value: &IOValue) -> bool { - match self { - CompiledPredicate::Selector(n) => n.test(path, value), - CompiledPredicate::Not(p) => !p.test(path, value), - CompiledPredicate::Or(ps) => { - for p in ps.iter_mut() { - if p.test(Rc::clone(&path), value) { - return true; - } - } - return false; - }, - CompiledPredicate::And(ps) => { - for p in ps.iter_mut() { - if !p.test(Rc::clone(&path), value) { - return false; - } - } - return true; - }, - } - } -} - -impl StepMaker for path::Step { - fn connect(&self, step: Node) -> Result { - match self { - path::Step::Axis(b) => (&**b).connect(step), - path::Step::Filter(b) => (&**b).connect(step), - } - } -} - -#[derive(Debug)] -struct AxisStep { - step: Node, - axis: path::Axis, -} - -impl StepMaker for path::Axis { - fn connect(&self, step: Node) -> Result { - Ok(Node::new(AxisStep { step, axis: self.clone() })) - } -} - -impl Step for AxisStep { - fn accept(&mut self, path: Rc, value: &IOValue) { - match &self.axis { - path::Axis::Values => { - let path = path.step(value); - for c in value.value().children() { - self.step.accept(Rc::clone(&path), &c) - } - } - path::Axis::Descendants => { - let mut q = VecDeque::new(); - q.push_back((path, value.clone())); - while let Some((p, c)) = q.pop_front() { - let p = p.step(&c); - for cc in c.value().children() { - q.push_back((Rc::clone(&p), cc.clone())); - } - self.step.accept(p, &c) - } - } - path::Axis::At { key } => match value.value() { - Value::String(s) => - step_index(path.step(value), s.chars(), &key, |c| IOValue::new(String::from(c)), &mut self.step), - Value::Record(r) => - step_index(path.step(value), r.fields().iter(), &key, |v| v.clone(), &mut self.step), - Value::Sequence(vs) => - step_index(path.step(value), vs.iter(), &key, |v| v.clone(), &mut self.step), - Value::Dictionary(d) => - if let Some(v) = d.get(&key) { - self.step.accept(path.step(value), v) - }, - _ => - (), - }, - path::Axis::Label => if let Some(r) = value.value().as_record(None) { - self.step.accept(path.step(value), r.label()) - }, - path::Axis::Keys => match value.value() { - Value::String(s) => step_keys(path.step(value), s.len(), &mut self.step), - Value::ByteString(bs) => step_keys(path.step(value), bs.len(), &mut self.step), - Value::Symbol(s) => step_keys(path.step(value), s.len(), &mut self.step), - Value::Record(r) => step_keys(path.step(value), r.arity(), &mut self.step), - Value::Sequence(vs) => step_keys(path.step(value), vs.len(), &mut self.step), - Value::Dictionary(d) => { - let path = path.step(value); - for k in d.keys() { - self.step.accept(Rc::clone(&path), k) - } - }, - _ => (), - }, - path::Axis::Length => match value.value() { - Value::String(s) => self.step.accept(path.step(value), &IOValue::new(s.len())), - Value::ByteString(bs) => self.step.accept(path.step(value), &IOValue::new(bs.len())), - Value::Symbol(s) => self.step.accept(path.step(value), &IOValue::new(s.len())), - Value::Record(r) => self.step.accept(path.step(value), &IOValue::new(r.arity())), - Value::Sequence(vs) => self.step.accept(path.step(value), &IOValue::new(vs.len())), - Value::Dictionary(d) => self.step.accept(path.step(value), &IOValue::new(d.len())), - _ => self.step.accept(path.step(value), &IOValue::new(0)), - }, - path::Axis::Annotations => { - let path = path.step(value); - for c in value.annotations().slice() { - self.step.accept(Rc::clone(&path), &c) - } - } - path::Axis::Embedded => if let Some(d) = value.value().as_embedded() { - self.step.accept(path.step(value), d) - }, - } - } - - delegate_finish_and_reset!(self, self.step); -} - -fn step_index, F: FnOnce(T) -> IOValue>( - p: Rc, - mut vs: Ts, - key: &IOValue, - f: F, - step: &mut Node, -) { - if let Some(i) = key.value().as_usize() { - match vs.nth(i) { - None => (), - Some(v) => step.accept(p, &f(v)), - } - } -} - -fn step_keys(p: Rc, count: usize, step: &mut Node) { - for i in 0 .. count { - step.accept(Rc::clone(&p), &IOValue::new(i)) - } -} - -impl StepMaker for path::Filter { - fn connect(&self, step: Node) -> Result { - match self { - path::Filter::Nop => Ok(step), - path::Filter::Compare { op, literal } => Ok(Node::new(CompareStep { - op: (**op).clone(), - literal: literal.clone(), - step, - })), - path::Filter::Regex { regex } => Ok(Node::new(RegexStep { regex: regex::Regex::new(regex)?, step })), - path::Filter::Test { pred } => Ok(Node::new(TestStep { pred: compile_predicate(&**pred)?, step })), - path::Filter::Real => Ok(Node::new(RealStep { step })), - path::Filter::Int => Ok(Node::new(IntStep { step })), - path::Filter::Kind { kind } => Ok(Node::new(KindStep { - kind: match &**kind { - path::ValueKind::Boolean => ValueClass::Atomic(AtomClass::Boolean), - path::ValueKind::Float => ValueClass::Atomic(AtomClass::Float), - path::ValueKind::Double => ValueClass::Atomic(AtomClass::Double), - path::ValueKind::SignedInteger => ValueClass::Atomic(AtomClass::SignedInteger), - path::ValueKind::String => ValueClass::Atomic(AtomClass::String), - path::ValueKind::ByteString => ValueClass::Atomic(AtomClass::ByteString), - path::ValueKind::Symbol => ValueClass::Atomic(AtomClass::Symbol), - path::ValueKind::Record => ValueClass::Compound(CompoundClass::Record), - path::ValueKind::Sequence => ValueClass::Compound(CompoundClass::Sequence), - path::ValueKind::Set => ValueClass::Compound(CompoundClass::Set), - path::ValueKind::Dictionary => ValueClass::Compound(CompoundClass::Dictionary), - path::ValueKind::Embedded => ValueClass::Embedded, - }, - step, - })), - } - } -} - -#[derive(Debug)] -struct InertStep; - -impl Step for InertStep { - fn accept(&mut self, _path: Rc, _value: &IOValue) {} - fn finish(&mut self) {} - fn reset(&mut self) -> Vec { vec![] } -} - -#[derive(Debug)] -struct CompareStep { - op: path::Comparison, - literal: IOValue, - step: Node, -} - -impl Step for CompareStep { - fn accept(&mut self, path: Rc, value: &IOValue) { - if match self.op { - path::Comparison::Eq => value == &self.literal, - path::Comparison::Ne => value != &self.literal, - path::Comparison::Lt => value < &self.literal, - path::Comparison::Ge => value >= &self.literal, - path::Comparison::Gt => value > &self.literal, - path::Comparison::Le => value <= &self.literal, - } { - self.step.accept(path, value) - } - } - - delegate_finish_and_reset!(self, self.step); -} - -#[derive(Debug)] -struct RegexStep { - regex: regex::Regex, - step: Node, -} - -impl Step for RegexStep { - fn accept(&mut self, path: Rc, value: &IOValue) { - match value.value() { - Value::String(s) => if self.regex.is_match(s) { self.step.accept(path, value) }, - Value::Symbol(s) => if self.regex.is_match(s) { self.step.accept(path, value) }, - _ => (), - } - } - - delegate_finish_and_reset!(self, self.step); -} - -#[derive(Debug)] -struct TestStep { - pred: CompiledPredicate, - step: Node, -} - -impl Step for TestStep { - fn accept(&mut self, path: Rc, value: &IOValue) { - if self.pred.test(Rc::clone(&path), value) { - self.step.accept(path, value) - } - } - - delegate_finish_and_reset!(self, self.step); -} - -#[derive(Debug)] -struct RealStep { - step: Node, -} - -impl Step for RealStep { - fn accept(&mut self, path: Rc, value: &IOValue) { - match value.value() { - Value::SignedInteger(i) => if let Some(r) = BigInt::from(i).to_f64() { - self.step.accept(path, &IOValue::new(r)) - }, - Value::Float(f) => self.step.accept(path, &IOValue::new(f32::from(*f) as f64)), - Value::Double(_) => self.step.accept(path, value), - _ => (), - } - } - - delegate_finish_and_reset!(self, self.step); -} - -#[derive(Debug)] -struct IntStep { - step: Node, -} - -impl Step for IntStep { - fn accept(&mut self, path: Rc, value: &IOValue) { - match value.value() { - Value::SignedInteger(_) => self.step.accept(path, value), - Value::Float(f) => if let Some(i) = BigInt::from_f32(f32::from(*f)) { - self.step.accept(path, &IOValue::new(i)) - }, - Value::Double(d) => if let Some(i) = BigInt::from_f64(f64::from(*d)) { - self.step.accept(path, &IOValue::new(i)) - }, - _ => (), - } - } - - delegate_finish_and_reset!(self, self.step); -} - -#[derive(Debug)] -struct VecCollector { - accumulator: Vec, -} - -impl VecCollector { - fn new() -> Node { - Node::new(VecCollector { accumulator: Vec::new() }) - } -} - -impl Step for VecCollector { - fn accept(&mut self, _path: Rc, value: &IOValue) { - self.accumulator.push(value.clone()) - } - - fn finish(&mut self) { - } - - fn reset(&mut self) -> Vec { - std::mem::take(&mut self.accumulator) - } -} - -#[derive(Debug)] -struct BoolCollector { - seen_value: bool, -} - -impl BoolCollector { - fn new() -> Node { - Node::new(BoolCollector { seen_value: false }) - } -} - -impl Step for BoolCollector { - fn accept(&mut self, _path: Rc, _value: &IOValue) { - self.seen_value = true - } - - fn finish(&mut self) { - } - - fn reset(&mut self) -> Vec { - let result = if self.seen_value { vec![IOValue::new(true)] } else { vec![] }; - self.seen_value = false; - result - } -} - -#[derive(Debug)] -struct KindStep { - kind: ValueClass, - step: Node, -} - -impl Step for KindStep { - fn accept(&mut self, path: Rc, value: &IOValue) { - if value.value_class() == self.kind { - self.step.accept(path, value) - } - } - - delegate_finish_and_reset!(self, self.step); -} - -fn split_values_by_symbol<'a>(tokens: &'a [IOValue], separator: &str) -> Vec<&'a [IOValue]> { - tokens - .split(|t| matches!(t.value().as_symbol(), Some(s) if s == separator)) - .collect() -} - -fn split_binop(tokens: &[IOValue]) -> Result<(Vec<&[IOValue]>, Option), CompilationError> { - let union_pieces = split_values_by_symbol(&tokens, "+"); - let intersection_pieces = split_values_by_symbol(&tokens, "&"); - match (union_pieces.len(), intersection_pieces.len()) { - (1, 1) => Ok((union_pieces, None)), - (_, 1) => Ok((union_pieces, Some(Binop::Union))), - (1, _) => Ok((intersection_pieces, Some(Binop::Intersection))), - _ => Err(CompilationError::MixedOperators), - } -} - -pub fn parse_selector(tokens: &[IOValue]) -> Result { - let mut steps = Vec::new(); - let mut tokens = tokens; - while let Some((s, remaining)) = parse_step(tokens)? { - steps.push(s); - tokens = remaining; - } - Ok(path::Selector(steps)) -} - -pub fn parse_predicate(tokens: &[IOValue]) -> Result { - let (pieces, binop) = split_binop(tokens)?; - match binop { - None => parse_non_binop(&pieces[0]), - Some(o) => { - let preds = pieces.into_iter().map(|ts| parse_non_binop(&ts)).collect::>()?; - Ok(match o { - Binop::Union => path::Predicate::Or { preds }, - Binop::Intersection => path::Predicate::And { preds }, - }) - } - } -} - -fn parse_non_binop(tokens: &[IOValue]) -> Result { - if !tokens.is_empty() { - let t = tokens[0].value(); - - if let Some("!") = t.as_symbol().map(|s| s.as_str()) { - return Ok(path::Predicate::Not { pred: Box::new(parse_non_binop(&tokens[1..])?) }); - } - } - - Ok(path::Predicate::Selector(Box::new(parse_selector(tokens)?))) -} - -fn parse_step(tokens: &[IOValue]) -> Result, CompilationError> { - if tokens.is_empty() { - return Ok(None); - } - - let remainder = &tokens[1..]; - - if tokens[0].value().is_sequence() { - return Ok(Some((path::Step::Filter(Box::new(path::Filter::Test { - pred: Box::new(parse_predicate(tokens[0].value().as_sequence().unwrap())?), - })), remainder))); - } - - match tokens[0].value().as_symbol() { - None => return Err(CompilationError::InvalidStep), - Some(t) => match t.as_str() { - "/" => Ok(Some((path::Step::Axis(Box::new(path::Axis::Values)), remainder))), - "//" => Ok(Some((path::Step::Axis(Box::new(path::Axis::Descendants)), remainder))), - "." => { - let (key, remainder) = pop_step_arg(remainder)?; - Ok(Some((path::Step::Axis(Box::new(path::Axis::At { key })), remainder))) - } - ".^" => Ok(Some((path::Step::Axis(Box::new(path::Axis::Label)), remainder))), - ".keys" => Ok(Some((path::Step::Axis(Box::new(path::Axis::Keys)), remainder))), - ".length" => Ok(Some((path::Step::Axis(Box::new(path::Axis::Length)), remainder))), - ".annotations" => Ok(Some((path::Step::Axis(Box::new(path::Axis::Annotations)), remainder))), - ".embedded" => Ok(Some((path::Step::Axis(Box::new(path::Axis::Embedded)), remainder))), - - "*" => Ok(Some((path::Step::Filter(Box::new(path::Filter::Nop)), remainder))), - "eq" | "=" => parse_comparison(remainder, path::Comparison::Eq), - "ne" | "!=" => parse_comparison(remainder, path::Comparison::Ne), - "lt" => parse_comparison(remainder, path::Comparison::Lt), - "gt" => parse_comparison(remainder, path::Comparison::Gt), - "le" => parse_comparison(remainder, path::Comparison::Le), - "ge" => parse_comparison(remainder, path::Comparison::Ge), - "re" | "=r" => { - let (regex_val, remainder) = pop_step_arg(remainder)?; - let regex = regex_val.value().to_string().map_err(|_| CompilationError::InvalidStep)?.clone(); - let _ = regex::Regex::new(®ex)?; - Ok(Some((path::Step::Filter(Box::new(path::Filter::Regex { regex })), remainder))) - } - "^" => { - let (literal, remainder) = pop_step_arg(remainder)?; - Ok(Some((path::Step::Filter(Box::new(path::Filter::Test { - pred: Box::new(path::Predicate::Selector(Box::new(path::Selector(vec![ - path::Step::Axis(Box::new(path::Axis::Label)), - path::Step::Filter(Box::new(path::Filter::Compare { - op: Box::new(path::Comparison::Eq), - literal, - })), - ])))), - })), remainder))) - } - - "~real" => Ok(Some((path::Step::Filter(Box::new(path::Filter::Real)), remainder))), - "~int" => Ok(Some((path::Step::Filter(Box::new(path::Filter::Int)), remainder))), - - "bool" => Ok(Some((path::Step::from(path::ValueKind::Boolean), remainder))), - "float" => Ok(Some((path::Step::from(path::ValueKind::Float), remainder))), - "double" => Ok(Some((path::Step::from(path::ValueKind::Double), remainder))), - "int" => Ok(Some((path::Step::from(path::ValueKind::SignedInteger), remainder))), - "string" => Ok(Some((path::Step::from(path::ValueKind::String), remainder))), - "bytes" => Ok(Some((path::Step::from(path::ValueKind::ByteString), remainder))), - "symbol" => Ok(Some((path::Step::from(path::ValueKind::Symbol), remainder))), - "rec" => Ok(Some((path::Step::from(path::ValueKind::Record), remainder))), - "seq" => Ok(Some((path::Step::from(path::ValueKind::Sequence), remainder))), - "set" => Ok(Some((path::Step::from(path::ValueKind::Set), remainder))), - "dict" => Ok(Some((path::Step::from(path::ValueKind::Dictionary), remainder))), - "embedded" => Ok(Some((path::Step::from(path::ValueKind::Embedded), remainder))), - - _ => Err(CompilationError::InvalidStep), - } - } -} - -impl From for path::Step { - fn from(k: path::ValueKind) -> Self { - path::Step::Filter(Box::new(path::Filter::Kind { - kind: Box::new(k), - })) - } -} - -fn pop_step_arg(tokens: &[IOValue]) -> Result<(IOValue, &[IOValue]), CompilationError> { - if tokens.is_empty() { - return Err(CompilationError::InvalidStep); - } - Ok((tokens[0].clone(), &tokens[1..])) -} - -fn parse_comparison( - tokens: &[IOValue], - op: path::Comparison, -) -> Result, CompilationError> { - let (literal, remainder) = pop_step_arg(tokens)?; - Ok(Some((path::Step::Filter(Box::new(path::Filter::Compare { - op: Box::new(op), - literal, - })), remainder))) -} - -impl path::Selector { - pub fn compile(&self) -> Result { - self.connect(VecCollector::new()) - } - - pub fn exec(&self, value: &IOValue) -> Result, CompilationError> { - Ok(self.compile()?.exec(value)) - } -} - -impl std::str::FromStr for path::Selector { - type Err = CompilationError; - fn from_str(s: &str) -> Result { - parse_selector(&(BytesBinarySource::new(s.as_bytes()) - .text_iovalues() - .configured(false) - .collect::, _>>()?)) - } -} - -impl std::str::FromStr for Node { - type Err = CompilationError; - fn from_str(s: &str) -> Result { - let expr = path::Selector::from_str(s)?; - expr.compile() - } -} +pub use step::Node; diff --git a/implementations/rust/preserves-path/src/parse.rs b/implementations/rust/preserves-path/src/parse.rs new file mode 100644 index 0000000..6466006 --- /dev/null +++ b/implementations/rust/preserves-path/src/parse.rs @@ -0,0 +1,189 @@ +use crate::CompilationError; +use crate::schemas::path; +use crate::step::Node; + +use preserves::value::BinarySource; +use preserves::value::BytesBinarySource; +use preserves::value::IOValue; +use preserves::value::NestedValue; +use preserves::value::Reader; + +use std::iter::Iterator; + +#[derive(Debug)] +enum Binop { + Union, + Intersection, +} + +fn split_values_by_symbol<'a>(tokens: &'a [IOValue], separator: &str) -> Vec<&'a [IOValue]> { + tokens + .split(|t| matches!(t.value().as_symbol(), Some(s) if s == separator)) + .collect() +} + +fn split_binop(tokens: &[IOValue]) -> Result<(Vec<&[IOValue]>, Option), CompilationError> { + let union_pieces = split_values_by_symbol(&tokens, "+"); + let intersection_pieces = split_values_by_symbol(&tokens, "&"); + match (union_pieces.len(), intersection_pieces.len()) { + (1, 1) => Ok((union_pieces, None)), + (_, 1) => Ok((union_pieces, Some(Binop::Union))), + (1, _) => Ok((intersection_pieces, Some(Binop::Intersection))), + _ => Err(CompilationError::MixedOperators), + } +} + +pub fn parse_selector(tokens: &[IOValue]) -> Result { + let mut steps = Vec::new(); + let mut tokens = tokens; + while let Some((s, remaining)) = parse_step(tokens)? { + steps.push(s); + tokens = remaining; + } + Ok(path::Selector(steps)) +} + +pub fn parse_predicate(tokens: &[IOValue]) -> Result { + let (pieces, binop) = split_binop(tokens)?; + match binop { + None => parse_non_binop(&pieces[0]), + Some(o) => { + let preds = pieces.into_iter().map(|ts| parse_non_binop(&ts)).collect::>()?; + Ok(match o { + Binop::Union => path::Predicate::Or { preds }, + Binop::Intersection => path::Predicate::And { preds }, + }) + } + } +} + +fn parse_non_binop(tokens: &[IOValue]) -> Result { + if !tokens.is_empty() { + let t = tokens[0].value(); + + if let Some("!") = t.as_symbol().map(|s| s.as_str()) { + return Ok(path::Predicate::Not { pred: Box::new(parse_non_binop(&tokens[1..])?) }); + } + } + + Ok(path::Predicate::Selector(Box::new(parse_selector(tokens)?))) +} + +fn parse_step(tokens: &[IOValue]) -> Result, CompilationError> { + if tokens.is_empty() { + return Ok(None); + } + + let remainder = &tokens[1..]; + + if tokens[0].value().is_sequence() { + return Ok(Some((path::Step::Filter(Box::new(path::Filter::Test { + pred: Box::new(parse_predicate(tokens[0].value().as_sequence().unwrap())?), + })), remainder))); + } + + match tokens[0].value().as_symbol() { + None => return Err(CompilationError::InvalidStep), + Some(t) => match t.as_str() { + "/" => Ok(Some((path::Step::Axis(Box::new(path::Axis::Values)), remainder))), + "//" => Ok(Some((path::Step::Axis(Box::new(path::Axis::Descendants)), remainder))), + "." => { + let (key, remainder) = pop_step_arg(remainder)?; + Ok(Some((path::Step::Axis(Box::new(path::Axis::At { key })), remainder))) + } + ".^" => Ok(Some((path::Step::Axis(Box::new(path::Axis::Label)), remainder))), + ".keys" => Ok(Some((path::Step::Axis(Box::new(path::Axis::Keys)), remainder))), + ".length" => Ok(Some((path::Step::Axis(Box::new(path::Axis::Length)), remainder))), + ".annotations" => Ok(Some((path::Step::Axis(Box::new(path::Axis::Annotations)), remainder))), + ".embedded" => Ok(Some((path::Step::Axis(Box::new(path::Axis::Embedded)), remainder))), + + "*" => Ok(Some((path::Step::Filter(Box::new(path::Filter::Nop)), remainder))), + "eq" | "=" => parse_comparison(remainder, path::Comparison::Eq), + "ne" | "!=" => parse_comparison(remainder, path::Comparison::Ne), + "lt" => parse_comparison(remainder, path::Comparison::Lt), + "gt" => parse_comparison(remainder, path::Comparison::Gt), + "le" => parse_comparison(remainder, path::Comparison::Le), + "ge" => parse_comparison(remainder, path::Comparison::Ge), + "re" | "=r" => { + let (regex_val, remainder) = pop_step_arg(remainder)?; + let regex = regex_val.value().to_string().map_err(|_| CompilationError::InvalidStep)?.clone(); + let _ = regex::Regex::new(®ex)?; + Ok(Some((path::Step::Filter(Box::new(path::Filter::Regex { regex })), remainder))) + } + "^" => { + let (literal, remainder) = pop_step_arg(remainder)?; + Ok(Some((path::Step::Filter(Box::new(path::Filter::Test { + pred: Box::new(path::Predicate::Selector(Box::new(path::Selector(vec![ + path::Step::Axis(Box::new(path::Axis::Label)), + path::Step::Filter(Box::new(path::Filter::Compare { + op: Box::new(path::Comparison::Eq), + literal, + })), + ])))), + })), remainder))) + } + + "~real" => Ok(Some((path::Step::Filter(Box::new(path::Filter::Real)), remainder))), + "~int" => Ok(Some((path::Step::Filter(Box::new(path::Filter::Int)), remainder))), + + "bool" => Ok(Some((path::Step::from(path::ValueKind::Boolean), remainder))), + "float" => Ok(Some((path::Step::from(path::ValueKind::Float), remainder))), + "double" => Ok(Some((path::Step::from(path::ValueKind::Double), remainder))), + "int" => Ok(Some((path::Step::from(path::ValueKind::SignedInteger), remainder))), + "string" => Ok(Some((path::Step::from(path::ValueKind::String), remainder))), + "bytes" => Ok(Some((path::Step::from(path::ValueKind::ByteString), remainder))), + "symbol" => Ok(Some((path::Step::from(path::ValueKind::Symbol), remainder))), + "rec" => Ok(Some((path::Step::from(path::ValueKind::Record), remainder))), + "seq" => Ok(Some((path::Step::from(path::ValueKind::Sequence), remainder))), + "set" => Ok(Some((path::Step::from(path::ValueKind::Set), remainder))), + "dict" => Ok(Some((path::Step::from(path::ValueKind::Dictionary), remainder))), + "embedded" => Ok(Some((path::Step::from(path::ValueKind::Embedded), remainder))), + + _ => Err(CompilationError::InvalidStep), + } + } +} + +impl From for path::Step { + fn from(k: path::ValueKind) -> Self { + path::Step::Filter(Box::new(path::Filter::Kind { + kind: Box::new(k), + })) + } +} + +fn pop_step_arg(tokens: &[IOValue]) -> Result<(IOValue, &[IOValue]), CompilationError> { + if tokens.is_empty() { + return Err(CompilationError::InvalidStep); + } + Ok((tokens[0].clone(), &tokens[1..])) +} + +fn parse_comparison( + tokens: &[IOValue], + op: path::Comparison, +) -> Result, CompilationError> { + let (literal, remainder) = pop_step_arg(tokens)?; + Ok(Some((path::Step::Filter(Box::new(path::Filter::Compare { + op: Box::new(op), + literal, + })), remainder))) +} + +impl std::str::FromStr for path::Selector { + type Err = CompilationError; + fn from_str(s: &str) -> Result { + parse_selector(&(BytesBinarySource::new(s.as_bytes()) + .text_iovalues() + .configured(false) + .collect::, _>>()?)) + } +} + +impl std::str::FromStr for Node { + type Err = CompilationError; + fn from_str(s: &str) -> Result { + let expr = path::Selector::from_str(s)?; + expr.compile() + } +} diff --git a/implementations/rust/preserves-path/src/path.rs b/implementations/rust/preserves-path/src/path.rs new file mode 100644 index 0000000..adf6fed --- /dev/null +++ b/implementations/rust/preserves-path/src/path.rs @@ -0,0 +1,18 @@ +use preserves::value::IOValue; + +use std::rc::Rc; + +pub enum Path { + Root, + Step(IOValue, Rc), +} + +impl Path { + pub fn root() -> Rc { + Rc::new(Path::Root) + } + + pub fn step(self: &Rc, v: &IOValue) -> Rc { + Rc::new(Path::Step(v.clone(), Rc::clone(self))) + } +} diff --git a/implementations/rust/preserves-path/src/predicate.rs b/implementations/rust/preserves-path/src/predicate.rs new file mode 100644 index 0000000..d2fd026 --- /dev/null +++ b/implementations/rust/preserves-path/src/predicate.rs @@ -0,0 +1,67 @@ +use crate::CompilationError; +use crate::path::Path; +use crate::schemas::path; +use crate::step::BoolCollector; +use crate::step::Node; +use crate::step::StepMaker; + +use preserves::value::IOValue; + +use std::rc::Rc; + +pub trait Predicate: std::fmt::Debug { + fn test(&mut self, path: Rc, value: &IOValue) -> bool; +} + +#[derive(Debug)] +pub enum CompiledPredicate { + Selector(Node), + Not(Box), + Or(Vec), + And(Vec), +} + +impl path::Predicate { + pub fn compile(&self) -> Result { + match self { + path::Predicate::Selector(b) => + Ok(CompiledPredicate::Selector((&**b).connect(BoolCollector::new())?)), + path::Predicate::Not { pred } => + Ok(CompiledPredicate::Not(Box::new((&**pred).compile()?))), + path::Predicate::Or { preds } => + Ok(CompiledPredicate::Or(preds.iter().map(Self::compile).collect::>()?)), + path::Predicate::And { preds } => + Ok(CompiledPredicate::And(preds.iter().map(Self::compile).collect::>()?)), + } + } + + pub fn exec(&self, value: &IOValue) -> Result { + Ok(self.compile()?.test(Path::root(), value)) + } +} + +impl Predicate for CompiledPredicate { + fn test(&mut self, path: Rc, value: &IOValue) -> bool { + match self { + CompiledPredicate::Selector(n) => n.test(path, value), + CompiledPredicate::Not(p) => !p.test(path, value), + CompiledPredicate::Or(ps) => { + for p in ps.iter_mut() { + if p.test(Rc::clone(&path), value) { + return true; + } + } + return false; + }, + CompiledPredicate::And(ps) => { + for p in ps.iter_mut() { + if !p.test(Rc::clone(&path), value) { + return false; + } + } + return true; + }, + } + } +} + diff --git a/implementations/rust/preserves-path/src/step.rs b/implementations/rust/preserves-path/src/step.rs new file mode 100644 index 0000000..3dddb62 --- /dev/null +++ b/implementations/rust/preserves-path/src/step.rs @@ -0,0 +1,416 @@ +// Selectors operate on IOValues because the AST includes keys of IOValue type. +// If we could make Schemas produce generics... + +use crate::CompilationError; +use crate::path::Path; +use crate::predicate::CompiledPredicate; +use crate::predicate::Predicate; +use crate::schemas::path; + +use num::bigint::BigInt; +use num::traits::cast::ToPrimitive; +use num::traits::cast::FromPrimitive; + +use preserves::value::AtomClass; +use preserves::value::CompoundClass; +use preserves::value::IOValue; +use preserves::value::NestedValue; +use preserves::value::Value; +use preserves::value::ValueClass; + +use std::cell::RefCell; +use std::collections::VecDeque; +use std::iter::Iterator; +use std::rc::Rc; + +pub trait StepMaker { + fn connect(&self, step: Node) -> Result; +} + +pub trait Step: std::fmt::Debug { + fn accept(&mut self, path: Rc, value: &IOValue); + fn finish(&mut self); + fn reset(&mut self) -> Vec; +} + +macro_rules! delegate_finish_and_reset { + ($self:ident, $target:expr) => { + fn finish(&mut $self) { $target.finish() } + fn reset(&mut $self) -> Vec { $target.reset() } + } +} + +#[derive(Clone, Debug)] +pub struct Node(pub Rc>); + +#[derive(Debug)] +struct AxisStep { + step: Node, + axis: path::Axis, +} + +#[derive(Debug)] +struct CompareStep { + op: path::Comparison, + literal: IOValue, + step: Node, +} + +#[derive(Debug)] +struct RegexStep { + regex: regex::Regex, + step: Node, +} + +#[derive(Debug)] +struct TestStep { + pred: CompiledPredicate, + step: Node, +} + +#[derive(Debug)] +struct RealStep { + step: Node, +} + +#[derive(Debug)] +struct IntStep { + step: Node, +} + +#[derive(Debug)] +struct VecCollector { + accumulator: Vec, +} + +#[derive(Debug)] +pub struct BoolCollector { + seen_value: bool, +} + +#[derive(Debug)] +struct KindStep { + kind: ValueClass, + step: Node, +} + +impl Node { + fn new(s: S) -> Self { + Node(Rc::new(RefCell::new(s))) + } + + pub fn test(&self, path: Rc, value: &IOValue) -> bool { + self.accept(path, value); + self.finish(); + !self.reset().is_empty() + } + + pub fn accept(&self, path: Rc, value: &IOValue) { + self.0.borrow_mut().accept(path, value) + } + + pub fn finish(&self) { + self.0.borrow_mut().finish() + } + + pub fn reset(&self) -> Vec { + self.0.borrow_mut().reset() + } + + pub fn exec(&self, value: &IOValue) -> Vec { + self.accept(Path::root(), value); + self.finish(); + self.reset() + } +} + +impl StepMaker for path::Selector { + fn connect(&self, step: Node) -> Result { + self.0.connect(step) + } +} + +impl StepMaker for Vec { + fn connect(&self, mut step: Node) -> Result { + for s in self.iter().rev() { + step = s.connect(step)?; + } + Ok(step) + } +} + +impl StepMaker for path::Step { + fn connect(&self, step: Node) -> Result { + match self { + path::Step::Axis(b) => (&**b).connect(step), + path::Step::Filter(b) => (&**b).connect(step), + } + } +} + +impl StepMaker for path::Axis { + fn connect(&self, step: Node) -> Result { + Ok(Node::new(AxisStep { step, axis: self.clone() })) + } +} + +impl Step for AxisStep { + fn accept(&mut self, path: Rc, value: &IOValue) { + match &self.axis { + path::Axis::Values => { + let path = path.step(value); + for c in value.value().children() { + self.step.accept(Rc::clone(&path), &c) + } + } + path::Axis::Descendants => { + let mut q = VecDeque::new(); + q.push_back((path, value.clone())); + while let Some((p, c)) = q.pop_front() { + let p = p.step(&c); + for cc in c.value().children() { + q.push_back((Rc::clone(&p), cc.clone())); + } + self.step.accept(p, &c) + } + } + path::Axis::At { key } => match value.value() { + Value::String(s) => + step_index(path.step(value), s.chars(), &key, |c| IOValue::new(String::from(c)), &mut self.step), + Value::Record(r) => + step_index(path.step(value), r.fields().iter(), &key, |v| v.clone(), &mut self.step), + Value::Sequence(vs) => + step_index(path.step(value), vs.iter(), &key, |v| v.clone(), &mut self.step), + Value::Dictionary(d) => + if let Some(v) = d.get(&key) { + self.step.accept(path.step(value), v) + }, + _ => + (), + }, + path::Axis::Label => if let Some(r) = value.value().as_record(None) { + self.step.accept(path.step(value), r.label()) + }, + path::Axis::Keys => match value.value() { + Value::String(s) => step_keys(path.step(value), s.len(), &mut self.step), + Value::ByteString(bs) => step_keys(path.step(value), bs.len(), &mut self.step), + Value::Symbol(s) => step_keys(path.step(value), s.len(), &mut self.step), + Value::Record(r) => step_keys(path.step(value), r.arity(), &mut self.step), + Value::Sequence(vs) => step_keys(path.step(value), vs.len(), &mut self.step), + Value::Dictionary(d) => { + let path = path.step(value); + for k in d.keys() { + self.step.accept(Rc::clone(&path), k) + } + }, + _ => (), + }, + path::Axis::Length => match value.value() { + Value::String(s) => self.step.accept(path.step(value), &IOValue::new(s.len())), + Value::ByteString(bs) => self.step.accept(path.step(value), &IOValue::new(bs.len())), + Value::Symbol(s) => self.step.accept(path.step(value), &IOValue::new(s.len())), + Value::Record(r) => self.step.accept(path.step(value), &IOValue::new(r.arity())), + Value::Sequence(vs) => self.step.accept(path.step(value), &IOValue::new(vs.len())), + Value::Dictionary(d) => self.step.accept(path.step(value), &IOValue::new(d.len())), + _ => self.step.accept(path.step(value), &IOValue::new(0)), + }, + path::Axis::Annotations => { + let path = path.step(value); + for c in value.annotations().slice() { + self.step.accept(Rc::clone(&path), &c) + } + } + path::Axis::Embedded => if let Some(d) = value.value().as_embedded() { + self.step.accept(path.step(value), d) + }, + } + } + + delegate_finish_and_reset!(self, self.step); +} + +fn step_index, F: FnOnce(T) -> IOValue>( + p: Rc, + mut vs: Ts, + key: &IOValue, + f: F, + step: &mut Node, +) { + if let Some(i) = key.value().as_usize() { + match vs.nth(i) { + None => (), + Some(v) => step.accept(p, &f(v)), + } + } +} + +fn step_keys(p: Rc, count: usize, step: &mut Node) { + for i in 0 .. count { + step.accept(Rc::clone(&p), &IOValue::new(i)) + } +} + +impl StepMaker for path::Filter { + fn connect(&self, step: Node) -> Result { + match self { + path::Filter::Nop => Ok(step), + path::Filter::Compare { op, literal } => Ok(Node::new(CompareStep { + op: (**op).clone(), + literal: literal.clone(), + step, + })), + path::Filter::Regex { regex } => Ok(Node::new(RegexStep { regex: regex::Regex::new(regex)?, step })), + path::Filter::Test { pred } => Ok(Node::new(TestStep { pred: (&**pred).compile()?, step })), + path::Filter::Real => Ok(Node::new(RealStep { step })), + path::Filter::Int => Ok(Node::new(IntStep { step })), + path::Filter::Kind { kind } => Ok(Node::new(KindStep { + kind: match &**kind { + path::ValueKind::Boolean => ValueClass::Atomic(AtomClass::Boolean), + path::ValueKind::Float => ValueClass::Atomic(AtomClass::Float), + path::ValueKind::Double => ValueClass::Atomic(AtomClass::Double), + path::ValueKind::SignedInteger => ValueClass::Atomic(AtomClass::SignedInteger), + path::ValueKind::String => ValueClass::Atomic(AtomClass::String), + path::ValueKind::ByteString => ValueClass::Atomic(AtomClass::ByteString), + path::ValueKind::Symbol => ValueClass::Atomic(AtomClass::Symbol), + path::ValueKind::Record => ValueClass::Compound(CompoundClass::Record), + path::ValueKind::Sequence => ValueClass::Compound(CompoundClass::Sequence), + path::ValueKind::Set => ValueClass::Compound(CompoundClass::Set), + path::ValueKind::Dictionary => ValueClass::Compound(CompoundClass::Dictionary), + path::ValueKind::Embedded => ValueClass::Embedded, + }, + step, + })), + } + } +} + +impl Step for CompareStep { + fn accept(&mut self, path: Rc, value: &IOValue) { + if match self.op { + path::Comparison::Eq => value == &self.literal, + path::Comparison::Ne => value != &self.literal, + path::Comparison::Lt => value < &self.literal, + path::Comparison::Ge => value >= &self.literal, + path::Comparison::Gt => value > &self.literal, + path::Comparison::Le => value <= &self.literal, + } { + self.step.accept(path, value) + } + } + + delegate_finish_and_reset!(self, self.step); +} + +impl Step for RegexStep { + fn accept(&mut self, path: Rc, value: &IOValue) { + match value.value() { + Value::String(s) => if self.regex.is_match(s) { self.step.accept(path, value) }, + Value::Symbol(s) => if self.regex.is_match(s) { self.step.accept(path, value) }, + _ => (), + } + } + + delegate_finish_and_reset!(self, self.step); +} + +impl Step for TestStep { + fn accept(&mut self, path: Rc, value: &IOValue) { + if self.pred.test(Rc::clone(&path), value) { + self.step.accept(path, value) + } + } + + delegate_finish_and_reset!(self, self.step); +} + +impl Step for RealStep { + fn accept(&mut self, path: Rc, value: &IOValue) { + match value.value() { + Value::SignedInteger(i) => if let Some(r) = BigInt::from(i).to_f64() { + self.step.accept(path, &IOValue::new(r)) + }, + Value::Float(f) => self.step.accept(path, &IOValue::new(f32::from(*f) as f64)), + Value::Double(_) => self.step.accept(path, value), + _ => (), + } + } + + delegate_finish_and_reset!(self, self.step); +} + +impl Step for IntStep { + fn accept(&mut self, path: Rc, value: &IOValue) { + match value.value() { + Value::SignedInteger(_) => self.step.accept(path, value), + Value::Float(f) => if let Some(i) = BigInt::from_f32(f32::from(*f)) { + self.step.accept(path, &IOValue::new(i)) + }, + Value::Double(d) => if let Some(i) = BigInt::from_f64(f64::from(*d)) { + self.step.accept(path, &IOValue::new(i)) + }, + _ => (), + } + } + + delegate_finish_and_reset!(self, self.step); +} + +impl VecCollector { + fn new() -> Node { + Node::new(VecCollector { accumulator: Vec::new() }) + } +} + +impl Step for VecCollector { + fn accept(&mut self, _path: Rc, value: &IOValue) { + self.accumulator.push(value.clone()) + } + + fn finish(&mut self) { + } + + fn reset(&mut self) -> Vec { + std::mem::take(&mut self.accumulator) + } +} + +impl BoolCollector { + pub fn new() -> Node { + Node::new(BoolCollector { seen_value: false }) + } +} + +impl Step for BoolCollector { + fn accept(&mut self, _path: Rc, _value: &IOValue) { + self.seen_value = true + } + + fn finish(&mut self) { + } + + fn reset(&mut self) -> Vec { + let result = if self.seen_value { vec![IOValue::new(true)] } else { vec![] }; + self.seen_value = false; + result + } +} + +impl Step for KindStep { + fn accept(&mut self, path: Rc, value: &IOValue) { + if value.value_class() == self.kind { + self.step.accept(path, value) + } + } + + delegate_finish_and_reset!(self, self.step); +} + +impl path::Selector { + pub fn compile(&self) -> Result { + self.connect(VecCollector::new()) + } + + pub fn exec(&self, value: &IOValue) -> Result, CompilationError> { + Ok(self.compile()?.exec(value)) + } +} diff --git a/implementations/rust/preserves-tools/src/bin/preserves-tool.rs b/implementations/rust/preserves-tools/src/bin/preserves-tool.rs index 1e84a3d..8e2aeb4 100644 --- a/implementations/rust/preserves-tools/src/bin/preserves-tool.rs +++ b/implementations/rust/preserves-tools/src/bin/preserves-tool.rs @@ -89,7 +89,7 @@ struct Convert { #[clap(long, arg_enum, value_name = "on/off", default_value = "on")] indent: Boolish, - #[clap(long, default_value="=*")] + #[clap(long, default_value="*")] select: preserves_path::Node, #[clap(long, arg_enum, value_name = "on/off", default_value = "on")]