preserves/implementations/rust/preserves-path/src/lib.rs

665 lines
21 KiB
Rust

pub mod schemas;
// Paths operate on IOValues because the AST includes keys of IOValue type.
// If we could make Schemas produce generics...
pub use crate::schemas::path;
use preserves::value::AtomClass;
use preserves::value::BinarySource;
use preserves::value::BytesBinarySource;
use preserves::value::CompoundClass;
use preserves::value::IOValue;
use preserves::value::Map;
use preserves::value::NestedValue;
use preserves::value::Reader;
use preserves::value::Value;
use preserves::value::ValueClass;
use std::cell::RefCell;
use std::collections::VecDeque;
use std::iter::Iterator;
use std::io;
use std::rc::Rc;
use thiserror::Error;
#[derive(Debug)]
enum Binop {
Interleave,
Union,
Intersection,
}
#[derive(Error, Debug)]
pub enum CompilationError {
#[error(transparent)]
IoError(#[from] io::Error),
#[error("Cannot mix binary operators")]
MixedOperators,
#[error("Invalid step")]
InvalidStep,
#[error(transparent)]
RegexError(#[from] regex::Error),
}
pub enum Path {
Root,
Step(IOValue, Rc<Path>),
}
pub trait Step: std::fmt::Debug {
fn accept(&mut self, path: Rc<Path>, value: &IOValue);
fn finish(&mut self);
fn reset(&mut self) -> Vec<IOValue>;
}
macro_rules! delegate_finish_and_reset {
($self:ident, $target:expr) => {
fn finish(&mut $self) { $target.finish() }
fn reset(&mut $self) -> Vec<IOValue> { $target.reset() }
}
}
#[derive(Clone, Debug)]
pub struct Node(pub Rc<RefCell<dyn Step>>);
impl Node {
fn new<S: Step + 'static>(s: S) -> Self {
Node(Rc::new(RefCell::new(s)))
}
pub fn accept(&self, path: Rc<Path>, value: &IOValue) {
self.0.borrow_mut().accept(path, value)
}
pub fn finish(&self) {
self.0.borrow_mut().finish()
}
pub fn reset(&self) -> Vec<IOValue> {
self.0.borrow_mut().reset()
}
pub fn exec(&self, value: &IOValue) -> Vec<IOValue> {
self.accept(Path::root(), value);
self.finish();
self.reset()
}
}
pub trait StepMaker {
fn connect(&self, step: Node) -> Result<Node, CompilationError>;
}
impl Path {
fn root() -> Rc<Self> {
Rc::new(Path::Root)
}
fn step(self: &Rc<Self>, v: &IOValue) -> Rc<Self> {
Rc::new(Path::Step(v.clone(), Rc::clone(self)))
}
}
impl<S: StepMaker> StepMaker for Vec<S> {
fn connect(&self, mut step: Node) -> Result<Node, CompilationError> {
for s in self.iter().rev() {
step = s.connect(step)?;
}
Ok(step)
}
}
impl StepMaker for path::Expr {
fn connect(&self, step: Node) -> Result<Node, CompilationError> {
match self {
path::Expr::Steps(s) =>
s.connect(step),
path::Expr::Not { expr } =>
expr.connect(Node::new(NotStep { seen_value: false, step, })),
path::Expr::Interleave { exprs } =>
ForkJoinStep::new(exprs, |e, s| e.connect(s), step),
path::Expr::Union { exprs } =>
ForkJoinStep::new(exprs, |e, s| e.connect(s), ThresholdStep::new(1, step)?),
path::Expr::Intersection { exprs } =>
ForkJoinStep::new(exprs, |e, s| e.connect(ThresholdStep::new(1, s)?), ThresholdStep::new(exprs.len(), step)?),
}
}
}
impl StepMaker for path::Step {
fn connect(&self, step: Node) -> Result<Node, CompilationError> {
match self {
path::Step::Axis(b) => (&**b).connect(step),
path::Step::Filter(b) => (&**b).connect(step),
path::Step::Expr(b) => (&**b).connect(step),
}
}
}
#[derive(Debug)]
struct AxisStep {
step: Node,
axis: path::Axis,
}
impl StepMaker for path::Axis {
fn connect(&self, step: Node) -> Result<Node, CompilationError> {
if let path::Axis::Nop = self {
Ok(step)
} else {
Ok(Node::new(AxisStep { step, axis: self.clone() }))
}
}
}
impl Step for AxisStep {
fn accept(&mut self, path: Rc<Path>, value: &IOValue) {
match &self.axis {
path::Axis::Nop => self.step.accept(path, value),
path::Axis::Values => {
let path = path.step(value);
for c in value.value().children() {
self.step.accept(Rc::clone(&path), &c)
}
}
path::Axis::Descendants => {
let mut q = VecDeque::new();
q.push_back((path, value.clone()));
while let Some((p, c)) = q.pop_front() {
let p = p.step(&c);
for cc in c.value().children() {
q.push_back((Rc::clone(&p), cc.clone()));
}
self.step.accept(p, &c)
}
}
path::Axis::At { key } => match value.value() {
Value::Record(r) => step_index(path.step(value), r.fields(), &key, &mut self.step),
Value::Sequence(vs) => step_index(path.step(value), vs, &key, &mut self.step),
Value::Dictionary(d) => if let Some(v) = d.get(&key) {
self.step.accept(path.step(value), v)
},
_ => (),
},
path::Axis::Label => if let Some(r) = value.value().as_record(None) {
self.step.accept(path.step(value), r.label())
},
path::Axis::Keys => match value.value() {
Value::Record(r) => step_keys(path.step(value), r.arity(), &mut self.step),
Value::Sequence(vs) => step_keys(path.step(value), vs.len(), &mut self.step),
Value::Dictionary(d) => {
let path = path.step(value);
for k in d.keys() {
self.step.accept(Rc::clone(&path), k)
}
},
_ => (),
},
path::Axis::Length => match value.value() {
Value::Record(r) => self.step.accept(path.step(value), &IOValue::new(r.arity())),
Value::Sequence(vs) => self.step.accept(path.step(value), &IOValue::new(vs.len())),
Value::Dictionary(d) => self.step.accept(path.step(value), &IOValue::new(d.len())),
_ => self.step.accept(path.step(value), &IOValue::new(0)),
},
path::Axis::Annotations => {
let path = path.step(value);
for c in value.annotations().slice() {
self.step.accept(Rc::clone(&path), &c)
}
}
path::Axis::Embedded => if let Some(d) = value.value().as_embedded() {
self.step.accept(path.step(value), d)
},
}
}
delegate_finish_and_reset!(self, self.step);
}
fn step_index(p: Rc<Path>, vs: &[IOValue], key: &IOValue, step: &mut Node) {
if let Some(i) = key.value().as_usize() {
if i < vs.len() {
step.accept(p, &vs[i])
}
}
}
fn step_keys(p: Rc<Path>, count: usize, step: &mut Node) {
for i in 0 .. count {
step.accept(Rc::clone(&p), &IOValue::new(i))
}
}
impl StepMaker for path::Filter {
fn connect(&self, step: Node) -> Result<Node, CompilationError> {
match self {
path::Filter::Nop => Ok(step),
path::Filter::Fail => Ok(Node::new(InertStep)),
path::Filter::Eq { literal } => Ok(Node::new(EqStep { literal: literal.clone(), step })),
path::Filter::Regex { regex } => Ok(Node::new(RegexStep { regex: regex::Regex::new(regex)?, step })),
path::Filter::Test { expr } => Ok(Node::new(TestStep { expr: expr.connect(BoolCollector::new())?, step })),
path::Filter::Kind { kind } => Ok(Node::new(KindStep {
kind: match &**kind {
path::ValueKind::Boolean => ValueClass::Atomic(AtomClass::Boolean),
path::ValueKind::Float => ValueClass::Atomic(AtomClass::Float),
path::ValueKind::Double => ValueClass::Atomic(AtomClass::Double),
path::ValueKind::SignedInteger => ValueClass::Atomic(AtomClass::SignedInteger),
path::ValueKind::String => ValueClass::Atomic(AtomClass::String),
path::ValueKind::ByteString => ValueClass::Atomic(AtomClass::ByteString),
path::ValueKind::Symbol => ValueClass::Atomic(AtomClass::Symbol),
path::ValueKind::Record => ValueClass::Compound(CompoundClass::Record),
path::ValueKind::Sequence => ValueClass::Compound(CompoundClass::Sequence),
path::ValueKind::Set => ValueClass::Compound(CompoundClass::Set),
path::ValueKind::Dictionary => ValueClass::Compound(CompoundClass::Dictionary),
path::ValueKind::Embedded => ValueClass::Embedded,
},
step,
})),
}
}
}
#[derive(Debug)]
struct InertStep;
impl Step for InertStep {
fn accept(&mut self, _path: Rc<Path>, _value: &IOValue) {}
fn finish(&mut self) {}
fn reset(&mut self) -> Vec<IOValue> { vec![] }
}
#[derive(Debug)]
struct NotStep {
seen_value: bool,
step: Node,
}
impl Step for NotStep {
fn accept(&mut self, _path: Rc<Path>, _value: &IOValue) {
self.seen_value = true;
}
fn finish(&mut self) {
if !self.seen_value {
self.step.accept(Path::root(), &IOValue::new(true));
self.seen_value = true; // makes finish() idempotent
}
self.step.finish()
}
fn reset(&mut self) -> Vec<IOValue> {
self.seen_value = false;
self.step.reset()
}
}
#[derive(Debug)]
struct EqStep {
literal: IOValue,
step: Node,
}
impl Step for EqStep {
fn accept(&mut self, path: Rc<Path>, value: &IOValue) {
if value == &self.literal {
self.step.accept(path, value)
}
}
delegate_finish_and_reset!(self, self.step);
}
#[derive(Debug)]
struct RegexStep {
regex: regex::Regex,
step: Node,
}
impl Step for RegexStep {
fn accept(&mut self, path: Rc<Path>, value: &IOValue) {
match value.value() {
Value::String(s) => if self.regex.is_match(s) { self.step.accept(path, value) },
Value::Symbol(s) => if self.regex.is_match(s) { self.step.accept(path, value) },
_ => (),
}
}
delegate_finish_and_reset!(self, self.step);
}
#[derive(Debug)]
struct TestStep {
expr: Node,
step: Node,
}
impl Step for TestStep {
fn accept(&mut self, path: Rc<Path>, value: &IOValue) {
self.expr.accept(Rc::clone(&path), value);
self.expr.finish();
match self.expr.reset().len() {
0 => (),
_ => self.step.accept(path, value)
}
}
delegate_finish_and_reset!(self, self.step);
}
#[derive(Debug)]
struct VecCollector {
accumulator: Vec<IOValue>,
}
impl VecCollector {
fn new() -> Node {
Node::new(VecCollector { accumulator: Vec::new() })
}
}
impl Step for VecCollector {
fn accept(&mut self, _path: Rc<Path>, value: &IOValue) {
self.accumulator.push(value.clone())
}
fn finish(&mut self) {
}
fn reset(&mut self) -> Vec<IOValue> {
std::mem::take(&mut self.accumulator)
}
}
#[derive(Debug)]
struct BoolCollector {
seen_value: bool,
}
impl BoolCollector {
fn new() -> Node {
Node::new(BoolCollector { seen_value: false })
}
}
impl Step for BoolCollector {
fn accept(&mut self, _path: Rc<Path>, _value: &IOValue) {
self.seen_value = true
}
fn finish(&mut self) {
}
fn reset(&mut self) -> Vec<IOValue> {
let result = if self.seen_value { vec![IOValue::new(true)] } else { vec![] };
self.seen_value = false;
result
}
}
#[derive(Debug)]
struct KindStep {
kind: ValueClass,
step: Node,
}
impl Step for KindStep {
fn accept(&mut self, path: Rc<Path>, value: &IOValue) {
if value.value_class() == self.kind {
self.step.accept(path, value)
}
}
delegate_finish_and_reset!(self, self.step);
}
#[derive(Debug)]
struct ForkJoinStep {
branches: Vec<Node>,
step: Node,
}
impl ForkJoinStep {
fn new<F: Fn(&path::Expr, Node) -> Result<Node, CompilationError>>(
exprs: &Vec<path::Expr>,
f: F,
step: Node,
) -> Result<Node, CompilationError> {
Ok(Node::new(Self {
branches: exprs.iter().map(|e| f(e, step.clone())).collect::<Result<Vec<Node>, _>>()?,
step,
}))
}
}
impl Step for ForkJoinStep {
fn accept(&mut self, path: Rc<Path>, value: &IOValue) {
for n in self.branches.iter_mut() {
n.accept(Rc::clone(&path), value)
}
}
fn finish(&mut self) {
for n in self.branches.iter_mut() {
n.finish()
}
self.step.finish()
}
fn reset(&mut self) -> Vec<IOValue> {
let result = self.step.reset();
for n in self.branches.iter_mut() {
n.reset();
}
result
}
}
#[derive(Debug)]
struct ThresholdStep {
threshold: usize,
accumulator: Map<IOValue, usize>,
step: Node,
}
impl ThresholdStep {
fn new(threshold: usize, step: Node) -> Result<Node, CompilationError> {
Ok(Node::new(Self {
threshold,
accumulator: Map::new(),
step,
}))
}
}
impl Step for ThresholdStep {
fn accept(&mut self, path: Rc<Path>, value: &IOValue) {
let c = self.accumulator.entry(value.clone()).or_insert(0);
*c += 1;
if *c == self.threshold {
self.step.accept(path, value)
}
}
fn finish(&mut self) {
self.step.finish()
}
fn reset(&mut self) -> Vec<IOValue> {
self.accumulator.clear();
self.step.reset()
}
}
fn split_values_by_symbol(tokens: &Vec<IOValue>, separator: &str) -> Vec<Vec<IOValue>> {
tokens
.split(|t| matches!(t.value().as_symbol(), Some(s) if s == separator))
.map(|ts| ts.to_vec())
.collect()
}
fn split_binop(tokens: &Vec<IOValue>) -> Result<(Vec<Vec<IOValue>>, Option<Binop>), CompilationError> {
let interleave_pieces = split_values_by_symbol(&tokens, "~");
let union_pieces = split_values_by_symbol(&tokens, "+");
let intersection_pieces = split_values_by_symbol(&tokens, "&");
match (interleave_pieces.len(), union_pieces.len(), intersection_pieces.len()) {
(1, 1, 1) => Ok((interleave_pieces, None)),
(m, 1, 1) if m > 1 => Ok((interleave_pieces, Some(Binop::Interleave))),
(1, m, 1) if m > 1 => Ok((union_pieces, Some(Binop::Union))),
(1, 1, m) if m > 1 => Ok((intersection_pieces, Some(Binop::Intersection))),
_ => Err(CompilationError::MixedOperators),
}
}
pub fn parse_expr(tokens: &Vec<IOValue>) -> Result<path::Expr, CompilationError> {
let (pieces, binop) = split_binop(tokens)?;
match binop {
None => parse_non_binop(&pieces[0]),
Some(o) => {
let exprs = pieces.into_iter().map(|ts| parse_non_binop(&ts))
.collect::<Result<Vec<path::Expr>, _>>()?;
Ok(match o {
Binop::Interleave => path::Expr::Interleave { exprs },
Binop::Union => path::Expr::Union { exprs },
Binop::Intersection => path::Expr::Intersection { exprs },
})
}
}
}
fn parse_non_binop(tokens: &[IOValue]) -> Result<path::Expr, CompilationError> {
if !tokens.is_empty() {
let t = tokens[0].value();
if let Some("!") = t.as_symbol().map(|s| s.as_str()) {
return Ok(path::Expr::Not { expr: Box::new(parse_non_binop(&tokens[1..])?) });
}
}
let mut steps = Vec::new();
let mut tokens = tokens;
while let Some((s, remaining)) = parse_step(tokens)? {
steps.push(s);
tokens = remaining;
}
Ok(path::Expr::Steps(steps))
}
fn parse_step(tokens: &[IOValue]) -> Result<Option<(path::Step, &[IOValue])>, CompilationError> {
if tokens.is_empty() {
return Ok(None);
}
let remainder = &tokens[1..];
if tokens[0].value().is_sequence() {
return Ok(Some((
path::Step::Expr(Box::new(parse_expr(tokens[0].value().as_sequence().unwrap())?)),
remainder)));
}
match tokens[0].value().as_symbol() {
None => return Err(CompilationError::InvalidStep),
Some(t) => match t.as_str() {
".=" => Ok(Some((path::Step::Axis(Box::new(path::Axis::Nop)), remainder))),
"/" => Ok(Some((path::Step::Axis(Box::new(path::Axis::Values)), remainder))),
"//" => Ok(Some((path::Step::Axis(Box::new(path::Axis::Descendants)), remainder))),
"." => {
let (key, remainder) = pop_step_arg(remainder)?;
Ok(Some((path::Step::Axis(Box::new(path::Axis::At { key })), remainder)))
}
".^" => Ok(Some((path::Step::Axis(Box::new(path::Axis::Label)), remainder))),
".keys" => Ok(Some((path::Step::Axis(Box::new(path::Axis::Keys)), remainder))),
".length" => Ok(Some((path::Step::Axis(Box::new(path::Axis::Length)), remainder))),
".annotations" => Ok(Some((path::Step::Axis(Box::new(path::Axis::Annotations)), remainder))),
".embedded" => Ok(Some((path::Step::Axis(Box::new(path::Axis::Embedded)), remainder))),
"=*" => Ok(Some((path::Step::Filter(Box::new(path::Filter::Nop)), remainder))),
"=!" => Ok(Some((path::Step::Filter(Box::new(path::Filter::Fail)), remainder))),
"=" => {
let (literal, remainder) = pop_step_arg(remainder)?;
Ok(Some((path::Step::Filter(Box::new(path::Filter::Eq { literal })), remainder)))
}
"=r" => {
let (regex_val, remainder) = pop_step_arg(remainder)?;
let regex = regex_val.value().to_string().map_err(|_| CompilationError::InvalidStep)?.clone();
let _ = regex::Regex::new(&regex)?;
Ok(Some((path::Step::Filter(Box::new(path::Filter::Regex { regex })), remainder)))
}
"?" => {
let (expr_val, remainder) = pop_step_arg(remainder)?;
let expr = Box::new(parse_expr(&vec![expr_val])?);
Ok(Some((path::Step::Filter(Box::new(path::Filter::Test { expr })), remainder)))
}
"^" => {
let (literal, remainder) = pop_step_arg(remainder)?;
Ok(Some((path::Step::Filter(Box::new(path::Filter::Test {
expr: Box::new(path::Expr::Steps(vec![
path::Step::Axis(Box::new(path::Axis::Label)),
path::Step::Filter(Box::new(path::Filter::Eq { literal })),
])),
})), remainder)))
}
"bool" => Ok(Some((path::Step::from(path::ValueKind::Boolean), remainder))),
"float" => Ok(Some((path::Step::from(path::ValueKind::Float), remainder))),
"double" => Ok(Some((path::Step::from(path::ValueKind::Double), remainder))),
"int" => Ok(Some((path::Step::from(path::ValueKind::SignedInteger), remainder))),
"string" => Ok(Some((path::Step::from(path::ValueKind::String), remainder))),
"bytes" => Ok(Some((path::Step::from(path::ValueKind::ByteString), remainder))),
"symbol" => Ok(Some((path::Step::from(path::ValueKind::Symbol), remainder))),
"rec" => Ok(Some((path::Step::from(path::ValueKind::Record), remainder))),
"seq" => Ok(Some((path::Step::from(path::ValueKind::Sequence), remainder))),
"set" => Ok(Some((path::Step::from(path::ValueKind::Set), remainder))),
"dict" => Ok(Some((path::Step::from(path::ValueKind::Dictionary), remainder))),
"embedded" => Ok(Some((path::Step::from(path::ValueKind::Embedded), remainder))),
_ => Err(CompilationError::InvalidStep),
}
}
}
impl From<path::ValueKind> for path::Step {
fn from(k: path::ValueKind) -> Self {
path::Step::Filter(Box::new(path::Filter::Kind {
kind: Box::new(k),
}))
}
}
fn pop_step_arg(tokens: &[IOValue]) -> Result<(IOValue, &[IOValue]), CompilationError> {
if tokens.is_empty() {
return Err(CompilationError::InvalidStep);
}
Ok((tokens[0].clone(), &tokens[1..]))
}
impl path::Expr {
pub fn compile(&self) -> Result<Node, CompilationError> {
self.connect(VecCollector::new())
}
pub fn exec(&self, value: &IOValue) -> Result<Vec<IOValue>, CompilationError> {
Ok(self.compile()?.exec(value))
}
}
impl std::str::FromStr for path::Expr {
type Err = CompilationError;
fn from_str(s: &str) -> Result<Self, Self::Err> {
parse_expr(&(BytesBinarySource::new(s.as_bytes())
.text_iovalues()
.configured(false)
.collect::<Result<Vec<_>, _>>()?))
}
}
impl std::str::FromStr for Node {
type Err = CompilationError;
fn from_str(s: &str) -> Result<Self, Self::Err> {
let expr = path::Expr::from_str(s)?;
expr.compile()
}
}