Initial stab at preserves-path; repair error wrt EOF immediately following a number
This commit is contained in:
parent
be10924118
commit
137cc63a97
|
@ -31,9 +31,10 @@ automatic, perfect-fidelity conversion between syntaxes.
|
|||
- [Syrup](https://github.com/ocapn/syrup#pseudo-specification), a
|
||||
hybrid binary/human-readable syntax for the Preserves data model
|
||||
|
||||
### Preserves schema
|
||||
### Preserves schema and queries
|
||||
|
||||
- [Preserves Schema specification](preserves-schema.html)
|
||||
- [Preserves Path specification](preserves-path.html)
|
||||
|
||||
## Implementations
|
||||
|
||||
|
|
|
@ -1,6 +1,7 @@
|
|||
[workspace]
|
||||
members = [
|
||||
"preserves",
|
||||
"preserves-path",
|
||||
"preserves-schema",
|
||||
"preserves-tools",
|
||||
]
|
||||
|
|
|
@ -0,0 +1 @@
|
|||
src/schemas/**/*.rs
|
|
@ -0,0 +1,19 @@
|
|||
[package]
|
||||
name = "preserves-path"
|
||||
version = "0.1.0"
|
||||
authors = ["Tony Garnock-Jones <tonyg@leastfixedpoint.com>"]
|
||||
edition = "2018"
|
||||
description = "Implementation of preserves-path, a query language for Preserves documents."
|
||||
homepage = "https://preserves.gitlab.io/"
|
||||
repository = "https://gitlab.com/preserves/preserves"
|
||||
license = "Apache-2.0"
|
||||
|
||||
[build-dependencies]
|
||||
preserves-schema = { path = "../preserves-schema", version = "0.3.0" }
|
||||
|
||||
[dependencies]
|
||||
preserves = { path = "../preserves", version = "0.16.0" }
|
||||
preserves-schema = { path = "../preserves-schema", version = "0.3.0" }
|
||||
|
||||
regex = "1.5"
|
||||
thiserror = "1.0"
|
|
@ -0,0 +1,16 @@
|
|||
use preserves_schema::compiler::*;
|
||||
|
||||
use std::io::Error;
|
||||
|
||||
fn main() -> Result<(), Error> {
|
||||
let buildroot = std::env::current_dir()?;
|
||||
|
||||
let mut gen_dir = buildroot.clone();
|
||||
gen_dir.push("src/schemas");
|
||||
|
||||
let mut c = CompilerConfig::new(gen_dir, "crate::schemas".to_owned());
|
||||
|
||||
let inputs = expand_inputs(&vec!["../../../path/path.bin".to_owned()])?;
|
||||
c.load_schemas_and_bundles(&inputs)?;
|
||||
compile(&c)
|
||||
}
|
|
@ -0,0 +1,664 @@
|
|||
pub mod schemas;
|
||||
|
||||
// Paths operate on IOValues because the AST includes keys of IOValue type.
|
||||
// If we could make Schemas produce generics...
|
||||
|
||||
pub use crate::schemas::path;
|
||||
|
||||
use preserves::value::AtomClass;
|
||||
use preserves::value::BinarySource;
|
||||
use preserves::value::BytesBinarySource;
|
||||
use preserves::value::CompoundClass;
|
||||
use preserves::value::IOValue;
|
||||
use preserves::value::Map;
|
||||
use preserves::value::NestedValue;
|
||||
use preserves::value::Reader;
|
||||
use preserves::value::Value;
|
||||
use preserves::value::ValueClass;
|
||||
|
||||
use std::cell::RefCell;
|
||||
use std::collections::VecDeque;
|
||||
use std::iter::Iterator;
|
||||
use std::io;
|
||||
use std::rc::Rc;
|
||||
|
||||
use thiserror::Error;
|
||||
|
||||
#[derive(Debug)]
|
||||
enum Binop {
|
||||
Interleave,
|
||||
Union,
|
||||
Intersection,
|
||||
}
|
||||
|
||||
#[derive(Error, Debug)]
|
||||
pub enum CompilationError {
|
||||
#[error(transparent)]
|
||||
IoError(#[from] io::Error),
|
||||
#[error("Cannot mix binary operators")]
|
||||
MixedOperators,
|
||||
#[error("Invalid step")]
|
||||
InvalidStep,
|
||||
#[error(transparent)]
|
||||
RegexError(#[from] regex::Error),
|
||||
}
|
||||
|
||||
pub enum Path {
|
||||
Root,
|
||||
Step(IOValue, Rc<Path>),
|
||||
}
|
||||
|
||||
pub trait Step: std::fmt::Debug {
|
||||
fn accept(&mut self, path: Rc<Path>, value: &IOValue);
|
||||
fn finish(&mut self);
|
||||
fn reset(&mut self) -> Vec<IOValue>;
|
||||
}
|
||||
|
||||
macro_rules! delegate_finish_and_reset {
|
||||
($self:ident, $target:expr) => {
|
||||
fn finish(&mut $self) { $target.finish() }
|
||||
fn reset(&mut $self) -> Vec<IOValue> { $target.reset() }
|
||||
}
|
||||
}
|
||||
|
||||
#[derive(Clone, Debug)]
|
||||
pub struct Node(pub Rc<RefCell<dyn Step>>);
|
||||
|
||||
impl Node {
|
||||
fn new<S: Step + 'static>(s: S) -> Self {
|
||||
Node(Rc::new(RefCell::new(s)))
|
||||
}
|
||||
|
||||
pub fn accept(&self, path: Rc<Path>, value: &IOValue) {
|
||||
self.0.borrow_mut().accept(path, value)
|
||||
}
|
||||
|
||||
pub fn finish(&self) {
|
||||
self.0.borrow_mut().finish()
|
||||
}
|
||||
|
||||
pub fn reset(&self) -> Vec<IOValue> {
|
||||
self.0.borrow_mut().reset()
|
||||
}
|
||||
|
||||
pub fn exec(&self, value: &IOValue) -> Vec<IOValue> {
|
||||
self.accept(Path::root(), value);
|
||||
self.finish();
|
||||
self.reset()
|
||||
}
|
||||
}
|
||||
|
||||
pub trait StepMaker {
|
||||
fn connect(&self, step: Node) -> Result<Node, CompilationError>;
|
||||
}
|
||||
|
||||
impl Path {
|
||||
fn root() -> Rc<Self> {
|
||||
Rc::new(Path::Root)
|
||||
}
|
||||
|
||||
fn step(self: &Rc<Self>, v: &IOValue) -> Rc<Self> {
|
||||
Rc::new(Path::Step(v.clone(), Rc::clone(self)))
|
||||
}
|
||||
}
|
||||
|
||||
impl<S: StepMaker> StepMaker for Vec<S> {
|
||||
fn connect(&self, mut step: Node) -> Result<Node, CompilationError> {
|
||||
for s in self.iter().rev() {
|
||||
step = s.connect(step)?;
|
||||
}
|
||||
Ok(step)
|
||||
}
|
||||
}
|
||||
|
||||
impl StepMaker for path::Expr {
|
||||
fn connect(&self, step: Node) -> Result<Node, CompilationError> {
|
||||
match self {
|
||||
path::Expr::Steps(s) =>
|
||||
s.connect(step),
|
||||
path::Expr::Not { expr } =>
|
||||
expr.connect(Node::new(NotStep { seen_value: false, step, })),
|
||||
path::Expr::Interleave { exprs } =>
|
||||
ForkJoinStep::new(exprs, |e, s| e.connect(s), step),
|
||||
path::Expr::Union { exprs } =>
|
||||
ForkJoinStep::new(exprs, |e, s| e.connect(s), ThresholdStep::new(1, step)?),
|
||||
path::Expr::Intersection { exprs } =>
|
||||
ForkJoinStep::new(exprs, |e, s| e.connect(ThresholdStep::new(1, s)?), ThresholdStep::new(exprs.len(), step)?),
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
impl StepMaker for path::Step {
|
||||
fn connect(&self, step: Node) -> Result<Node, CompilationError> {
|
||||
match self {
|
||||
path::Step::Axis(b) => (&**b).connect(step),
|
||||
path::Step::Filter(b) => (&**b).connect(step),
|
||||
path::Step::Expr(b) => (&**b).connect(step),
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
#[derive(Debug)]
|
||||
struct AxisStep {
|
||||
step: Node,
|
||||
axis: path::Axis,
|
||||
}
|
||||
|
||||
impl StepMaker for path::Axis {
|
||||
fn connect(&self, step: Node) -> Result<Node, CompilationError> {
|
||||
if let path::Axis::Nop = self {
|
||||
Ok(step)
|
||||
} else {
|
||||
Ok(Node::new(AxisStep { step, axis: self.clone() }))
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
impl Step for AxisStep {
|
||||
fn accept(&mut self, path: Rc<Path>, value: &IOValue) {
|
||||
match &self.axis {
|
||||
path::Axis::Nop => self.step.accept(path, value),
|
||||
path::Axis::Values => {
|
||||
let path = path.step(value);
|
||||
for c in value.value().children() {
|
||||
self.step.accept(Rc::clone(&path), &c)
|
||||
}
|
||||
}
|
||||
path::Axis::Descendants => {
|
||||
let mut q = VecDeque::new();
|
||||
q.push_back((path, value.clone()));
|
||||
while let Some((p, c)) = q.pop_front() {
|
||||
let p = p.step(&c);
|
||||
for cc in c.value().children() {
|
||||
q.push_back((Rc::clone(&p), cc.clone()));
|
||||
}
|
||||
self.step.accept(p, &c)
|
||||
}
|
||||
}
|
||||
path::Axis::At { key } => match value.value() {
|
||||
Value::Record(r) => step_index(path.step(value), r.fields(), &key, &mut self.step),
|
||||
Value::Sequence(vs) => step_index(path.step(value), vs, &key, &mut self.step),
|
||||
Value::Dictionary(d) => if let Some(v) = d.get(&key) {
|
||||
self.step.accept(path.step(value), v)
|
||||
},
|
||||
_ => (),
|
||||
},
|
||||
path::Axis::Label => if let Some(r) = value.value().as_record(None) {
|
||||
self.step.accept(path.step(value), r.label())
|
||||
},
|
||||
path::Axis::Keys => match value.value() {
|
||||
Value::Record(r) => step_keys(path.step(value), r.arity(), &mut self.step),
|
||||
Value::Sequence(vs) => step_keys(path.step(value), vs.len(), &mut self.step),
|
||||
Value::Dictionary(d) => {
|
||||
let path = path.step(value);
|
||||
for k in d.keys() {
|
||||
self.step.accept(Rc::clone(&path), k)
|
||||
}
|
||||
},
|
||||
_ => (),
|
||||
},
|
||||
path::Axis::Length => match value.value() {
|
||||
Value::Record(r) => self.step.accept(path.step(value), &IOValue::new(r.arity())),
|
||||
Value::Sequence(vs) => self.step.accept(path.step(value), &IOValue::new(vs.len())),
|
||||
Value::Dictionary(d) => self.step.accept(path.step(value), &IOValue::new(d.len())),
|
||||
_ => self.step.accept(path.step(value), &IOValue::new(0)),
|
||||
},
|
||||
path::Axis::Annotations => {
|
||||
let path = path.step(value);
|
||||
for c in value.annotations().slice() {
|
||||
self.step.accept(Rc::clone(&path), &c)
|
||||
}
|
||||
}
|
||||
path::Axis::Embedded => if let Some(d) = value.value().as_embedded() {
|
||||
self.step.accept(path.step(value), d)
|
||||
},
|
||||
}
|
||||
}
|
||||
|
||||
delegate_finish_and_reset!(self, self.step);
|
||||
}
|
||||
|
||||
fn step_index(p: Rc<Path>, vs: &[IOValue], key: &IOValue, step: &mut Node) {
|
||||
if let Some(i) = key.value().as_usize() {
|
||||
if i < vs.len() {
|
||||
step.accept(p, &vs[i])
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
fn step_keys(p: Rc<Path>, count: usize, step: &mut Node) {
|
||||
for i in 0 .. count {
|
||||
step.accept(Rc::clone(&p), &IOValue::new(i))
|
||||
}
|
||||
}
|
||||
|
||||
impl StepMaker for path::Filter {
|
||||
fn connect(&self, step: Node) -> Result<Node, CompilationError> {
|
||||
match self {
|
||||
path::Filter::Nop => Ok(step),
|
||||
path::Filter::Fail => Ok(Node::new(InertStep)),
|
||||
path::Filter::Eq { literal } => Ok(Node::new(EqStep { literal: literal.clone(), step })),
|
||||
path::Filter::Regex { regex } => Ok(Node::new(RegexStep { regex: regex::Regex::new(regex)?, step })),
|
||||
path::Filter::Test { expr } => Ok(Node::new(TestStep { expr: expr.connect(BoolCollector::new())?, step })),
|
||||
path::Filter::Kind { kind } => Ok(Node::new(KindStep {
|
||||
kind: match &**kind {
|
||||
path::ValueKind::Boolean => ValueClass::Atomic(AtomClass::Boolean),
|
||||
path::ValueKind::Float => ValueClass::Atomic(AtomClass::Float),
|
||||
path::ValueKind::Double => ValueClass::Atomic(AtomClass::Double),
|
||||
path::ValueKind::SignedInteger => ValueClass::Atomic(AtomClass::SignedInteger),
|
||||
path::ValueKind::String => ValueClass::Atomic(AtomClass::String),
|
||||
path::ValueKind::ByteString => ValueClass::Atomic(AtomClass::ByteString),
|
||||
path::ValueKind::Symbol => ValueClass::Atomic(AtomClass::Symbol),
|
||||
path::ValueKind::Record => ValueClass::Compound(CompoundClass::Record),
|
||||
path::ValueKind::Sequence => ValueClass::Compound(CompoundClass::Sequence),
|
||||
path::ValueKind::Set => ValueClass::Compound(CompoundClass::Set),
|
||||
path::ValueKind::Dictionary => ValueClass::Compound(CompoundClass::Dictionary),
|
||||
path::ValueKind::Embedded => ValueClass::Embedded,
|
||||
},
|
||||
step,
|
||||
})),
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
#[derive(Debug)]
|
||||
struct InertStep;
|
||||
|
||||
impl Step for InertStep {
|
||||
fn accept(&mut self, _path: Rc<Path>, _value: &IOValue) {}
|
||||
fn finish(&mut self) {}
|
||||
fn reset(&mut self) -> Vec<IOValue> { vec![] }
|
||||
}
|
||||
|
||||
#[derive(Debug)]
|
||||
struct NotStep {
|
||||
seen_value: bool,
|
||||
step: Node,
|
||||
}
|
||||
|
||||
impl Step for NotStep {
|
||||
fn accept(&mut self, _path: Rc<Path>, _value: &IOValue) {
|
||||
self.seen_value = true;
|
||||
}
|
||||
|
||||
fn finish(&mut self) {
|
||||
if !self.seen_value {
|
||||
self.step.accept(Path::root(), &IOValue::new(true));
|
||||
self.seen_value = true; // makes finish() idempotent
|
||||
}
|
||||
self.step.finish()
|
||||
}
|
||||
|
||||
fn reset(&mut self) -> Vec<IOValue> {
|
||||
self.seen_value = false;
|
||||
self.step.reset()
|
||||
}
|
||||
}
|
||||
|
||||
#[derive(Debug)]
|
||||
struct EqStep {
|
||||
literal: IOValue,
|
||||
step: Node,
|
||||
}
|
||||
|
||||
impl Step for EqStep {
|
||||
fn accept(&mut self, path: Rc<Path>, value: &IOValue) {
|
||||
if value == &self.literal {
|
||||
self.step.accept(path, value)
|
||||
}
|
||||
}
|
||||
|
||||
delegate_finish_and_reset!(self, self.step);
|
||||
}
|
||||
|
||||
#[derive(Debug)]
|
||||
struct RegexStep {
|
||||
regex: regex::Regex,
|
||||
step: Node,
|
||||
}
|
||||
|
||||
impl Step for RegexStep {
|
||||
fn accept(&mut self, path: Rc<Path>, value: &IOValue) {
|
||||
match value.value() {
|
||||
Value::String(s) => if self.regex.is_match(s) { self.step.accept(path, value) },
|
||||
Value::Symbol(s) => if self.regex.is_match(s) { self.step.accept(path, value) },
|
||||
_ => (),
|
||||
}
|
||||
}
|
||||
|
||||
delegate_finish_and_reset!(self, self.step);
|
||||
}
|
||||
|
||||
#[derive(Debug)]
|
||||
struct TestStep {
|
||||
expr: Node,
|
||||
step: Node,
|
||||
}
|
||||
|
||||
impl Step for TestStep {
|
||||
fn accept(&mut self, path: Rc<Path>, value: &IOValue) {
|
||||
self.expr.accept(Rc::clone(&path), value);
|
||||
self.expr.finish();
|
||||
match self.expr.reset().len() {
|
||||
0 => (),
|
||||
_ => self.step.accept(path, value)
|
||||
}
|
||||
}
|
||||
|
||||
delegate_finish_and_reset!(self, self.step);
|
||||
}
|
||||
|
||||
#[derive(Debug)]
|
||||
struct VecCollector {
|
||||
accumulator: Vec<IOValue>,
|
||||
}
|
||||
|
||||
impl VecCollector {
|
||||
fn new() -> Node {
|
||||
Node::new(VecCollector { accumulator: Vec::new() })
|
||||
}
|
||||
}
|
||||
|
||||
impl Step for VecCollector {
|
||||
fn accept(&mut self, _path: Rc<Path>, value: &IOValue) {
|
||||
self.accumulator.push(value.clone())
|
||||
}
|
||||
|
||||
fn finish(&mut self) {
|
||||
}
|
||||
|
||||
fn reset(&mut self) -> Vec<IOValue> {
|
||||
std::mem::take(&mut self.accumulator)
|
||||
}
|
||||
}
|
||||
|
||||
#[derive(Debug)]
|
||||
struct BoolCollector {
|
||||
seen_value: bool,
|
||||
}
|
||||
|
||||
impl BoolCollector {
|
||||
fn new() -> Node {
|
||||
Node::new(BoolCollector { seen_value: false })
|
||||
}
|
||||
}
|
||||
|
||||
impl Step for BoolCollector {
|
||||
fn accept(&mut self, _path: Rc<Path>, _value: &IOValue) {
|
||||
self.seen_value = true
|
||||
}
|
||||
|
||||
fn finish(&mut self) {
|
||||
}
|
||||
|
||||
fn reset(&mut self) -> Vec<IOValue> {
|
||||
let result = if self.seen_value { vec![IOValue::new(true)] } else { vec![] };
|
||||
self.seen_value = false;
|
||||
result
|
||||
}
|
||||
}
|
||||
|
||||
#[derive(Debug)]
|
||||
struct KindStep {
|
||||
kind: ValueClass,
|
||||
step: Node,
|
||||
}
|
||||
|
||||
impl Step for KindStep {
|
||||
fn accept(&mut self, path: Rc<Path>, value: &IOValue) {
|
||||
if value.value_class() == self.kind {
|
||||
self.step.accept(path, value)
|
||||
}
|
||||
}
|
||||
|
||||
delegate_finish_and_reset!(self, self.step);
|
||||
}
|
||||
|
||||
#[derive(Debug)]
|
||||
struct ForkJoinStep {
|
||||
branches: Vec<Node>,
|
||||
step: Node,
|
||||
}
|
||||
|
||||
impl ForkJoinStep {
|
||||
fn new<F: Fn(&path::Expr, Node) -> Result<Node, CompilationError>>(
|
||||
exprs: &Vec<path::Expr>,
|
||||
f: F,
|
||||
step: Node,
|
||||
) -> Result<Node, CompilationError> {
|
||||
Ok(Node::new(Self {
|
||||
branches: exprs.iter().map(|e| f(e, step.clone())).collect::<Result<Vec<Node>, _>>()?,
|
||||
step,
|
||||
}))
|
||||
}
|
||||
}
|
||||
|
||||
impl Step for ForkJoinStep {
|
||||
fn accept(&mut self, path: Rc<Path>, value: &IOValue) {
|
||||
for n in self.branches.iter_mut() {
|
||||
n.accept(Rc::clone(&path), value)
|
||||
}
|
||||
}
|
||||
|
||||
fn finish(&mut self) {
|
||||
for n in self.branches.iter_mut() {
|
||||
n.finish()
|
||||
}
|
||||
self.step.finish()
|
||||
}
|
||||
|
||||
fn reset(&mut self) -> Vec<IOValue> {
|
||||
let result = self.step.reset();
|
||||
for n in self.branches.iter_mut() {
|
||||
n.reset();
|
||||
}
|
||||
result
|
||||
}
|
||||
}
|
||||
|
||||
#[derive(Debug)]
|
||||
struct ThresholdStep {
|
||||
threshold: usize,
|
||||
accumulator: Map<IOValue, usize>,
|
||||
step: Node,
|
||||
}
|
||||
|
||||
impl ThresholdStep {
|
||||
fn new(threshold: usize, step: Node) -> Result<Node, CompilationError> {
|
||||
Ok(Node::new(Self {
|
||||
threshold,
|
||||
accumulator: Map::new(),
|
||||
step,
|
||||
}))
|
||||
}
|
||||
}
|
||||
|
||||
impl Step for ThresholdStep {
|
||||
fn accept(&mut self, path: Rc<Path>, value: &IOValue) {
|
||||
let c = self.accumulator.entry(value.clone()).or_insert(0);
|
||||
*c += 1;
|
||||
if *c == self.threshold {
|
||||
self.step.accept(path, value)
|
||||
}
|
||||
}
|
||||
|
||||
fn finish(&mut self) {
|
||||
self.step.finish()
|
||||
}
|
||||
|
||||
fn reset(&mut self) -> Vec<IOValue> {
|
||||
self.accumulator.clear();
|
||||
self.step.reset()
|
||||
}
|
||||
}
|
||||
|
||||
fn split_values_by_symbol(tokens: &Vec<IOValue>, separator: &str) -> Vec<Vec<IOValue>> {
|
||||
tokens
|
||||
.split(|t| matches!(t.value().as_symbol(), Some(s) if s == separator))
|
||||
.map(|ts| ts.to_vec())
|
||||
.collect()
|
||||
}
|
||||
|
||||
fn split_binop(tokens: &Vec<IOValue>) -> Result<(Vec<Vec<IOValue>>, Option<Binop>), CompilationError> {
|
||||
let interleave_pieces = split_values_by_symbol(&tokens, "~");
|
||||
let union_pieces = split_values_by_symbol(&tokens, "+");
|
||||
let intersection_pieces = split_values_by_symbol(&tokens, "&");
|
||||
match (interleave_pieces.len(), union_pieces.len(), intersection_pieces.len()) {
|
||||
(1, 1, 1) => Ok((interleave_pieces, None)),
|
||||
(m, 1, 1) if m > 1 => Ok((interleave_pieces, Some(Binop::Interleave))),
|
||||
(1, m, 1) if m > 1 => Ok((union_pieces, Some(Binop::Union))),
|
||||
(1, 1, m) if m > 1 => Ok((intersection_pieces, Some(Binop::Intersection))),
|
||||
_ => Err(CompilationError::MixedOperators),
|
||||
}
|
||||
}
|
||||
|
||||
pub fn parse_expr(tokens: &Vec<IOValue>) -> Result<path::Expr, CompilationError> {
|
||||
let (pieces, binop) = split_binop(tokens)?;
|
||||
match binop {
|
||||
None => parse_non_binop(&pieces[0]),
|
||||
Some(o) => {
|
||||
let exprs = pieces.into_iter().map(|ts| parse_non_binop(&ts))
|
||||
.collect::<Result<Vec<path::Expr>, _>>()?;
|
||||
Ok(match o {
|
||||
Binop::Interleave => path::Expr::Interleave { exprs },
|
||||
Binop::Union => path::Expr::Union { exprs },
|
||||
Binop::Intersection => path::Expr::Intersection { exprs },
|
||||
})
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
fn parse_non_binop(tokens: &[IOValue]) -> Result<path::Expr, CompilationError> {
|
||||
if !tokens.is_empty() {
|
||||
let t = tokens[0].value();
|
||||
|
||||
if let Some("!") = t.as_symbol().map(|s| s.as_str()) {
|
||||
return Ok(path::Expr::Not { expr: Box::new(parse_non_binop(&tokens[1..])?) });
|
||||
}
|
||||
}
|
||||
|
||||
let mut steps = Vec::new();
|
||||
let mut tokens = tokens;
|
||||
while let Some((s, remaining)) = parse_step(tokens)? {
|
||||
steps.push(s);
|
||||
tokens = remaining;
|
||||
}
|
||||
Ok(path::Expr::Steps(steps))
|
||||
}
|
||||
|
||||
fn parse_step(tokens: &[IOValue]) -> Result<Option<(path::Step, &[IOValue])>, CompilationError> {
|
||||
if tokens.is_empty() {
|
||||
return Ok(None);
|
||||
}
|
||||
|
||||
let remainder = &tokens[1..];
|
||||
|
||||
if tokens[0].value().is_sequence() {
|
||||
return Ok(Some((
|
||||
path::Step::Expr(Box::new(parse_expr(tokens[0].value().as_sequence().unwrap())?)),
|
||||
remainder)));
|
||||
}
|
||||
|
||||
match tokens[0].value().as_symbol() {
|
||||
None => return Err(CompilationError::InvalidStep),
|
||||
Some(t) => match t.as_str() {
|
||||
".=" => Ok(Some((path::Step::Axis(Box::new(path::Axis::Nop)), remainder))),
|
||||
"/" => Ok(Some((path::Step::Axis(Box::new(path::Axis::Values)), remainder))),
|
||||
"//" => Ok(Some((path::Step::Axis(Box::new(path::Axis::Descendants)), remainder))),
|
||||
"." => {
|
||||
let (key, remainder) = pop_step_arg(remainder)?;
|
||||
Ok(Some((path::Step::Axis(Box::new(path::Axis::At { key })), remainder)))
|
||||
}
|
||||
".^" => Ok(Some((path::Step::Axis(Box::new(path::Axis::Label)), remainder))),
|
||||
".keys" => Ok(Some((path::Step::Axis(Box::new(path::Axis::Keys)), remainder))),
|
||||
".length" => Ok(Some((path::Step::Axis(Box::new(path::Axis::Length)), remainder))),
|
||||
".annotations" => Ok(Some((path::Step::Axis(Box::new(path::Axis::Annotations)), remainder))),
|
||||
".embedded" => Ok(Some((path::Step::Axis(Box::new(path::Axis::Embedded)), remainder))),
|
||||
|
||||
"=*" => Ok(Some((path::Step::Filter(Box::new(path::Filter::Nop)), remainder))),
|
||||
"=!" => Ok(Some((path::Step::Filter(Box::new(path::Filter::Fail)), remainder))),
|
||||
"=" => {
|
||||
let (literal, remainder) = pop_step_arg(remainder)?;
|
||||
Ok(Some((path::Step::Filter(Box::new(path::Filter::Eq { literal })), remainder)))
|
||||
}
|
||||
"=r" => {
|
||||
let (regex_val, remainder) = pop_step_arg(remainder)?;
|
||||
let regex = regex_val.value().to_string().map_err(|_| CompilationError::InvalidStep)?.clone();
|
||||
let _ = regex::Regex::new(®ex)?;
|
||||
Ok(Some((path::Step::Filter(Box::new(path::Filter::Regex { regex })), remainder)))
|
||||
}
|
||||
"?" => {
|
||||
let (expr_val, remainder) = pop_step_arg(remainder)?;
|
||||
let expr = Box::new(parse_expr(&vec![expr_val])?);
|
||||
Ok(Some((path::Step::Filter(Box::new(path::Filter::Test { expr })), remainder)))
|
||||
}
|
||||
"^" => {
|
||||
let (literal, remainder) = pop_step_arg(remainder)?;
|
||||
Ok(Some((path::Step::Filter(Box::new(path::Filter::Test {
|
||||
expr: Box::new(path::Expr::Steps(vec![
|
||||
path::Step::Axis(Box::new(path::Axis::Label)),
|
||||
path::Step::Filter(Box::new(path::Filter::Eq { literal })),
|
||||
])),
|
||||
})), remainder)))
|
||||
}
|
||||
|
||||
"bool" => Ok(Some((path::Step::from(path::ValueKind::Boolean), remainder))),
|
||||
"float" => Ok(Some((path::Step::from(path::ValueKind::Float), remainder))),
|
||||
"double" => Ok(Some((path::Step::from(path::ValueKind::Double), remainder))),
|
||||
"int" => Ok(Some((path::Step::from(path::ValueKind::SignedInteger), remainder))),
|
||||
"string" => Ok(Some((path::Step::from(path::ValueKind::String), remainder))),
|
||||
"bytes" => Ok(Some((path::Step::from(path::ValueKind::ByteString), remainder))),
|
||||
"symbol" => Ok(Some((path::Step::from(path::ValueKind::Symbol), remainder))),
|
||||
"rec" => Ok(Some((path::Step::from(path::ValueKind::Record), remainder))),
|
||||
"seq" => Ok(Some((path::Step::from(path::ValueKind::Sequence), remainder))),
|
||||
"set" => Ok(Some((path::Step::from(path::ValueKind::Set), remainder))),
|
||||
"dict" => Ok(Some((path::Step::from(path::ValueKind::Dictionary), remainder))),
|
||||
"embedded" => Ok(Some((path::Step::from(path::ValueKind::Embedded), remainder))),
|
||||
|
||||
_ => Err(CompilationError::InvalidStep),
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
impl From<path::ValueKind> for path::Step {
|
||||
fn from(k: path::ValueKind) -> Self {
|
||||
path::Step::Filter(Box::new(path::Filter::Kind {
|
||||
kind: Box::new(k),
|
||||
}))
|
||||
}
|
||||
}
|
||||
|
||||
fn pop_step_arg(tokens: &[IOValue]) -> Result<(IOValue, &[IOValue]), CompilationError> {
|
||||
if tokens.is_empty() {
|
||||
return Err(CompilationError::InvalidStep);
|
||||
}
|
||||
Ok((tokens[0].clone(), &tokens[1..]))
|
||||
}
|
||||
|
||||
impl path::Expr {
|
||||
pub fn compile(&self) -> Result<Node, CompilationError> {
|
||||
self.connect(VecCollector::new())
|
||||
}
|
||||
|
||||
pub fn exec(&self, value: &IOValue) -> Result<Vec<IOValue>, CompilationError> {
|
||||
Ok(self.compile()?.exec(value))
|
||||
}
|
||||
}
|
||||
|
||||
impl std::str::FromStr for path::Expr {
|
||||
type Err = CompilationError;
|
||||
fn from_str(s: &str) -> Result<Self, Self::Err> {
|
||||
parse_expr(&(BytesBinarySource::new(s.as_bytes())
|
||||
.text_iovalues()
|
||||
.configured(false)
|
||||
.collect::<Result<Vec<_>, _>>()?))
|
||||
}
|
||||
}
|
||||
|
||||
impl std::str::FromStr for Node {
|
||||
type Err = CompilationError;
|
||||
fn from_str(s: &str) -> Result<Self, Self::Err> {
|
||||
let expr = path::Expr::from_str(s)?;
|
||||
expr.compile()
|
||||
}
|
||||
}
|
|
@ -10,6 +10,7 @@ license = "Apache-2.0"
|
|||
|
||||
[dependencies]
|
||||
preserves = { path = "../preserves", version = "0.16.0" }
|
||||
preserves-path = { path = "../preserves-path", version = "0.1.0" }
|
||||
|
||||
bytes = "1.0"
|
||||
clap = "3.0.0-beta.2"
|
||||
|
|
|
@ -89,6 +89,9 @@ struct Convert {
|
|||
#[clap(long, arg_enum, value_name = "on/off", default_value = "on")]
|
||||
indent: Boolish,
|
||||
|
||||
#[clap(long, default_value="=*")]
|
||||
select: preserves_path::Node,
|
||||
|
||||
#[clap(long, arg_enum, value_name = "on/off", default_value = "on")]
|
||||
annotations: Boolish,
|
||||
}
|
||||
|
@ -356,7 +359,9 @@ fn convert(c: Convert) -> io::Result<()> {
|
|||
};
|
||||
while let Some(value) = vs.next() {
|
||||
let value = value?;
|
||||
w(&value)?;
|
||||
for v in c.select.exec(&value) {
|
||||
w(&v)?;
|
||||
}
|
||||
if let Some(limit) = c.limit {
|
||||
if vs.count >= limit {
|
||||
return Ok(());
|
||||
|
|
|
@ -350,6 +350,24 @@ impl<N: NestedValue<D>, D: Embeddable> Value<N, D> {
|
|||
}
|
||||
}
|
||||
|
||||
pub fn children(&self) -> Vec<N> {
|
||||
match self {
|
||||
Value::Boolean(_) |
|
||||
Value::Float(_) |
|
||||
Value::Double(_) |
|
||||
Value::SignedInteger(_) |
|
||||
Value::String(_) |
|
||||
Value::ByteString(_) |
|
||||
Value::Symbol(_) |
|
||||
Value::Embedded(_) => vec![],
|
||||
|
||||
Value::Record(r) => r.fields().to_vec(),
|
||||
Value::Sequence(vs) => vs.clone(),
|
||||
Value::Set(s) => s.iter().cloned().collect(),
|
||||
Value::Dictionary(d) => d.values().cloned().collect(),
|
||||
}
|
||||
}
|
||||
|
||||
fn expected(&self, k: ExpectedKind) -> Error {
|
||||
Error::Expected(k, Received::ReceivedOtherValue(format!("{:?}", self.clone().wrap())))
|
||||
}
|
||||
|
@ -516,6 +534,8 @@ impl<N: NestedValue<D>, D: Embeddable> Value<N, D> {
|
|||
pub fn as_i64(&self) -> Option<i64> { self.as_i().and_then(|i| i.to_i64()) }
|
||||
pub fn as_u128(&self) -> Option<u128> { self.as_u().and_then(|i| i.to_u128()) }
|
||||
pub fn as_i128(&self) -> Option<i128> { self.as_i().and_then(|i| i.to_i128()) }
|
||||
pub fn as_usize(&self) -> Option<usize> { self.as_u().and_then(|i| i.to_usize()) }
|
||||
pub fn as_isize(&self) -> Option<isize> { self.as_i().and_then(|i| i.to_isize()) }
|
||||
|
||||
pub fn to_i8(&self) -> Result<i8, Error> {
|
||||
match self.as_i() {
|
||||
|
@ -587,6 +607,20 @@ impl<N: NestedValue<D>, D: Embeddable> Value<N, D> {
|
|||
}
|
||||
}
|
||||
|
||||
pub fn to_isize(&self) -> Result<isize, Error> {
|
||||
match self.as_i() {
|
||||
Some(i) => i.to_isize().ok_or_else(|| Error::NumberOutOfRange(BigInt::from(i))),
|
||||
None => Err(self.expected(ExpectedKind::SignedInteger)),
|
||||
}
|
||||
}
|
||||
|
||||
pub fn to_usize(&self) -> Result<usize, Error> {
|
||||
match self.as_u() {
|
||||
Some(i) => i.to_usize().ok_or_else(|| Error::NumberOutOfRange(BigInt::from(i))),
|
||||
None => Err(self.expected(ExpectedKind::SignedInteger)),
|
||||
}
|
||||
}
|
||||
|
||||
pub fn to_char(&self) -> Result<char, Error> {
|
||||
let fs = self.to_simple_record("UnicodeScalar", Some(1))?;
|
||||
let c = fs[0].value().to_u32()?;
|
||||
|
|
|
@ -150,8 +150,8 @@ impl<'de, 'src, D: Embeddable, Dec: DomainParse<D>, S: BinarySource<'de>> TextRe
|
|||
|
||||
fn read_fracexp<N: NestedValue<D>>(&mut self, mut bs: Vec<u8>) -> io::Result<N> {
|
||||
let mut is_float = false;
|
||||
match self.peek()? {
|
||||
b'.' => {
|
||||
match self.peek() {
|
||||
Ok(b'.') => {
|
||||
is_float = true;
|
||||
bs.push(self.next_byte()?);
|
||||
let c = self.next_byte()?;
|
||||
|
@ -159,8 +159,8 @@ impl<'de, 'src, D: Embeddable, Dec: DomainParse<D>, S: BinarySource<'de>> TextRe
|
|||
}
|
||||
_ => ()
|
||||
}
|
||||
match self.peek()? {
|
||||
b'e' | b'E' => {
|
||||
match self.peek() {
|
||||
Ok(b'e') | Ok(b'E') => {
|
||||
bs.push(self.next_byte()?);
|
||||
self.read_sign_and_exp(bs)
|
||||
}
|
||||
|
@ -181,8 +181,8 @@ impl<'de, 'src, D: Embeddable, Dec: DomainParse<D>, S: BinarySource<'de>> TextRe
|
|||
fn finish_number<N: NestedValue<D>>(&mut self, bs: Vec<u8>, is_float: bool) -> io::Result<N> {
|
||||
let s = decode_utf8(bs)?;
|
||||
if is_float {
|
||||
match self.peek()? {
|
||||
b'f' | b'F' => {
|
||||
match self.peek() {
|
||||
Ok(b'f') | Ok(b'F') => {
|
||||
self.skip()?;
|
||||
Ok(N::new(s.parse::<f32>().map_err(
|
||||
|_| io_syntax_error(&format!(
|
||||
|
@ -206,7 +206,10 @@ impl<'de, 'src, D: Embeddable, Dec: DomainParse<D>, S: BinarySource<'de>> TextRe
|
|||
return Err(io_syntax_error("Incomplete number"));
|
||||
}
|
||||
bs.push(c);
|
||||
while (self.peek()? as char).is_digit(10) {
|
||||
while let Ok(c) = self.peek() {
|
||||
if !(c as char).is_digit(10) {
|
||||
break;
|
||||
}
|
||||
bs.push(self.next_byte()?);
|
||||
}
|
||||
Ok(())
|
||||
|
|
|
@ -0,0 +1,10 @@
|
|||
TARGETS=path.bin
|
||||
|
||||
all: $(TARGETS)
|
||||
|
||||
%.bin: %.prs
|
||||
../implementations/javascript/packages/schema/bin/preserves-schemac.js --no-bundle $< > $@.tmp || (rm -f $@.tmp; false)
|
||||
mv $@.tmp $@
|
||||
|
||||
clean:
|
||||
rm -f $(TARGETS)
|
|
@ -0,0 +1,7 @@
|
|||
´³schema·³version‘³definitions·³Axis´³orµµ±nop´³rec´³lit³nop„´³tupleµ„„„„µ±values´³rec´³lit³values„´³tupleµ„„„„µ±descendants´³rec´³lit³descendants„´³tupleµ„„„„µ±at´³rec´³lit³at„´³tupleµ´³named³key³any„„„„„µ±label´³rec´³lit³label„´³tupleµ„„„„µ±keys´³rec´³lit³keys„´³tupleµ„„„„µ±length´³rec´³lit³length„´³tupleµ„„„„µ±annotations´³rec´³lit³annotations„´³tupleµ„„„„µ±embedded´³rec´³lit³embedded„´³tupleµ„„„„„„³Expr´³orµµ±steps´³seqof´³refµ„³Step„„„µ±not´³rec´³lit³not„´³tupleµ´³named³expr´³refµ„³Expr„„„„„„µ±
|
||||
interleave´³rec´³lit³
|
||||
interleave„´³tupleµ´³named³exprs´³seqof´³refµ„³Expr„„„„„„„µ±union´³rec´³lit³union„´³tupleµ´³named³exprs´³seqof´³refµ„³Expr„„„„„„„µ±intersection´³rec´³lit³intersection„´³tupleµ´³named³exprs´³seqof´³refµ„³Expr„„„„„„„„„³Step´³orµµ±Axis´³refµ„³Axis„„µ±Filter´³refµ„³Filter„„µ±Expr´³refµ„³Expr„„„„³Filter´³orµµ±nop´³rec´³lit³nop„´³tupleµ„„„„µ±fail´³rec´³lit³fail„´³tupleµ„„„„µ±eq´³rec´³lit³eq„´³tupleµ´³named³literal³any„„„„„µ±regex´³rec´³lit³regex„´³tupleµ´³named³regex´³atom³String„„„„„„µ±test´³rec´³lit³test„´³tupleµ´³named³expr´³refµ„³Expr„„„„„„µ±kind´³rec´³lit³kind„´³tupleµ´³named³kind´³refµ„³ ValueKind„„„„„„„„³ ValueKind´³orµµ±Boolean´³lit³Boolean„„µ±Float´³lit³Float„„µ±Double´³lit³Double„„µ±
SignedInteger´³lit³
SignedInteger„„µ±String´³lit³String„„µ±
|
||||
ByteString´³lit³
|
||||
ByteString„„µ±Symbol´³lit³Symbol„„µ±Record´³lit³Record„„µ±Sequence´³lit³Sequence„„µ±Set´³lit³Set„„µ±
|
||||
Dictionary´³lit³
|
||||
Dictionary„„µ±Embedded´³lit³Embedded„„„„„³embeddedType€„„
|
|
@ -0,0 +1,38 @@
|
|||
version 1 .
|
||||
|
||||
Expr =
|
||||
/ @steps [Step ...]
|
||||
/ <not @expr Expr>
|
||||
/ <interleave @exprs [Expr ...]>
|
||||
/ <union @exprs [Expr ...]>
|
||||
/ <intersection @exprs [Expr ...]>
|
||||
.
|
||||
|
||||
Step = Axis / Filter / Expr .
|
||||
|
||||
Axis =
|
||||
/ <nop>
|
||||
/ <values>
|
||||
/ <descendants>
|
||||
/ <at @key any>
|
||||
/ <label>
|
||||
/ <keys>
|
||||
/ <length>
|
||||
/ <annotations>
|
||||
/ <embedded>
|
||||
.
|
||||
|
||||
Filter =
|
||||
/ <nop>
|
||||
/ <fail>
|
||||
/ <eq @literal any>
|
||||
/ <regex @regex string>
|
||||
/ <test @expr Expr>
|
||||
/ <kind @kind ValueKind>
|
||||
.
|
||||
|
||||
ValueKind =
|
||||
/ =Boolean / =Float / =Double / =SignedInteger / =String / =ByteString / =Symbol
|
||||
/ =Record / =Sequence / =Set / =Dictionary
|
||||
/ =Embedded
|
||||
.
|
|
@ -0,0 +1,91 @@
|
|||
---
|
||||
no_site_title: true
|
||||
title: "Preserves Path"
|
||||
---
|
||||
|
||||
Tony Garnock-Jones <tonyg@leastfixedpoint.com>
|
||||
August 2021. Version 0.1.0.
|
||||
|
||||
XML documents can move into attributes, into text, or into children.
|
||||
|
||||
Preserves documents don't have attributes, but they do have children
|
||||
generally and keyed children in particular. You might want to move
|
||||
into the child with a particular key (number, for sequences, or
|
||||
general-value for dictionaries); into all keys; into all
|
||||
mapped-to-values, i.e. children (n.b. not just for sequences and
|
||||
dicts, but also for sets).
|
||||
|
||||
## Expressions
|
||||
|
||||
Expressions: compute a sequence or set (or dictionary?) of results
|
||||
from a stream of input values.
|
||||
|
||||
Precedence groupings from highest to lowest. Within a grouping, no
|
||||
mixed precedence is permitted.
|
||||
|
||||
step ... ;; Applies steps one after the other, flatmap-style
|
||||
|
||||
! expr ;; If no nodes, yields a dummy #t node; if some, yields none
|
||||
|
||||
expr ~ expr ~ ... ;; "interleave" of expressions (sequence-valued, duplicates allowed)
|
||||
expr + expr + ... ;; "union" of expressions (set-valued)
|
||||
expr & expr & ... ;; "intersection" of expressions (set-valued)
|
||||
|
||||
A step is an axis, a filter, or `[expr]`, a parenthesis for overriding precedence.
|
||||
|
||||
## Axes
|
||||
|
||||
Axes: move around, applying filters after moving
|
||||
|
||||
.= ;; Doesn't move anywhere
|
||||
/ ;; Moves into immediate children (values / fields)
|
||||
// ;; Flattens children recursively
|
||||
. key ;; Moves into named child
|
||||
.^ ;; Moves into record label
|
||||
.keys ;; Moves into *keys* rather than values
|
||||
.length ;; Moves into the number of keys
|
||||
.annotations ;; Moves into any annotations that might be present
|
||||
.embedded ;; Moves into the representation of an embedded value
|
||||
|
||||
## Filters
|
||||
|
||||
Filters: narrow down a selection without moving
|
||||
|
||||
=* ;; Accepts all
|
||||
=! ;; Rejects all
|
||||
|
||||
= literal ;; Matches values equal to the literal
|
||||
=r regex ;; Matches strings and symbols by regular expression
|
||||
|
||||
?[expr] ;; Applies the expression to each node; keeps nodes that yield nonempty
|
||||
|
||||
^ literal ;; Matches a record having a the literal as its label -- equivalent to ?[.^ = literal]
|
||||
|
||||
bool ;; Type filters
|
||||
float
|
||||
double
|
||||
int
|
||||
string
|
||||
bytes
|
||||
symbol
|
||||
rec
|
||||
seq
|
||||
set
|
||||
dict
|
||||
embedded
|
||||
|
||||
## Transformers
|
||||
|
||||
e.g. stringify results; sequenceify results (see "+" operator); setify
|
||||
results (see "/" and "&" operators); join stringified results with a
|
||||
separator
|
||||
|
||||
## Tool design
|
||||
|
||||
When processing multiple input documents sequentially, will sometimes
|
||||
want a list of results for each document, a set of results for each
|
||||
document, or a list flattened into a sequence of outputs for all input
|
||||
documents in the sequence. (A flattened set doesn't make sense for
|
||||
streaming since the input documents come in a sequence; if the inputs
|
||||
were treated as a set represented as a sequence, and outputs were
|
||||
buffered in a single large set, that could work out...)
|
Loading…
Reference in New Issue