Initial stab at preserves-path; repair error wrt EOF immediately following a number

This commit is contained in:
Tony Garnock-Jones 2021-08-08 14:26:17 -04:00
parent be10924118
commit 137cc63a97
14 changed files with 900 additions and 9 deletions

View File

@ -31,9 +31,10 @@ automatic, perfect-fidelity conversion between syntaxes.
- [Syrup](https://github.com/ocapn/syrup#pseudo-specification), a
hybrid binary/human-readable syntax for the Preserves data model
### Preserves schema
### Preserves schema and queries
- [Preserves Schema specification](preserves-schema.html)
- [Preserves Path specification](preserves-path.html)
## Implementations

View File

@ -1,6 +1,7 @@
[workspace]
members = [
"preserves",
"preserves-path",
"preserves-schema",
"preserves-tools",
]

View File

@ -0,0 +1 @@
src/schemas/**/*.rs

View File

@ -0,0 +1,19 @@
[package]
name = "preserves-path"
version = "0.1.0"
authors = ["Tony Garnock-Jones <tonyg@leastfixedpoint.com>"]
edition = "2018"
description = "Implementation of preserves-path, a query language for Preserves documents."
homepage = "https://preserves.gitlab.io/"
repository = "https://gitlab.com/preserves/preserves"
license = "Apache-2.0"
[build-dependencies]
preserves-schema = { path = "../preserves-schema", version = "0.3.0" }
[dependencies]
preserves = { path = "../preserves", version = "0.16.0" }
preserves-schema = { path = "../preserves-schema", version = "0.3.0" }
regex = "1.5"
thiserror = "1.0"

View File

@ -0,0 +1,16 @@
use preserves_schema::compiler::*;
use std::io::Error;
fn main() -> Result<(), Error> {
let buildroot = std::env::current_dir()?;
let mut gen_dir = buildroot.clone();
gen_dir.push("src/schemas");
let mut c = CompilerConfig::new(gen_dir, "crate::schemas".to_owned());
let inputs = expand_inputs(&vec!["../../../path/path.bin".to_owned()])?;
c.load_schemas_and_bundles(&inputs)?;
compile(&c)
}

View File

@ -0,0 +1,664 @@
pub mod schemas;
// Paths operate on IOValues because the AST includes keys of IOValue type.
// If we could make Schemas produce generics...
pub use crate::schemas::path;
use preserves::value::AtomClass;
use preserves::value::BinarySource;
use preserves::value::BytesBinarySource;
use preserves::value::CompoundClass;
use preserves::value::IOValue;
use preserves::value::Map;
use preserves::value::NestedValue;
use preserves::value::Reader;
use preserves::value::Value;
use preserves::value::ValueClass;
use std::cell::RefCell;
use std::collections::VecDeque;
use std::iter::Iterator;
use std::io;
use std::rc::Rc;
use thiserror::Error;
#[derive(Debug)]
enum Binop {
Interleave,
Union,
Intersection,
}
#[derive(Error, Debug)]
pub enum CompilationError {
#[error(transparent)]
IoError(#[from] io::Error),
#[error("Cannot mix binary operators")]
MixedOperators,
#[error("Invalid step")]
InvalidStep,
#[error(transparent)]
RegexError(#[from] regex::Error),
}
pub enum Path {
Root,
Step(IOValue, Rc<Path>),
}
pub trait Step: std::fmt::Debug {
fn accept(&mut self, path: Rc<Path>, value: &IOValue);
fn finish(&mut self);
fn reset(&mut self) -> Vec<IOValue>;
}
macro_rules! delegate_finish_and_reset {
($self:ident, $target:expr) => {
fn finish(&mut $self) { $target.finish() }
fn reset(&mut $self) -> Vec<IOValue> { $target.reset() }
}
}
#[derive(Clone, Debug)]
pub struct Node(pub Rc<RefCell<dyn Step>>);
impl Node {
fn new<S: Step + 'static>(s: S) -> Self {
Node(Rc::new(RefCell::new(s)))
}
pub fn accept(&self, path: Rc<Path>, value: &IOValue) {
self.0.borrow_mut().accept(path, value)
}
pub fn finish(&self) {
self.0.borrow_mut().finish()
}
pub fn reset(&self) -> Vec<IOValue> {
self.0.borrow_mut().reset()
}
pub fn exec(&self, value: &IOValue) -> Vec<IOValue> {
self.accept(Path::root(), value);
self.finish();
self.reset()
}
}
pub trait StepMaker {
fn connect(&self, step: Node) -> Result<Node, CompilationError>;
}
impl Path {
fn root() -> Rc<Self> {
Rc::new(Path::Root)
}
fn step(self: &Rc<Self>, v: &IOValue) -> Rc<Self> {
Rc::new(Path::Step(v.clone(), Rc::clone(self)))
}
}
impl<S: StepMaker> StepMaker for Vec<S> {
fn connect(&self, mut step: Node) -> Result<Node, CompilationError> {
for s in self.iter().rev() {
step = s.connect(step)?;
}
Ok(step)
}
}
impl StepMaker for path::Expr {
fn connect(&self, step: Node) -> Result<Node, CompilationError> {
match self {
path::Expr::Steps(s) =>
s.connect(step),
path::Expr::Not { expr } =>
expr.connect(Node::new(NotStep { seen_value: false, step, })),
path::Expr::Interleave { exprs } =>
ForkJoinStep::new(exprs, |e, s| e.connect(s), step),
path::Expr::Union { exprs } =>
ForkJoinStep::new(exprs, |e, s| e.connect(s), ThresholdStep::new(1, step)?),
path::Expr::Intersection { exprs } =>
ForkJoinStep::new(exprs, |e, s| e.connect(ThresholdStep::new(1, s)?), ThresholdStep::new(exprs.len(), step)?),
}
}
}
impl StepMaker for path::Step {
fn connect(&self, step: Node) -> Result<Node, CompilationError> {
match self {
path::Step::Axis(b) => (&**b).connect(step),
path::Step::Filter(b) => (&**b).connect(step),
path::Step::Expr(b) => (&**b).connect(step),
}
}
}
#[derive(Debug)]
struct AxisStep {
step: Node,
axis: path::Axis,
}
impl StepMaker for path::Axis {
fn connect(&self, step: Node) -> Result<Node, CompilationError> {
if let path::Axis::Nop = self {
Ok(step)
} else {
Ok(Node::new(AxisStep { step, axis: self.clone() }))
}
}
}
impl Step for AxisStep {
fn accept(&mut self, path: Rc<Path>, value: &IOValue) {
match &self.axis {
path::Axis::Nop => self.step.accept(path, value),
path::Axis::Values => {
let path = path.step(value);
for c in value.value().children() {
self.step.accept(Rc::clone(&path), &c)
}
}
path::Axis::Descendants => {
let mut q = VecDeque::new();
q.push_back((path, value.clone()));
while let Some((p, c)) = q.pop_front() {
let p = p.step(&c);
for cc in c.value().children() {
q.push_back((Rc::clone(&p), cc.clone()));
}
self.step.accept(p, &c)
}
}
path::Axis::At { key } => match value.value() {
Value::Record(r) => step_index(path.step(value), r.fields(), &key, &mut self.step),
Value::Sequence(vs) => step_index(path.step(value), vs, &key, &mut self.step),
Value::Dictionary(d) => if let Some(v) = d.get(&key) {
self.step.accept(path.step(value), v)
},
_ => (),
},
path::Axis::Label => if let Some(r) = value.value().as_record(None) {
self.step.accept(path.step(value), r.label())
},
path::Axis::Keys => match value.value() {
Value::Record(r) => step_keys(path.step(value), r.arity(), &mut self.step),
Value::Sequence(vs) => step_keys(path.step(value), vs.len(), &mut self.step),
Value::Dictionary(d) => {
let path = path.step(value);
for k in d.keys() {
self.step.accept(Rc::clone(&path), k)
}
},
_ => (),
},
path::Axis::Length => match value.value() {
Value::Record(r) => self.step.accept(path.step(value), &IOValue::new(r.arity())),
Value::Sequence(vs) => self.step.accept(path.step(value), &IOValue::new(vs.len())),
Value::Dictionary(d) => self.step.accept(path.step(value), &IOValue::new(d.len())),
_ => self.step.accept(path.step(value), &IOValue::new(0)),
},
path::Axis::Annotations => {
let path = path.step(value);
for c in value.annotations().slice() {
self.step.accept(Rc::clone(&path), &c)
}
}
path::Axis::Embedded => if let Some(d) = value.value().as_embedded() {
self.step.accept(path.step(value), d)
},
}
}
delegate_finish_and_reset!(self, self.step);
}
fn step_index(p: Rc<Path>, vs: &[IOValue], key: &IOValue, step: &mut Node) {
if let Some(i) = key.value().as_usize() {
if i < vs.len() {
step.accept(p, &vs[i])
}
}
}
fn step_keys(p: Rc<Path>, count: usize, step: &mut Node) {
for i in 0 .. count {
step.accept(Rc::clone(&p), &IOValue::new(i))
}
}
impl StepMaker for path::Filter {
fn connect(&self, step: Node) -> Result<Node, CompilationError> {
match self {
path::Filter::Nop => Ok(step),
path::Filter::Fail => Ok(Node::new(InertStep)),
path::Filter::Eq { literal } => Ok(Node::new(EqStep { literal: literal.clone(), step })),
path::Filter::Regex { regex } => Ok(Node::new(RegexStep { regex: regex::Regex::new(regex)?, step })),
path::Filter::Test { expr } => Ok(Node::new(TestStep { expr: expr.connect(BoolCollector::new())?, step })),
path::Filter::Kind { kind } => Ok(Node::new(KindStep {
kind: match &**kind {
path::ValueKind::Boolean => ValueClass::Atomic(AtomClass::Boolean),
path::ValueKind::Float => ValueClass::Atomic(AtomClass::Float),
path::ValueKind::Double => ValueClass::Atomic(AtomClass::Double),
path::ValueKind::SignedInteger => ValueClass::Atomic(AtomClass::SignedInteger),
path::ValueKind::String => ValueClass::Atomic(AtomClass::String),
path::ValueKind::ByteString => ValueClass::Atomic(AtomClass::ByteString),
path::ValueKind::Symbol => ValueClass::Atomic(AtomClass::Symbol),
path::ValueKind::Record => ValueClass::Compound(CompoundClass::Record),
path::ValueKind::Sequence => ValueClass::Compound(CompoundClass::Sequence),
path::ValueKind::Set => ValueClass::Compound(CompoundClass::Set),
path::ValueKind::Dictionary => ValueClass::Compound(CompoundClass::Dictionary),
path::ValueKind::Embedded => ValueClass::Embedded,
},
step,
})),
}
}
}
#[derive(Debug)]
struct InertStep;
impl Step for InertStep {
fn accept(&mut self, _path: Rc<Path>, _value: &IOValue) {}
fn finish(&mut self) {}
fn reset(&mut self) -> Vec<IOValue> { vec![] }
}
#[derive(Debug)]
struct NotStep {
seen_value: bool,
step: Node,
}
impl Step for NotStep {
fn accept(&mut self, _path: Rc<Path>, _value: &IOValue) {
self.seen_value = true;
}
fn finish(&mut self) {
if !self.seen_value {
self.step.accept(Path::root(), &IOValue::new(true));
self.seen_value = true; // makes finish() idempotent
}
self.step.finish()
}
fn reset(&mut self) -> Vec<IOValue> {
self.seen_value = false;
self.step.reset()
}
}
#[derive(Debug)]
struct EqStep {
literal: IOValue,
step: Node,
}
impl Step for EqStep {
fn accept(&mut self, path: Rc<Path>, value: &IOValue) {
if value == &self.literal {
self.step.accept(path, value)
}
}
delegate_finish_and_reset!(self, self.step);
}
#[derive(Debug)]
struct RegexStep {
regex: regex::Regex,
step: Node,
}
impl Step for RegexStep {
fn accept(&mut self, path: Rc<Path>, value: &IOValue) {
match value.value() {
Value::String(s) => if self.regex.is_match(s) { self.step.accept(path, value) },
Value::Symbol(s) => if self.regex.is_match(s) { self.step.accept(path, value) },
_ => (),
}
}
delegate_finish_and_reset!(self, self.step);
}
#[derive(Debug)]
struct TestStep {
expr: Node,
step: Node,
}
impl Step for TestStep {
fn accept(&mut self, path: Rc<Path>, value: &IOValue) {
self.expr.accept(Rc::clone(&path), value);
self.expr.finish();
match self.expr.reset().len() {
0 => (),
_ => self.step.accept(path, value)
}
}
delegate_finish_and_reset!(self, self.step);
}
#[derive(Debug)]
struct VecCollector {
accumulator: Vec<IOValue>,
}
impl VecCollector {
fn new() -> Node {
Node::new(VecCollector { accumulator: Vec::new() })
}
}
impl Step for VecCollector {
fn accept(&mut self, _path: Rc<Path>, value: &IOValue) {
self.accumulator.push(value.clone())
}
fn finish(&mut self) {
}
fn reset(&mut self) -> Vec<IOValue> {
std::mem::take(&mut self.accumulator)
}
}
#[derive(Debug)]
struct BoolCollector {
seen_value: bool,
}
impl BoolCollector {
fn new() -> Node {
Node::new(BoolCollector { seen_value: false })
}
}
impl Step for BoolCollector {
fn accept(&mut self, _path: Rc<Path>, _value: &IOValue) {
self.seen_value = true
}
fn finish(&mut self) {
}
fn reset(&mut self) -> Vec<IOValue> {
let result = if self.seen_value { vec![IOValue::new(true)] } else { vec![] };
self.seen_value = false;
result
}
}
#[derive(Debug)]
struct KindStep {
kind: ValueClass,
step: Node,
}
impl Step for KindStep {
fn accept(&mut self, path: Rc<Path>, value: &IOValue) {
if value.value_class() == self.kind {
self.step.accept(path, value)
}
}
delegate_finish_and_reset!(self, self.step);
}
#[derive(Debug)]
struct ForkJoinStep {
branches: Vec<Node>,
step: Node,
}
impl ForkJoinStep {
fn new<F: Fn(&path::Expr, Node) -> Result<Node, CompilationError>>(
exprs: &Vec<path::Expr>,
f: F,
step: Node,
) -> Result<Node, CompilationError> {
Ok(Node::new(Self {
branches: exprs.iter().map(|e| f(e, step.clone())).collect::<Result<Vec<Node>, _>>()?,
step,
}))
}
}
impl Step for ForkJoinStep {
fn accept(&mut self, path: Rc<Path>, value: &IOValue) {
for n in self.branches.iter_mut() {
n.accept(Rc::clone(&path), value)
}
}
fn finish(&mut self) {
for n in self.branches.iter_mut() {
n.finish()
}
self.step.finish()
}
fn reset(&mut self) -> Vec<IOValue> {
let result = self.step.reset();
for n in self.branches.iter_mut() {
n.reset();
}
result
}
}
#[derive(Debug)]
struct ThresholdStep {
threshold: usize,
accumulator: Map<IOValue, usize>,
step: Node,
}
impl ThresholdStep {
fn new(threshold: usize, step: Node) -> Result<Node, CompilationError> {
Ok(Node::new(Self {
threshold,
accumulator: Map::new(),
step,
}))
}
}
impl Step for ThresholdStep {
fn accept(&mut self, path: Rc<Path>, value: &IOValue) {
let c = self.accumulator.entry(value.clone()).or_insert(0);
*c += 1;
if *c == self.threshold {
self.step.accept(path, value)
}
}
fn finish(&mut self) {
self.step.finish()
}
fn reset(&mut self) -> Vec<IOValue> {
self.accumulator.clear();
self.step.reset()
}
}
fn split_values_by_symbol(tokens: &Vec<IOValue>, separator: &str) -> Vec<Vec<IOValue>> {
tokens
.split(|t| matches!(t.value().as_symbol(), Some(s) if s == separator))
.map(|ts| ts.to_vec())
.collect()
}
fn split_binop(tokens: &Vec<IOValue>) -> Result<(Vec<Vec<IOValue>>, Option<Binop>), CompilationError> {
let interleave_pieces = split_values_by_symbol(&tokens, "~");
let union_pieces = split_values_by_symbol(&tokens, "+");
let intersection_pieces = split_values_by_symbol(&tokens, "&");
match (interleave_pieces.len(), union_pieces.len(), intersection_pieces.len()) {
(1, 1, 1) => Ok((interleave_pieces, None)),
(m, 1, 1) if m > 1 => Ok((interleave_pieces, Some(Binop::Interleave))),
(1, m, 1) if m > 1 => Ok((union_pieces, Some(Binop::Union))),
(1, 1, m) if m > 1 => Ok((intersection_pieces, Some(Binop::Intersection))),
_ => Err(CompilationError::MixedOperators),
}
}
pub fn parse_expr(tokens: &Vec<IOValue>) -> Result<path::Expr, CompilationError> {
let (pieces, binop) = split_binop(tokens)?;
match binop {
None => parse_non_binop(&pieces[0]),
Some(o) => {
let exprs = pieces.into_iter().map(|ts| parse_non_binop(&ts))
.collect::<Result<Vec<path::Expr>, _>>()?;
Ok(match o {
Binop::Interleave => path::Expr::Interleave { exprs },
Binop::Union => path::Expr::Union { exprs },
Binop::Intersection => path::Expr::Intersection { exprs },
})
}
}
}
fn parse_non_binop(tokens: &[IOValue]) -> Result<path::Expr, CompilationError> {
if !tokens.is_empty() {
let t = tokens[0].value();
if let Some("!") = t.as_symbol().map(|s| s.as_str()) {
return Ok(path::Expr::Not { expr: Box::new(parse_non_binop(&tokens[1..])?) });
}
}
let mut steps = Vec::new();
let mut tokens = tokens;
while let Some((s, remaining)) = parse_step(tokens)? {
steps.push(s);
tokens = remaining;
}
Ok(path::Expr::Steps(steps))
}
fn parse_step(tokens: &[IOValue]) -> Result<Option<(path::Step, &[IOValue])>, CompilationError> {
if tokens.is_empty() {
return Ok(None);
}
let remainder = &tokens[1..];
if tokens[0].value().is_sequence() {
return Ok(Some((
path::Step::Expr(Box::new(parse_expr(tokens[0].value().as_sequence().unwrap())?)),
remainder)));
}
match tokens[0].value().as_symbol() {
None => return Err(CompilationError::InvalidStep),
Some(t) => match t.as_str() {
".=" => Ok(Some((path::Step::Axis(Box::new(path::Axis::Nop)), remainder))),
"/" => Ok(Some((path::Step::Axis(Box::new(path::Axis::Values)), remainder))),
"//" => Ok(Some((path::Step::Axis(Box::new(path::Axis::Descendants)), remainder))),
"." => {
let (key, remainder) = pop_step_arg(remainder)?;
Ok(Some((path::Step::Axis(Box::new(path::Axis::At { key })), remainder)))
}
".^" => Ok(Some((path::Step::Axis(Box::new(path::Axis::Label)), remainder))),
".keys" => Ok(Some((path::Step::Axis(Box::new(path::Axis::Keys)), remainder))),
".length" => Ok(Some((path::Step::Axis(Box::new(path::Axis::Length)), remainder))),
".annotations" => Ok(Some((path::Step::Axis(Box::new(path::Axis::Annotations)), remainder))),
".embedded" => Ok(Some((path::Step::Axis(Box::new(path::Axis::Embedded)), remainder))),
"=*" => Ok(Some((path::Step::Filter(Box::new(path::Filter::Nop)), remainder))),
"=!" => Ok(Some((path::Step::Filter(Box::new(path::Filter::Fail)), remainder))),
"=" => {
let (literal, remainder) = pop_step_arg(remainder)?;
Ok(Some((path::Step::Filter(Box::new(path::Filter::Eq { literal })), remainder)))
}
"=r" => {
let (regex_val, remainder) = pop_step_arg(remainder)?;
let regex = regex_val.value().to_string().map_err(|_| CompilationError::InvalidStep)?.clone();
let _ = regex::Regex::new(&regex)?;
Ok(Some((path::Step::Filter(Box::new(path::Filter::Regex { regex })), remainder)))
}
"?" => {
let (expr_val, remainder) = pop_step_arg(remainder)?;
let expr = Box::new(parse_expr(&vec![expr_val])?);
Ok(Some((path::Step::Filter(Box::new(path::Filter::Test { expr })), remainder)))
}
"^" => {
let (literal, remainder) = pop_step_arg(remainder)?;
Ok(Some((path::Step::Filter(Box::new(path::Filter::Test {
expr: Box::new(path::Expr::Steps(vec![
path::Step::Axis(Box::new(path::Axis::Label)),
path::Step::Filter(Box::new(path::Filter::Eq { literal })),
])),
})), remainder)))
}
"bool" => Ok(Some((path::Step::from(path::ValueKind::Boolean), remainder))),
"float" => Ok(Some((path::Step::from(path::ValueKind::Float), remainder))),
"double" => Ok(Some((path::Step::from(path::ValueKind::Double), remainder))),
"int" => Ok(Some((path::Step::from(path::ValueKind::SignedInteger), remainder))),
"string" => Ok(Some((path::Step::from(path::ValueKind::String), remainder))),
"bytes" => Ok(Some((path::Step::from(path::ValueKind::ByteString), remainder))),
"symbol" => Ok(Some((path::Step::from(path::ValueKind::Symbol), remainder))),
"rec" => Ok(Some((path::Step::from(path::ValueKind::Record), remainder))),
"seq" => Ok(Some((path::Step::from(path::ValueKind::Sequence), remainder))),
"set" => Ok(Some((path::Step::from(path::ValueKind::Set), remainder))),
"dict" => Ok(Some((path::Step::from(path::ValueKind::Dictionary), remainder))),
"embedded" => Ok(Some((path::Step::from(path::ValueKind::Embedded), remainder))),
_ => Err(CompilationError::InvalidStep),
}
}
}
impl From<path::ValueKind> for path::Step {
fn from(k: path::ValueKind) -> Self {
path::Step::Filter(Box::new(path::Filter::Kind {
kind: Box::new(k),
}))
}
}
fn pop_step_arg(tokens: &[IOValue]) -> Result<(IOValue, &[IOValue]), CompilationError> {
if tokens.is_empty() {
return Err(CompilationError::InvalidStep);
}
Ok((tokens[0].clone(), &tokens[1..]))
}
impl path::Expr {
pub fn compile(&self) -> Result<Node, CompilationError> {
self.connect(VecCollector::new())
}
pub fn exec(&self, value: &IOValue) -> Result<Vec<IOValue>, CompilationError> {
Ok(self.compile()?.exec(value))
}
}
impl std::str::FromStr for path::Expr {
type Err = CompilationError;
fn from_str(s: &str) -> Result<Self, Self::Err> {
parse_expr(&(BytesBinarySource::new(s.as_bytes())
.text_iovalues()
.configured(false)
.collect::<Result<Vec<_>, _>>()?))
}
}
impl std::str::FromStr for Node {
type Err = CompilationError;
fn from_str(s: &str) -> Result<Self, Self::Err> {
let expr = path::Expr::from_str(s)?;
expr.compile()
}
}

View File

@ -10,6 +10,7 @@ license = "Apache-2.0"
[dependencies]
preserves = { path = "../preserves", version = "0.16.0" }
preserves-path = { path = "../preserves-path", version = "0.1.0" }
bytes = "1.0"
clap = "3.0.0-beta.2"

View File

@ -89,6 +89,9 @@ struct Convert {
#[clap(long, arg_enum, value_name = "on/off", default_value = "on")]
indent: Boolish,
#[clap(long, default_value="=*")]
select: preserves_path::Node,
#[clap(long, arg_enum, value_name = "on/off", default_value = "on")]
annotations: Boolish,
}
@ -356,7 +359,9 @@ fn convert(c: Convert) -> io::Result<()> {
};
while let Some(value) = vs.next() {
let value = value?;
w(&value)?;
for v in c.select.exec(&value) {
w(&v)?;
}
if let Some(limit) = c.limit {
if vs.count >= limit {
return Ok(());

View File

@ -350,6 +350,24 @@ impl<N: NestedValue<D>, D: Embeddable> Value<N, D> {
}
}
pub fn children(&self) -> Vec<N> {
match self {
Value::Boolean(_) |
Value::Float(_) |
Value::Double(_) |
Value::SignedInteger(_) |
Value::String(_) |
Value::ByteString(_) |
Value::Symbol(_) |
Value::Embedded(_) => vec![],
Value::Record(r) => r.fields().to_vec(),
Value::Sequence(vs) => vs.clone(),
Value::Set(s) => s.iter().cloned().collect(),
Value::Dictionary(d) => d.values().cloned().collect(),
}
}
fn expected(&self, k: ExpectedKind) -> Error {
Error::Expected(k, Received::ReceivedOtherValue(format!("{:?}", self.clone().wrap())))
}
@ -516,6 +534,8 @@ impl<N: NestedValue<D>, D: Embeddable> Value<N, D> {
pub fn as_i64(&self) -> Option<i64> { self.as_i().and_then(|i| i.to_i64()) }
pub fn as_u128(&self) -> Option<u128> { self.as_u().and_then(|i| i.to_u128()) }
pub fn as_i128(&self) -> Option<i128> { self.as_i().and_then(|i| i.to_i128()) }
pub fn as_usize(&self) -> Option<usize> { self.as_u().and_then(|i| i.to_usize()) }
pub fn as_isize(&self) -> Option<isize> { self.as_i().and_then(|i| i.to_isize()) }
pub fn to_i8(&self) -> Result<i8, Error> {
match self.as_i() {
@ -587,6 +607,20 @@ impl<N: NestedValue<D>, D: Embeddable> Value<N, D> {
}
}
pub fn to_isize(&self) -> Result<isize, Error> {
match self.as_i() {
Some(i) => i.to_isize().ok_or_else(|| Error::NumberOutOfRange(BigInt::from(i))),
None => Err(self.expected(ExpectedKind::SignedInteger)),
}
}
pub fn to_usize(&self) -> Result<usize, Error> {
match self.as_u() {
Some(i) => i.to_usize().ok_or_else(|| Error::NumberOutOfRange(BigInt::from(i))),
None => Err(self.expected(ExpectedKind::SignedInteger)),
}
}
pub fn to_char(&self) -> Result<char, Error> {
let fs = self.to_simple_record("UnicodeScalar", Some(1))?;
let c = fs[0].value().to_u32()?;

View File

@ -150,8 +150,8 @@ impl<'de, 'src, D: Embeddable, Dec: DomainParse<D>, S: BinarySource<'de>> TextRe
fn read_fracexp<N: NestedValue<D>>(&mut self, mut bs: Vec<u8>) -> io::Result<N> {
let mut is_float = false;
match self.peek()? {
b'.' => {
match self.peek() {
Ok(b'.') => {
is_float = true;
bs.push(self.next_byte()?);
let c = self.next_byte()?;
@ -159,8 +159,8 @@ impl<'de, 'src, D: Embeddable, Dec: DomainParse<D>, S: BinarySource<'de>> TextRe
}
_ => ()
}
match self.peek()? {
b'e' | b'E' => {
match self.peek() {
Ok(b'e') | Ok(b'E') => {
bs.push(self.next_byte()?);
self.read_sign_and_exp(bs)
}
@ -181,8 +181,8 @@ impl<'de, 'src, D: Embeddable, Dec: DomainParse<D>, S: BinarySource<'de>> TextRe
fn finish_number<N: NestedValue<D>>(&mut self, bs: Vec<u8>, is_float: bool) -> io::Result<N> {
let s = decode_utf8(bs)?;
if is_float {
match self.peek()? {
b'f' | b'F' => {
match self.peek() {
Ok(b'f') | Ok(b'F') => {
self.skip()?;
Ok(N::new(s.parse::<f32>().map_err(
|_| io_syntax_error(&format!(
@ -206,7 +206,10 @@ impl<'de, 'src, D: Embeddable, Dec: DomainParse<D>, S: BinarySource<'de>> TextRe
return Err(io_syntax_error("Incomplete number"));
}
bs.push(c);
while (self.peek()? as char).is_digit(10) {
while let Ok(c) = self.peek() {
if !(c as char).is_digit(10) {
break;
}
bs.push(self.next_byte()?);
}
Ok(())

10
path/Makefile Normal file
View File

@ -0,0 +1,10 @@
TARGETS=path.bin
all: $(TARGETS)
%.bin: %.prs
../implementations/javascript/packages/schema/bin/preserves-schemac.js --no-bundle $< > $@.tmp || (rm -f $@.tmp; false)
mv $@.tmp $@
clean:
rm -f $(TARGETS)

7
path/path.bin Normal file
View File

@ -0,0 +1,7 @@
´³schema·³version³ definitions·³Axis´³orµµ±nop´³rec´³lit³nop„´³tupleµ„„„„µ±values´³rec´³lit³values„´³tupleµ„„„„µ± descendants´³rec´³lit³ descendants„´³tupleµ„„„„µ±at´³rec´³lit³at„´³tupleµ´³named³key³any„„„„„µ±label´³rec´³lit³label„´³tupleµ„„„„µ±keys´³rec´³lit³keys„´³tupleµ„„„„µ±length´³rec´³lit³length„´³tupleµ„„„„µ± annotations´³rec´³lit³ annotations„´³tupleµ„„„„µ±embedded´³rec´³lit³embedded„´³tupleµ„„„„„„³Expr´³orµµ±steps´³seqof´³refµ„³Step„„„µ±not´³rec´³lit³not„´³tupleµ´³named³expr´³refµ„³Expr„„„„„„µ±
interleave´³rec´³lit³
interleave„´³tupleµ´³named³exprs´³seqof´³refµ„³Expr„„„„„„„µ±union´³rec´³lit³union„´³tupleµ´³named³exprs´³seqof´³refµ„³Expr„„„„„„„µ± intersection´³rec´³lit³ intersection„´³tupleµ´³named³exprs´³seqof´³refµ„³Expr„„„„„„„„„³Step´³orµµ±Axis´³refµ„³Axis„„µ±Filter´³refµ„³Filter„„µ±Expr´³refµ„³Expr„„„„³Filter´³orµµ±nop´³rec´³lit³nop„´³tupleµ„„„„µ±fail´³rec´³lit³fail„´³tupleµ„„„„µ±eq´³rec´³lit³eq„´³tupleµ´³named³literal³any„„„„„µ±regex´³rec´³lit³regex„´³tupleµ´³named³regex´³atom³String„„„„„„µ±test´³rec´³lit³test„´³tupleµ´³named³expr´³refµ„³Expr„„„„„„µ±kind´³rec´³lit³kind„´³tupleµ´³named³kind´³refµ„³ ValueKind„„„„„„„„³ ValueKind´³orµµ±Boolean´³lit³Boolean„„µ±Float´³lit³Float„„µ±Double´³lit³Double„„µ± SignedInteger´³lit³ SignedInteger„„µ±String´³lit³String„„µ±
ByteString´³lit³
ByteString„„µ±Symbol´³lit³Symbol„„µ±Record´³lit³Record„„µ±Sequence´³lit³Sequence„„µ±Set´³lit³Set„„µ±
Dictionary´³lit³
Dictionary„„µ±Embedded´³lit³Embedded„„„„„³ embeddedType€„„

38
path/path.prs Normal file
View File

@ -0,0 +1,38 @@
version 1 .
Expr =
/ @steps [Step ...]
/ <not @expr Expr>
/ <interleave @exprs [Expr ...]>
/ <union @exprs [Expr ...]>
/ <intersection @exprs [Expr ...]>
.
Step = Axis / Filter / Expr .
Axis =
/ <nop>
/ <values>
/ <descendants>
/ <at @key any>
/ <label>
/ <keys>
/ <length>
/ <annotations>
/ <embedded>
.
Filter =
/ <nop>
/ <fail>
/ <eq @literal any>
/ <regex @regex string>
/ <test @expr Expr>
/ <kind @kind ValueKind>
.
ValueKind =
/ =Boolean / =Float / =Double / =SignedInteger / =String / =ByteString / =Symbol
/ =Record / =Sequence / =Set / =Dictionary
/ =Embedded
.

91
preserves-path.md Normal file
View File

@ -0,0 +1,91 @@
---
no_site_title: true
title: "Preserves Path"
---
Tony Garnock-Jones <tonyg@leastfixedpoint.com>
August 2021. Version 0.1.0.
XML documents can move into attributes, into text, or into children.
Preserves documents don't have attributes, but they do have children
generally and keyed children in particular. You might want to move
into the child with a particular key (number, for sequences, or
general-value for dictionaries); into all keys; into all
mapped-to-values, i.e. children (n.b. not just for sequences and
dicts, but also for sets).
## Expressions
Expressions: compute a sequence or set (or dictionary?) of results
from a stream of input values.
Precedence groupings from highest to lowest. Within a grouping, no
mixed precedence is permitted.
step ... ;; Applies steps one after the other, flatmap-style
! expr ;; If no nodes, yields a dummy #t node; if some, yields none
expr ~ expr ~ ... ;; "interleave" of expressions (sequence-valued, duplicates allowed)
expr + expr + ... ;; "union" of expressions (set-valued)
expr & expr & ... ;; "intersection" of expressions (set-valued)
A step is an axis, a filter, or `[expr]`, a parenthesis for overriding precedence.
## Axes
Axes: move around, applying filters after moving
.= ;; Doesn't move anywhere
/ ;; Moves into immediate children (values / fields)
// ;; Flattens children recursively
. key ;; Moves into named child
.^ ;; Moves into record label
.keys ;; Moves into *keys* rather than values
.length ;; Moves into the number of keys
.annotations ;; Moves into any annotations that might be present
.embedded ;; Moves into the representation of an embedded value
## Filters
Filters: narrow down a selection without moving
=* ;; Accepts all
=! ;; Rejects all
= literal ;; Matches values equal to the literal
=r regex ;; Matches strings and symbols by regular expression
?[expr] ;; Applies the expression to each node; keeps nodes that yield nonempty
^ literal ;; Matches a record having a the literal as its label -- equivalent to ?[.^ = literal]
bool ;; Type filters
float
double
int
string
bytes
symbol
rec
seq
set
dict
embedded
## Transformers
e.g. stringify results; sequenceify results (see "+" operator); setify
results (see "/" and "&" operators); join stringified results with a
separator
## Tool design
When processing multiple input documents sequentially, will sometimes
want a list of results for each document, a set of results for each
document, or a list flattened into a sequence of outputs for all input
documents in the sequence. (A flattened set doesn't make sense for
streaming since the input documents come in a sequence; if the inputs
were treated as a set represented as a sequence, and outputs were
buffered in a single large set, that could work out...)