diff --git a/implementations/python/preserves/path.py b/implementations/python/preserves/path.py index 4b0ad15..7381212 100644 --- a/implementations/python/preserves/path.py +++ b/implementations/python/preserves/path.py @@ -1,23 +1,139 @@ -"""TODO (document __main__ behaviour)""" +"""The [preserves.path][] module implements [Preserves +Path](https://preserves.dev/preserves-path.html). + +Preserves Path is roughly analogous to +[XPath](https://www.w3.org/TR/2017/REC-xpath-31-20170321/), but for Preserves values: just as +XPath selects portions of an XML document, a Preserves Path uses *path expressions* to select +portions of a `Value`. + +Use [parse][preserves.path.parse] to compile a path expression, and then use the +[exec][preserves.path.exec] method on the result to apply it to a given input: + +```python +parse(PATH_EXPRESSION_STRING).exec(PRESERVES_VALUE) + -> SEQUENCE_OF_PRESERVES_VALUES +``` + +## Command-line usage + +When [preserves.path][] is run as a `__main__` module, `sys.argv[1]` is +[parsed][preserves.path.parse], interpreted as a path expression, and +[run][preserves.path.exec] against [human-readable values][preserves.text] read from standard +input. Each matching result is passed to [stringify][preserves.text.stringify] and printed to +standard output. + +## Examples + +### Setup: Loading test data + +The following examples use `testdata`: + +```python +>>> with open('tests/samples.bin', 'rb') as f: +... testdata = decode_with_annotations(f.read()) + +``` + +Recall that `samples.bin` contains a binary-syntax form of the human-readable +[`samples.pr](https://preserves.dev/tests/samples.pr) test data file, intended to exercise most +of the features of Preserves. In particular, the root `Value` in the file has a number of +annotations (for documentation and other purposes). + +### Example 1: Selecting string-valued documentation annotations + +The path expression `.annotations ^ Documentation . 0 / string` proceeds in five steps: + +1. `.annotations` selects each annotation on the root document +2. `^ Documentation` retains only those values (each an annotation of the root) that are `Record`s with label equal to the symbol `Documentation` +3. `. 0` moves into the first child (the first field) of each such `Record`, which in our case is a list of other `Value`s +4. `/` selects all immediate children of these lists +5. `string` retains only those values that are strings + +The result of evaluating it on `testdata` is as follows: + +```python +>>> selector = parse('.annotations ^ Documentation . 0 / string') +>>> for result in selector.exec(testdata): +... print(stringify(result)) +"Individual test cases may be any of the following record types:" +"In each test, let value = strip(annotatedValue)," +" forward = value," +" back = value," +"except where test-case-specific values of `forward` and/or `back`" +"are provided by the executing harness, and check the following" +"numbered expectations according to the table above:" +"Implementations may vary in their treatment of the difference between expectations" +"13/14 and 16/17, depending on how they wish to treat end-of-stream conditions." + +``` + +### Example 2: Selecting tests with `Record`s as their `annotatedValue`s + +The path expression `// [.^ [= Test + = NondeterministicTest]] [. 1 rec]` proceeds in three steps: + +1. `//` recursively decomposes the input, yielding all direct and indirect descendants of each input value + +2. `[.^ [= Test + = NondeterministicTest]]` retains only those inputs (each a descendant of the root) that yield more than zero results when executed against the expression within the brackets: + 1. `.^` selects only labels of values that are `Records`, filtering by type and transforming in a single step + 2. `[= Test + = NondeterministicTest]` again filters by a path expression: + 1. the infix `+` operator takes the *union* of matches of its arguments + 2. the left-hand argument, `= Test` selects values (remember, record labels) equal to the symbol `Test` + 3. the right-hand argument `= NondeterministicTest` selects values equal to `NondeterministicTest` + + The result is thus all `Record`s anywhere inside `testdata` that have either `Test` or `NondeterministicTest` as their labels. + +3. `[. 1 rec]` filters these `Record`s by another path expression: + 1. `. 1` selects their second field (fields are numbered from 0) + 2. `rec` retains only values that are `Record`s + +Evaluating the expression against `testdata` yields the following: + +```python +>>> selector = parse('// [.^ [= Test + = NondeterministicTest]] [. 1 rec]') +>>> for result in selector.exec(testdata): +... print(stringify(result)) +>> + >>>> + "Dr">> +> +> +> +> + 3 4>> +> +> + +``` + +""" from . import * from .schema import load_schema_file, extend from .values import _unwrap from .compat import basestring_ +from . import compare as preserves_compare import pathlib import re syntax = load_schema_file(pathlib.Path(__file__).parent / 'path.prb').path -"""TODO""" +"""This value is a Python representation of a [Preserves Schema][preserves.schema] definition +for the Preserves Path expression language. The language is defined in the file +[path.prs](https://preserves.dev/path/path.prs).""" Selector = syntax.Selector -"""TODO""" +"""Schema definition for representing a sequence of Preserves Path `Step`s.""" Predicate = syntax.Predicate -"""TODO""" +"""Schema definition for representing a Preserves Path `Predicate`.""" def parse(s): - """TODO""" + """Parse `s` as a Preserves Path path expression, yielding a + [Selector][preserves.path.Selector] object. Selectors (and Predicates etc.) have an + [exec][preserves.path.exec] method defined on them. + + Raises `ValueError` if `s` is not a valid path expression. + + """ return parse_selector(Parser(s)) def parse_selector(tokens): @@ -277,6 +393,7 @@ def exec(self, v): @extend(syntax.Axis.embedded) def exec(self, v): + v = preserve(_unwrap(v)) return (v.embeddedValue,) if isinstance(v, Embedded) else () @extend(syntax.Filter.nop) @@ -285,35 +402,37 @@ def exec(self, v): @extend(syntax.Filter.compare) def exec(self, v): + v = preserve(_unwrap(v)) return (v,) if self.op.compare(v, self.literal) else () @extend(syntax.Comparison.eq) def compare(self, lhs, rhs): - return lhs == rhs + return preserves_compare.eq(lhs, rhs) @extend(syntax.Comparison.ne) def compare(self, lhs, rhs): - return lhs != rhs + return not preserves_compare.eq(lhs, rhs) @extend(syntax.Comparison.lt) def compare(self, lhs, rhs): - return lhs < rhs + return preserves_compare.lt(lhs, rhs) @extend(syntax.Comparison.ge) def compare(self, lhs, rhs): - return lhs >= rhs + return not preserves_compare.lt(lhs, rhs) @extend(syntax.Comparison.gt) def compare(self, lhs, rhs): - return lhs > rhs + return not preserves_compare.le(lhs, rhs) @extend(syntax.Comparison.le) def compare(self, lhs, rhs): - return lhs <= rhs + return preserves_compare.le(lhs, rhs) @extend(syntax.Filter.regex) def exec(self, v): r = re.compile(self.regex) + v = preserve(_unwrap(v)) if isinstance(v, Symbol): return (v,) if r.match(v.name) else () if isinstance(v, basestring_): @@ -326,6 +445,7 @@ def exec(self, v): @extend(syntax.Filter.real) def exec(self, v): + v = preserve(_unwrap(v)) if isinstance(v, Float): return (v.value,) if type(v) == float: @@ -336,6 +456,7 @@ def exec(self, v): @extend(syntax.Filter.int) def exec(self, v): + v = preserve(_unwrap(v)) if isinstance(v, Float): return (int(v.value()),) if type(v) == float: @@ -346,6 +467,7 @@ def exec(self, v): @extend(syntax.Filter.kind) def exec(self, v): + v = preserve(_unwrap(v)) return self.kind.exec(v) @extend(syntax.ValueKind.Boolean) @@ -398,8 +520,22 @@ def exec(self, v): @extend(syntax.Function) def exec(self, v): + """WARNING: This is not a *function*: it is a *method* on + [Selector][preserves.path.Selector], [Predicate][preserves.path.Predicate], and so on. + + ```python + >>> sel = parse('/ [.length gt 1]') + >>> sel.exec(['', 'a', 'ab', 'abc', 'abcd', 'bcd', 'cd', 'd', '']) + ('ab', 'abc', 'abcd', 'bcd', 'cd') + + ``` + + """ return (len(self.selector.exec(v)),) +### NOTE WELL: the *LAST* definition of exec in this file is the one that needs the docstring +### attached! + if __name__ == '__main__': import sys sel = parse(sys.argv[1]) diff --git a/implementations/python/preserves/schema.py b/implementations/python/preserves/schema.py index 04233fa..1142dd3 100644 --- a/implementations/python/preserves/schema.py +++ b/implementations/python/preserves/schema.py @@ -7,6 +7,7 @@ TODO from . import * import pathlib import keyword +from functools import wraps AND = Symbol('and') ANY = Symbol('any') @@ -509,6 +510,7 @@ def load_schema_file(filename): # a decorator def extend(cls): """TODO""" + @wraps(cls) def extender(f): setattr(cls, f.__name__, f) return f diff --git a/implementations/python/setup.py b/implementations/python/setup.py index 443dad1..308008b 100644 --- a/implementations/python/setup.py +++ b/implementations/python/setup.py @@ -2,7 +2,7 @@ from setuptools import setup setup( name="preserves", - version="0.18.0", + version="0.18.1", author="Tony Garnock-Jones", author_email="tonyg@leastfixedpoint.com", license="Apache Software License",