Textual parser
This commit is contained in:
parent
30bfaa8c00
commit
99b0ddbb13
|
@ -1,2 +1,3 @@
|
|||
tests/test_rfc8259
|
||||
tests/test_integers
|
||||
tests/test_parser
|
||||
tests/test_rfc8259
|
||||
|
|
|
@ -2,5 +2,5 @@ Nim implementation of the [Preserves data language](https://preserves.gitlab.io/
|
|||
|
||||
Missing features:
|
||||
* embedded values
|
||||
* parsing from human-readable encoding
|
||||
* ordering of compound values
|
||||
* schemas
|
||||
|
|
|
@ -1,7 +1,7 @@
|
|||
# SPDX-License-Identifier: ISC
|
||||
|
||||
import bigints
|
||||
import std/[base64, endians, hashes, macros, sets, streams, tables, typetraits]
|
||||
import std/[base64, endians, hashes, macros, sets, streams, strutils, tables, typetraits]
|
||||
|
||||
import json except `%`, `%*`
|
||||
|
||||
|
@ -151,9 +151,7 @@ proc `==`*(x, y: Preserve): bool =
|
|||
of pkSymbol:
|
||||
result = x.symbol == y.symbol
|
||||
of pkRecord:
|
||||
for i, val in x.record:
|
||||
if y.record[i] != val: return false
|
||||
result = true
|
||||
result = x.record == y.record
|
||||
of pkSequence:
|
||||
for i, val in x.sequence:
|
||||
if y.sequence[i] != val: return false
|
||||
|
@ -171,26 +169,32 @@ proc `==`*(x, y: Preserve): bool =
|
|||
of pkEmbedded:
|
||||
result = x.embedded == y.embedded
|
||||
|
||||
proc `$`*(prs: Preserve): string =
|
||||
proc concat(result: var string; prs: Preserve) =
|
||||
case prs.kind:
|
||||
of pkBoolean:
|
||||
case prs.bool
|
||||
of false: result = "#f"
|
||||
of true: result = "#t"
|
||||
of false: result.add "#f"
|
||||
of true: result.add "#t"
|
||||
of pkFloat:
|
||||
result = $prs.float & "f"
|
||||
result.add($prs.float & "f")
|
||||
of pkDouble:
|
||||
result = $prs.double
|
||||
result.add $prs.double
|
||||
of pkSignedInteger:
|
||||
result = $prs.int
|
||||
result.add $prs.int
|
||||
of pkBigInteger:
|
||||
result = $prs.bigint
|
||||
result.add $prs.bigint
|
||||
of pkString:
|
||||
result = escapeJson(prs.string)
|
||||
result.add escapeJson(prs.string)
|
||||
of pkByteString:
|
||||
result.add("#[")
|
||||
result.add(base64.encode(prs.bytes))
|
||||
result.add(']')
|
||||
for b in prs.bytes:
|
||||
if b.char notin {'\20'..'\21', '#'..'[', ']'..'~'}:
|
||||
result.add("#[")
|
||||
result.add(base64.encode(prs.bytes))
|
||||
result.add(']')
|
||||
return
|
||||
result.add("#\"")
|
||||
result.add(cast[string](prs.bytes))
|
||||
result.add('"')
|
||||
of pkSymbol:
|
||||
result.add(escapeJsonUnquoted(prs.symbol))
|
||||
of pkRecord:
|
||||
|
@ -199,36 +203,38 @@ proc `$`*(prs: Preserve): string =
|
|||
result.add($prs.record[prs.record.high])
|
||||
for i in 0..<prs.record.high:
|
||||
result.add(' ')
|
||||
result.add($prs.record[i])
|
||||
result.concat(prs.record[i])
|
||||
result.add('>')
|
||||
of pkSequence:
|
||||
result.add('[')
|
||||
for i, val in prs.sequence:
|
||||
if i > 0:
|
||||
result.add(' ')
|
||||
result.add($val)
|
||||
result.concat(val)
|
||||
result.add(']')
|
||||
of pkSet:
|
||||
result.add("#{")
|
||||
for val in prs.set.items:
|
||||
result.add($val)
|
||||
result.concat(val)
|
||||
result.add(' ')
|
||||
if result.len > 2:
|
||||
if prs.set.len > 1:
|
||||
result.setLen(result.high)
|
||||
result.add('}')
|
||||
of pkDictionary:
|
||||
result.add('{')
|
||||
for (key, value) in prs.dict.pairs:
|
||||
result.add($key)
|
||||
result.concat(key)
|
||||
result.add(": ")
|
||||
result.add($value)
|
||||
result.concat(value)
|
||||
result.add(' ')
|
||||
if result.len > 1:
|
||||
if prs.dict.len > 1:
|
||||
result.setLen(result.high)
|
||||
result.add('}')
|
||||
of pkEmbedded:
|
||||
result.add(prs.embedded.repr)
|
||||
|
||||
proc `$`*(prs: Preserve): string = concat(result, prs)
|
||||
|
||||
iterator items*(prs: Preserve): Preserve =
|
||||
case prs.kind
|
||||
of pkRecord:
|
||||
|
@ -353,7 +359,7 @@ proc write*(str: Stream; prs: Preserve) =
|
|||
of pkByteString:
|
||||
str.write(0xb2'u8)
|
||||
str.writeVarint(prs.bytes.len)
|
||||
str.write(prs.bytes)
|
||||
str.write(cast[string](prs.bytes))
|
||||
of pkSymbol:
|
||||
str.write(0xb3'u8)
|
||||
str.writeVarint(prs.symbol.len)
|
||||
|
@ -385,7 +391,13 @@ proc write*(str: Stream; prs: Preserve) =
|
|||
str.write(0x86'u8)
|
||||
raiseAssert("binary representation of embedded values is undefined")
|
||||
|
||||
proc parsePreserve*(s: Stream): Preserve =
|
||||
proc encode*(prs: Preserve): string =
|
||||
let s = newStringStream()
|
||||
s.write prs
|
||||
s.setPosition 0
|
||||
result = s.readAll
|
||||
|
||||
proc decodePreserves*(s: Stream): Preserve =
|
||||
proc assertStream(check: bool) =
|
||||
if not check:
|
||||
raise newException(ValueError, "invalid Preserves stream")
|
||||
|
@ -423,26 +435,26 @@ proc parsePreserve*(s: Stream): Preserve =
|
|||
result = symbol(s.readStr(len))
|
||||
of 0xb4:
|
||||
result = Preserve(kind: pkRecord)
|
||||
var label = s.parsePreserve()
|
||||
var label = s.decodePreserves()
|
||||
while s.peekUint8() != endMarker:
|
||||
result.record.add(s.parsePreserve())
|
||||
result.record.add(s.decodePreserves())
|
||||
result.record.add(label)
|
||||
discard s.readUint8()
|
||||
of 0xb5:
|
||||
result = Preserve(kind: pkSequence)
|
||||
while s.peekUint8() != endMarker:
|
||||
result.sequence.add(s.parsePreserve())
|
||||
result.sequence.add(s.decodePreserves())
|
||||
discard s.readUint8()
|
||||
of 0xb6:
|
||||
result = Preserve(kind: pkSet)
|
||||
while s.peekUint8() != endMarker:
|
||||
result.set.incl(s.parsePreserve())
|
||||
result.set.incl(s.decodePreserves())
|
||||
discard s.readUint8()
|
||||
of 0xb7:
|
||||
result = Preserve(kind: pkDictionary)
|
||||
while s.peekUint8() != endMarker:
|
||||
let key = s.parsePreserve()
|
||||
let val = s.parsePreserve()
|
||||
let key = s.decodePreserves()
|
||||
let val = s.decodePreserves()
|
||||
result.dict[key] = val
|
||||
discard s.readUint8()
|
||||
of 0xb0:
|
||||
|
@ -470,6 +482,12 @@ proc parsePreserve*(s: Stream): Preserve =
|
|||
else:
|
||||
assertStream(false)
|
||||
|
||||
proc decodePreserves*(s: string): Preserve =
|
||||
s.newStringStream.decodePreserves
|
||||
|
||||
proc decodePreserves*(s: seq[byte]): Preserve =
|
||||
cast[string](s).newStringStream.decodePreserves
|
||||
|
||||
proc initDictionary*(): Preserve = Preserve(kind: pkDictionary)
|
||||
|
||||
proc `%`*(b: bool): Preserve =
|
||||
|
|
|
@ -0,0 +1,98 @@
|
|||
# SPDX-License-Identifier: ISC
|
||||
|
||||
import std/[base64, parseutils, sets, strutils, tables]
|
||||
import npeg
|
||||
import ../preserves, ./pegs
|
||||
|
||||
type
|
||||
Frame = tuple[value: Preserve, pos: int]
|
||||
Stack = seq[Frame]
|
||||
|
||||
proc shrink(stack: var Stack; n: int) = stack.setLen(stack.len - n)
|
||||
|
||||
template pushStack(v: Preserve) = stack.add((v, capture[0].si))
|
||||
|
||||
const pegParser = peg("Document", stack: Stack):
|
||||
# Override rules from pegs.nim
|
||||
|
||||
Document <- Preserves.Document
|
||||
|
||||
Preserves.Record <- Preserves.Record:
|
||||
var
|
||||
record: seq[Preserve]
|
||||
labelOff: int
|
||||
while stack[labelOff].pos < capture[0].si:
|
||||
inc labelOff
|
||||
for i in labelOff.succ..stack.high:
|
||||
record.add(move stack[i].value)
|
||||
record.add(move stack[labelOff].value)
|
||||
stack.shrink record.len
|
||||
pushStack Preserve(kind: pkRecord, record: move record)
|
||||
|
||||
Preserves.Sequence <- Preserves.Sequence:
|
||||
var sequence: seq[Preserve]
|
||||
for frame in stack.mitems:
|
||||
if frame.pos > capture[0].si:
|
||||
sequence.add(move frame.value)
|
||||
stack.shrink sequence.len
|
||||
pushStack Preserve(kind: pkSequence, sequence: move sequence)
|
||||
|
||||
Preserves.Dictionary <- Preserves.Dictionary:
|
||||
var dict: Table[Preserve, Preserve]
|
||||
for i in countDown(stack.high.pred, 0, 2):
|
||||
if stack[i].pos < capture[0].si: break
|
||||
dict[move stack[i].value] = move stack[i.succ].value
|
||||
stack.shrink 2*dict.len
|
||||
pushStack Preserve(kind: pkDictionary, dict: move dict)
|
||||
|
||||
Preserves.Set <- Preserves.Set:
|
||||
var set: HashSet[Preserve]
|
||||
for frame in stack.mitems:
|
||||
if frame.pos > capture[0].si:
|
||||
set.incl(move frame.value)
|
||||
stack.shrink set.len
|
||||
pushStack Preserve(kind: pkSet, set: move set)
|
||||
|
||||
Preserves.Boolean <- Preserves.Boolean:
|
||||
case $0
|
||||
of "#f": pushStack Preserve(kind: pkBoolean)
|
||||
of "#t": pushStack Preserve(kind: pkBoolean, bool: true)
|
||||
else: discard
|
||||
|
||||
Preserves.Float <- Preserves.Float:
|
||||
pushStack Preserve(kind: pkFloat, float: parseFloat($1))
|
||||
|
||||
Preserves.Double <- Preserves.Double:
|
||||
pushStack Preserve(kind: pkDouble)
|
||||
let i = stack.high
|
||||
discard parseBiggestFloat($0, stack[i].value.double)
|
||||
|
||||
Preserves.SignedInteger <- Preserves.SignedInteger:
|
||||
pushStack Preserve(kind: pkSignedInteger, int: parseInt($0))
|
||||
|
||||
Preserves.String <- Preserves.String:
|
||||
pushStack Preserve(kind: pkString, string: unescape($0))
|
||||
|
||||
Preserves.charByteString <- Preserves.charByteString:
|
||||
let s = unescape($1)
|
||||
pushStack Preserve(kind: pkByteString, bytes: cast[seq[byte]](s))
|
||||
|
||||
Preserves.hexByteString <- Preserves.hexByteString:
|
||||
pushStack Preserve(kind: pkByteString, bytes: cast[seq[byte]](parseHexStr($1)))
|
||||
|
||||
Preserves.b64ByteString <- Preserves.b64ByteString:
|
||||
pushStack Preserve(kind: pkByteString, bytes: cast[seq[byte]](base64.decode($1)))
|
||||
|
||||
Preserves.Symbol <- Preserves.Symbol:
|
||||
pushStack Preserve(kind: pkSymbol, symbol: $0)
|
||||
|
||||
Preserves.Compact <- Preserves.Compact:
|
||||
pushStack decodePreserves(stack.pop.value.bytes)
|
||||
|
||||
proc parsePreserves*(text: string): Preserve {.gcsafe.} =
|
||||
var stack: Stack
|
||||
let match = pegParser.match(text, stack)
|
||||
if not match.ok:
|
||||
raise newException(ValueError, "failed to parse Preserves:\n" & text[match.matchMax..text.high])
|
||||
assert(stack.len == 1)
|
||||
stack.pop.value
|
|
@ -0,0 +1,72 @@
|
|||
# SPDX-FileCopyrightText: ☭ 2021 Emery Hemingway
|
||||
# SPDX-License-Identifier: ISC
|
||||
|
||||
import npeg, npeg/lib/utf8
|
||||
|
||||
when defined(nimHasUsed): {.used.}
|
||||
|
||||
grammar "Preserves":
|
||||
|
||||
Document <- Value * ws * !1
|
||||
|
||||
Value <-
|
||||
(ws * (Record | Collection | Atom | Embedded | Compact)) |
|
||||
(ws * '@' * Value * Value) |
|
||||
(ws * ';' * @'\n' * Value)
|
||||
|
||||
Collection <- Sequence | Dictionary | Set
|
||||
|
||||
Atom <- Boolean | Float | Double | SignedInteger | String | ByteString | Symbol
|
||||
|
||||
Record <- '<' * Value * *Value * ws * '>'
|
||||
|
||||
Sequence <- '[' * ws * *(Value * ws) * ']'
|
||||
|
||||
Dictionary <- '{' * ws * *(Value * ws * ':' * ws * Value * ws) * '}'
|
||||
|
||||
Set <- "#{" * ws * *(Value * ws) * '}'
|
||||
|
||||
Boolean <- "#f" | "#t"
|
||||
|
||||
Float <- >flt * 'f'
|
||||
Double <- flt
|
||||
SignedInteger <- int
|
||||
|
||||
nat <- '0' | (Digit-'0') * *Digit
|
||||
int <- ?'-' * nat
|
||||
frac <- '.' * +Digit
|
||||
exp <- 'e' * ?('-'|'+') * +Digit
|
||||
flt <- int * ((frac * exp) | frac | exp)
|
||||
|
||||
stringBody <- ?escape * *( +( {'\x20'..'\xff'} - {'"'} - {'\\'}) * *escape)
|
||||
String <- '"' * stringBody * '"'
|
||||
|
||||
ByteString <- charByteString | hexByteString | b64ByteString
|
||||
charByteString <- '#' * >('"' * >(*binchar) * '"')
|
||||
hexByteString <- "#x\"" * ws * >(*(Xdigit[2] * ws)) * '"'
|
||||
b64ByteString <- "#[" * ws * >(*(base64char * ws)) * ']'
|
||||
|
||||
binchar <- binunescaped | (escape * (escaped | '"' | ('x' * Xdigit[2])))
|
||||
binunescaped <- {'\20'..'\21', '#'..'[', ']'..'~'}
|
||||
base64char <- {'A'..'Z', 'a'..'z', '0'..'9', '+', '/', '-', '_', '='}
|
||||
|
||||
Symbol <- (symstart * *symcont) | ('|' * *symchar * '|')
|
||||
|
||||
symstart <- Alpha | sympunct | symustart
|
||||
symcont <- Alpha | sympunct | symustart | symucont | Digit | '-'
|
||||
sympunct <- {'~', '!', '$', '%', '^', '&', '*', '?', '_', '=', '+', '/', '.'}
|
||||
symchar <- unescaped | '"' | (escape * (escaped | '|' | ('u' * Xdigit)))
|
||||
symustart <- utf8.any - {0..127}
|
||||
symucont <- utf8.any - {0..127}
|
||||
# TODO: exclude some unicode ranges
|
||||
|
||||
Embedded <- "#!" * Value
|
||||
|
||||
Compact <- "#=" * ws * ByteString
|
||||
|
||||
unescaped <- utf8.any - escaped
|
||||
unicodeEscaped <- 'u' * Xdigit[4]
|
||||
escaped <- '\\' * ({'{', '"', '|', '\\', 'b', 'f', 'n', 'r', 't'} | unicodeEscaped)
|
||||
escape <- '\\'
|
||||
|
||||
ws <- *(' ' | '\t' | '\r' | '\n' | ',')
|
|
@ -43,7 +43,7 @@ suite "native":
|
|||
check(b == a)
|
||||
block:
|
||||
stream.setPosition(0)
|
||||
let y = stream.parsePreserve()
|
||||
let y = stream.decodePreserves()
|
||||
let a = num
|
||||
let b = y.int
|
||||
check(b == a)
|
||||
|
@ -67,7 +67,7 @@ suite "big":
|
|||
check(b == a)
|
||||
block:
|
||||
stream.setPosition(0)
|
||||
let y = stream.parsePreserve()
|
||||
let y = stream.decodePreserves()
|
||||
let a = big
|
||||
let b = y.bigint
|
||||
check(b == a)
|
||||
|
|
|
@ -0,0 +1,39 @@
|
|||
# SPDX-License-Identifier: ISC
|
||||
|
||||
import std/[strutils, unittest]
|
||||
import preserves, preserves/parse
|
||||
|
||||
const examples = [
|
||||
("""<capture <discard>>""", "\xB4\xB3\x07capture\xB4\xB3\x07discard\x84\x84"),
|
||||
("""[1 2 3 4]""", "\xB5\x91\x92\x93\x94\x84"),
|
||||
("""[-2 -1 0 1]""", "\xB5\x9E\x9F\x90\x91\x84"),
|
||||
(""""hello"""", "\xB1\x05hello"),
|
||||
("""["a" b #"c" [] #{} #t #f]""", "\xB5\xB1\x01a\xB3\x01b\xB2\x01c\xB5\x84\xB6\x84\x81\x80\x84"),
|
||||
("""-257""", "\xA1\xFE\xFF"),
|
||||
("""-1""", "\x9F"),
|
||||
("""0""", "\x90"),
|
||||
("""1""", "\x91"),
|
||||
("""255""", "\xA1\x00\xFF"),
|
||||
("""1.0f""", "\x82\x3F\x80\x00\x00"),
|
||||
("""1.0""", "\x83\x3F\xF0\x00\x00\x00\x00\x00\x00"),
|
||||
("""-1.202e300""", "\x83\xFE\x3C\xB7\xB7\x59\xBF\x04\x26"),
|
||||
("""#=#x"B4B30763617074757265B4B307646973636172648484"""", "\xB4\xB3\x07capture\xB4\xB3\x07discard\x84\x84"),
|
||||
("""#f""", "\x80")
|
||||
]
|
||||
|
||||
suite "parse":
|
||||
for (txt, bin) in examples:
|
||||
test txt:
|
||||
checkpoint(txt)
|
||||
let test = parsePreserves(txt)
|
||||
checkpoint($test)
|
||||
block:
|
||||
let
|
||||
a= test
|
||||
b = decodePreserves(bin)
|
||||
check(a == b)
|
||||
block:
|
||||
let
|
||||
a = encode test
|
||||
b = bin
|
||||
check(a.toHex == b.toHex)
|
|
@ -62,7 +62,7 @@ for i, jsText in testVectors:
|
|||
stream.write(x)
|
||||
stream.setPosition(0)
|
||||
let
|
||||
y = stream.parsePreserve()
|
||||
y = stream.decodePreserves()
|
||||
test = y.toJson
|
||||
check(y == x)
|
||||
check(test == control)
|
||||
|
|
Loading…
Reference in New Issue