preserves-nim/src/preserves/pegs.nim

77 lines
2.2 KiB
Nim

# SPDX-FileCopyrightText: 2021 ☭ Emery Hemingway
# SPDX-License-Identifier: Unlicense
## NPEG rules for Preserves.
import npeg, npeg/lib/utf8
when defined(nimHasUsed): {.used.}
grammar "Preserves":
Document <- Value * ws * !1
Value <-
(ws * (Record | Collection | Atom | Embedded | Compact)) |
(ws * Annotation) |
(ws * ';' * @'\n' * Value)
Collection <- Sequence | Dictionary | Set
Atom <- Boolean | Float | Double | SignedInteger | String | ByteString | Symbol
Record <- '<' * Value * *Value * ws * '>'
Sequence <- '[' * ws * *(Value * ws) * ']'
Dictionary <- '{' * ws * *(Value * ws * ':' * ws * Value * ws) * '}'
Set <- "#{" * ws * *(Value * ws) * '}'
Boolean <- "#f" | "#t"
Float <- >flt * 'f'
Double <- flt
SignedInteger <- int
nat <- '0' | (Digit-'0') * *Digit
int <- ?'-' * nat
frac <- '.' * +Digit
exp <- 'e' * ?('-'|'+') * +Digit
flt <- int * ((frac * exp) | frac | exp)
char <- unescaped | '|' | (escape * (escaped | '"' | ('u' * Xdigit[4])))
String <- '"' * >(*char) * '"'
ByteString <- charByteString | hexByteString | b64ByteString
charByteString <- "#\"" * >(*binchar) * '"'
hexByteString <- "#x\"" * ws * >(*(Xdigit[2] * ws)) * '"'
b64ByteString <- "#[" * ws * >(*(base64char * ws)) * ']'
binchar <- binunescaped | (escape * (escaped | '"' | ('x' * Xdigit[2])))
binunescaped <- {' '..'!', '#'..'[', ']'..'~'}
base64char <- {'A'..'Z', 'a'..'z', '0'..'9', '+', '/', '-', '_', '='}
Symbol <- >(symstart * *symcont) | ('|' * >(*symchar) * '|')
symstart <- Alpha | sympunct | symustart
symcont <- Alpha | sympunct | symustart | symucont | Digit | '-'
sympunct <- {'~', '!', '$', '%', '^', '&', '*', '?', '_', '=', '+', '/', '.'}
symchar <- unescaped | '"' | (escape * (escaped | '|' | ('u' * Xdigit)))
symustart <- utf8.any - {0..127}
symucont <- utf8.any - {0..127}
# TODO: exclude some unicode ranges
Embedded <- "#!" * Value
Annotation <- '@' * Value * Value
Compact <- "#=" * ws * ByteString
unescaped <- utf8.any - { '\x00'..'\x19', '"', '\\', '|' }
unicodeEscaped <- 'u' * Xdigit[4]
escaped <- {'\\', '/', 'b', 'f', 'n', 'r', 't'}
escape <- '\\'
ws <- *(' ' | '\t' | '\r' | '\n' | ',')