preserves-nim/src/preserves/schemaparse.nim

324 lines
9.1 KiB
Nim

# SPDX-FileCopyrightText: ☭ Emery Hemingway
# SPDX-License-Identifier: Unlicense
import std/[strutils, tables]
from std/os import absolutePath, isAbsolute, getCurrentDir, parentDir
import npeg
import ../preserves, ./schema, ./pegs
type
Stack = seq[tuple[node: Value, pos: int]]
ParseState = object
schema: SchemaField0
stack: Stack
directory: string
template takeStackAt(): seq[Value] =
var nodes = newSeq[Value]()
let pos = capture[0].si
var i: int
while i < p.stack.len and p.stack[i].pos < pos:
inc i
let stop = i
while i < p.stack.len:
nodes.add(move p.stack[i].node)
inc i
p.stack.setLen(stop)
nodes
template takeStackAfter(): seq[Value] =
var nodes = newSeq[Value]()
let pos = capture[0].si
var i: int
while i < p.stack.len and p.stack[i].pos <= pos:
inc i
let stop = i
while i < p.stack.len:
nodes.add(move p.stack[i].node)
inc i
p.stack.setLen(stop)
nodes
template popStack(): Value =
assert(p.stack.len > 0, capture[0].s)
assert(capture[0].si <= p.stack[p.stack.high].pos, capture[0].s)
p.stack.pop.node
template pushStack(n: Value) =
let pos = capture[0].si
var i: int
while i < p.stack.len and p.stack[i].pos < pos:
inc i
p.stack.setLen(i)
p.stack.add((n, pos))
assert(p.stack.len > 0, capture[0].s)
proc toSymbolLit(s: string): Value =
initRecord(toSymbol"lit", toSymbol s)
proc match(text: string; p: var ParseState)
const parser = peg("Schema", p: ParseState):
Schema <- S * +Clause * !1
Clause <- (Version | EmbeddedTypeName | Include | Definition | +LineComment) * S * '.' * S
Version <- "version" * S * >(*Digit):
if parseInt($1) != 1: fail()
EmbeddedTypeName <- "embeddedType" * S * ("#f" | Ref):
if capture.len == 1:
var r = popStack()
p.schema.embeddedType =
EmbeddedTypeName(orKind: EmbeddedTypeNameKind.Ref)
validate p.schema.embeddedType.`ref`.fromPreserves(r)
Include <- "include" * S * '"' * >(+Preserves.char) * '"':
var path: string
unescape(path, $1)
path = absolutePath(path, p.directory)
var state = ParseState(
schema: move p.schema,
directory: parentDir path)
match(readFile path, state)
p.schema = move state.schema
Definition <- ?Annotation * *LineComment * id * '=' * S * (OrPattern | AndPattern | Pattern):
if p.schema.definitions.hasKey(Symbol $1):
raise newException(ValueError, "duplicate definition of " & $0)
var
node = popStack()
def: Definition
if not def.fromPreserves(node):
raise newException(ValueError, "failed to convert " & $1 & " to a Definition: " & $node)
p.schema.definitions[Symbol $1] = def
p.stack.setLen(0)
OrDelim <- *LineComment * '/' * S * *LineComment
OrPattern <- ?OrDelim * AltPattern * +(S * OrDelim * AltPattern):
var node = initRecord(toSymbol("or"), takeStackAt().toPreserves)
pushStack node
AltPattern <-
AltNamed |
AltRecord |
AltRef |
AltLiteralPattern
AltNamed <- atId * ?Annotation * Pattern:
var n = toPreserves(@[toPreserves $1] & takeStackAt())
pushStack n
AltRecord <- '<' * id * *NamedPattern * '>':
var n = toPreserves @[
toPreserves $1,
initRecord(
toSymbol"rec",
toSymbolLit $1,
initRecord(toSymbol"tuple", toPreserves takeStackAt()))]
pushStack n
AltRef <- Ref:
var r = popStack()
var n = toPreserves @[r[1].symbol.string.toPreserves, r]
pushStack n
AltLiteralPattern <-
>Preserves.Boolean |
>Preserves.Float |
>Preserves.Double |
>Preserves.SignedInteger |
>Preserves.String |
'=' * >Preserves.Symbol:
var id = case $1
of "#f": "false"
of "#t": "true"
else: $1
var n = toPreserves @[
toPreserves id,
initRecord(toSymbol"lit", parsePreserves $1)]
pushStack n
AndPattern <- ?'&' * S * NamedPattern * +('&' * S * NamedPattern):
var node = initRecord(toSymbol("and"), toPreserves takeStackAt())
pushStack node
Pattern <- SimplePattern | CompoundPattern
SimplePattern <- (
AnyPattern |
AtomKindPattern |
EmbeddedPattern |
LiteralPattern |
SequenceOfPattern |
SetOfPattern |
DictOfPattern |
Ref ) * S
AnyPattern <- "any":
pushStack toSymbol"any"
AtomKindPattern <- Boolean | Float | Double | SignedInteger | String | ByteString | Symbol
Boolean <- "bool":
pushStack initRecord(toSymbol"atom", toSymbol"Boolean")
Float <- "float":
pushStack initRecord(toSymbol"atom", toSymbol"Float")
Double <- "double":
pushStack initRecord(toSymbol"atom", toSymbol"Double")
SignedInteger <- "int":
pushStack initRecord(toSymbol"atom", toSymbol"SignedInteger")
String <- "string":
pushStack initRecord(toSymbol"atom", toSymbol"String")
ByteString <- "bytes":
pushStack initRecord(toSymbol"atom", toSymbol"ByteString")
Symbol <- "symbol":
pushStack initRecord(toSymbol"atom", toSymbol"Symbol")
EmbeddedPattern <- "#!" * SimplePattern:
var n = initRecord(toSymbol"embedded", popStack())
pushStack n
LiteralPattern <- ('=' * >symbol) | ("<<lit>" * >Preserves.Value * ">") | >nonSymbolAtom:
pushStack initRecord(toSymbol"lit", parsePreserves($1))
SequenceOfPattern <- '[' * S * SimplePattern * "..." * S * ']':
var n = initRecord(toSymbol"seqof", popStack())
pushStack n
SetOfPattern <- "#{" * S * SimplePattern * '}':
var n = initRecord(toSymbol"setof", popStack())
pushStack n
DictOfPattern <-
'{' * S *
?Annotation * SimplePattern * ':' * S * SimplePattern * "...:..." * S *
'}':
var
val = popStack()
key = popStack()
var n = initRecord(toSymbol"dictof", key, val)
pushStack n
Ref <- >(Alpha * *Alnum) * *('.' * >(*Alnum)):
var path = initSequence()
for i in 1..<capture.len: path.sequence.add(toSymbol capture[i].s)
var name = pop(path.sequence)
var n = initRecord(toSymbol"ref", path, name)
pushStack n
CompoundPattern <- (
RecordPattern |
VariableRecordPattern |
TuplePattern |
VariableTuplePattern |
DictionaryPattern ) * S
RecordPattern <-
("<<rec>" * S * NamedPattern * *NamedPattern * '>') |
('<' * >Value * *(S * NamedPattern) * '>'):
if capture.len == 2:
var n = initRecord(toSymbol"rec",
toSymbolLit $1,
initRecord(toSymbol"tuple", toPreserves takeStackAfter()))
pushStack n
else:
var n = initRecord(toSymbol"rec", takeStackAfter())
pushStack n
VariableRecordPattern <- '<' * >Value * S * *(NamedPattern) * "..." * S * '>':
var fields = takeStackAfter()
var tail = fields.pop
tail[1] = initRecord(toSymbol"seqof", tail[1])
var n = initRecord(
toSymbol"rec",
toSymbolLit $1,
initRecord(toSymbol"tuplePrefix", toPreserves fields, tail))
pushStack n
TuplePattern <-
'[' * S * *NamedPattern * ']':
var n = initRecord(toSymbol"tuple", toPreserves takeStackAfter())
pushStack n
VariableTuplePattern <-
'[' * S * *NamedPattern * ?Pattern * "..." * S * ']':
var fields = takeStackAfter()
var tail = fields.pop
tail[1] = initRecord(toSymbol"seqof", tail[1])
var node = initRecord(toSymbol"tuplePrefix", toPreserves fields, tail)
pushStack node
DictionaryPattern <- '{' * *(S * >Value * S * ':' * S * NamedSimplePattern * ?',') * S * '}':
var dict = initDictionary()
for i in countDown(pred capture.len, 1):
let key = toSymbol capture[i].s
dict[key] = initRecord("named", key, popStack())
var n = initRecord(toSymbol"dict", dict)
pushStack n
NamedPattern <- ((atId * ?Annotation * SimplePattern) | Pattern):
if capture.len == 2:
var n = initRecord(toSymbol"named", toSymbol $1, popStack())
pushStack n
NamedSimplePattern <- ((atId * ?Annotation * SimplePattern) | SimplePattern):
if capture.len == 2:
var n = initRecord(toSymbol"named", toSymbol $1, popStack())
pushStack n
id <- >(Alpha * *Alnum) * S
atId <- ?Annotation * '@' * id
symbol <- Preserves.Symbol
nonSymbolAtom <-
Preserves.Boolean |
Preserves.Float |
Preserves.Double |
Preserves.SignedInteger |
Preserves.String |
Preserves.ByteString
Value <- Preserves.Value:
discard
Annotation <- '@' * (Preserves.String | Preserves.Record) * S
S <- *{ ' ', '\t', '\r', '\n' }
LineComment <- '#' * @'\n' * S
proc match(text: string; p: var ParseState) =
let match = parser.match(text, p)
if not match.ok:
raise newException(ValueError, "failed to parse.\n" & text[0..<match.matchMax])
proc parsePreservesSchema*(text: string; directory = getCurrentDir()): Schema =
## Parse a Preserves schema.
##
## Schemas in binary encoding should instead be parsed as Preserves
## and converted to `Schema` with `fromPreserve` or `preserveTo`.
assert directory != ""
var p = ParseState(schema: SchemaField0(), directory: directory)
match(text, p)
Schema(field0: p.schema)
when isMainModule:
import std/streams
let txt = readAll stdin
if txt != "":
let
scm = parsePreservesSchema(txt)
pr = toPreserves scm
stdout.newFileStream.writeText(pr, textPreserves)