2023-12-19 20:07:44 +00:00
|
|
|
# SPDX-FileCopyrightText: ☭ Emery Hemingway
|
2021-08-31 15:00:00 +00:00
|
|
|
# SPDX-License-Identifier: Unlicense
|
2021-07-16 17:11:19 +00:00
|
|
|
|
2023-12-22 15:55:23 +00:00
|
|
|
import std/[base64, options, parseutils, strutils, unicode]
|
2023-12-19 20:07:44 +00:00
|
|
|
from std/sequtils import insert
|
2023-06-11 20:34:16 +00:00
|
|
|
|
2023-12-22 15:55:23 +00:00
|
|
|
import bigints, npeg
|
|
|
|
|
2022-03-18 16:26:52 +00:00
|
|
|
import ../pegs
|
2023-12-19 20:07:44 +00:00
|
|
|
import ./decoding, ./values
|
2021-07-16 17:11:19 +00:00
|
|
|
|
|
|
|
type
|
2021-09-23 13:20:06 +00:00
|
|
|
Frame = tuple[value: Value, pos: int]
|
2021-07-16 17:11:19 +00:00
|
|
|
Stack = seq[Frame]
|
|
|
|
|
|
|
|
proc shrink(stack: var Stack; n: int) = stack.setLen(stack.len - n)
|
|
|
|
|
2021-09-23 13:20:06 +00:00
|
|
|
template pushStack(v: Value) = stack.add((v, capture[0].si))
|
2021-07-16 17:11:19 +00:00
|
|
|
|
2022-10-28 18:40:35 +00:00
|
|
|
proc joinWhitespace(s: string): string =
|
|
|
|
result = newStringOfCap(s.len)
|
|
|
|
for token, isSep in tokenize(s, Whitespace + {','}):
|
|
|
|
if not isSep: add(result, token)
|
|
|
|
|
2022-11-20 20:12:23 +00:00
|
|
|
template unescape*(buf: var string; capture: string) =
|
2022-10-29 23:34:01 +00:00
|
|
|
var i: int
|
|
|
|
while i < len(capture):
|
|
|
|
if capture[i] == '\\':
|
|
|
|
inc(i)
|
|
|
|
case capture[i]
|
|
|
|
of '\\': add(buf, char 0x5c)
|
|
|
|
of '/': add(buf, char 0x2f)
|
|
|
|
of 'b': add(buf, char 0x08)
|
|
|
|
of 'f': add(buf, char 0x0c)
|
|
|
|
of 'n': add(buf, char 0x0a)
|
|
|
|
of 'r': add(buf, char 0x0d)
|
|
|
|
of 't': add(buf, char 0x09)
|
|
|
|
of '"': add(buf, char 0x22)
|
|
|
|
of 'u':
|
2023-12-23 23:10:21 +00:00
|
|
|
var short: uint16
|
2022-10-29 23:34:01 +00:00
|
|
|
inc(i)
|
2023-12-24 19:08:48 +00:00
|
|
|
discard parseHex(capture, short, i, 4)
|
2022-10-29 23:34:01 +00:00
|
|
|
inc(i, 3)
|
2023-12-23 23:10:21 +00:00
|
|
|
if (short shr 15) == 0:
|
|
|
|
add(buf, Rune(short).toUtf8)
|
|
|
|
elif (short shr 10) == 0b110110:
|
|
|
|
if i+6 >= capture.len:
|
|
|
|
raise newException(ValueError, "Invalid UTF-16 surrogate pair")
|
|
|
|
var rune = uint32(short shl 10) + 0x10000
|
|
|
|
validate(capture[i+1] == '\\')
|
|
|
|
validate(capture[i+2] == 'u')
|
|
|
|
inc(i, 3)
|
2023-12-24 19:08:48 +00:00
|
|
|
discard parseHex(capture, short, i, 4)
|
2023-12-23 23:10:21 +00:00
|
|
|
if (short shr 10) != 0b110111:
|
|
|
|
raise newException(ValueError, "Invalid UTF-16 surrogate pair")
|
2023-12-24 19:08:48 +00:00
|
|
|
inc(i, 3)
|
2023-12-23 23:10:21 +00:00
|
|
|
rune = rune or (short and 0b1111111111)
|
|
|
|
#add(buf, Rune(rune).toUTF8)
|
|
|
|
let j = buf.len
|
|
|
|
buf.setLen(buf.len+4)
|
|
|
|
rune.Rune.fastToUTF8Copy(buf, j, false)
|
|
|
|
else:
|
|
|
|
raise newException(ValueError, "Invalid UTF-16 escape sequence " & capture)
|
2022-10-29 23:34:01 +00:00
|
|
|
else:
|
|
|
|
validate(false)
|
|
|
|
else:
|
|
|
|
add(buf, capture[i])
|
|
|
|
inc(i)
|
|
|
|
|
|
|
|
template unescape(buf: var seq[byte]; capture: string) =
|
|
|
|
var i: int
|
|
|
|
while i < len(capture):
|
|
|
|
if capture[i] == '\\':
|
|
|
|
inc(i)
|
|
|
|
case capture[i]
|
|
|
|
of '\\': add(buf, 0x5c'u8)
|
|
|
|
of '/': add(buf, 0x2f'u8)
|
|
|
|
of 'b': add(buf, 0x08'u8)
|
|
|
|
of 'f': add(buf, 0x0c'u8)
|
|
|
|
of 'n': add(buf, 0x0a'u8)
|
|
|
|
of 'r': add(buf, 0x0d'u8)
|
|
|
|
of 't': add(buf, 0x09'u8)
|
|
|
|
of '"': add(buf, 0x22'u8)
|
|
|
|
of 'x':
|
|
|
|
var b: byte
|
|
|
|
inc(i)
|
|
|
|
discard parseHex(capture, b, i, 2)
|
|
|
|
inc(i)
|
|
|
|
add(buf, b)
|
|
|
|
else:
|
|
|
|
validate(false)
|
|
|
|
else:
|
|
|
|
add(buf, byte capture[i])
|
|
|
|
inc(i)
|
|
|
|
|
2023-12-20 09:45:00 +00:00
|
|
|
proc pushHexNibble[T](result: var T; c: char) =
|
|
|
|
var n = case c
|
|
|
|
of '0'..'9': T(ord(c) - ord('0'))
|
|
|
|
of 'a'..'f': T(ord(c) - ord('a') + 10)
|
|
|
|
of 'A'..'F': T(ord(c) - ord('A') + 10)
|
2023-12-23 20:39:24 +00:00
|
|
|
else: return
|
2023-12-20 09:45:00 +00:00
|
|
|
result = (result shl 4) or n
|
|
|
|
|
2023-12-27 15:21:11 +00:00
|
|
|
proc parsePreserves*(text: string): Value =
|
|
|
|
## Parse a text-encoded Preserves `string` to a Preserves `Value`.
|
|
|
|
let pegParser = peg("Document", stack: Stack):
|
2021-08-28 10:48:50 +00:00
|
|
|
# Override rules from pegs.nim
|
|
|
|
|
|
|
|
Document <- Preserves.Document
|
|
|
|
|
|
|
|
Preserves.Record <- Preserves.Record:
|
|
|
|
var
|
2021-09-23 13:20:06 +00:00
|
|
|
record: seq[Value]
|
2021-08-28 10:48:50 +00:00
|
|
|
labelOff: int
|
|
|
|
while stack[labelOff].pos < capture[0].si:
|
|
|
|
inc labelOff
|
|
|
|
for i in labelOff.succ..stack.high:
|
|
|
|
record.add(move stack[i].value)
|
|
|
|
record.add(move stack[labelOff].value)
|
|
|
|
stack.shrink record.len
|
2021-09-23 13:20:06 +00:00
|
|
|
pushStack Value(kind: pkRecord, record: move record)
|
2021-08-28 10:48:50 +00:00
|
|
|
|
|
|
|
Preserves.Sequence <- Preserves.Sequence:
|
2021-09-23 13:20:06 +00:00
|
|
|
var sequence: seq[Value]
|
2021-08-28 10:48:50 +00:00
|
|
|
for frame in stack.mitems:
|
|
|
|
if frame.pos > capture[0].si:
|
|
|
|
sequence.add(move frame.value)
|
|
|
|
stack.shrink sequence.len
|
2021-09-23 13:20:06 +00:00
|
|
|
pushStack Value(kind: pkSequence, sequence: move sequence)
|
2021-08-28 10:48:50 +00:00
|
|
|
|
|
|
|
Preserves.Dictionary <- Preserves.Dictionary:
|
2021-09-23 13:20:06 +00:00
|
|
|
var prs = Value(kind: pkDictionary)
|
2021-08-28 10:48:50 +00:00
|
|
|
for i in countDown(stack.high.pred, 0, 2):
|
|
|
|
if stack[i].pos < capture[0].si: break
|
2022-10-29 06:06:32 +00:00
|
|
|
var
|
|
|
|
val = stack.pop.value
|
|
|
|
key = stack.pop.value
|
|
|
|
for j in 0..prs.dict.high:
|
|
|
|
validate(prs.dict[j].key != key)
|
|
|
|
prs[key] = val
|
2021-08-28 10:48:50 +00:00
|
|
|
pushStack prs
|
|
|
|
|
|
|
|
Preserves.Set <- Preserves.Set:
|
2021-09-23 13:20:06 +00:00
|
|
|
var prs = Value(kind: pkSet)
|
2021-08-28 10:48:50 +00:00
|
|
|
for frame in stack.mitems:
|
|
|
|
if frame.pos > capture[0].si:
|
2022-10-29 06:06:52 +00:00
|
|
|
for e in prs.set: validate(e != frame.value)
|
2021-08-28 10:48:50 +00:00
|
|
|
prs.incl(move frame.value)
|
|
|
|
stack.shrink prs.set.len
|
|
|
|
pushStack prs
|
|
|
|
|
|
|
|
Preserves.Boolean <- Preserves.Boolean:
|
|
|
|
case $0
|
2021-09-23 13:20:06 +00:00
|
|
|
of "#f": pushStack Value(kind: pkBoolean)
|
|
|
|
of "#t": pushStack Value(kind: pkBoolean, bool: true)
|
2021-08-28 10:48:50 +00:00
|
|
|
else: discard
|
|
|
|
|
|
|
|
Preserves.Float <- Preserves.Float:
|
2021-09-23 13:20:06 +00:00
|
|
|
pushStack Value(kind: pkFloat, float: parseFloat($1))
|
2021-08-28 10:48:50 +00:00
|
|
|
|
|
|
|
Preserves.Double <- Preserves.Double:
|
2021-09-23 13:20:06 +00:00
|
|
|
pushStack Value(kind: pkDouble)
|
2021-08-28 10:48:50 +00:00
|
|
|
let i = stack.high
|
|
|
|
discard parseBiggestFloat($0, stack[i].value.double)
|
|
|
|
|
2023-12-20 09:45:00 +00:00
|
|
|
Preserves.FloatRaw <- Preserves.FloatRaw:
|
|
|
|
var reg: uint32
|
|
|
|
for c in $1: pushHexNibble(reg, c)
|
|
|
|
pushStack Value(kind: pkFloat, float: cast[float32](reg))
|
|
|
|
|
|
|
|
Preserves.DoubleRaw <- Preserves.DoubleRaw:
|
|
|
|
var reg: uint64
|
|
|
|
for c in $1: pushHexNibble(reg, c)
|
|
|
|
pushStack Value(kind: pkDouble, double: cast[float64](reg))
|
|
|
|
|
2021-08-28 10:48:50 +00:00
|
|
|
Preserves.SignedInteger <- Preserves.SignedInteger:
|
2023-12-22 15:55:23 +00:00
|
|
|
var
|
|
|
|
big = initBigInt($0)
|
|
|
|
small = toInt[int](big)
|
|
|
|
if small.isSome:
|
|
|
|
pushStack Value(kind: pkRegister, register: small.get)
|
|
|
|
else:
|
|
|
|
pushStack Value(kind: pkBigInt, bigint: big)
|
2021-08-28 10:48:50 +00:00
|
|
|
|
|
|
|
Preserves.String <- Preserves.String:
|
2022-10-29 23:34:01 +00:00
|
|
|
var v = Value(kind: pkString, string: newStringOfCap(len($1)))
|
|
|
|
unescape(v.string, $1)
|
2023-12-23 21:17:13 +00:00
|
|
|
if validateUtf8(v.string) != -1:
|
|
|
|
raise newException(ValueError, "Preserves text contains an invalid UTF-8 sequence")
|
2022-10-29 23:34:01 +00:00
|
|
|
pushStack v
|
2021-08-28 10:48:50 +00:00
|
|
|
|
|
|
|
Preserves.charByteString <- Preserves.charByteString:
|
2022-10-29 23:34:01 +00:00
|
|
|
var v = Value(kind: pkByteString, bytes: newSeqOfCap[byte](len($1)))
|
|
|
|
unescape(v.bytes, $1)
|
2022-10-29 06:05:47 +00:00
|
|
|
pushStack v
|
2021-08-28 10:48:50 +00:00
|
|
|
|
|
|
|
Preserves.hexByteString <- Preserves.hexByteString:
|
2022-10-28 18:40:35 +00:00
|
|
|
pushStack Value(kind: pkByteString, bytes: cast[seq[byte]](parseHexStr(joinWhitespace($1))))
|
2021-08-28 10:48:50 +00:00
|
|
|
|
|
|
|
Preserves.b64ByteString <- Preserves.b64ByteString:
|
2022-10-28 18:40:35 +00:00
|
|
|
pushStack Value(kind: pkByteString, bytes: cast[seq[byte]](base64.decode(joinWhitespace($1))))
|
2021-08-28 10:48:50 +00:00
|
|
|
|
|
|
|
Preserves.Symbol <- Preserves.Symbol:
|
2023-12-22 21:21:01 +00:00
|
|
|
var buf = newStringOfCap(len($1))
|
|
|
|
unescape(buf, $1)
|
|
|
|
pushStack Value(kind: pkSymbol, symbol: Symbol buf)
|
2021-08-28 10:48:50 +00:00
|
|
|
|
|
|
|
Preserves.Embedded <- Preserves.Embedded:
|
2021-09-08 09:00:38 +00:00
|
|
|
var v = stack.pop.value
|
|
|
|
v.embedded = true
|
|
|
|
pushStack v
|
2021-08-28 10:48:50 +00:00
|
|
|
|
2022-11-02 18:58:37 +00:00
|
|
|
Preserves.Annotation <- Preserves.Annotation:
|
|
|
|
var val = stack.pop.value
|
|
|
|
discard stack.pop.value
|
|
|
|
pushStack val
|
|
|
|
|
2021-08-28 10:48:50 +00:00
|
|
|
Preserves.Compact <- Preserves.Compact:
|
2023-12-27 15:21:11 +00:00
|
|
|
pushStack decodePreserves(stack.pop.value.bytes)
|
2021-08-28 10:48:50 +00:00
|
|
|
|
2021-07-16 17:11:19 +00:00
|
|
|
var stack: Stack
|
|
|
|
let match = pegParser.match(text, stack)
|
|
|
|
if not match.ok:
|
|
|
|
raise newException(ValueError, "failed to parse Preserves:\n" & text[match.matchMax..text.high])
|
|
|
|
assert(stack.len == 1)
|
|
|
|
stack.pop.value
|
2021-08-28 10:48:50 +00:00
|
|
|
|
2023-12-27 15:21:11 +00:00
|
|
|
proc parsePreservesAtom*(text: string): Atom =
|
|
|
|
## Parse a text-encoded Preserves `string` to a Preserves `Atom`.
|
|
|
|
let pegParser = peg("Atom", a: Atom):
|
|
|
|
# Override rules from pegs.nim
|
|
|
|
|
|
|
|
Atom <- ?"#!" * Preserves.Atom
|
|
|
|
|
|
|
|
Preserves.Boolean <- Preserves.Boolean:
|
|
|
|
case $0
|
|
|
|
of "#f": a = Atom(kind: pkBoolean)
|
|
|
|
of "#t": a = Atom(kind: pkBoolean, bool: true)
|
|
|
|
else: discard
|
|
|
|
|
|
|
|
Preserves.Float <- Preserves.Float:
|
|
|
|
a = Atom(kind: pkFloat, float: parseFloat($1))
|
|
|
|
|
|
|
|
Preserves.Double <- Preserves.Double:
|
|
|
|
a = Atom(kind: pkDouble)
|
|
|
|
discard parseBiggestFloat($0, a.double)
|
|
|
|
|
|
|
|
Preserves.FloatRaw <- Preserves.FloatRaw:
|
|
|
|
var reg: uint32
|
|
|
|
for c in $1: pushHexNibble(reg, c)
|
|
|
|
a = Atom(kind: pkFloat, float: cast[float32](reg))
|
|
|
|
|
|
|
|
Preserves.DoubleRaw <- Preserves.DoubleRaw:
|
|
|
|
var reg: uint64
|
|
|
|
for c in $1: pushHexNibble(reg, c)
|
|
|
|
a = Atom(kind: pkDouble, double: cast[float64](reg))
|
|
|
|
|
|
|
|
Preserves.SignedInteger <- Preserves.SignedInteger:
|
|
|
|
var
|
|
|
|
big = initBigInt($0)
|
|
|
|
small = toInt[int](big)
|
|
|
|
if small.isSome:
|
|
|
|
a = Atom(kind: pkRegister, register: small.get)
|
|
|
|
else:
|
|
|
|
a = Atom(kind: pkBigInt, bigint: big)
|
|
|
|
|
|
|
|
Preserves.String <- Preserves.String:
|
|
|
|
a = Atom(kind: pkString, string: newStringOfCap(len($1)))
|
|
|
|
unescape(a.string, $1)
|
|
|
|
if validateUtf8(a.string) != -1:
|
|
|
|
raise newException(ValueError, "Preserves text contains an invalid UTF-8 sequence")
|
|
|
|
|
|
|
|
Preserves.charByteString <- Preserves.charByteString:
|
|
|
|
a = Atom(kind: pkByteString, bytes: newSeqOfCap[byte](len($1)))
|
|
|
|
unescape(a.bytes, $1)
|
|
|
|
|
|
|
|
Preserves.hexByteString <- Preserves.hexByteString:
|
|
|
|
a = Atom(kind: pkByteString, bytes: cast[seq[byte]](parseHexStr(joinWhitespace($1))))
|
|
|
|
|
|
|
|
Preserves.b64ByteString <- Preserves.b64ByteString:
|
|
|
|
a = Atom(kind: pkByteString, bytes: cast[seq[byte]](base64.decode(joinWhitespace($1))))
|
|
|
|
|
|
|
|
Preserves.Symbol <- Preserves.Symbol:
|
|
|
|
var buf = newStringOfCap(len($1))
|
|
|
|
unescape(buf, $1)
|
|
|
|
a = Atom(kind: pkSymbol, symbol: Symbol buf)
|
|
|
|
|
|
|
|
if not pegParser.match(text, result).ok:
|
|
|
|
raise newException(ValueError, "failed to parse Preserves atom: " & text)
|