# SPDX-FileCopyrightText: ☭ Emery Hemingway # SPDX-License-Identifier: Unlicense import std/[base64, options, parseutils, strutils, unicode] from std/sequtils import insert import bigints, npeg import ../pegs import ./decoding, ./values type Frame = tuple[value: Value, pos: int] Stack = seq[Frame] proc shrink(stack: var Stack; n: int) = stack.setLen(stack.len - n) template pushStack(v: Value) = stack.add((v, capture[0].si)) proc joinWhitespace(s: string): string = result = newStringOfCap(s.len) for token, isSep in tokenize(s, Whitespace + {','}): if not isSep: add(result, token) template unescape*(buf: var string; capture: string) = var i: int while i < len(capture): if capture[i] == '\\': inc(i) case capture[i] of '\\': add(buf, char 0x5c) of '/': add(buf, char 0x2f) of 'b': add(buf, char 0x08) of 'f': add(buf, char 0x0c) of 'n': add(buf, char 0x0a) of 'r': add(buf, char 0x0d) of 't': add(buf, char 0x09) of '"': add(buf, char 0x22) of 'u': var short: uint16 inc(i) discard parseHex(capture, short, i, 4) inc(i, 3) if (short shr 15) == 0: add(buf, Rune(short).toUtf8) elif (short shr 10) == 0b110110: if i+6 >= capture.len: raise newException(ValueError, "Invalid UTF-16 surrogate pair") var rune = uint32(short shl 10) + 0x10000 validate(capture[i+1] == '\\') validate(capture[i+2] == 'u') inc(i, 3) discard parseHex(capture, short, i, 4) if (short shr 10) != 0b110111: raise newException(ValueError, "Invalid UTF-16 surrogate pair") inc(i, 3) rune = rune or (short and 0b1111111111) #add(buf, Rune(rune).toUTF8) let j = buf.len buf.setLen(buf.len+4) rune.Rune.fastToUTF8Copy(buf, j, false) else: raise newException(ValueError, "Invalid UTF-16 escape sequence " & capture) else: validate(false) else: add(buf, capture[i]) inc(i) template unescape(buf: var seq[byte]; capture: string) = var i: int while i < len(capture): if capture[i] == '\\': inc(i) case capture[i] of '\\': add(buf, 0x5c'u8) of '/': add(buf, 0x2f'u8) of 'b': add(buf, 0x08'u8) of 'f': add(buf, 0x0c'u8) of 'n': add(buf, 0x0a'u8) of 'r': add(buf, 0x0d'u8) of 't': add(buf, 0x09'u8) of '"': add(buf, 0x22'u8) of 'x': var b: byte inc(i) discard parseHex(capture, b, i, 2) inc(i) add(buf, b) else: validate(false) else: add(buf, byte capture[i]) inc(i) proc pushHexNibble[T](result: var T; c: char) = var n = case c of '0'..'9': T(ord(c) - ord('0')) of 'a'..'f': T(ord(c) - ord('a') + 10) of 'A'..'F': T(ord(c) - ord('A') + 10) else: return result = (result shl 4) or n proc parsePreserves*(text: string): Value = ## Parse a text-encoded Preserves `string` to a Preserves `Value`. let pegParser = peg("Document", stack: Stack): # Override rules from pegs.nim Document <- Preserves.Document Preserves.Record <- Preserves.Record: var record: seq[Value] labelOff: int while stack[labelOff].pos < capture[0].si: inc labelOff for i in labelOff.succ..stack.high: record.add(move stack[i].value) record.add(move stack[labelOff].value) stack.shrink record.len pushStack Value(kind: pkRecord, record: move record) Preserves.Sequence <- Preserves.Sequence: var sequence: seq[Value] for frame in stack.mitems: if frame.pos > capture[0].si: sequence.add(move frame.value) stack.shrink sequence.len pushStack Value(kind: pkSequence, sequence: move sequence) Preserves.Dictionary <- Preserves.Dictionary: var prs = Value(kind: pkDictionary) for i in countDown(stack.high.pred, 0, 2): if stack[i].pos < capture[0].si: break var val = stack.pop.value key = stack.pop.value for j in 0..prs.dict.high: validate(prs.dict[j].key != key) prs[key] = val pushStack prs Preserves.Set <- Preserves.Set: var prs = Value(kind: pkSet) for frame in stack.mitems: if frame.pos > capture[0].si: for e in prs.set: validate(e != frame.value) prs.incl(move frame.value) stack.shrink prs.set.len pushStack prs Preserves.Boolean <- Preserves.Boolean: case $0 of "#f": pushStack Value(kind: pkBoolean) of "#t": pushStack Value(kind: pkBoolean, bool: true) else: discard Preserves.Double <- Preserves.Double: pushStack Value(kind: pkFloat, float: parseFloat($1)) Preserves.DoubleRaw <- Preserves.DoubleRaw: var reg: uint64 for c in $1: pushHexNibble(reg, c) pushStack Value(kind: pkFloat, float: cast[float64](reg)) Preserves.SignedInteger <- Preserves.SignedInteger: var big = initBigInt($0) small = toInt[int](big) if small.isSome: pushStack Value(kind: pkRegister, register: small.get) else: pushStack Value(kind: pkBigInt, bigint: big) Preserves.String <- Preserves.String: var v = Value(kind: pkString, string: newStringOfCap(len($1))) unescape(v.string, $1) if validateUtf8(v.string) != -1: raise newException(ValueError, "Preserves text contains an invalid UTF-8 sequence") pushStack v Preserves.charByteString <- Preserves.charByteString: var v = Value(kind: pkByteString, bytes: newSeqOfCap[byte](len($1))) unescape(v.bytes, $1) pushStack v Preserves.hexByteString <- Preserves.hexByteString: pushStack Value(kind: pkByteString, bytes: cast[seq[byte]](parseHexStr(joinWhitespace($1)))) Preserves.b64ByteString <- Preserves.b64ByteString: pushStack Value(kind: pkByteString, bytes: cast[seq[byte]](base64.decode(joinWhitespace($1)))) Preserves.Symbol <- Preserves.Symbol: var buf = newStringOfCap(len($1)) unescape(buf, $1) pushStack Value(kind: pkSymbol, symbol: Symbol buf) Preserves.Embedded <- Preserves.Embedded: var v = stack.pop.value v.embedded = true pushStack v Preserves.Annotation <- Preserves.Annotation: var val = stack.pop.value discard stack.pop.value pushStack val Preserves.Compact <- Preserves.Compact: pushStack decodePreserves(stack.pop.value.bytes) var stack: Stack let match = pegParser.match(text, stack) if not match.ok: raise newException(ValueError, "failed to parse Preserves:\n" & text[match.matchMax..text.high]) assert(stack.len == 1) stack.pop.value proc parsePreservesAtom*(text: string): Atom = ## Parse a text-encoded Preserves `string` to a Preserves `Atom`. let pegParser = peg("Atom", a: Atom): # Override rules from pegs.nim Atom <- ?"#!" * Preserves.Atom Preserves.Boolean <- Preserves.Boolean: case $0 of "#f": a = Atom(kind: pkBoolean) of "#t": a = Atom(kind: pkBoolean, bool: true) else: discard Preserves.Float <- Preserves.Float: a = Atom(kind: pkFloat) validate(parseBiggestFloat($0, a.float) == len($0)) Preserves.FloatRaw <- Preserves.FloatRaw: var reg: uint64 for c in $1: pushHexNibble(reg, c) a = Atom(kind: pkFloat, float: cast[float64](reg)) Preserves.SignedInteger <- Preserves.SignedInteger: var big = initBigInt($0) small = toInt[int](big) if small.isSome: a = Atom(kind: pkRegister, register: small.get) else: a = Atom(kind: pkBigInt, bigint: big) Preserves.String <- Preserves.String: a = Atom(kind: pkString, string: newStringOfCap(len($1))) unescape(a.string, $1) if validateUtf8(a.string) != -1: raise newException(ValueError, "Preserves text contains an invalid UTF-8 sequence") Preserves.charByteString <- Preserves.charByteString: a = Atom(kind: pkByteString, bytes: newSeqOfCap[byte](len($1))) unescape(a.bytes, $1) Preserves.hexByteString <- Preserves.hexByteString: a = Atom(kind: pkByteString, bytes: cast[seq[byte]](parseHexStr(joinWhitespace($1)))) Preserves.b64ByteString <- Preserves.b64ByteString: a = Atom(kind: pkByteString, bytes: cast[seq[byte]](base64.decode(joinWhitespace($1)))) Preserves.Symbol <- Preserves.Symbol: var buf = newStringOfCap(len($1)) unescape(buf, $1) a = Atom(kind: pkSymbol, symbol: Symbol buf) if not pegParser.match(text, result).ok: raise newException(ValueError, "failed to parse Preserves atom: " & text)