From 489d6b31d5b00745e9bf554daeea457c4045c9e7 Mon Sep 17 00:00:00 2001 From: Emery Hemingway Date: Sat, 29 Oct 2022 18:34:01 -0500 Subject: [PATCH] Fix string escaping --- preserves.nimble | 2 +- src/preserves/pegs.nim | 9 ++-- src/preserves/private/parse.nim | 88 ++++++++++++++++++++++----------- 3 files changed, 66 insertions(+), 33 deletions(-) diff --git a/preserves.nimble b/preserves.nimble index 9e42c43..6382aff 100644 --- a/preserves.nimble +++ b/preserves.nimble @@ -1,6 +1,6 @@ # Package -version = "20221027" +version = "20221030" author = "Emery Hemingway" description = "data model and serialization format" license = "Unlicense" diff --git a/src/preserves/pegs.nim b/src/preserves/pegs.nim index 9c63fb7..b64b5d4 100644 --- a/src/preserves/pegs.nim +++ b/src/preserves/pegs.nim @@ -40,7 +40,8 @@ grammar "Preserves": exp <- 'e' * ?('-'|'+') * +Digit flt <- int * ((frac * exp) | frac | exp) - String <- '"' * *(escape * (escaped | unicodeEscaped) | (utf8.any - '"')) * '"' + char <- unescaped | '|' | (escape * (escaped | '"' | ('u' * Xdigit[4]))) + String <- '"' * >(*char) * '"' ByteString <- charByteString | hexByteString | b64ByteString charByteString <- "#\"" * >(*binchar) * '"' @@ -48,7 +49,7 @@ grammar "Preserves": b64ByteString <- "#[" * ws * >(*(base64char * ws)) * ']' binchar <- binunescaped | (escape * (escaped | '"' | ('x' * Xdigit[2]))) - binunescaped <- {'\20'..'\21', '#'..'[', ']'..'~'} + binunescaped <- {' '..'!', '#'..'[', ']'..'~'} base64char <- {'A'..'Z', 'a'..'z', '0'..'9', '+', '/', '-', '_', '='} Symbol <- (symstart * *symcont) | ('|' * *symchar * '|') @@ -65,9 +66,9 @@ grammar "Preserves": Compact <- "#=" * ws * ByteString - unescaped <- utf8.any - escaped + unescaped <- utf8.any - { '\x00'..'\x19', '"', '\\', '|' } unicodeEscaped <- 'u' * Xdigit[4] - escaped <- {'{', '"', '|', '\\', 'b', 'f', 'n', 'r', 't'} + escaped <- {'\\', '/', 'b', 'f', 'n', 'r', 't'} escape <- '\\' ws <- *(' ' | '\t' | '\r' | '\n' | ',') diff --git a/src/preserves/private/parse.nim b/src/preserves/private/parse.nim index e00e3cb..6c8f8fb 100644 --- a/src/preserves/private/parse.nim +++ b/src/preserves/private/parse.nim @@ -3,7 +3,9 @@ # this module is included in ../../preserves.nim -import std/[parseutils, strutils] +import std/[parseutils, unicode] +from std/sequtils import insert +from std/strutils import Whitespace, parseFloat, parseHexStr, parseInt, tokenize import npeg import ../pegs @@ -21,6 +23,58 @@ proc joinWhitespace(s: string): string = for token, isSep in tokenize(s, Whitespace + {','}): if not isSep: add(result, token) +template unescape(buf: var string; capture: string) = + var i: int + while i < len(capture): + if capture[i] == '\\': + inc(i) + case capture[i] + of '\\': add(buf, char 0x5c) + of '/': add(buf, char 0x2f) + of 'b': add(buf, char 0x08) + of 'f': add(buf, char 0x0c) + of 'n': add(buf, char 0x0a) + of 'r': add(buf, char 0x0d) + of 't': add(buf, char 0x09) + of '"': add(buf, char 0x22) + of 'u': + var r: int32 + inc(i) + discard parseHex(capture, r, i, 4) + inc(i, 3) + add(buf, Rune r) + else: + validate(false) + else: + add(buf, capture[i]) + inc(i) + +template unescape(buf: var seq[byte]; capture: string) = + var i: int + while i < len(capture): + if capture[i] == '\\': + inc(i) + case capture[i] + of '\\': add(buf, 0x5c'u8) + of '/': add(buf, 0x2f'u8) + of 'b': add(buf, 0x08'u8) + of 'f': add(buf, 0x0c'u8) + of 'n': add(buf, 0x0a'u8) + of 'r': add(buf, 0x0d'u8) + of 't': add(buf, 0x09'u8) + of '"': add(buf, 0x22'u8) + of 'x': + var b: byte + inc(i) + discard parseHex(capture, b, i, 2) + inc(i) + add(buf, b) + else: + validate(false) + else: + add(buf, byte capture[i]) + inc(i) + proc parsePreserves*(text: string): Preserve[void] {.gcsafe.} = ## Parse a text-encoded Preserves `string` to a `Preserve` value. runnableExamples: @@ -89,35 +143,13 @@ proc parsePreserves*(text: string): Preserve[void] {.gcsafe.} = pushStack Value(kind: pkSignedInteger, int: parseInt($0)) Preserves.String <- Preserves.String: - pushStack Value(kind: pkString, string: unescape($0).replace("\\n", "\n")) + var v = Value(kind: pkString, string: newStringOfCap(len($1))) + unescape(v.string, $1) + pushStack v Preserves.charByteString <- Preserves.charByteString: - let chars = $1 - var - v = Value(kind: pkByteString, bytes: newSeqOfCap[byte](chars.len)) - i: int - while i < len(chars): - if chars[i] == '\\': - inc(i) - case chars[i] - of '\\': add(v.bytes, 0x5c'u8) - of '/': add(v.bytes, 0x2f'u8) - of 'b': add(v.bytes, 0x08'u8) - of 'f': add(v.bytes, 0x0c'u8) - of 'n': add(v.bytes, 0x0a'u8) - of 'r': add(v.bytes, 0x0d'u8) - of 't': add(v.bytes, 0x09'u8) - of '"': add(v.bytes, 0x22'u8) - of 'x': - var b: byte - inc(i) - discard parseHex(chars, b, i, 2) - inc(i) - add(v.bytes, b) - else: discard - else: - add(v.bytes, byte chars[i]) - inc(i) + var v = Value(kind: pkByteString, bytes: newSeqOfCap[byte](len($1))) + unescape(v.bytes, $1) pushStack v Preserves.hexByteString <- Preserves.hexByteString: