Fix string escaping

This commit is contained in:
Emery Hemingway 2022-10-29 18:34:01 -05:00
parent 201cb7c68e
commit 489d6b31d5
3 changed files with 66 additions and 33 deletions

View File

@ -1,6 +1,6 @@
# Package
version = "20221027"
version = "20221030"
author = "Emery Hemingway"
description = "data model and serialization format"
license = "Unlicense"

View File

@ -40,7 +40,8 @@ grammar "Preserves":
exp <- 'e' * ?('-'|'+') * +Digit
flt <- int * ((frac * exp) | frac | exp)
String <- '"' * *(escape * (escaped | unicodeEscaped) | (utf8.any - '"')) * '"'
char <- unescaped | '|' | (escape * (escaped | '"' | ('u' * Xdigit[4])))
String <- '"' * >(*char) * '"'
ByteString <- charByteString | hexByteString | b64ByteString
charByteString <- "#\"" * >(*binchar) * '"'
@ -48,7 +49,7 @@ grammar "Preserves":
b64ByteString <- "#[" * ws * >(*(base64char * ws)) * ']'
binchar <- binunescaped | (escape * (escaped | '"' | ('x' * Xdigit[2])))
binunescaped <- {'\20'..'\21', '#'..'[', ']'..'~'}
binunescaped <- {' '..'!', '#'..'[', ']'..'~'}
base64char <- {'A'..'Z', 'a'..'z', '0'..'9', '+', '/', '-', '_', '='}
Symbol <- (symstart * *symcont) | ('|' * *symchar * '|')
@ -65,9 +66,9 @@ grammar "Preserves":
Compact <- "#=" * ws * ByteString
unescaped <- utf8.any - escaped
unescaped <- utf8.any - { '\x00'..'\x19', '"', '\\', '|' }
unicodeEscaped <- 'u' * Xdigit[4]
escaped <- {'{', '"', '|', '\\', 'b', 'f', 'n', 'r', 't'}
escaped <- {'\\', '/', 'b', 'f', 'n', 'r', 't'}
escape <- '\\'
ws <- *(' ' | '\t' | '\r' | '\n' | ',')

View File

@ -3,7 +3,9 @@
# this module is included in ../../preserves.nim
import std/[parseutils, strutils]
import std/[parseutils, unicode]
from std/sequtils import insert
from std/strutils import Whitespace, parseFloat, parseHexStr, parseInt, tokenize
import npeg
import ../pegs
@ -21,6 +23,58 @@ proc joinWhitespace(s: string): string =
for token, isSep in tokenize(s, Whitespace + {','}):
if not isSep: add(result, token)
template unescape(buf: var string; capture: string) =
var i: int
while i < len(capture):
if capture[i] == '\\':
inc(i)
case capture[i]
of '\\': add(buf, char 0x5c)
of '/': add(buf, char 0x2f)
of 'b': add(buf, char 0x08)
of 'f': add(buf, char 0x0c)
of 'n': add(buf, char 0x0a)
of 'r': add(buf, char 0x0d)
of 't': add(buf, char 0x09)
of '"': add(buf, char 0x22)
of 'u':
var r: int32
inc(i)
discard parseHex(capture, r, i, 4)
inc(i, 3)
add(buf, Rune r)
else:
validate(false)
else:
add(buf, capture[i])
inc(i)
template unescape(buf: var seq[byte]; capture: string) =
var i: int
while i < len(capture):
if capture[i] == '\\':
inc(i)
case capture[i]
of '\\': add(buf, 0x5c'u8)
of '/': add(buf, 0x2f'u8)
of 'b': add(buf, 0x08'u8)
of 'f': add(buf, 0x0c'u8)
of 'n': add(buf, 0x0a'u8)
of 'r': add(buf, 0x0d'u8)
of 't': add(buf, 0x09'u8)
of '"': add(buf, 0x22'u8)
of 'x':
var b: byte
inc(i)
discard parseHex(capture, b, i, 2)
inc(i)
add(buf, b)
else:
validate(false)
else:
add(buf, byte capture[i])
inc(i)
proc parsePreserves*(text: string): Preserve[void] {.gcsafe.} =
## Parse a text-encoded Preserves `string` to a `Preserve` value.
runnableExamples:
@ -89,35 +143,13 @@ proc parsePreserves*(text: string): Preserve[void] {.gcsafe.} =
pushStack Value(kind: pkSignedInteger, int: parseInt($0))
Preserves.String <- Preserves.String:
pushStack Value(kind: pkString, string: unescape($0).replace("\\n", "\n"))
var v = Value(kind: pkString, string: newStringOfCap(len($1)))
unescape(v.string, $1)
pushStack v
Preserves.charByteString <- Preserves.charByteString:
let chars = $1
var
v = Value(kind: pkByteString, bytes: newSeqOfCap[byte](chars.len))
i: int
while i < len(chars):
if chars[i] == '\\':
inc(i)
case chars[i]
of '\\': add(v.bytes, 0x5c'u8)
of '/': add(v.bytes, 0x2f'u8)
of 'b': add(v.bytes, 0x08'u8)
of 'f': add(v.bytes, 0x0c'u8)
of 'n': add(v.bytes, 0x0a'u8)
of 'r': add(v.bytes, 0x0d'u8)
of 't': add(v.bytes, 0x09'u8)
of '"': add(v.bytes, 0x22'u8)
of 'x':
var b: byte
inc(i)
discard parseHex(chars, b, i, 2)
inc(i)
add(v.bytes, b)
else: discard
else:
add(v.bytes, byte chars[i])
inc(i)
var v = Value(kind: pkByteString, bytes: newSeqOfCap[byte](len($1)))
unescape(v.bytes, $1)
pushStack v
Preserves.hexByteString <- Preserves.hexByteString: