Fix string escaping
This commit is contained in:
parent
201cb7c68e
commit
489d6b31d5
|
@ -1,6 +1,6 @@
|
|||
# Package
|
||||
|
||||
version = "20221027"
|
||||
version = "20221030"
|
||||
author = "Emery Hemingway"
|
||||
description = "data model and serialization format"
|
||||
license = "Unlicense"
|
||||
|
|
|
@ -40,7 +40,8 @@ grammar "Preserves":
|
|||
exp <- 'e' * ?('-'|'+') * +Digit
|
||||
flt <- int * ((frac * exp) | frac | exp)
|
||||
|
||||
String <- '"' * *(escape * (escaped | unicodeEscaped) | (utf8.any - '"')) * '"'
|
||||
char <- unescaped | '|' | (escape * (escaped | '"' | ('u' * Xdigit[4])))
|
||||
String <- '"' * >(*char) * '"'
|
||||
|
||||
ByteString <- charByteString | hexByteString | b64ByteString
|
||||
charByteString <- "#\"" * >(*binchar) * '"'
|
||||
|
@ -48,7 +49,7 @@ grammar "Preserves":
|
|||
b64ByteString <- "#[" * ws * >(*(base64char * ws)) * ']'
|
||||
|
||||
binchar <- binunescaped | (escape * (escaped | '"' | ('x' * Xdigit[2])))
|
||||
binunescaped <- {'\20'..'\21', '#'..'[', ']'..'~'}
|
||||
binunescaped <- {' '..'!', '#'..'[', ']'..'~'}
|
||||
base64char <- {'A'..'Z', 'a'..'z', '0'..'9', '+', '/', '-', '_', '='}
|
||||
|
||||
Symbol <- (symstart * *symcont) | ('|' * *symchar * '|')
|
||||
|
@ -65,9 +66,9 @@ grammar "Preserves":
|
|||
|
||||
Compact <- "#=" * ws * ByteString
|
||||
|
||||
unescaped <- utf8.any - escaped
|
||||
unescaped <- utf8.any - { '\x00'..'\x19', '"', '\\', '|' }
|
||||
unicodeEscaped <- 'u' * Xdigit[4]
|
||||
escaped <- {'{', '"', '|', '\\', 'b', 'f', 'n', 'r', 't'}
|
||||
escaped <- {'\\', '/', 'b', 'f', 'n', 'r', 't'}
|
||||
escape <- '\\'
|
||||
|
||||
ws <- *(' ' | '\t' | '\r' | '\n' | ',')
|
||||
|
|
|
@ -3,7 +3,9 @@
|
|||
|
||||
# this module is included in ../../preserves.nim
|
||||
|
||||
import std/[parseutils, strutils]
|
||||
import std/[parseutils, unicode]
|
||||
from std/sequtils import insert
|
||||
from std/strutils import Whitespace, parseFloat, parseHexStr, parseInt, tokenize
|
||||
import npeg
|
||||
import ../pegs
|
||||
|
||||
|
@ -21,6 +23,58 @@ proc joinWhitespace(s: string): string =
|
|||
for token, isSep in tokenize(s, Whitespace + {','}):
|
||||
if not isSep: add(result, token)
|
||||
|
||||
template unescape(buf: var string; capture: string) =
|
||||
var i: int
|
||||
while i < len(capture):
|
||||
if capture[i] == '\\':
|
||||
inc(i)
|
||||
case capture[i]
|
||||
of '\\': add(buf, char 0x5c)
|
||||
of '/': add(buf, char 0x2f)
|
||||
of 'b': add(buf, char 0x08)
|
||||
of 'f': add(buf, char 0x0c)
|
||||
of 'n': add(buf, char 0x0a)
|
||||
of 'r': add(buf, char 0x0d)
|
||||
of 't': add(buf, char 0x09)
|
||||
of '"': add(buf, char 0x22)
|
||||
of 'u':
|
||||
var r: int32
|
||||
inc(i)
|
||||
discard parseHex(capture, r, i, 4)
|
||||
inc(i, 3)
|
||||
add(buf, Rune r)
|
||||
else:
|
||||
validate(false)
|
||||
else:
|
||||
add(buf, capture[i])
|
||||
inc(i)
|
||||
|
||||
template unescape(buf: var seq[byte]; capture: string) =
|
||||
var i: int
|
||||
while i < len(capture):
|
||||
if capture[i] == '\\':
|
||||
inc(i)
|
||||
case capture[i]
|
||||
of '\\': add(buf, 0x5c'u8)
|
||||
of '/': add(buf, 0x2f'u8)
|
||||
of 'b': add(buf, 0x08'u8)
|
||||
of 'f': add(buf, 0x0c'u8)
|
||||
of 'n': add(buf, 0x0a'u8)
|
||||
of 'r': add(buf, 0x0d'u8)
|
||||
of 't': add(buf, 0x09'u8)
|
||||
of '"': add(buf, 0x22'u8)
|
||||
of 'x':
|
||||
var b: byte
|
||||
inc(i)
|
||||
discard parseHex(capture, b, i, 2)
|
||||
inc(i)
|
||||
add(buf, b)
|
||||
else:
|
||||
validate(false)
|
||||
else:
|
||||
add(buf, byte capture[i])
|
||||
inc(i)
|
||||
|
||||
proc parsePreserves*(text: string): Preserve[void] {.gcsafe.} =
|
||||
## Parse a text-encoded Preserves `string` to a `Preserve` value.
|
||||
runnableExamples:
|
||||
|
@ -89,35 +143,13 @@ proc parsePreserves*(text: string): Preserve[void] {.gcsafe.} =
|
|||
pushStack Value(kind: pkSignedInteger, int: parseInt($0))
|
||||
|
||||
Preserves.String <- Preserves.String:
|
||||
pushStack Value(kind: pkString, string: unescape($0).replace("\\n", "\n"))
|
||||
var v = Value(kind: pkString, string: newStringOfCap(len($1)))
|
||||
unescape(v.string, $1)
|
||||
pushStack v
|
||||
|
||||
Preserves.charByteString <- Preserves.charByteString:
|
||||
let chars = $1
|
||||
var
|
||||
v = Value(kind: pkByteString, bytes: newSeqOfCap[byte](chars.len))
|
||||
i: int
|
||||
while i < len(chars):
|
||||
if chars[i] == '\\':
|
||||
inc(i)
|
||||
case chars[i]
|
||||
of '\\': add(v.bytes, 0x5c'u8)
|
||||
of '/': add(v.bytes, 0x2f'u8)
|
||||
of 'b': add(v.bytes, 0x08'u8)
|
||||
of 'f': add(v.bytes, 0x0c'u8)
|
||||
of 'n': add(v.bytes, 0x0a'u8)
|
||||
of 'r': add(v.bytes, 0x0d'u8)
|
||||
of 't': add(v.bytes, 0x09'u8)
|
||||
of '"': add(v.bytes, 0x22'u8)
|
||||
of 'x':
|
||||
var b: byte
|
||||
inc(i)
|
||||
discard parseHex(chars, b, i, 2)
|
||||
inc(i)
|
||||
add(v.bytes, b)
|
||||
else: discard
|
||||
else:
|
||||
add(v.bytes, byte chars[i])
|
||||
inc(i)
|
||||
var v = Value(kind: pkByteString, bytes: newSeqOfCap[byte](len($1)))
|
||||
unescape(v.bytes, $1)
|
||||
pushStack v
|
||||
|
||||
Preserves.hexByteString <- Preserves.hexByteString:
|
||||
|
|
Loading…
Reference in New Issue