Fix string escaping
This commit is contained in:
parent
201cb7c68e
commit
489d6b31d5
|
@ -1,6 +1,6 @@
|
||||||
# Package
|
# Package
|
||||||
|
|
||||||
version = "20221027"
|
version = "20221030"
|
||||||
author = "Emery Hemingway"
|
author = "Emery Hemingway"
|
||||||
description = "data model and serialization format"
|
description = "data model and serialization format"
|
||||||
license = "Unlicense"
|
license = "Unlicense"
|
||||||
|
|
|
@ -40,7 +40,8 @@ grammar "Preserves":
|
||||||
exp <- 'e' * ?('-'|'+') * +Digit
|
exp <- 'e' * ?('-'|'+') * +Digit
|
||||||
flt <- int * ((frac * exp) | frac | exp)
|
flt <- int * ((frac * exp) | frac | exp)
|
||||||
|
|
||||||
String <- '"' * *(escape * (escaped | unicodeEscaped) | (utf8.any - '"')) * '"'
|
char <- unescaped | '|' | (escape * (escaped | '"' | ('u' * Xdigit[4])))
|
||||||
|
String <- '"' * >(*char) * '"'
|
||||||
|
|
||||||
ByteString <- charByteString | hexByteString | b64ByteString
|
ByteString <- charByteString | hexByteString | b64ByteString
|
||||||
charByteString <- "#\"" * >(*binchar) * '"'
|
charByteString <- "#\"" * >(*binchar) * '"'
|
||||||
|
@ -48,7 +49,7 @@ grammar "Preserves":
|
||||||
b64ByteString <- "#[" * ws * >(*(base64char * ws)) * ']'
|
b64ByteString <- "#[" * ws * >(*(base64char * ws)) * ']'
|
||||||
|
|
||||||
binchar <- binunescaped | (escape * (escaped | '"' | ('x' * Xdigit[2])))
|
binchar <- binunescaped | (escape * (escaped | '"' | ('x' * Xdigit[2])))
|
||||||
binunescaped <- {'\20'..'\21', '#'..'[', ']'..'~'}
|
binunescaped <- {' '..'!', '#'..'[', ']'..'~'}
|
||||||
base64char <- {'A'..'Z', 'a'..'z', '0'..'9', '+', '/', '-', '_', '='}
|
base64char <- {'A'..'Z', 'a'..'z', '0'..'9', '+', '/', '-', '_', '='}
|
||||||
|
|
||||||
Symbol <- (symstart * *symcont) | ('|' * *symchar * '|')
|
Symbol <- (symstart * *symcont) | ('|' * *symchar * '|')
|
||||||
|
@ -65,9 +66,9 @@ grammar "Preserves":
|
||||||
|
|
||||||
Compact <- "#=" * ws * ByteString
|
Compact <- "#=" * ws * ByteString
|
||||||
|
|
||||||
unescaped <- utf8.any - escaped
|
unescaped <- utf8.any - { '\x00'..'\x19', '"', '\\', '|' }
|
||||||
unicodeEscaped <- 'u' * Xdigit[4]
|
unicodeEscaped <- 'u' * Xdigit[4]
|
||||||
escaped <- {'{', '"', '|', '\\', 'b', 'f', 'n', 'r', 't'}
|
escaped <- {'\\', '/', 'b', 'f', 'n', 'r', 't'}
|
||||||
escape <- '\\'
|
escape <- '\\'
|
||||||
|
|
||||||
ws <- *(' ' | '\t' | '\r' | '\n' | ',')
|
ws <- *(' ' | '\t' | '\r' | '\n' | ',')
|
||||||
|
|
|
@ -3,7 +3,9 @@
|
||||||
|
|
||||||
# this module is included in ../../preserves.nim
|
# this module is included in ../../preserves.nim
|
||||||
|
|
||||||
import std/[parseutils, strutils]
|
import std/[parseutils, unicode]
|
||||||
|
from std/sequtils import insert
|
||||||
|
from std/strutils import Whitespace, parseFloat, parseHexStr, parseInt, tokenize
|
||||||
import npeg
|
import npeg
|
||||||
import ../pegs
|
import ../pegs
|
||||||
|
|
||||||
|
@ -21,6 +23,58 @@ proc joinWhitespace(s: string): string =
|
||||||
for token, isSep in tokenize(s, Whitespace + {','}):
|
for token, isSep in tokenize(s, Whitespace + {','}):
|
||||||
if not isSep: add(result, token)
|
if not isSep: add(result, token)
|
||||||
|
|
||||||
|
template unescape(buf: var string; capture: string) =
|
||||||
|
var i: int
|
||||||
|
while i < len(capture):
|
||||||
|
if capture[i] == '\\':
|
||||||
|
inc(i)
|
||||||
|
case capture[i]
|
||||||
|
of '\\': add(buf, char 0x5c)
|
||||||
|
of '/': add(buf, char 0x2f)
|
||||||
|
of 'b': add(buf, char 0x08)
|
||||||
|
of 'f': add(buf, char 0x0c)
|
||||||
|
of 'n': add(buf, char 0x0a)
|
||||||
|
of 'r': add(buf, char 0x0d)
|
||||||
|
of 't': add(buf, char 0x09)
|
||||||
|
of '"': add(buf, char 0x22)
|
||||||
|
of 'u':
|
||||||
|
var r: int32
|
||||||
|
inc(i)
|
||||||
|
discard parseHex(capture, r, i, 4)
|
||||||
|
inc(i, 3)
|
||||||
|
add(buf, Rune r)
|
||||||
|
else:
|
||||||
|
validate(false)
|
||||||
|
else:
|
||||||
|
add(buf, capture[i])
|
||||||
|
inc(i)
|
||||||
|
|
||||||
|
template unescape(buf: var seq[byte]; capture: string) =
|
||||||
|
var i: int
|
||||||
|
while i < len(capture):
|
||||||
|
if capture[i] == '\\':
|
||||||
|
inc(i)
|
||||||
|
case capture[i]
|
||||||
|
of '\\': add(buf, 0x5c'u8)
|
||||||
|
of '/': add(buf, 0x2f'u8)
|
||||||
|
of 'b': add(buf, 0x08'u8)
|
||||||
|
of 'f': add(buf, 0x0c'u8)
|
||||||
|
of 'n': add(buf, 0x0a'u8)
|
||||||
|
of 'r': add(buf, 0x0d'u8)
|
||||||
|
of 't': add(buf, 0x09'u8)
|
||||||
|
of '"': add(buf, 0x22'u8)
|
||||||
|
of 'x':
|
||||||
|
var b: byte
|
||||||
|
inc(i)
|
||||||
|
discard parseHex(capture, b, i, 2)
|
||||||
|
inc(i)
|
||||||
|
add(buf, b)
|
||||||
|
else:
|
||||||
|
validate(false)
|
||||||
|
else:
|
||||||
|
add(buf, byte capture[i])
|
||||||
|
inc(i)
|
||||||
|
|
||||||
proc parsePreserves*(text: string): Preserve[void] {.gcsafe.} =
|
proc parsePreserves*(text: string): Preserve[void] {.gcsafe.} =
|
||||||
## Parse a text-encoded Preserves `string` to a `Preserve` value.
|
## Parse a text-encoded Preserves `string` to a `Preserve` value.
|
||||||
runnableExamples:
|
runnableExamples:
|
||||||
|
@ -89,35 +143,13 @@ proc parsePreserves*(text: string): Preserve[void] {.gcsafe.} =
|
||||||
pushStack Value(kind: pkSignedInteger, int: parseInt($0))
|
pushStack Value(kind: pkSignedInteger, int: parseInt($0))
|
||||||
|
|
||||||
Preserves.String <- Preserves.String:
|
Preserves.String <- Preserves.String:
|
||||||
pushStack Value(kind: pkString, string: unescape($0).replace("\\n", "\n"))
|
var v = Value(kind: pkString, string: newStringOfCap(len($1)))
|
||||||
|
unescape(v.string, $1)
|
||||||
|
pushStack v
|
||||||
|
|
||||||
Preserves.charByteString <- Preserves.charByteString:
|
Preserves.charByteString <- Preserves.charByteString:
|
||||||
let chars = $1
|
var v = Value(kind: pkByteString, bytes: newSeqOfCap[byte](len($1)))
|
||||||
var
|
unescape(v.bytes, $1)
|
||||||
v = Value(kind: pkByteString, bytes: newSeqOfCap[byte](chars.len))
|
|
||||||
i: int
|
|
||||||
while i < len(chars):
|
|
||||||
if chars[i] == '\\':
|
|
||||||
inc(i)
|
|
||||||
case chars[i]
|
|
||||||
of '\\': add(v.bytes, 0x5c'u8)
|
|
||||||
of '/': add(v.bytes, 0x2f'u8)
|
|
||||||
of 'b': add(v.bytes, 0x08'u8)
|
|
||||||
of 'f': add(v.bytes, 0x0c'u8)
|
|
||||||
of 'n': add(v.bytes, 0x0a'u8)
|
|
||||||
of 'r': add(v.bytes, 0x0d'u8)
|
|
||||||
of 't': add(v.bytes, 0x09'u8)
|
|
||||||
of '"': add(v.bytes, 0x22'u8)
|
|
||||||
of 'x':
|
|
||||||
var b: byte
|
|
||||||
inc(i)
|
|
||||||
discard parseHex(chars, b, i, 2)
|
|
||||||
inc(i)
|
|
||||||
add(v.bytes, b)
|
|
||||||
else: discard
|
|
||||||
else:
|
|
||||||
add(v.bytes, byte chars[i])
|
|
||||||
inc(i)
|
|
||||||
pushStack v
|
pushStack v
|
||||||
|
|
||||||
Preserves.hexByteString <- Preserves.hexByteString:
|
Preserves.hexByteString <- Preserves.hexByteString:
|
||||||
|
|
Loading…
Reference in New Issue