diff --git a/preserves.nimble b/preserves.nimble index d008e2f..b4642ef 100644 --- a/preserves.nimble +++ b/preserves.nimble @@ -1,6 +1,6 @@ # Package -version = "20231222" +version = "20231224" author = "Emery Hemingway" description = "data model and serialization format" license = "Unlicense" diff --git a/src/preserves/private/decoding.nim b/src/preserves/private/decoding.nim index e22af86..d9e06b5 100644 --- a/src/preserves/private/decoding.nim +++ b/src/preserves/private/decoding.nim @@ -76,12 +76,10 @@ proc decodePreserves*(s: Stream; E = void): Preserve[E] = else: result.bigint.fromBytes(buf, bigEndian) of 0xb1: - var data = newString(s.readVarint()) - if data.len > 0: - let n = s.readData(unsafeAddr data[0], data.len) - if n != data.len: + result = Preserve[E](kind: pkString, string: newString(s.readVarint())) + if result.string.len > 0: + if s.readData(addr result.string[0], result.string.len) != result.string.len: raise newException(IOError, "short read") - result = Preserve[E](kind: pkString, string: data) of 0xb2: var data = newSeq[byte](s.readVarint()) if data.len > 0: diff --git a/src/preserves/private/parsing.nim b/src/preserves/private/parsing.nim index 544c091..2bd421c 100644 --- a/src/preserves/private/parsing.nim +++ b/src/preserves/private/parsing.nim @@ -38,19 +38,30 @@ template unescape*(buf: var string; capture: string) = of 't': add(buf, char 0x09) of '"': add(buf, char 0x22) of 'u': - var - long: uint32 - short: uint16 + var short: uint16 inc(i) discard parseHex(capture, short, i, 4) inc(i, 3) - long = uint32(short) - if capture[i] == 'u': - inc(i) - discard parseHex(capture, short, i, 4) + if (short shr 15) == 0: + add(buf, Rune(short).toUtf8) + elif (short shr 10) == 0b110110: + if i+6 >= capture.len: + raise newException(ValueError, "Invalid UTF-16 surrogate pair") + var rune = uint32(short shl 10) + 0x10000 + validate(capture[i+1] == '\\') + validate(capture[i+2] == 'u') inc(i, 3) - long = (long shl 16) or uint32(short) - add(buf, long.Rune.toUtf8) + discard parseHex(capture, short, i, 4) + if (short shr 10) != 0b110111: + raise newException(ValueError, "Invalid UTF-16 surrogate pair") + inc(i, 3) + rune = rune or (short and 0b1111111111) + #add(buf, Rune(rune).toUTF8) + let j = buf.len + buf.setLen(buf.len+4) + rune.Rune.fastToUTF8Copy(buf, j, false) + else: + raise newException(ValueError, "Invalid UTF-16 escape sequence " & capture) else: validate(false) else: diff --git a/src/preserves/private/texts.nim b/src/preserves/private/texts.nim index de62564..e1d2af3 100644 --- a/src/preserves/private/texts.nim +++ b/src/preserves/private/texts.nim @@ -13,32 +13,26 @@ template writeEscaped(stream: Stream; text: string; delim: char) = const escaped = { '"', '\\', '\b', '\f', '\n', '\r', '\t' } var i: int - r: Rune c: char while i < text.len: c = text[i] - if (c.ord and 0x80) == 0x00: - case c - of delim: - write(stream, '\\') - write(stream, delim) - of '\\': write(stream, "\\\\") - of '\b': write(stream, "\\b") - of '\f': write(stream, "\\f") - of '\n': write(stream, "\\n") - of '\r': write(stream, "\\r") - of '\t': write(stream, "\\t") - of { '\x00'..'\x1f', '\x7f' } - escaped: - # do not use \x__ notation because - # it is a subset of \u____. - write(stream, "\\u00") - write(stream, c.uint8.toHex(2)) - else: write(stream, c) - inc i - else: - fastRuneAt(text, i, r) - write(stream, "\\u") - write(stream, r.uint16.toHex(4)) + case c + of delim: + write(stream, '\\') + write(stream, delim) + of '\\': write(stream, "\\\\") + of '\b': write(stream, "\\b") + of '\f': write(stream, "\\f") + of '\n': write(stream, "\\n") + of '\r': write(stream, "\\r") + of '\t': write(stream, "\\t") + of { '\x00'..'\x1f', '\x7f' } - escaped: + # do not use \x__ notation because + # it is a subset of \u____. + write(stream, "\\u00") + write(stream, c.uint8.toHex(2)) + else: write(stream, c) + inc i proc writeSymbol(stream: Stream; sym: string) = if sym.len > 0 and sym[0] in {'A'..'z'} and not sym.anyIt(char(it) in { '\x00'..'\x19', '"', '\\', '|' }):