From 44f98163d91b33c48c9d1f57b141a0c0bd6fd22a Mon Sep 17 00:00:00 2001 From: Emery Hemingway Date: Sun, 24 Dec 2023 21:07:05 +0200 Subject: [PATCH] Unify string and symbol escaping --- src/preserves/private/texts.nim | 107 +++++++++++++++----------------- 1 file changed, 50 insertions(+), 57 deletions(-) diff --git a/src/preserves/private/texts.nim b/src/preserves/private/texts.nim index 247d659..de62564 100644 --- a/src/preserves/private/texts.nim +++ b/src/preserves/private/texts.nim @@ -1,42 +1,53 @@ # SPDX-FileCopyrightText: ☭ Emery Hemingway # SPDX-License-Identifier: Unlicense -import std/[base64, endians, json, math, options, sets, sequtils, streams, strutils, tables, typetraits] +import std/[base64, bitops, endians, math, sequtils, streams, strutils, unicode] +import bigints import ./values -proc `$`*(s: Symbol): string = - let sym = string s - if sym.len > 0 and sym[0] in {'A'..'z'} and not sym.anyIt(char(it) in { '\x00'..'\x19', '"', '\\', '|' }): - result = sym - else: - result = newStringOfCap(sym.len shl 1) - result.add('|') - for c in sym: - case c - of '\\': - result.add("\\\\") - of '/': - result.add("\\/") - of '\x08': - result.add("\\b") - of '\x0c': - result.add("\\f") - of '\x0a': - result.add("\\n") - of '\x0d': - result.add("\\r") - of '\x09': - result.add("\\t") - of '|': - result.add("\\|") - else: - result.add(c) - result.add('|') - const hexAlphabet = "0123456789abcdef" type TextMode* = enum textPreserves, textJson +template writeEscaped(stream: Stream; text: string; delim: char) = + const escaped = { '"', '\\', '\b', '\f', '\n', '\r', '\t' } + var + i: int + r: Rune + c: char + while i < text.len: + c = text[i] + if (c.ord and 0x80) == 0x00: + case c + of delim: + write(stream, '\\') + write(stream, delim) + of '\\': write(stream, "\\\\") + of '\b': write(stream, "\\b") + of '\f': write(stream, "\\f") + of '\n': write(stream, "\\n") + of '\r': write(stream, "\\r") + of '\t': write(stream, "\\t") + of { '\x00'..'\x1f', '\x7f' } - escaped: + # do not use \x__ notation because + # it is a subset of \u____. + write(stream, "\\u00") + write(stream, c.uint8.toHex(2)) + else: write(stream, c) + inc i + else: + fastRuneAt(text, i, r) + write(stream, "\\u") + write(stream, r.uint16.toHex(4)) + +proc writeSymbol(stream: Stream; sym: string) = + if sym.len > 0 and sym[0] in {'A'..'z'} and not sym.anyIt(char(it) in { '\x00'..'\x19', '"', '\\', '|' }): + write(stream, sym) + else: + write(stream, '|') + writeEscaped(stream, sym, '|') + write(stream, '|') + proc writeText*[E](stream: Stream; pr: Preserve[E]; mode = textPreserves) = ## Encode Preserves to a `Stream` as text. if pr.embedded: write(stream, "#!") @@ -75,7 +86,9 @@ proc writeText*[E](stream: Stream; pr: Preserve[E]; mode = textPreserves) = of pkBigInt: write(stream, $pr.bigint) of pkString: - write(stream, escapeJson(pr.string)) + write(stream, '"') + writeEscaped(stream, pr.string, '"') + write(stream, '"') of pkByteString: if pr.bytes.allIt(char(it) in {' '..'!', '#'..'~'}): write(stream, "#\"") @@ -93,32 +106,7 @@ proc writeText*[E](stream: Stream; pr: Preserve[E]; mode = textPreserves) = write(stream, hexAlphabet[b.int and 0xf]) write(stream, '"') of pkSymbol: - let sym = pr.symbol.string - if sym.len > 0 and sym[0] in {'A'..'z'} and not sym.anyIt(char(it) in { '\x00'..'\x19', '"', '\\', '|' }): - write(stream, sym) - else: - write(stream, '|') - for c in sym: - case c - of '\\': - write(stream, "\\\\") - of '/': - write(stream, "\\/") - of '\x08': - write(stream, "\\b") - of '\x0c': - write(stream, "\\f") - of '\x0a': - write(stream, "\\n") - of '\x0d': - write(stream, "\\r") - of '\x09': - write(stream, "\\t") - of '|': - write(stream, "\\|") - else: - write(stream, c) - write(stream, '|') + writeSymbol(stream, pr.symbol.string) of pkRecord: assert(pr.record.len > 0) write(stream, '<') @@ -176,6 +164,11 @@ proc writeText*[E](stream: Stream; pr: Preserve[E]; mode = textPreserves) = else: write(stream, "…") +proc `$`*(sym: Symbol): string = + var stream = newStringStream() + writeSymbol(stream, sym.string) + result = move stream.data + proc `$`*[E](pr: Preserve[E]): string = ## Generate the textual representation of ``pr``. var stream = newStringStream()