Unify string and symbol escaping

This commit is contained in:
Emery Hemingway 2023-12-24 21:07:05 +02:00
parent 32ed35adce
commit 44f98163d9
1 changed files with 50 additions and 57 deletions

View File

@ -1,42 +1,53 @@
# SPDX-FileCopyrightText: ☭ Emery Hemingway
# SPDX-License-Identifier: Unlicense
import std/[base64, endians, json, math, options, sets, sequtils, streams, strutils, tables, typetraits]
import std/[base64, bitops, endians, math, sequtils, streams, strutils, unicode]
import bigints
import ./values
proc `$`*(s: Symbol): string =
let sym = string s
if sym.len > 0 and sym[0] in {'A'..'z'} and not sym.anyIt(char(it) in { '\x00'..'\x19', '"', '\\', '|' }):
result = sym
else:
result = newStringOfCap(sym.len shl 1)
result.add('|')
for c in sym:
case c
of '\\':
result.add("\\\\")
of '/':
result.add("\\/")
of '\x08':
result.add("\\b")
of '\x0c':
result.add("\\f")
of '\x0a':
result.add("\\n")
of '\x0d':
result.add("\\r")
of '\x09':
result.add("\\t")
of '|':
result.add("\\|")
else:
result.add(c)
result.add('|')
const hexAlphabet = "0123456789abcdef"
type TextMode* = enum textPreserves, textJson
template writeEscaped(stream: Stream; text: string; delim: char) =
const escaped = { '"', '\\', '\b', '\f', '\n', '\r', '\t' }
var
i: int
r: Rune
c: char
while i < text.len:
c = text[i]
if (c.ord and 0x80) == 0x00:
case c
of delim:
write(stream, '\\')
write(stream, delim)
of '\\': write(stream, "\\\\")
of '\b': write(stream, "\\b")
of '\f': write(stream, "\\f")
of '\n': write(stream, "\\n")
of '\r': write(stream, "\\r")
of '\t': write(stream, "\\t")
of { '\x00'..'\x1f', '\x7f' } - escaped:
# do not use \x__ notation because
# it is a subset of \u____.
write(stream, "\\u00")
write(stream, c.uint8.toHex(2))
else: write(stream, c)
inc i
else:
fastRuneAt(text, i, r)
write(stream, "\\u")
write(stream, r.uint16.toHex(4))
proc writeSymbol(stream: Stream; sym: string) =
if sym.len > 0 and sym[0] in {'A'..'z'} and not sym.anyIt(char(it) in { '\x00'..'\x19', '"', '\\', '|' }):
write(stream, sym)
else:
write(stream, '|')
writeEscaped(stream, sym, '|')
write(stream, '|')
proc writeText*[E](stream: Stream; pr: Preserve[E]; mode = textPreserves) =
## Encode Preserves to a `Stream` as text.
if pr.embedded: write(stream, "#!")
@ -75,7 +86,9 @@ proc writeText*[E](stream: Stream; pr: Preserve[E]; mode = textPreserves) =
of pkBigInt:
write(stream, $pr.bigint)
of pkString:
write(stream, escapeJson(pr.string))
write(stream, '"')
writeEscaped(stream, pr.string, '"')
write(stream, '"')
of pkByteString:
if pr.bytes.allIt(char(it) in {' '..'!', '#'..'~'}):
write(stream, "#\"")
@ -93,32 +106,7 @@ proc writeText*[E](stream: Stream; pr: Preserve[E]; mode = textPreserves) =
write(stream, hexAlphabet[b.int and 0xf])
write(stream, '"')
of pkSymbol:
let sym = pr.symbol.string
if sym.len > 0 and sym[0] in {'A'..'z'} and not sym.anyIt(char(it) in { '\x00'..'\x19', '"', '\\', '|' }):
write(stream, sym)
else:
write(stream, '|')
for c in sym:
case c
of '\\':
write(stream, "\\\\")
of '/':
write(stream, "\\/")
of '\x08':
write(stream, "\\b")
of '\x0c':
write(stream, "\\f")
of '\x0a':
write(stream, "\\n")
of '\x0d':
write(stream, "\\r")
of '\x09':
write(stream, "\\t")
of '|':
write(stream, "\\|")
else:
write(stream, c)
write(stream, '|')
writeSymbol(stream, pr.symbol.string)
of pkRecord:
assert(pr.record.len > 0)
write(stream, '<')
@ -176,6 +164,11 @@ proc writeText*[E](stream: Stream; pr: Preserve[E]; mode = textPreserves) =
else:
write(stream, "")
proc `$`*(sym: Symbol): string =
var stream = newStringStream()
writeSymbol(stream, sym.string)
result = move stream.data
proc `$`*[E](pr: Preserve[E]): string =
## Generate the textual representation of ``pr``.
var stream = newStringStream()