Store embedded values as pointers

Reduce Preserve from a generic to a concrete type.
This commit is contained in:
Emery Hemingway 2021-06-15 12:48:20 +02:00
parent 32ce6a71fe
commit 6ebb752690
4 changed files with 260 additions and 222 deletions

View File

@ -4,4 +4,3 @@ Missing features:
* embedded values
* parsing from human-readable encoding
* ordering of compound values
* conversion of Nim tuples and objects

View File

@ -9,4 +9,4 @@ srcDir = "src"
# Dependencies
requires "nim >= 1.4.6", "bigints"
requires "nim >= 1.4.8", "bigints"

View File

@ -5,12 +5,11 @@ import bigints
type
PreserveKind* = enum
pkBoolean, pkFloat, pkDouble, pkSignedInteger, pkBigInteger, pkString,
pkByteString, pkSymbol, pkRecord, pkSequence, pkSet, pkDictionary, pkEmbedded
pkBoolean, pkFloat, pkDouble, pkSignedInteger, pkBigInteger, pkString, pkByteString,
pkSymbol, pkRecord, pkSequence, pkSet, pkDictionary, pkEmbedded
Preserve*[T] {.acyclic.} = object
Preserve* {.acyclic.} = object
## Type that stores a Preserves value.
## ``T`` is the domain-specific type of "embedded" values, otherwise ``void``.
case kind*: PreserveKind
of pkBoolean:
bool*: bool
@ -29,17 +28,124 @@ type
of pkSymbol:
symbol*: string
of pkRecord:
record*: seq[Preserve[T]] # record[0] is the label
record*: seq[Preserve] # record[0] is the label
of pkSequence:
seq*: seq[Preserve[T]]
sequence*: seq[Preserve]
of pkSet:
set*: HashSet[Preserve[T]]
set*: HashSet[Preserve]
of pkDictionary:
dict*: Table[Preserve[T], Preserve[T]]
dict*: Table[Preserve, Preserve]
of pkEmbedded:
embedded*: T
embedded*: pointer
proc `$`*[T](prs: Preserve[T]): string =
proc `<`(x, y: string | seq[byte]): bool =
for i in 0 .. min(x.high, y.high):
if x[i] < y[i]:
return true
x.len < y.len
proc `<`*(x, y: Preserve): bool =
if x.kind != y.kind:
if x.kind == pkSignedInteger and y.kind == pkBigInteger:
result = x.int.initBigInt < y.bigint
elif x.kind == pkBigInteger and y.kind == pkSignedInteger:
result = x.bigint < y.int.initBigInt
else:
result = x.kind < y.kind
else:
case x.kind
of pkBoolean:
result = (not x.bool) and y.bool
of pkSignedInteger:
result = x.int < y.int
of pkBigInteger:
result = x.bigint < y.bigint
of pkString:
result = x.string < y.string
of pkByteString:
result = x.bytes < y.bytes
of pkSymbol:
result = x.symbol < y.symbol
else:
discard
proc hash*(prs: Preserve): Hash =
var h = hash(prs.kind.int)
case prs.kind
of pkBoolean:
h = h !& hash(prs.bool)
of pkFloat:
h = h !& hash(prs.float)
of pkDouble:
h = h !& hash(prs.double)
of pkSignedInteger:
h = h !& hash(prs.int)
of pkBigInteger:
h = h !& hash(prs.bigint.flags)
h = h !& hash(prs.bigint)
of pkString:
h = h !& hash(prs.string)
of pkByteString:
h = h !& hash(prs.bytes)
of pkSymbol:
h = h !& hash(prs.symbol)
of pkRecord:
for val in prs.record:
h = h !& hash(val)
of pkSequence:
for val in prs.sequence:
h = h !& hash(val)
of pkSet:
for val in prs.set.items:
h = h !& hash(val)
of pkDictionary:
for (key, val) in prs.dict.pairs:
h = h !& hash(val)
of pkEmbedded:
h = h !& hash(prs.embedded)
!$h
proc `==`*(x, y: Preserve): bool =
if x.kind == y.kind:
case x.kind
of pkBoolean:
result = x.bool == y.bool
of pkFloat:
result = x.float == y.float
of pkDouble:
result = x.double == y.double
of pkSignedInteger:
result = x.int == y.int
of pkBigInteger:
result = x.bigint == y.bigint
of pkString:
result = x.string == y.string
of pkByteString:
result = x.bytes == y.bytes
of pkSymbol:
result = x.symbol == y.symbol
of pkRecord:
for i, val in x.record:
if y.record[i] != val: return false
result = true
of pkSequence:
for i, val in x.sequence:
if y.sequence[i] != val: return false
result = true
of pkSet:
for val in x.set.items:
if not y.set.contains(val): return false
for val in y.set.items:
if not x.set.contains(val): return false
result = true
of pkDictionary:
for (key, val) in x.dict.pairs:
if y.dict[key] != val: return false
result = true
of pkEmbedded:
result = x.embedded == y.embedded
proc `$`*(prs: Preserve): string =
case prs.kind:
of pkBoolean:
case prs.bool
@ -69,14 +175,14 @@ proc `$`*[T](prs: Preserve[T]): string =
result.add('>')
of pkSequence:
result.add('[')
for i, val in prs.seq:
for i, val in prs.sequence:
if i > 0:
result.add(' ')
result.add($val)
result.add(']')
of pkSet:
result.add("#{")
for val in prs.set:
for val in prs.set.items:
result.add($val)
result.add(' ')
if result.len > 1:
@ -86,178 +192,50 @@ proc `$`*[T](prs: Preserve[T]): string =
result.add('{')
for (key, value) in prs.dict.pairs:
result.add($key)
result.add(" :")
result.add(": ")
result.add($value)
result.add(' ')
if result.len > 1:
result.setLen(result.high)
result.add('}')
of pkEmbedded:
when not T is void:
$prs.embedded
result.add(prs.embedded.repr)
proc toPreserve*(b: bool): Preserve[void] =
Preserve[void](kind: pkBoolean, bool: b)
proc isNamedTuple(T: typedesc): bool {.magic: "TypeTrait".}
proc distinctBase(T: typedesc): typedesc {.magic: "TypeTrait".}
template distinctBase[T](a: T): untyped = distinctBase(type(a))(a)
proc toPreserve*(n: SomeInteger): Preserve[void] =
Preserve[void](kind: pkSignedInteger, int: n.BiggestInt)
proc toPreserve*(n: BigInt): Preserve[void] =
if initBigInt(low(BiggestInt)) < n and n < initBigInt(high(BiggestInt)):
var tmp: BiggestUint
for limb in n.limbs:
tmp = (tmp shl 32) or limb
if Negative in n.flags:
tmp = (not tmp) + 1
result = Preserve[void](kind: pkSignedInteger, int: cast[BiggestInt](tmp))
else:
result = Preserve[void](kind: pkBigInteger, bigint: n)
proc toPreserve*(s: string): Preserve[void] =
Preserve[void](kind: pkString, string: s)
proc symbol*[T](s: string): Preserve[void] {.inline.} =
proc symbol*(s: string): Preserve {.inline.} =
## Symbol constructor.
Preserve[T](kind: pkSymbol, symbol: s)
Preserve(kind: pkSymbol, symbol: s)
proc record*[T](label: Preserve[T], args: varargs[Preserve[T]]): Preserve[T] =
proc initRecord*(label: Preserve, args: varargs[Preserve]): Preserve =
## Record constructor.
result = Preserve[T](kind: pkRecord, record: newSeqOfCap(1+args.len))
result = Preserve(kind: pkRecord,
record: newSeqOfCap[Preserve](1+args.len))
result.record.add(label)
for arg in args: result.record.add(arg)
proc record*[T](label: string, args: varargs[Preserve[T]]): Preserve[T] {.inline.} =
proc initRecord*(label: string, args: varargs[Preserve]): Preserve {.inline.} =
## Record constructor that converts ``label`` to a symbol.
record(symbol[T](label), args)
initRecord(symbol(label), args)
proc label*[T](prs: Preserve[T]): Preserve[T] {.inline.} =
proc label*(prs: Preserve): Preserve {.inline.} =
## Return the label of a record value.
prs.record[0]
proc arity*[T](prs: Preserve[T]): int {.inline.} =
proc arity*(prs: Preserve): int {.inline.} =
## Return the number of fields in a record value.
pred(prs.record.len)
proc fields*[T](prs: Preserve[T]): seq[Preserve[T]] {.inline.} =
proc fields*(prs: Preserve): seq[Preserve] {.inline.} =
## Return the fields of a record value.
prs.record[1..prs.record.high]
iterator fields*[T](prs: Preserve[T]): Preserve[T] =
iterator fields*(prs: Preserve): Preserve =
## Iterate the fields of a record value.
for i in 1..prs.record.high: yield prs.record[i]
iterator setItems*[T](prs: Preserve[T]): Preserve[T] =
for v in prs.set.keys: yield v
proc `<`(x, y: string | seq[byte]): bool =
for i in 0 .. min(x.high, y.high):
if x[i] < y[i]:
return true
x.len < y.len
proc `<`*[T](x, y: Preserve[T]): bool =
if x.kind != y.kind:
if x.kind == pkSignedInteger and y.kind == pkBigInteger:
result = x.int < y.bigint
elif x.kind == pkBigInteger and y.kind == pkSignedInteger:
result = x.bigint < y.int
else:
result = x.kind < y.kind
else:
case x.kind
of pkBoolean:
result = (not x.bool) and y.bool
of pkSignedInteger:
result = x.int < y.int
of pkBigInteger:
result = x.bigint < y.bigint
of pkString:
result = x.string < y.string
of pkByteString:
result = x.bytes < y.bytes
of pkSymbol:
result = x.symbol < y.symbol
else:
discard
proc `==`*[T](x, y: Preserve[T]): bool =
if x.kind == y.kind:
case x.kind
of pkBoolean:
result = x.bool == y.bool
of pkFloat:
result = x.float == y.float
of pkDouble:
result = x.double == y.double
of pkSignedInteger:
result = x.int == y.int
of pkBigInteger:
result = x.bigint == y.bigint
of pkString:
result = x.string == y.string
of pkByteString:
result = x.bytes == y.bytes
of pkSymbol:
result = x.symbol == y.symbol
of pkRecord:
for i, val in x.record:
if y.record[i] != val: return false
result = true
of pkSequence:
for i, val in x.seq:
if y.seq[i] != val: return false
result = true
of pkSet:
for val in x.set:
if not y.set.contains(val): return false
for val in y.set:
if not x.set.contains(val): return false
result = true
of pkDictionary:
for (key, val) in x.dict.pairs:
if y.dict[key] != val: return false
result = true
of pkEmbedded:
when not T is void:
result = x.embedded == y.embedded
proc hash*[T](prs: Preserve[T]): Hash =
var h = hash(prs.kind.int)
case prs.kind
of pkBoolean:
h = h !& hash(prs.bool)
of pkFloat:
h = h !& hash(prs.float)
of pkDouble:
h = h !& hash(prs.double)
of pkSignedInteger:
h = h !& hash(prs.int)
of pkBigInteger:
h = h !& hash(prs.bigint.flags)
h = h !& hash(prs.bigint)
of pkString:
h = h !& hash(prs.string)
of pkByteString:
h = h !& hash(prs.bytes)
of pkSymbol:
h = h !& hash(prs.symbol)
of pkRecord:
for val in prs.record:
h = h !& hash(val)
of pkSequence:
for val in prs.seq:
h = h !& hash(val)
of pkSet:
for val in prs.set:
h = h !& hash(val)
of pkDictionary:
for (key, val) in prs.dict.pairs:
h = h !& hash(val)
of pkEmbedded:
when not T is void:
h = h !& hash(prs.embedded)
!$h
proc writeVarint(s: Stream; n: int) =
var n = n
while true:
@ -278,7 +256,7 @@ proc readVarint(s: Stream): int =
break
shift.inc 7
proc write*[T](str: Stream; prs: Preserve[T]) =
proc write*(str: Stream; prs: Preserve) =
case prs.kind:
of pkBoolean:
case prs.bool
@ -354,7 +332,7 @@ proc write*[T](str: Stream; prs: Preserve[T]) =
str.write(0x84'u8)
of pkSequence:
str.write(0xb5'u8)
for e in prs.seq:
for e in prs.sequence:
str.write(e)
str.write(0x84'u8)
of pkSet:
@ -369,65 +347,63 @@ proc write*[T](str: Stream; prs: Preserve[T]) =
str.write(value)
str.write(0x84'u8)
of pkEmbedded:
when not T is void:
str.write(0x86'u8)
str.write(prs.embedded)
str.write(0x86'u8)
raiseAssert("binary representation of embedded values is undefined")
proc parsePreserve*(s: Stream): Preserve[void] =
proc parsePreserve*(s: Stream): Preserve =
proc assertStream(check: bool) =
if not check:
raise newException(ValueError, "invalid Preserves stream")
const endMarker = 0x84
let tag = s.readUint8()
case tag
of 0x80: result = Preserve[void](kind: pkBoolean, bool: false)
of 0x81: result = Preserve[void](kind: pkBoolean, bool: true)
of 0x80: result = Preserve(kind: pkBoolean, bool: false)
of 0x81: result = Preserve(kind: pkBoolean, bool: true)
of 0x82:
when system.cpuEndian == bigEndian:
result = Preserve[void](kind: pkFloat, float: s.readFloat32())
result = Preserve(kind: pkFloat, float: s.readFloat32())
else:
result = Preserve[void](kind: pkFloat)
result = Preserve(kind: pkFloat)
var be = s.readFloat32()
swapEndian32(result.float.addr, be.addr)
of 0x83:
when system.cpuEndian == bigEndian:
result = Preserve[void](kind: pkDouble, double: s.readFloat64())
result = Preserve(kind: pkDouble, double: s.readFloat64())
else:
result = Preserve[void](kind: pkDouble)
result = Preserve(kind: pkDouble)
var be = s.readFloat64()
swapEndian64(result.double.addr, be.addr)
of 0x84:
assertStream(false)
of 0xb1:
result = Preserve[void](kind: pkString)
result = Preserve(kind: pkString)
let len = s.readVarint()
result.string = s.readStr(len)
of 0xb2:
result = Preserve[void](kind: pkByteString)
result = Preserve(kind: pkByteString)
let len = s.readVarint()
result.bytes = cast[seq[byte]](s.readStr(len))
of 0xb3:
let len = s.readVarint()
result = symbol[void](s.readStr(len))
result = symbol(s.readStr(len))
of 0xb4:
result = Preserve[void](kind: pkRecord)
result = Preserve(kind: pkRecord)
while s.peekUint8() != endMarker:
result.record.add(s.parsePreserve())
discard s.readUint8()
assertStream(result.record.len > 0)
of 0xb5:
result = Preserve[void](kind: pkSequence)
result = Preserve(kind: pkSequence)
while s.peekUint8() != endMarker:
result.seq.add(s.parsePreserve())
result.sequence.add(s.parsePreserve())
discard s.readUint8()
of 0xb6:
result = Preserve[void](kind: pkSet)
result = Preserve(kind: pkSet)
while s.peekUint8() != endMarker:
let val = s.parsePreserve()
result.set.incl(val)
result.set.incl(s.parsePreserve())
discard s.readUint8()
of 0xb7:
result = Preserve[void](kind: pkDictionary)
result = Preserve(kind: pkDictionary)
while s.peekUint8() != endMarker:
let key = s.parsePreserve()
let val = s.parsePreserve()
@ -435,54 +411,100 @@ proc parsePreserve*(s: Stream): Preserve[void] =
discard s.readUint8()
of 0xb0:
let len = s.readVarint()
result = Preserve[void](kind: pkBigInteger)
result = Preserve(kind: pkBigInteger)
for _ in 1..len:
result.bigint = (result.bigint shl 8) + s.readUint8().int32
else:
case 0xf0 and tag
of 0x90:
var n = tag.BiggestInt
result = Preserve[void](kind: pkSignedInteger,
result = Preserve(kind: pkSignedInteger,
int: n - (if n > 0x9c: 0xa0 else: 0x90))
of 0xa0:
let len = (tag.int and 0x0f) + 1
if len <= 8:
result = Preserve[void](kind: pkSignedInteger, int: s.readUint8().BiggestInt)
result = Preserve(kind: pkSignedInteger, int: s.readUint8().BiggestInt)
if (result.int and 0x80) != 0: result.int.dec(0x100)
for i in 1..<len:
result.int = (result.int shl 8) or s.readUint8().BiggestInt
else:
result = Preserve[void](kind: pkBigInteger)
result = Preserve(kind: pkBigInteger)
for i in 0..<len:
result.bigint = (result.bigint shl 8) + s.readUint8().int32
else:
assertStream(false)
proc toPreserve*(js: JsonNode): Preserve[void] =
proc toPreserveHook*(n: BigInt): Preserve =
if initBigInt(low(BiggestInt)) < n and n < initBigInt(high(BiggestInt)):
var tmp: BiggestUint
for limb in n.limbs:
tmp = (tmp shl 32) or limb
if Negative in n.flags:
tmp = (not tmp) + 1
result = Preserve(kind: pkSignedInteger, int: cast[BiggestInt](tmp))
else:
result = Preserve(kind: pkBigInteger, bigint: n)
proc toPreserveHook*(js: JsonNode): Preserve =
case js.kind
of JString:
result = js.str.toPreserve
result = Preserve(kind: pkString, string: js.str)
of JInt:
result = Preserve[void](kind: pkSignedInteger, int: js.num)
result = Preserve(kind: pkSignedInteger, int: js.num)
of JFloat:
result = Preserve[void](kind: pkDouble, double: js.fnum)
result = Preserve(kind: pkDouble, double: js.fnum)
of JBool:
result = case js.bval
of false: symbol[void]"false"
of true: symbol[void]"true"
of false: symbol"false"
of true: symbol"true"
of JNull:
result = symbol[void]"null"
result = symbol"null"
of JObject:
result = Preserve[void](kind: pkDictionary)
result = Preserve(kind: pkDictionary)
for key, val in js.fields.pairs:
result.dict[key.toPreserve] = val.toPreserve
result.dict[Preserve(kind: pkString, string: key)] = toPreserveHook(val)
of JArray:
result = Preserve[void](kind: pkSequence, seq: newSeq[Preserve[void]](
js.elems.len))
result = Preserve(kind: pkSequence,
sequence: newSeq[Preserve](js.elems.len))
for i, e in js.elems:
result.seq[i] = e.toPreserve
result.sequence[i] = toPreserveHook(e)
proc toJson*[T](prs: Preserve[T]): JsonNode =
proc toPreserve*[T](x: T): Preserve =
## Serializes `x` to Preserves; uses `toPreserveHook(x: A)` if it's in scope to
## customize serialization.
when T is Preserve: result = x
elif compiles(toPreserveHook(x)):
result = toPreserveHook(x)
elif T is array | seq:
result = Preserve(kind: pkSequence)
for v in x.items: result.sequence.add(toPreserve(v))
elif T is bool:
result = Preserve(kind: pkBoolean, bool: x)
elif T is distinct:
result = toPreserve(x.distinctBase)
elif T is float:
result = Preserve(kind: pkFloat, float: x)
elif T is float64:
result = Preserve(kind: pkDouble, double: x)
elif T is object:
result = Preserve(kind: pkDictionary)
for k, v in x.fieldPairs: result.dict[symbol(k)] = toPreserve(v)
elif T is Ordinal:
result = Preserve(kind: pkSignedInteger, int: x.ord.BiggestInt)
elif T is ptr | ref:
if system.`==`(x, nil): result = symbol"null"
else: result = toPreserve(x[])
elif T is string:
result = Preserve(kind: pkString, string: x)
elif T is SomeInteger:
result = Preserve(kind: pkSignedInteger, int: x.BiggestInt)
elif T is tuple:
result = Preserve(kind: pkRecord, record: @[symbol("")])
for v in x.fields: result.record.add(toPreserve(v))
else:
raiseAssert("cannot convert to Preserves: " & $T)
proc toJsonHook*(prs: Preserve): JsonNode =
case prs.kind:
of pkBoolean:
result = newJBool(prs.bool)
@ -512,8 +534,8 @@ proc toJson*[T](prs: Preserve[T]): JsonNode =
raise newException(ValueError, "cannot convert record to JSON")
of pkSequence:
result = newJArray()
for val in prs.seq:
result.add(val.toJSON)
for val in prs.sequence:
result.add(toJsonHook(val))
of pkSet:
raise newException(ValueError, "cannot convert set to JSON")
of pkDictionary:
@ -521,7 +543,7 @@ proc toJson*[T](prs: Preserve[T]): JsonNode =
for (key, val) in prs.dict.pairs:
if key.kind != pkString:
raise newException(ValueError, "cannot convert non-string dictionary key to JSON")
result[key.string] = val.toJson
result[key.string] = toJsonHook(val)
of pkEmbedded:
raise newException(ValueError, "cannot convert embedded value to JSON")
@ -530,20 +552,36 @@ type Record* = object
label*: string
arity*: Natural
proc init*[T](rec: Record, fields: varargs[Preserve[T]]): Preserve[T] =
proc init*(rec: Record, fields: varargs[Preserve]): Preserve =
## Initialize a new record value.
assert(fields.len == rec.arity)
record(rec.label, fields)
initRecord(rec.label, fields)
proc isClassOf*[T](rec: Record, val: Preserve[T]): bool =
proc isClassOf*(rec: Record, val: Preserve): bool =
## Compare the label and arity of ``val`` to the record type ``rec``.
if val.kind == pkRecord:
let label = val.label
if label.kind == pkSymbol:
result = label.symbol == rec.label and rec.arity == val.arity
proc classOf*[T](val: Preserve[T]): Record =
proc classOf*(val: Preserve): Record =
## Derive the ``Record`` type of ``val``.
if val.kind != pkRecord or val.label.kind == pkSymbol:
raise newException(ValueError, "cannot derive class of non-record value")
Record(label: val.label.symbol, arity: val.arity)
proc len*(prs: Preserve): int =
## Return the number of values one level below ``prs``.
case prs.kind
of pkRecord: prs.record.len.pred
of pkSequence: prs.sequence.len
of pkSet: prs.set.len
of pkDictionary: prs.dict.len
else: 0
proc `[]`*(prs: Preserve; i: int): Preserve =
case prs.kind
of pkRecord: prs.record[succ(i)]
of pkSequence: prs.sequence[i]
else:
raise newException(ValueError, "`[]` is not valid for " & $prs.kind)

View File

@ -1,8 +1,7 @@
# SPDX-License-Identifier: ISC
import json, streams, unittest
import preserves
import std/[json,jsonutils,streams, unittest]
let testVectors = [
"""
@ -29,24 +28,24 @@ let testVectors = [
"""
[
{
"precision": "zip",
"Latitude": 37.7668,
"Longitude": -122.3959,
"Address": "",
"City": "SAN FRANCISCO",
"State": "CA",
"Zip": "94107",
"Country": "US"
"space": "C3D2",
"logo": "https://www.c3d2.de/images/ck.png",
"url": "https://www.c3d2.de/",
"location": {
"address": "Raum 1.04.01, Haus B, Zentralwerk, Riesaer Straße 32, 01127 Dresden, Germany",
"lat": 51.0810791,
"lon": 13.7286123
}
},
{
"precision": "zip",
"Latitude": 37.371991,
"Longitude": -122.02602,
"Address": "",
"City": "SUNNYVALE",
"State": "CA",
"Zip": "94085",
"Country": "US"
"space": "LAG",
"logo": "http://laglab.org/logo.png",
"url": "http://laglab.org",
"location": {
"address": "Eerste Schinkelstraat 16, 1075 TX Amsterdam, The Netherlands",
"lat": 52.35406,
"lon": 4.85423
}
}
]
"""
@ -54,9 +53,11 @@ let testVectors = [
for i, jsText in testVectors:
test $i:
checkpoint(jsText)
let
control = parseJson jsText
x = control.toPreserve
checkpoint($x)
var stream = newStringStream()
stream.write(x)
stream.setPosition(0)