commit d63dfad954cde75c2d2f499f066b9171dc3d62d7 Author: Emery Hemingway Date: Wed Jun 2 15:51:36 2021 +0200 Initial commit diff --git a/.gitignore b/.gitignore new file mode 100644 index 0000000..b03728b --- /dev/null +++ b/.gitignore @@ -0,0 +1,2 @@ +tests/test_rfc8259 +tests/test_integers diff --git a/README.md b/README.md new file mode 100644 index 0000000..5def74a --- /dev/null +++ b/README.md @@ -0,0 +1,7 @@ +Nim implementation of the [Preserves data language](https://preserves.gitlab.io/preserves/preserves.html). + +Missing features: +* embedded values +* parsing from human-readable encoding +* ordering of compound values +* conversion of Nim tuples and objects diff --git a/preserves.nimble b/preserves.nimble new file mode 100644 index 0000000..aadf4bf --- /dev/null +++ b/preserves.nimble @@ -0,0 +1,12 @@ +# Package + +version = "0.1.0" +author = "Emery Hemingway" +description = "data model and serialization format" +license = "ISC" +srcDir = "src" + + +# Dependencies + +requires "nim >= 1.4.6", "bigints" diff --git a/src/preserves.nim b/src/preserves.nim new file mode 100644 index 0000000..d9eaad8 --- /dev/null +++ b/src/preserves.nim @@ -0,0 +1,495 @@ +# SPDX-License-Identifier: ISC + +import base64, endians, json, hashes, tables, streams +import bigints + +type + PreserveKind = enum + pkBoolean, pkFloat, pkDouble, pkSignedInteger, pkBigInteger, pkString, pkByteString, + pkSymbol, pkRecord, pkSequence, pkSet, pkDictionary, pkEmbedded + + Preserve*[T] = object + case kind*: PreserveKind + of pkBoolean: + bool*: bool + of pkFloat: + float*: float32 + of pkDouble: + double*: float64 + of pkSignedInteger: + int*: BiggestInt + of pkBigInteger: + bigint*: BigInt + of pkString: + string*: string + of pkByteString: + bytes*: seq[byte] + of pkSymbol: + symbol*: string + of pkRecord: + record*: seq[Preserve[T]] # record[0] is the label + of pkSequence: + seq*: seq[Preserve[T]] + of pkSet: + set*: CountTable[Preserve[T]] + of pkDictionary: + dict*: Table[Preserve[T], Preserve[T]] + of pkEmbedded: + embedded*: T + +proc label*[T](prs: Preserve[T]): Preserve[T] = + prs.record[0] + +proc fields*[T](prs: Preserve[T]): seq[Preserve[T]] = + prs.record[1..prs.record.high] + +iterator fields*[T](prs: Preserve[T]): seq[Preserve[T]] = + for i in 1..prs.record.high: + yield prs.record[i] + +proc `<`(x, y: string | seq[byte]): bool = + for i in 0 .. min(x.high, y.high): + if x[i] < y[i]: + return true + x.len < y.len + +proc `<`*[T](x, y: Preserve[T]): bool = + if x.kind != y.kind: + result = x.kind < y.kind + else: + case x.kind + of pkBoolean: + result = (not x.bool) and y.bool + of pkSignedInteger: + result = x.int < y.int + of pkBigInteger: + result = x.bigint < y.bigint + of pkString: + result = x.string < y.string + of pkByteString: + result = x.bytes < y.bytes + of pkSymbol: + result = x.symbol < y.symbol + else: + discard + +proc `==`*[T](x, y: Preserve[T]): bool = + if x.kind == y.kind: + case x.kind + of pkBoolean: + result = x.bool == y.bool + of pkFloat: + result = x.float == y.float + of pkDouble: + result = x.double == y.double + of pkSignedInteger: + result = x.int == y.int + of pkBigInteger: + result = x.bigint == y.bigint + of pkString: + result = x.string == y.string + of pkByteString: + result = x.bytes == y.bytes + of pkSymbol: + result = x.symbol == y.symbol + of pkRecord: + for i, val in x.record: + if y.record[i] != val: return false + result = true + of pkSequence: + for i, val in x.seq: + if y.seq[i] != val: return false + result = true + of pkSet: + for val in x.set.keys: + if not y.set.hasKey(val): return false + for val in y.set.keys: + if not x.set.hasKey(val): return false + result = true + of pkDictionary: + for (key, val) in x.dict.pairs: + if y.dict[key] != val: return false + result = true + of pkEmbedded: + when not T is void: + result = x.embedded == y.embedded + +proc hash*[T](prs: Preserve[T]): Hash = + var h = hash(prs.kind.int) + case prs.kind + of pkBoolean: + h = h !& hash(prs.bool) + of pkFloat: + h = h !& hash(prs.float) + of pkDouble: + h = h !& hash(prs.double) + of pkSignedInteger: + h = h !& hash(prs.int) + of pkBigInteger: + h = h !& hash(prs.bigint) + of pkString: + h = h !& hash(prs.string) + of pkByteString: + h = h !& hash(prs.bytes) + of pkSymbol: + h = h !& hash(prs.symbol) + of pkRecord: + for val in prs.record: + h = h !& hash(val) + of pkSequence: + for val in prs.seq: + h = h !& hash(val) + of pkSet: + for val in prs.set.keys: + h = h !& hash(val) + of pkDictionary: + for (key, val) in prs.dict.pairs: + h = h !& hash(val) + of pkEmbedded: + when not T is void: + h = h !& hash(prs.embedded) + !$h + +proc `$`*[T](prs: Preserve[T]): string = + case prs.kind: + of pkBoolean: + case prs.bool + of false: result = "#f" + of true: result = "#t" + of pkFloat: + result = $prs.float & "f" + of pkDouble: + result = $prs.double + of pkSignedInteger: + result = $prs.int + of pkBigInteger: + result = $prs.bigint + of pkString: + result = escapeJson(prs.string) + of pkByteString: + result.add("#[") + result.add(base64.encode(prs.bytes)) + result.add(']') + of pkSymbol: + result.add('|') + result.add(escapeJsonUnquoted(prs.symbol)) + result.add('|') + of pkRecord: + result.add('<') + result.add(prs.label) + for val in prs.fields: + result.add(' ') + result.add($val) + result.add('>') + of pkSequence: + result.add('[') + for i, val in prs.seq: + if i > 0: + result.add(' ') + result.add($val) + result.add(']') + of pkSet: + result.add("#{") + for val in prs.set.keys: + result.add($val) + result.add(' ') + if result.len > 1: + result.setLen(result.high) + result.add('}') + of pkDictionary: + result.add('{') + for (key, value) in prs.dict.pairs: + result.add($key) + result.add(" :") + result.add($value) + result.add(' ') + if result.len > 1: + result.setLen(result.high) + result.add('}') + of pkEmbedded: + when not T is void: + $prs.embedded + +proc writeVarint(s: Stream; n: int) = + var n = n + while true: + let c = int8(n and 0x7f) + n = n shr 7 + if n == 0: + s.write((char)c.char) + break + else: + s.write((char)c or 0x80) + +proc readVarint(s: Stream): int = + var shift: int + while shift < (9*8): + let c = s.readChar.int + result = result or ((c and 0x7f) shl shift) + if (c and 0x80) == 0: + break + shift.inc 7 + +proc write*[T](str: Stream; prs: Preserve[T]) = + case prs.kind: + of pkBoolean: + case prs.bool + of false: str.write(0x80'u8) + of true: str.write(0x81'u8) + of pkFloat: + str.write(0x82'u8) + when system.cpuEndian == bigEndian: + str.write(prs.float) + else: + var be: float32 + swapEndian32(be.addr, prs.float.unsafeAddr) + str.write(be) + of pkDouble: + str.write(0x83'u8) + when system.cpuEndian == bigEndian: + str.write(prs.double) + else: + var be: float64 + swapEndian64(be.addr, prs.double.unsafeAddr) + str.write(be) + of pkSignedInteger: + if (-3 <= prs.int) and (prs.int <= 12): + str.write(0x90'i8 or int8(if prs.int < 0: prs.int + 16 else: prs.int)) + else: + var bitCount = 1'u8 + if prs.int < 0: + while ((not prs.int) shr bitCount) != 0: + inc(bitCount) + else: + while (prs.int shr bitCount) != 0: + inc(bitCount) + var byteCount = (bitCount + 8) div 8 + str.write(0xa0'u8 or (byteCount - 1)) + proc write(n: uint8, i: BiggestInt) = + if n > 0: + write(n.pred, i shr 8) + str.write(i.uint8) + write(byteCount, prs.int) + of pkBigInteger: + var bytes = newSeqOfCap[uint8](prs.bigint.limbs.len * 4) + var begun = false + for i in countdown(prs.bigint.limbs.high, 0): + let limb = prs.bigint.limbs[i] + for j in countdown(24, 0, 8): + let b = uint8(limb shr j) + begun = begun or (b != 0) + if begun: + bytes.add(b) + if bytes.len <= 16: + str.write(0xa0'u8 or bytes.high.uint8) + else: + str.write(0xb0'u8) + str.writeVarint(bytes.len) + if Negative in prs.bigint.flags: + bytes[0] = uint8(- bytes[0].int8) + str.write(cast[string](bytes)) + of pkString: + str.write(0xb1'u8) + str.writeVarint(prs.string.len) + str.write(prs.string) + of pkByteString: + str.write(0xb2'u8) + str.writeVarint(prs.bytes.len) + str.write(prs.bytes) + of pkSymbol: + str.write(0xb3'u8) + str.writeVarint(prs.symbol.len) + str.write(prs.symbol) + of pkRecord: + str.write(0xb4'u8) + for val in prs.record: + str.write(val) + str.write(0x84'u8) + of pkSequence: + str.write(0xb5'u8) + for e in prs.seq: + str.write(e) + str.write(0x84'u8) + of pkSet: + str.write(0xb6'u8) + for key in prs.set.keys: + str.write(key) + str.write(0x84'u8) + of pkDictionary: + str.write(0xb7'u8) + for (key, value) in prs.dict.pairs: + str.write(key) + str.write(value) + str.write(0x84'u8) + of pkEmbedded: + when not T is void: + str.write(0x86'u8) + str.write(prs.embedded) + +proc parsePreserve*(s: Stream): Preserve[void] = + proc assertStream(check: bool) = + if not check: + raise newException(ValueError, "invalid Preserves stream") + const endMarker = 0x84 + let tag = s.readUint8() + case tag + of 0x80: result = Preserve[void](kind: pkBoolean, bool: false) + of 0x81: result = Preserve[void](kind: pkBoolean, bool: true) + of 0x82: + when system.cpuEndian == bigEndian: + result = Preserve[void](kind: pkFloat, float: s.readFloat32()) + else: + result = Preserve[void](kind: pkFloat) + var be = s.readFloat32() + swapEndian32(result.float.addr, be.addr) + of 0x83: + when system.cpuEndian == bigEndian: + result = Preserve[void](kind: pkDouble, double: s.readFloat64()) + else: + result = Preserve[void](kind: pkDouble) + var be = s.readFloat64() + swapEndian64(result.double.addr, be.addr) + of 0x84: + assertStream(false) + of 0xb1: + result = Preserve[void](kind: pkString) + let len = s.readVarint() + result.string = s.readStr(len) + of 0xb2: + result = Preserve[void](kind: pkByteString) + let len = s.readVarint() + result.bytes = cast[seq[byte]](s.readStr(len)) + of 0xb3: + result = Preserve[void](kind: pkSymbol) + let len = s.readVarint() + result.symbol = s.readStr(len) + of 0xb4: + result = Preserve[void](kind: pkRecord #[, label: s.parsePreserve()]#) + while s.peekUint8() != endMarker: + result.record.add(s.parsePreserve()) + discard s.readUint8() + assertStream(result.record.len > 0) + of 0xb5: + result = Preserve[void](kind: pkSequence) + while s.peekUint8() != endMarker: + result.seq.add(s.parsePreserve()) + discard s.readUint8() + of 0xb6: + result = Preserve[void](kind: pkSet) + while s.peekUint8() != endMarker: + let key = s.parsePreserve() + result.set[key] = 0 + discard s.readUint8() + of 0xb7: + result = Preserve[void](kind: pkDictionary) + while s.peekUint8() != endMarker: + let key = s.parsePreserve() + let val = s.parsePreserve() + result.dict[key] = val + discard s.readUint8() + of 0xb0: + let len = s.readVarint() + let initial = s.readInt8() + result = Preserve[void](kind: pkBigInteger, bigint: initBigInt(initial)) + for _ in 2..len: + result.bigint = (result.bigint shl 8) + s.readUint8().int32 + if initial < 0: + result.bigint.flags = {Negative} + else: + case 0xf0 and tag + of 0x90: + var n = tag.BiggestInt + result = Preserve[void](kind: pkSignedInteger, + int: n - (if n > 0x9c: 0xa0 else: 0x90)) + of 0xa0: + let len = (tag.int and 0x0f) + 1 + if len <= 8: + result = Preserve[void](kind: pkSignedInteger, int: s.readUint8().BiggestInt) + if (result.int and 0x80) != 0: result.int.dec(0x100) + for i in 1..