diff --git a/implementations/racket/preserves/preserves/iolist.rkt b/implementations/racket/preserves/preserves/iolist.rkt new file mode 100644 index 0000000..b294fdc --- /dev/null +++ b/implementations/racket/preserves/preserves/iolist.rkt @@ -0,0 +1,69 @@ +#lang racket/base + +(provide counted-iolist? + counted-iolist-value + counted-iolist-length + count-iolist + iolist-length + iolist->bytes + write-iolist) + +(struct counted-iolist (value length) #:prefab) + +(define (bad-iolist who v) + (error who "Invalid iolist: ~v" v)) + +(define (iolist-length i [acc 0]) + (cond [(byte? i) (+ acc 1)] + [(bytes? i) (+ acc (bytes-length i))] + [(pair? i) (iolist-length (cdr i) (iolist-length (car i) acc))] + [(null? i) acc] + [(void? i) acc] + [(counted-iolist? i) (+ acc (counted-iolist-length i))] + [else (bad-iolist 'iolist-length i)])) + +(define (count-iolist i) + (if (counted-iolist? i) + i + (counted-iolist i (iolist-length i)))) + +(define (iolist->bytes i) + (if (bytes? i) + i + (let ((buffer (make-bytes (iolist-length i)))) + (let walk ((i i) (offset 0)) + (cond [(byte? i) (bytes-set! buffer offset i) (+ offset 1)] + [(bytes? i) (bytes-copy! buffer offset i) (+ offset (bytes-length i))] + [(pair? i) (walk (cdr i) (walk (car i) offset))] + [(null? i) offset] + [(void? i) offset] + [(counted-iolist? i) (walk (counted-iolist-value i) offset)])) + buffer))) + +(define (write-iolist i [out-port (current-output-port)]) + (cond [(byte? i) (write-byte i out-port)] + [(bytes? i) (write-bytes i out-port)] + [(pair? i) (write-iolist (car i) out-port) (write-iolist (cdr i) out-port)] + [(null? i) (void)] + [(void? i) (void)] + [(counted-iolist? i) (write-iolist (counted-iolist-value i) out-port)] + [else (bad-iolist 'write-iolist i)])) + +(module+ test + (require rackunit) + (require (only-in racket/port with-output-to-bytes)) + + (check-equal? (iolist-length '((1 . 1) (1 . #"xxx") . 1)) 7) + (check-equal? (iolist-length '((1 . 1) (1 . #"xxx"))) 6) + (check-equal? (iolist-length 123) 1) + (check-equal? (iolist-length #"123") 3) + (check-equal? (iolist-length (cons 123 #"123")) 4) + + (check-equal? (iolist->bytes '((1 . 1) (1 . #"xxx") . 1)) (bytes 1 1 1 120 120 120 1)) + (check-equal? (iolist->bytes '((1 . 1) (1 . #"xxx"))) (bytes 1 1 1 120 120 120)) + (check-equal? (iolist->bytes 123) (bytes 123)) + (check-equal? (iolist->bytes #"123") (bytes 49 50 51)) + (check-equal? (iolist->bytes (cons 123 #"123")) (bytes 123 49 50 51)) + + (check-equal? (with-output-to-bytes (lambda () (write-iolist '((1 . 1) (1 . #"xxx") . 1)))) + (bytes 1 1 1 120 120 120 1))) diff --git a/implementations/racket/preserves/preserves/read-binary.rkt b/implementations/racket/preserves/preserves/read-binary.rkt index 0ec1f28..a6e446e 100644 --- a/implementations/racket/preserves/preserves/read-binary.rkt +++ b/implementations/racket/preserves/preserves/read-binary.rkt @@ -1,7 +1,8 @@ #lang racket/base (provide read-preserve/binary - bytes->preserve) + bytes->preserve + preserve-sequence-reader) (require racket/match) (require "record.rkt") @@ -10,7 +11,7 @@ (require "annotation.rkt") (require "varint.rkt") (require racket/set) -(require (only-in racket/port call-with-input-bytes)) +(require (only-in racket/port call-with-input-bytes port->bytes)) (define (default-on-short) (error 'read-preserve/binary "Short Preserves binary")) (define (default-on-fail message . args) (error 'read-preserve/binary (apply format message args))) @@ -21,7 +22,8 @@ #:read-syntax? [read-syntax? #f] #:decode-embedded [decode-embedded #f] #:on-short [on-short default-on-short] - [on-fail default-on-fail]) + [on-fail default-on-fail] + [expected-input-length #f]) (call-with-input-bytes bs (lambda (p) @@ -29,27 +31,43 @@ #:read-syntax? read-syntax? #:decode-embedded decode-embedded #:on-short on-short - on-fail) + on-fail + expected-input-length) [(? eof-object?) (on-short)] [v v])))) -(define ((between lo hi) v) (<= lo v hi)) - (define (read-preserve/binary [in-port (current-input-port)] #:read-syntax? [read-syntax? #f] #:decode-embedded [decode-embedded0 #f] #:on-short [on-short default-on-short] - [on-fail default-on-fail]) + [on-fail default-on-fail] + [expected-input-length #f]) (define read-annotations? read-syntax?) (define decode-embedded (or decode-embedded0 default-decode-embedded)) (let/ec return - (define (next) (wrap (pos) (next* (next-byte)))) + (define count expected-input-length) - (define (next* lead-byte) - (match (next** lead-byte) - ['#:end (return (on-fail "Unexpected sequence end marker"))] - [v v])) + (define (eof-guard v) + (if (eof-object? v) + (return (on-short)) + v)) + + (define (next-byte) (eof-guard (next-byte*))) + (define (next-byte*) + (cond [(not count) (read-byte in-port)] + [(zero? count) eof] + [else (begin0 (read-byte in-port) (set! count (- count 1)))])) + + (define (remaining-bytes) + (if (not count) + (port->bytes in-port) + (let ((bs (eof-guard (read-bytes count in-port)))) + (if (< (bytes-length bs) count) + (return (on-short)) + (begin0 bs (set! count 0)))))) + + (define (next) (wrap (pos) (next* (next-byte)))) (define pos (if read-syntax? @@ -66,61 +84,109 @@ (annotated '() (srcloc #f #f #f pos0 (- (pos) pos0)) v))) (lambda (pos0 v) v))) - (define (next** lead-byte) - (match lead-byte - [#x80 #f] - [#x81 #t] - [#x82 (float (floating-point-bytes->real (next-bytes 4) #t 0 4))] - [#x83 (floating-point-bytes->real (next-bytes 8) #t 0 8)] - [#x84 '#:end] - [#x85 (let ((a (next))) - (if read-annotations? - (annotate (next) a) - (next)))] - [#x86 (embedded (decode-embedded (next)))] - [(? (between #x90 #x9C) v) (- v #x90)] - [(? (between #x9D #x9F) v) (- v #xA0)] - [(? (between #xA0 #xAF) v) (next-integer (- v #xA0 -1))] - [#xB0 (next-integer (next-varint))] - [#xB1 (bytes->string/utf-8 (next-bytes (next-varint)))] - [#xB2 (next-bytes (next-varint))] - [#xB3 (string->symbol (bytes->string/utf-8 (next-bytes (next-varint))))] - [#xB4 (apply (lambda (label . fields) (record label fields)) (next-items))] - [#xB5 (next-items)] - [#xB6 (list->set (next-items))] - [#xB7 (build-dictionary (next-items))] - [_ (return (on-fail "Invalid Preserves binary tag: ~v" lead-byte))])) + (define (next* tag) + (match tag + [#xA0 #f] + [#xA1 #t] + [#xA2 (let ((bs (remaining-bytes))) + (match (bytes-length bs) + [4 (float (floating-point-bytes->real bs #t 0 4))] + [8 (floating-point-bytes->real bs #t 0 8)] + [n (return (on-fail "Invalid floating-point length: ~v" n))]))] + [#xA3 (let* ((acc0 (initial-integer)) + (acc (if (< acc0 128) acc0 (- acc0 256)))) + (for/fold [(acc acc)] [(b (remaining-bytes))] (+ (* acc 256) b)))] + [#xA4 (let* ((bs (remaining-bytes)) + (n (bytes-length bs))) + (if (or (zero? n) (not (zero? (bytes-ref bs (- n 1))))) + (return (on-fail "String not NUL terminated")) + (bytes->string/utf-8 (subbytes bs 0 (- n 1)))))] + [#xA5 (remaining-bytes)] + [#xA6 (string->symbol (bytes->string/utf-8 (remaining-bytes)))] + [#xA7 (apply (lambda (label . fields) (record label fields)) (next-items))] + [#xA8 (next-items)] + [#xA9 (list->set (next-items))] + [#xAA (build-dictionary (next-items))] + [#xAB (embedded (decode-embedded (next)))] + [#xBF (if read-annotations? + (apply annotate (next-items)) + (begin0 (next-item (next-byte)) (remaining-bytes)))] + [_ (return (on-fail "Invalid Preserves binary tag: ~v" tag))])) - (define (eof-guard v) - (if (eof-object? v) - (return (on-short)) - v)) + (define (initial-integer) + (cond [(not count) (match (read-byte in-port) [(? eof-object?) 0] [n n])] + [(zero? count) 0] + [else (next-byte)])) - (define (next-byte) (eof-guard (read-byte in-port))) - - (define (next-bytes n) - (define bs (eof-guard (read-bytes n in-port))) - (if (< (bytes-length bs) n) (return (on-short)) bs)) - - (define (next-varint) (eof-guard (read-varint in-port))) - - (define (next-integer n) - (cond [(zero? n) 0] - [else (define acc0 (next-byte)) - (define acc (if (< acc0 128) acc0 (- acc0 256))) - (for/fold [(acc acc)] [(n (in-range (- n 1)))] (+ (* acc 256) (next-byte)))])) + (define (next-item first-varint-byte) + (define block-len (eof-guard (read-varint in-port first-varint-byte))) + (define next-count (and count (- count block-len))) + (set! count block-len) + (begin0 (next) (set! count next-count))) (define (next-items) - (define pos0 (pos)) - (match (next** (next-byte)) - ['#:end '()] - [v (cons (wrap pos0 v) (next-items))])) + (cond [(not count) (match (read-byte in-port) + [(? eof-object?) '()] + [n (cons (next-item n) (next-items))])] + [(zero? count) '()] + [else (cons (next-item (next-byte)) (next-items))])) (define (build-dictionary items) (when (not (even? (length items))) (return (on-fail "Odd number of items in dictionary"))) (apply hash items)) - (let ((pos0 (pos))) - (match (read-byte in-port) - [(? eof-object?) eof] - [lead-byte (wrap pos0 (next* lead-byte))])))) + (if (not count) + (let ((pos0 (pos))) + (match (next-byte*) + [(? eof-object?) eof] + [tag (wrap pos0 (next* tag))])) + (next)))) + +(define (read-preserve/binary/length-prefix [in-port (current-input-port)] + #:read-syntax? [read-syntax? #f] + #:decode-embedded [decode-embedded #f] + #:on-short [on-short default-on-short] + [on-fail default-on-fail]) + (if (eof-object? (peek-byte in-port)) + eof + (match (read-varint in-port) + [(? eof-object?) (on-short)] + [block-len (read-preserve/binary in-port + #:read-syntax? read-syntax? + #:decode-embedded decode-embedded + #:on-short on-short + on-fail + block-len)]))) + +(define (preserve-sequence-reader [in-port (current-input-port)] + #:read-syntax? [read-syntax? #f] + #:decode-embedded [decode-embedded #f] + #:on-short [on-short default-on-short] + [on-fail default-on-fail]) + (if (eqv? (peek-byte in-port) #xA8) + (begin (read-byte in-port) + (lambda () (read-preserve/binary/length-prefix in-port + #:read-syntax? read-syntax? + #:decode-embedded decode-embedded + #:on-short on-short + on-fail))) + #f)) + +(module+ test + (require rackunit) + (require (only-in file/sha1 hex-string->bytes)) + + (let ((r (preserve-sequence-reader (open-input-bytes + (hex-string->bytes "a882a30182a30281a381"))))) + (check-equal? (r) 1) + (check-equal? (r) 2) + (check-equal? (r) 0) + (check-exn #px"Short Preserves binary" (lambda () (r)))) + + (check-equal? (read-preserve/binary/length-prefix (open-input-bytes (hex-string->bytes "82a301"))) 1) + (check-equal? (read-preserve/binary/length-prefix (open-input-bytes (hex-string->bytes "82a302"))) 2) + (check-equal? (read-preserve/binary/length-prefix (open-input-bytes (hex-string->bytes "81a3"))) 0) + + (check-equal? (read-preserve/binary/length-prefix (open-input-bytes (hex-string->bytes "81")) + #:on-short (lambda () 'short)) + 'short)) diff --git a/implementations/racket/preserves/preserves/tests/samples.pr b/implementations/racket/preserves/preserves/tests/samples.pr index 9450d49..2154093 100644 --- a/implementations/racket/preserves/preserves/tests/samples.pr +++ b/implementations/racket/preserves/preserves/tests/samples.pr @@ -43,134 +43,132 @@ "13/14 and 16/17, depending on how they wish to treat end-of-stream conditions." ]> - annotation2: - annotation3: - annotation4: + annotation2: + annotation3: + annotation4: - annotation5: > - annotation6: > + annotation5: > + annotation6: > annotation7: ;Stop reading symbols at @ -- this test has three separate annotations - - bytes2: - bytes2a: - bytes3: - bytes4: - bytes5: + + bytes2: + bytes2a: + bytes3: + bytes4: + bytes5: bytes6: @"Bytes must be 2-digits entire" - bytes7: - bytes8: - bytes9: - bytes10: - bytes11: + bytes7: + bytes8: + bytes9: + bytes10: + bytes11: bytes12: @"Bytes syntax only supports \\x, not \\u" - bytes13: + bytes13: - dict0: - dict1: + dict0: + dict1: dict2: @"Missing close brace" dict2a: @"Missing close brace" dict3: @"Duplicate key" dict4: @"Unexpected close brace" - dict5: @"Missing value" - double1: - double2: - float1: - int-257: - int-256: - int-255: - int-254: - int-129: - int-128: - int-127: - int-4: - int-3: - int-2: - int-1: - int0: - int1: - int12: - int13: - int127: - int128: - int255: - int256: - int32767: - int32768: - int65535: - int65536: - int131072: - int2500000000: - int87112285931760246646623899502532662132736: - list0: - list4: - list4a: - list5: - list6: - list7: + dict5: @"Missing value" + double1: + double2: + float1: + int-257: + int-256: + int-255: + int-254: + int-129: + int-128: + int-127: + int-4: + int-3: + int-2: + int-1: + int0: + int1: + int12: + int13: + int127: + int128: + int255: + int256: + int32767: + int32768: + int65535: + int65536: + int131072: + int2500000000: + int87112285931760246646623899502532662132736: + list0: + list4: + list4a: + list5: + list6: + list7: list8: @"Missing close bracket" list9: @"Unexpected close bracket" - list10: @"Missing end byte" + list10: @"Missing tag" noinput0: @"No input at all" - embed0: - embed1: - embed2: - record1: >> - record2: , >>>> - record3: "Dr">> - record4: > - record5: > - record6: > - record7: > - record8: 3 4>> + embed0: + embed1: + embed2: + record1: >> + record2: , >>>> + record3: "Dr">> + record4: > + record5: > + record6: > + record7: > + record8: 3 4>> record9: @"Missing record label" "> record10: @"Missing close-angle-bracket" record11: @"Unexpected close-angle-bracket" "> - set0: - set1: + set0: + set1: set2: @"Missing close brace" set2a: @"Missing close brace" set3: @"Duplicate value" - string0: - string3: - string4: - string5: - symbol0: - symbol2: - tag0: @"Unexpected end tag" + string0: + string3: + string4: + string5: + symbol0: + symbol2: tag1: @"Invalid tag" - tag2: @"Invalid tag" + tag2: @"Invalid tag" whitespace0: @"Leading spaces have to eventually yield something" whitespace1: @"No input at all" - value1: - value2: - value3: - value4: - value5: - value6: + value1: + value2: + value3: + value4: + value5: + value6: - longlist14: - longlist15: longlist100: - longlist200: - rfc8259-example1: rfc8259-example2: preserve (subbytes bs 0 i) #:decode-embedded strip-annotations - #:on-short (lambda () 'short) void)) + #:on-short (lambda () 'short) + void + (bytes-length bs))) (when (and (not (eq? result 'short)) (not (and allow-invalid-prefix? (void? result)))) (error 'd "~a-byte prefix of ~v does not read as short; result: ~v" i bs result))) diff --git a/implementations/racket/preserves/preserves/varint.rkt b/implementations/racket/preserves/preserves/varint.rkt index 2e84492..d17db06 100644 --- a/implementations/racket/preserves/preserves/varint.rkt +++ b/implementations/racket/preserves/preserves/varint.rkt @@ -1,33 +1,31 @@ #lang racket/base -;; "varints" from Google Protocol Buffers, -;; https://developers.google.com/protocol-buffers/docs/encoding#varints -;; -;; "Each byte in a varint, except the last byte, has the most -;; significant bit (msb) set – this indicates that there are further -;; bytes to come. The lower 7 bits of each byte are used to store the -;; two's complement representation of the number in groups of 7 bits, -;; least significant group first." -(provide write-varint +(provide varint->iolist + write-varint read-varint encode-varint decode-varint) (require racket/port) -(define (write-varint v out-port) - (if (< v 128) - (write-byte v out-port) - (begin (write-byte (+ 128 (modulo v 128)) out-port) - (write-varint (quotient v 128) out-port)))) +(define (varint->iolist n) + (let wr ((n n) (d 128)) + (if (< n 128) + (+ d n) + (cons (wr (quotient n 128) 0) (+ d (modulo n 128)))))) -(define (read-varint in-port) - (let/ec return - (let loop () - (define b (read-byte in-port)) - (cond [(eof-object? b) (return b)] - [(< b 128) b] - [else (+ (* (loop) 128) (- b 128))])))) +(define (write-varint n out-port) + (let wr ((n n) (d 128)) + (if (< n 128) + (write-byte (+ d n) out-port) + (begin (wr (quotient n 128) 0) + (write-byte (+ d (modulo n 128)) out-port))))) + +(define (read-varint in-port [first-byte (read-byte in-port)]) + (let rd ((acc 0) (b first-byte)) + (cond [(eof-object? b) eof] + [(< b 128) (rd (+ (* acc 128) b) (read-byte in-port))] + [else (+ (* acc 128) (- b 128))]))) (define (encode-varint v) (call-with-output-bytes (lambda (p) (write-varint v p)))) @@ -42,28 +40,28 @@ (module+ test (require rackunit) - (check-equal? (encode-varint 0) (bytes 0)) - (check-equal? (encode-varint 1) (bytes 1)) - (check-equal? (encode-varint 127) (bytes 127)) - (check-equal? (encode-varint 128) (bytes 128 1)) - (check-equal? (encode-varint 255) (bytes 255 1)) - (check-equal? (encode-varint 256) (bytes 128 2)) - (check-equal? (encode-varint 300) (bytes #b10101100 #b00000010)) - (check-equal? (encode-varint 1000000000) (bytes 128 148 235 220 3)) + (check-equal? (encode-varint 0) (bytes 128)) + (check-equal? (encode-varint 1) (bytes 129)) + (check-equal? (encode-varint 127) (bytes 255)) + (check-equal? (encode-varint 128) (bytes 1 128)) + (check-equal? (encode-varint 255) (bytes 1 255)) + (check-equal? (encode-varint 256) (bytes 2 128)) + (check-equal? (encode-varint 300) (bytes #b00000010 #b10101100)) + (check-equal? (encode-varint 1000000000) (bytes 3 92 107 20 128)) (define (ks* v rest) (list v rest)) (define (kf* [short? #f]) (if short? 'short (void))) (check-equal? (decode-varint (bytes) ks* kf*) 'short) - (check-equal? (decode-varint (bytes 0) ks* kf*) (list 0 (bytes))) - (check-equal? (decode-varint (bytes 0 99) ks* kf*) (list 0 (bytes 99))) - (check-equal? (decode-varint (bytes 1) ks* kf*) (list 1 (bytes))) - (check-equal? (decode-varint (bytes 127) ks* kf*) (list 127 (bytes))) - (check-equal? (decode-varint (bytes 128) ks* kf*) 'short) - (check-equal? (decode-varint (bytes 128 1) ks* kf*) (list 128 (bytes))) - (check-equal? (decode-varint (bytes 128 1 99) ks* kf*) (list 128 (bytes 99))) - (check-equal? (decode-varint (bytes 255 1) ks* kf*) (list 255 (bytes))) - (check-equal? (decode-varint (bytes 128 2) ks* kf*) (list 256 (bytes))) - (check-equal? (decode-varint (bytes #b10101100 #b00000010) ks* kf*) (list 300 (bytes))) - (check-equal? (decode-varint (bytes 128 148 235 220 3) ks* kf*) (list 1000000000 (bytes))) - (check-equal? (decode-varint (bytes 128 148 235 220 3 99) ks* kf*) (list 1000000000 (bytes 99)))) + (check-equal? (decode-varint (bytes 128) ks* kf*) (list 0 (bytes))) + (check-equal? (decode-varint (bytes 128 99) ks* kf*) (list 0 (bytes 99))) + (check-equal? (decode-varint (bytes 129) ks* kf*) (list 1 (bytes))) + (check-equal? (decode-varint (bytes 255) ks* kf*) (list 127 (bytes))) + (check-equal? (decode-varint (bytes 0) ks* kf*) 'short) + (check-equal? (decode-varint (bytes 1 128) ks* kf*) (list 128 (bytes))) + (check-equal? (decode-varint (bytes 1 128 99) ks* kf*) (list 128 (bytes 99))) + (check-equal? (decode-varint (bytes 1 255) ks* kf*) (list 255 (bytes))) + (check-equal? (decode-varint (bytes 2 128) ks* kf*) (list 256 (bytes))) + (check-equal? (decode-varint (bytes #b00000010 #b10101100) ks* kf*) (list 300 (bytes))) + (check-equal? (decode-varint (bytes 3 92 107 20 128) ks* kf*) (list 1000000000 (bytes))) + (check-equal? (decode-varint (bytes 3 92 107 20 128 99) ks* kf*) (list 1000000000 (bytes 99)))) diff --git a/implementations/racket/preserves/preserves/write-binary.rkt b/implementations/racket/preserves/preserves/write-binary.rkt index 664b433..0cdf70e 100644 --- a/implementations/racket/preserves/preserves/write-binary.rkt +++ b/implementations/racket/preserves/preserves/write-binary.rkt @@ -1,7 +1,8 @@ #lang racket/base -(provide write-preserve/binary - preserve->bytes) +(provide preserve->iolist + preserve->bytes + write-preserve/binary) (require racket/match) (require (only-in racket/port call-with-output-bytes)) @@ -11,120 +12,97 @@ (require "annotation.rkt") (require "varint.rkt") (require "object-id.rkt") +(require "iolist.rkt") (require racket/set) (require racket/dict) (require (only-in racket/list flatten)) +(define (preserve->iolist v + #:canonicalizing? [canonicalizing? #t] + #:encode-embedded [encode-embedded0 #f] + #:write-annotations? [write-annotations? (not canonicalizing?)]) + (define encode-embedded (or encode-embedded0 object-id)) + + (define (prepare v) + (iolist->bytes (preserve->iolist v #:canonicalizing? #t #:encode-embedded encode-embedded0))) + + (define (length-prefixed-iolist i) + (define c (count-iolist i)) + (cons (varint->iolist (counted-iolist-length c)) c)) + + (define (length-prefixed v) (length-prefixed-iolist (encode v))) + + (define (encode v) + (match v + [#f #xA0] + [#t #xA1] + [(float v) (cons #xA2 (real->floating-point-bytes v 4 #t))] + [(? flonum?) (cons #xA2 (real->floating-point-bytes v 8 #t))] + [(? integer?) (cons #xA3 (when (not (zero? v)) + (define nbits (bitwise-and (+ (integer-length v) 8) -8)) + (for/list [(shift (in-range nbits 0 -8))] + (bitwise-bit-field v (- shift 8) shift))))] + [(? string?) (list #xA4 (string->bytes/utf-8 v) 0)] + [(? bytes?) (cons #xA5 v)] + [(? symbol?) (cons #xA6 (string->bytes/utf-8 (symbol->string v)))] + [(record label fields) (list #xA7 (length-prefixed label) (map length-prefixed fields))] + [(? list?) (cons #xA8 (map length-prefixed v))] + [(? set?) (cons #xA9 (set->iolist v))] + [(? dict?) (cons #xAA (dict->iolist v))] + [(embedded value) (cons #xAB (encode (encode-embedded value)))] + + [(annotated as _ v) + (if (and write-annotations? (pair? as)) + (list #xBF (length-prefixed v) (map length-prefixed as)) + (encode v))] + + [other (error 'preserve->iolist "Attempt to serialize non-preserve: ~v" other)])) + + (define set->iolist + (match* [canonicalizing? write-annotations?] + [[#t #f] (lambda (v) + (map length-prefixed-iolist + (sort (for/list [(e (in-set v))] (prepare e)) bytesiolist + (match* [canonicalizing? write-annotations?] + [[#t #f] (lambda (v) + (map (match-lambda [(list kb _ v) (cons (length-prefixed-iolist kb) + (length-prefixed v))]) + (prepare-dict v)))] + [[#t #t] (lambda (v) + (map (match-lambda [(list _ k v) (cons (length-prefixed k) + (length-prefixed v))]) + (prepare-dict v)))] + [[#f _] (lambda (v) + (for/list [((k v) (in-dict v))] (cons (length-prefixed k) (length-prefixed v))))])) + + (encode v)) + (define (preserve->bytes v #:canonicalizing? [canonicalizing? #t] #:encode-embedded [encode-embedded #f] #:write-annotations? [write-annotations? (not canonicalizing?)]) - (call-with-output-bytes - (lambda (p) (write-preserve/binary v p - #:canonicalizing? canonicalizing? - #:encode-embedded encode-embedded - #:write-annotations? write-annotations?)))) + (iolist->bytes (preserve->iolist v + #:canonicalizing? canonicalizing? + #:encode-embedded encode-embedded + #:write-annotations? write-annotations?))) (define (write-preserve/binary v [out-port (current-output-port)] #:canonicalizing? [canonicalizing? #t] - #:encode-embedded [encode-embedded0 #f] + #:encode-embedded [encode-embedded #f] #:write-annotations? [write-annotations? (not canonicalizing?)]) - (define encode-embedded (or encode-embedded0 object-id)) - - (define (output-byte b) - (write-byte b out-port)) - - (define (output-bytes bs) - (write-bytes bs out-port)) - - (define (output-varint v) - (write-varint v out-port)) - - (define-syntax-rule (with-seq tag body ...) - (begin (output-byte (+ tag #xB0)) - body ... - (output-byte #x84))) - - (define (count-bytes tag bs) - (output-byte (+ tag #xB0)) - (output-varint (bytes-length bs)) - (output-bytes bs)) - - (define (prepare v) (preserve->bytes v #:canonicalizing? #t)) - - (define (output-all vs) - (for [(v (in-list vs))] (output v))) - - (define output-set - (match* [canonicalizing? write-annotations?] - [[#t #f] (lambda (v) - (for-each output-bytes - (sort (for/list [(e (in-set v))] (prepare e)) bytesfloating-point-bytes v 4 #t))] - [(? flonum?) - (output-byte #x83) - (output-bytes (real->floating-point-bytes v 8 #t))] - - [(annotated as _ v) - (when write-annotations? - (for [(a (in-list as))] - (output-byte #x85) - (output a))) - (output v)] - - [(? integer?) - (cond [(<= -3 v -1) (output-byte (+ v #xA0))] - [(<= 0 v 12) (output-byte (+ v #x90))] - [else (define raw-bit-count (+ (integer-length v) 1)) ;; at least one sign bit - (define byte-count (quotient (+ raw-bit-count 7) 8)) - (if (<= byte-count 16) - (output-byte (+ byte-count #xA0 -1)) - (begin (output-byte #xB0) - (output-varint byte-count))) - (for [(shift (in-range (* byte-count 8) 0 -8))] - (output-byte (bitwise-bit-field v (- shift 8) shift)))])] - - [(? string?) (count-bytes 1 (string->bytes/utf-8 v))] - [(? bytes?) (count-bytes 2 v)] - [(? symbol?) (count-bytes 3 (string->bytes/utf-8 (symbol->string v)))] - - [(record label fields) (with-seq 4 (output label) (output-all fields))] - [(? list?) (with-seq 5 (output-all v))] - [(? set?) (with-seq 6 (output-set v))] - [(? dict?) (with-seq 7 (output-dict v))] - - [(embedded value) - (output-byte #x86) - (output (encode-embedded value))] - - [other (error 'write-preserve/binary "Attempt to serialize non-preserve: ~v" other)])) - - (output v)) + (write-iolist (preserve->iolist v + #:canonicalizing? canonicalizing? + #:encode-embedded encode-embedded + #:write-annotations? write-annotations?) + out-port))