#lang racket/base ;; Preserve, as in Fruit Preserve, as in a remarkably weak pun on pickling/dehydration etc (provide (struct-out stream-of) (struct-out record) short-form-labels encode decode wire-value) (require racket/bytes) (require racket/dict) (require racket/generator) (require racket/match) (require racket/set) (require bitsyntax) (require syndicate/support/struct) (require imperative-syndicate/assertions) (require imperative-syndicate/pattern) (struct stream-of (kind generator) #:transparent) (struct record (label fields) #:transparent) (define short-form-labels (make-parameter (vector 'discard 'capture 'observe))) (define version 1) (define (encode v) (bit-string->bytes (bit-string (version :: bits 8) (v :: (wire-value))))) (define (decode bs [on-fail (lambda () (error 'decode "Invalid encoding: ~v" bs))]) (bit-string-case bs ([ (= version :: bits 8) (v :: (wire-value)) ] v) (else (on-fail)))) (define-syntax wire-value (syntax-rules () [(_ #t input ks kf) (decode-value input ks kf)] [(_ #f v) (encode-value v)])) (define-syntax wire-length (syntax-rules () [(_ #t input ks kf) (decode-wire-length input ks kf)] [(_ #f v) (encode-wire-length v)])) (define (encode-wire-length v) (when (negative? v) (error 'encode-wire-length "Cannot encode negative wire-length ~v" v)) (if (< v #b1111) (bit-string (v :: bits 4)) (bit-string (#b1111 :: bits 4) ((encode-varint v) :: binary)))) (define (encode-varint v) (if (< v 128) (bytes v) (bit-string ((+ (modulo v 128) 128) :: bits 8) ((encode-varint (quotient v 128)) :: binary)))) (define (encode-array-like major minor fields) (bit-string (major :: bits 2) (minor :: bits 2) ((length fields) :: (wire-length)) ((apply bit-string-append (map encode-value fields)) :: binary))) (define (encode-binary-like major minor bs) (bit-string (major :: bits 2) (minor :: bits 2) ((bytes-length bs) :: (wire-length)) (bs :: binary))) (define (encode-start-byte major minor) (bit-string (#b0010 :: bits 4) (major :: bits 2) (minor :: bits 2))) (define (encode-end-byte major minor) (bit-string (#b0011 :: bits 4) (major :: bits 2) (minor :: bits 2))) (define (encode-stream major minor chunk-ok? generator) (bit-string-append (encode-start-byte major minor) (let loop () (match (generator) [(? void?) #""] [(? chunk-ok? v) (bit-string-append (encode-value v) (loop))] [bad (error 'encode-stream "Cannot encode chunk: ~v" bad)])) (encode-end-byte major minor))) (define (dict-keys-and-values d) (reverse (for/fold [(acc '())] [((k v) (in-dict d))] (cons k (cons v acc))))) (define (short-form-for-label key) (let ((labels (short-form-labels))) (let loop ((i 0)) (cond [(= i 3) #f] [(equal? (vector-ref labels i) key) i] [else (loop (+ i 1))])))) (define (encode-record key fields) (define short (short-form-for-label key)) (if short (encode-array-like 2 short fields) (encode-array-like 2 3 (cons key fields)))) (define (encode-value v) (match v [#f (bytes #b00000000)] [#t (bytes #b00000001)] [(? single-flonum?) (bit-string #b00000010 (v :: float bits 32))] [(? double-flonum?) (bit-string #b00000011 (v :: float bits 64))] [(? integer? x) #:when (<= -3 x 12) (bit-string (#b0001 :: bits 4) (x :: bits 4))] [(stream-of 'string p) (encode-stream 1 1 bytes? p)] [(stream-of 'byte-string p) (encode-stream 1 2 bytes? p)] [(stream-of 'symbol p) (encode-stream 1 3 bytes? p)] [(stream-of 'sequence p) (encode-stream 3 0 (lambda (x) #t) p)] [(stream-of 'set p) (encode-stream 3 1 (lambda (x) #t) p)] [(stream-of 'dictionary p) (encode-stream 3 2 (lambda (x) #t) p)] ;; [0 (bytes #b10000000)] [(? integer?) (define raw-bit-count (+ (integer-length v) 1)) ;; at least one sign bit (define byte-count (quotient (+ raw-bit-count 7) 8)) (bit-string (#b0100 :: bits 4) (byte-count :: (wire-length)) (v :: integer bytes byte-count))] [(? string?) (encode-binary-like 1 1 (string->bytes/utf-8 v))] [(? bytes?) (encode-binary-like 1 2 v)] [(? symbol?) (encode-binary-like 1 3 (string->bytes/utf-8 (symbol->string v)))] [(record label fields) (encode-record label fields)] [(? non-object-struct?) (define key (prefab-struct-key v)) (when (not key) (error 'encode-value "Cannot encode non-prefab struct ~v" v)) (encode-record key (cdr (vector->list (struct->vector v))))] [(? list?) (encode-array-like 3 0 v)] [(? set?) (encode-array-like 3 1 (set->list v))] [(? dict?) (encode-array-like 3 2 (dict-keys-and-values v))] [_ (error 'encode-value "Cannot encode value ~v" v)])) ;;--------------------------------------------------------------------------- (define (decode-wire-length bs ks kf) (bit-string-case bs ([ (= #b1111 :: bits 4) (rest :: binary) ] (decode-varint rest (lambda (v tail) (if (< v #b1111) (kf) (ks v tail))) kf)) ([ (v :: bits 4) (rest :: binary) ] (ks v rest)) (else (kf)))) (define (decode-varint bs ks kf) (bit-string-case bs ([ (= 1 :: bits 1) (v :: bits 7) (rest :: binary) ] (decode-varint rest (lambda (acc tail) (ks (+ (* acc 128) v) tail)) kf)) ([ (= 0 :: bits 1) (v :: bits 7) (rest :: binary) ] (ks v rest)) (else (kf)))) (define (decode-values n acc-rev bs ks kf) (if (zero? n) (ks (reverse acc-rev) bs) (bit-string-case bs ([ (v :: (wire-value)) (rest :: binary) ] (decode-values (- n 1) (cons v acc-rev) rest ks kf)) (else (kf))))) (define (decode-binary minor bs rest ks kf) (match minor [0 (if (positive? (bit-string-length bs)) (ks (bit-string->signed-integer bs #t) rest) (ks 0 rest))] [2 (ks bs rest)] [(or 1 3) ((with-handlers [(exn:fail:contract? (lambda (e) kf))] (define s (bytes->string/utf-8 bs)) (lambda () (ks (if (= minor 3) (string->symbol s) s) rest))))])) (define (decode-record minor fields rest ks kf) (define (build key fs) (ks (with-handlers [(exn:fail:contract? (lambda (e) (record key fs)))] (apply make-prefab-struct key fs)) rest)) (match* (minor fields) [(3 (list* key fs)) (build key fs)] [(3 '()) (kf)] [(n fs) (build (vector-ref (short-form-labels) n) fs)])) (define (decode-collection minor vs rest ks kf) (match minor [0 (ks vs rest)] [1 (ks (list->set vs) rest)] [2 (if (even? (length vs)) (ks (apply hash vs) rest) (kf))] [_ (kf)])) (define (decode-stream major minor chunk-ok? join-chunks decode rest ks kf) (let loop ((acc-rev '()) (rest rest)) (bit-string-case rest ([ (= #b0011 :: bits 4) (emajor :: bits 2) (eminor :: bits 2) (rest :: binary) ] (if (and (= major emajor) (= minor eminor)) (decode minor (join-chunks (reverse acc-rev)) rest ks kf) (kf))) (else (decode-value rest (lambda (chunk rest) (if (chunk-ok? chunk) (loop (cons chunk acc-rev) rest) (kf))) kf))))) (define (decode-value bs ks kf) (bit-string-case bs ([ (= #b00000000 :: bits 8) (rest :: binary) ] (ks #f rest)) ([ (= #b00000001 :: bits 8) (rest :: binary) ] (ks #t rest)) ([ (= #b00000010 :: bits 8) (v :: float bits 32) (rest :: binary) ] (ks (real->single-flonum v) rest)) ([ (= #b00000011 :: bits 8) (v :: float bits 64) (rest :: binary) ] (ks v rest)) ([ (= #b0001 :: bits 4) (x :: bits 4) (rest :: binary) ] (ks (if (> x 12) (- x 16) x) rest)) ([ (= #b001001 :: bits 6) (minor :: bits 2) (rest :: binary) ] (decode-stream 1 minor bytes? bytes-append* decode-binary rest ks kf)) ([ (= #b001010 :: bits 6) (minor :: bits 2) (rest :: binary) ] (decode-stream 2 minor (lambda (x) #t) values decode-record rest ks kf)) ([ (= #b001011 :: bits 6) (minor :: bits 2) (rest :: binary) ] (decode-stream 3 minor (lambda (x) #t) values decode-collection rest ks kf)) ([ (= #b01 :: bits 2) (minor :: bits 2) (byte-count :: (wire-length)) (bits :: binary bytes byte-count) (rest :: binary) ] (decode-binary minor (bit-string->bytes bits) rest ks kf)) ([ (= #b10 :: bits 2) (minor :: bits 2) (field-count :: (wire-length)) (rest :: binary) ] (decode-values field-count '() rest (lambda (fields rest) (decode-record minor fields rest ks kf)) kf)) ([ (= #b11 :: bits 2) (minor :: bits 2) (count :: (wire-length)) (rest :: binary) ] (decode-values count '() rest (lambda (vs rest) (decode-collection minor vs rest ks kf)) kf)) (else (kf)))) ;;--------------------------------------------------------------------------- (module+ test (require rackunit) (require (for-syntax racket syntax/srcloc)) (check-equal? (bit-string->bytes (encode-varint 0)) (bytes 0)) (check-equal? (bit-string->bytes (encode-varint 1)) (bytes 1)) (check-equal? (bit-string->bytes (encode-varint 127)) (bytes 127)) (check-equal? (bit-string->bytes (encode-varint 128)) (bytes 128 1)) (check-equal? (bit-string->bytes (encode-varint 255)) (bytes 255 1)) (check-equal? (bit-string->bytes (encode-varint 256)) (bytes 128 2)) (check-equal? (bit-string->bytes (encode-varint 300)) (bytes #b10101100 #b00000010)) (check-equal? (bit-string->bytes (encode-varint 1000000000)) (bytes 128 148 235 220 3)) (define (ks* v rest) (list v (bit-string->bytes rest))) (define (kf*) (void)) (check-equal? (decode-varint (bytes 0) ks* kf*) (list 0 (bytes))) (check-equal? (decode-varint (bytes 0 99) ks* kf*) (list 0 (bytes 99))) (check-equal? (decode-varint (bytes 1) ks* kf*) (list 1 (bytes))) (check-equal? (decode-varint (bytes 127) ks* kf*) (list 127 (bytes))) (check-equal? (decode-varint (bytes 128 1) ks* kf*) (list 128 (bytes))) (check-equal? (decode-varint (bytes 128 1 99) ks* kf*) (list 128 (bytes 99))) (check-equal? (decode-varint (bytes 255 1) ks* kf*) (list 255 (bytes))) (check-equal? (decode-varint (bytes 128 2) ks* kf*) (list 256 (bytes))) (check-equal? (decode-varint (bytes #b10101100 #b00000010) ks* kf*) (list 300 (bytes))) (check-equal? (decode-varint (bytes 128 148 235 220 3) ks* kf*) (list 1000000000 (bytes))) (check-equal? (decode-varint (bytes 128 148 235 220 3 99) ks* kf*) (list 1000000000 (bytes 99))) (check-equal? (bit-string->bytes (bit-string (0 :: bits 4) (0 :: (wire-length)))) (bytes 0)) (check-equal? (bit-string->bytes (bit-string (0 :: bits 4) (3 :: (wire-length)))) (bytes 3)) (check-equal? (bit-string->bytes (bit-string (0 :: bits 4) (14 :: (wire-length)))) (bytes 14)) (check-equal? (bit-string->bytes (bit-string (0 :: bits 4) (15 :: (wire-length)))) (bytes 15 15)) (check-equal? (bit-string->bytes (bit-string (0 :: bits 4) (100 :: (wire-length)))) (bytes 15 100)) (check-equal? (bit-string->bytes (bit-string (0 :: bits 4) (300 :: (wire-length)))) (bytes 15 #b10101100 #b00000010)) (define (dwl bs) (bit-string-case bs ([ (= 0 :: bits 4) (w :: (wire-length)) ] w) (else (void)))) (check-equal? (dwl (bytes 0)) 0) (check-equal? (dwl (bytes 3)) 3) (check-equal? (dwl (bytes 14)) 14) (check-equal? (dwl (bytes 15)) (void)) (check-equal? (dwl (bytes 15 9)) (void)) ;; not canonical (check-equal? (dwl (bytes 15 15)) 15) (check-equal? (dwl (bytes 15 100)) 100) (check-equal? (dwl (bytes 15 #b10101100 #b00000010)) 300) (struct speak (who what) #:prefab) (define (expected . pieces) (bit-string->bytes (apply bit-string-append (map (match-lambda [(? byte? b) (bytes b)] [(? bytes? bs) bs] [(? string? s) (string->bytes/utf-8 s)]) pieces)))) (define (d bs) (decode bs void)) (define-syntax (check-both-directions stx) (syntax-case stx () ((_ v (b ...)) #'(let ((val v)) (check-both-directions v v (b ...)))) ((_ forward back (b ...)) #`(let ((loc #,(source-location->string #'forward))) (check-equal? (d (encode forward)) back loc) (check-equal? (d (encode back)) back loc) (check-equal? (d (expected version b ...)) back loc) (check-equal? (encode forward) (expected version b ...) loc) )))) (check-both-directions (capture (discard)) (#x91 #x80)) (check-both-directions (observe (speak (discard) (capture (discard)))) (#xA1 #xB3 #x75 "speak" #x80 #x91 #x80)) (check-both-directions '(1 2 3 4) (#xC4 #x11 #x12 #x13 #x14)) (check-both-directions (stream-of 'sequence (sequence->generator '(1 2 3 4))) '(1 2 3 4) (#x2C #x11 #x12 #x13 #x14 #x3C)) (check-both-directions '(-2 -1 0 1) (#xC4 #x1E #x1F #x10 #x11)) (check-both-directions "hello" (#x55 "hello")) (check-both-directions (stream-of 'string (sequence->generator '(#"he" #"llo"))) "hello" (#x25 #x62 "he" #x63 "llo" #x35)) (check-both-directions (stream-of 'string (sequence->generator '(#"he" #"ll" #"" #"" #"o"))) "hello" (#x25 #x62 "he" #x62 "ll" #x60 #x60 #x61 "o" #x35)) (check-both-directions (stream-of 'byte-string (sequence->generator '(#"he" #"ll" #"" #"" #"o"))) #"hello" (#x26 #x62 "he" #x62 "ll" #x60 #x60 #x61 "o" #x36)) (check-both-directions (stream-of 'symbol (sequence->generator '(#"he" #"ll" #"" #"" #"o"))) 'hello (#x27 #x62 "he" #x62 "ll" #x60 #x60 #x61 "o" #x37)) (check-both-directions `("hello" there #"world" () ,(set) #t #f) (#xC7 #x55 "hello" #x75 "there" #x65 "world" #xC0 #xD0 #x01 #x00)) (check-both-directions -257 (#x42 #xFE #xFF)) (check-both-directions -256 (#x42 #xFF #x00)) (check-both-directions -255 (#x42 #xFF #x01)) (check-both-directions -254 (#x42 #xFF #x02)) (check-both-directions -129 (#x42 #xFF #x7F)) (check-both-directions -128 (#x41 #x80)) (check-both-directions -127 (#x41 #x81)) (check-both-directions -4 (#x41 #xFC)) (check-both-directions -3 (#x1D)) (check-both-directions -2 (#x1E)) (check-both-directions -1 (#x1F)) (check-both-directions 0 (#x10)) (check-both-directions 1 (#x11)) (check-both-directions 12 (#x1C)) (check-both-directions 13 (#x41 #x0D)) (check-both-directions 127 (#x41 #x7F)) (check-both-directions 128 (#x42 #x00 #x80)) (check-both-directions 255 (#x42 #x00 #xFF)) (check-both-directions 256 (#x42 #x01 #x00)) (check-both-directions 32767 (#x42 #x7F #xFF)) (check-both-directions 32768 (#x43 #x00 #x80 #x00)) (check-both-directions 65535 (#x43 #x00 #xFF #xFF)) (check-both-directions 65536 (#x43 #x01 #x00 #x00)) (check-both-directions 131072 (#x43 #x02 #x00 #x00)) (check-both-directions 1.0f0 (#b00000010 #b00111111 #b10000000 0 0)) (check-both-directions 1.0 (#b00000011 #b00111111 #b11110000 0 0 0 0 0 0)) (check-both-directions -1.202e300 (#x03 #xFE #x3C #xB7 #xB7 #x59 #xBF #x04 #x26)) (check-equal? (d (bytes (+ version 1) #x91 #x80)) (void)) (check-equal? (d (expected version #x25 #x51 "a" #x35)) (void)) ;; Bad chunk type: must be bytes (check-equal? (d (expected version #x25 #x71 "a" #x35)) (void)) ;; Bad chunk type: must be bytes (check-equal? (d (expected version #x26 #x51 "a" #x36)) (void)) ;; Bad chunk type: must be bytes (check-equal? (d (expected version #x26 #x71 "a" #x36)) (void)) ;; Bad chunk type: must be bytes (check-equal? (d (expected version #x27 #x51 "a" #x37)) (void)) ;; Bad chunk type: must be bytes (check-equal? (d (expected version #x27 #x71 "a" #x37)) (void)) ;; Bad chunk type: must be bytes (check-equal? (d (expected version #x25 #x61 "a" #x35)) "a") (check-equal? (d (expected version #x26 #x61 "a" #x36)) #"a") (check-equal? (d (expected version #x27 #x61 "a" #x37)) 'a) (struct date (year month day) #:prefab) (struct thing (id) #:prefab) (struct person thing (name date-of-birth) #:prefab) (struct titled person (title) #:prefab) (check-both-directions (titled 101 "Blackwell" (date 1821 2 3) "Dr") (#xB5 ;; Record, generic, 4+1 #xC5 ;; Sequence, 5 #x76 #x74 #x69 #x74 #x6C #x65 #x64 ;; Symbol, "titled" #x76 #x70 #x65 #x72 #x73 #x6F #x6E ;; Symbol, "person" #x12 ;; SignedInteger, "2" #x75 #x74 #x68 #x69 #x6E #x67 ;; Symbol, "thing" #x11 ;; SignedInteger, "1" #x41 #x65 ;; SignedInteger, "101" #x59 #x42 #x6C #x61 #x63 #x6B #x77 #x65 #x6C #x6C ;; String, "Blackwell" #xB4 ;; Record, generic, 3+1 #x74 #x64 #x61 #x74 #x65 ;; Symbol, "date" #x42 #x07 #x1D ;; SignedInteger, "1821" #x12 ;; SignedInteger, "2" #x13 ;; SignedInteger, "3" #x52 #x44 #x72 ;; String, "Dr" )) (check-both-directions (record 'discard '()) (discard) (#x80)) (check-both-directions (record 'discard '(surprise)) '#s(discard surprise) (#x81 #x78 "surprise")) (check-both-directions (record 'capture '(x)) (capture 'x) (#x91 #x71 "x")) (check-both-directions (record 'observe '(x)) (observe 'x) (#xA1 #x71 "x")) (check-both-directions (record 'observe '(x y)) '#s(observe x y) (#xA2 #x71 "x" #x71 "y")) (check-both-directions (record 'other '(x y)) '#s(other x y) (#xB3 #x75 "other" #x71 "x" #x71 "y")) (check-both-directions (record "aString" '(3 4)) (#xB3 #x57 "aString" #x13 #x14)) (check-both-directions (record (discard) '(3 4)) (#xB3 #x80 #x13 #x14)) )