Optionally-canonicalizing binary output.
This commit is contained in:
parent
02c02b641f
commit
0832b94ce9
|
@ -2,6 +2,7 @@
|
||||||
;; Preserve, as in Fruit Preserve, as in a remarkably weak pun on pickling/dehydration etc
|
;; Preserve, as in Fruit Preserve, as in a remarkably weak pun on pickling/dehydration etc
|
||||||
|
|
||||||
(provide (struct-out stream-of)
|
(provide (struct-out stream-of)
|
||||||
|
stream-of->preserve
|
||||||
(all-from-out "record.rkt")
|
(all-from-out "record.rkt")
|
||||||
(struct-out annotated)
|
(struct-out annotated)
|
||||||
annotate
|
annotate
|
||||||
|
@ -22,9 +23,13 @@
|
||||||
encode
|
encode
|
||||||
decode
|
decode
|
||||||
decode-syntax
|
decode-syntax
|
||||||
wire-value)
|
wire-value
|
||||||
|
preserve-order
|
||||||
|
preserve<?
|
||||||
|
canonicalize-preserves?)
|
||||||
|
|
||||||
(require racket/bytes)
|
(require racket/bytes)
|
||||||
|
(require (only-in racket/contract any/c))
|
||||||
(require racket/dict)
|
(require racket/dict)
|
||||||
(require (only-in racket/format ~a))
|
(require (only-in racket/format ~a))
|
||||||
(require racket/generator)
|
(require racket/generator)
|
||||||
|
@ -37,9 +42,22 @@
|
||||||
(require (only-in syntax/readerr raise-read-error raise-read-eof-error))
|
(require (only-in syntax/readerr raise-read-error raise-read-eof-error))
|
||||||
(require net/base64)
|
(require net/base64)
|
||||||
(require (for-syntax racket/base))
|
(require (for-syntax racket/base))
|
||||||
|
(require data/order)
|
||||||
|
|
||||||
(struct stream-of (kind generator-thunk) #:transparent)
|
(struct stream-of (kind generator-thunk) #:transparent)
|
||||||
|
|
||||||
|
(define (stream-of->preserve s)
|
||||||
|
(match-define (stream-of kind generator-thunk) s)
|
||||||
|
(define g (generator-thunk))
|
||||||
|
(define pieces (for/list [(p (in-producer g (void)))] p))
|
||||||
|
(match kind
|
||||||
|
['string (bytes->string/utf-8 (bytes-append* pieces))]
|
||||||
|
['byte-string (bytes-append* pieces)]
|
||||||
|
['symbol (string->symbol (bytes->string/utf-8 (bytes-append* pieces)))]
|
||||||
|
['sequence pieces]
|
||||||
|
['set (list->set pieces)]
|
||||||
|
['dictionary (apply hash pieces)]))
|
||||||
|
|
||||||
;; Syntax properties and syntax objects would be almost perfect for
|
;; Syntax properties and syntax objects would be almost perfect for
|
||||||
;; representing annotations, plus position/source tracking as
|
;; representing annotations, plus position/source tracking as
|
||||||
;; lagniappe, but unfortunately they don't play nicely with data much
|
;; lagniappe, but unfortunately they don't play nicely with data much
|
||||||
|
@ -159,43 +177,53 @@
|
||||||
(reverse (for/fold [(acc '())] [((k v) (in-dict d))] (cons v (cons k acc)))))
|
(reverse (for/fold [(acc '())] [((k v) (in-dict d))] (cons v (cons k acc)))))
|
||||||
|
|
||||||
(define (encode-value v)
|
(define (encode-value v)
|
||||||
(match ((current-value->placeholder) v)
|
(define canonicalizing? (canonicalize-preserves?))
|
||||||
|
(match (and (not canonicalizing?) ((current-value->placeholder) v))
|
||||||
[(? integer? n)
|
[(? integer? n)
|
||||||
(bit-string (#b0001 :: bits 4) (n :: (wire-length)))]
|
(bit-string (#b0001 :: bits 4) (n :: (wire-length)))]
|
||||||
[#f
|
[#f
|
||||||
(match v
|
(let restart ((v v))
|
||||||
[#f (bytes #b00000000)]
|
(match v
|
||||||
[#t (bytes #b00000001)]
|
[#f (bytes #b00000000)]
|
||||||
[(? single-flonum?) (bit-string #b00000010 (v :: float bits 32))]
|
[#t (bytes #b00000001)]
|
||||||
[(? double-flonum?) (bit-string #b00000011 (v :: float bits 64))]
|
[(? single-flonum?) (bit-string #b00000010 (v :: float bits 32))]
|
||||||
[(annotated annotations _ item)
|
[(? double-flonum?) (bit-string #b00000011 (v :: float bits 64))]
|
||||||
(bit-string ((apply bit-string-append
|
[(annotated annotations _ item)
|
||||||
(map (lambda (a) (bit-string #b00000101 ((encode-value a) :: binary)))
|
(if canonicalizing?
|
||||||
annotations)) :: binary)
|
(restart item)
|
||||||
((encode-value item) :: binary))]
|
(bit-string ((apply bit-string-append
|
||||||
[(stream-of 'string p) (encode-stream 1 1 bytes? (p))]
|
(map (lambda (a) (bit-string #b00000101 ((encode-value a) :: binary)))
|
||||||
[(stream-of 'byte-string p) (encode-stream 1 2 bytes? (p))]
|
annotations)) :: binary)
|
||||||
[(stream-of 'symbol p) (encode-stream 1 3 bytes? (p))]
|
((encode-value item) :: binary)))]
|
||||||
[(stream-of 'sequence p) (encode-stream 2 1 (lambda (x) #t) (p))]
|
[(? stream-of?) #:when canonicalizing?
|
||||||
[(stream-of 'set p) (encode-stream 2 2 (lambda (x) #t) (p))]
|
(restart (stream-of->preserve v))]
|
||||||
[(stream-of 'dictionary p) (encode-stream 2 3 (lambda (x) #t) (p))]
|
[(stream-of 'string p) (encode-stream 1 1 bytes? (p))]
|
||||||
|
[(stream-of 'byte-string p) (encode-stream 1 2 bytes? (p))]
|
||||||
|
[(stream-of 'symbol p) (encode-stream 1 3 bytes? (p))]
|
||||||
|
[(stream-of 'sequence p) (encode-stream 2 1 (lambda (x) #t) (p))]
|
||||||
|
[(stream-of 'set p) (encode-stream 2 2 (lambda (x) #t) (p))]
|
||||||
|
[(stream-of 'dictionary p) (encode-stream 2 3 (lambda (x) #t) (p))]
|
||||||
|
|
||||||
[(? integer? x) #:when (<= -3 x 12) (bit-string (#b0011 :: bits 4) (x :: bits 4))]
|
[(? integer? x) #:when (<= -3 x 12) (bit-string (#b0011 :: bits 4) (x :: bits 4))]
|
||||||
;; [0 (bytes #b10000000)]
|
;; [0 (bytes #b10000000)]
|
||||||
[(? integer?)
|
[(? integer?)
|
||||||
(define raw-bit-count (+ (integer-length v) 1)) ;; at least one sign bit
|
(define raw-bit-count (+ (integer-length v) 1)) ;; at least one sign bit
|
||||||
(define byte-count (quotient (+ raw-bit-count 7) 8))
|
(define byte-count (quotient (+ raw-bit-count 7) 8))
|
||||||
(bit-string (#b0100 :: bits 4) (byte-count :: (wire-length)) (v :: integer bytes byte-count))]
|
(bit-string (#b0100 :: bits 4) (byte-count :: (wire-length)) (v :: integer bytes byte-count))]
|
||||||
[(? string?) (encode-binary-like 1 (string->bytes/utf-8 v))]
|
[(? string?) (encode-binary-like 1 (string->bytes/utf-8 v))]
|
||||||
[(? bytes?) (encode-binary-like 2 v)]
|
[(? bytes?) (encode-binary-like 2 v)]
|
||||||
[(? symbol?) (encode-binary-like 3 (string->bytes/utf-8 (symbol->string v)))]
|
[(? symbol?) (encode-binary-like 3 (string->bytes/utf-8 (symbol->string v)))]
|
||||||
|
|
||||||
[(record label fields) (encode-array-like 0 (cons label fields))]
|
[(record label fields) (encode-array-like 0 (cons label fields))]
|
||||||
[(? list?) (encode-array-like 1 v)]
|
[(? list?) (encode-array-like 1 v)]
|
||||||
[(? set?) (encode-array-like 2 (set->list v))]
|
[(? set?) (encode-array-like 2 (if canonicalizing?
|
||||||
[(? dict?) (encode-array-like 3 (dict-keys-and-values v))]
|
(canonical-set-elements v)
|
||||||
|
(set->list v)))]
|
||||||
|
[(? dict?) (encode-array-like 3 (if canonicalizing?
|
||||||
|
(canonical-dict-keys-and-values v)
|
||||||
|
(dict-keys-and-values v)))]
|
||||||
|
|
||||||
[_ (error 'encode-value "Cannot encode value ~v" v)])]))
|
[_ (error 'encode-value "Cannot encode value ~v" v)]))]))
|
||||||
|
|
||||||
;;---------------------------------------------------------------------------
|
;;---------------------------------------------------------------------------
|
||||||
|
|
||||||
|
@ -654,6 +682,7 @@
|
||||||
[#t 2] ;; a default
|
[#t 2] ;; a default
|
||||||
[other other]))
|
[other other]))
|
||||||
(define indenting? (and indent-amount0 #t))
|
(define indenting? (and indent-amount0 #t))
|
||||||
|
(define canonicalizing? (canonicalize-preserves?))
|
||||||
|
|
||||||
(define-syntax-rule (! fmt arg ...) (fprintf o fmt arg ...))
|
(define-syntax-rule (! fmt arg ...) (fprintf o fmt arg ...))
|
||||||
|
|
||||||
|
@ -776,22 +805,13 @@
|
||||||
(define (write-value distance v)
|
(define (write-value distance v)
|
||||||
(match v
|
(match v
|
||||||
[(annotated annotations _ item)
|
[(annotated annotations _ item)
|
||||||
(for [(a (in-list annotations))]
|
(when (not canonicalizing?)
|
||||||
(! "@")
|
(for [(a (in-list annotations))]
|
||||||
(write-value (+ distance 1) a)
|
(! "@")
|
||||||
(!indent* distance))
|
(write-value (+ distance 1) a)
|
||||||
|
(!indent* distance)))
|
||||||
(write-value distance item)]
|
(write-value distance item)]
|
||||||
[(stream-of kind generator-thunk)
|
[(? stream-of?) (write-value distance (stream-of->preserve v))]
|
||||||
(define g (generator-thunk))
|
|
||||||
(define pieces (for/list [(p (in-producer g (void)))] p))
|
|
||||||
(write-value distance
|
|
||||||
(match kind
|
|
||||||
['string (bytes->string/utf-8 (bytes-append* pieces))]
|
|
||||||
['byte-string (bytes-append* pieces)]
|
|
||||||
['symbol (string->symbol (bytes->string/utf-8 (bytes-append* pieces)))]
|
|
||||||
['sequence pieces]
|
|
||||||
['set (list->set pieces)]
|
|
||||||
['dictionary (apply hash pieces)]))]
|
|
||||||
[#f (! "#false")]
|
[#f (! "#false")]
|
||||||
[#t (! "#true")]
|
[#t (! "#true")]
|
||||||
[(? single-flonum?) (! "~vf" (real->double-flonum v))]
|
[(? single-flonum?) (! "~vf" (real->double-flonum v))]
|
||||||
|
@ -818,8 +838,12 @@
|
||||||
(! "|")))]
|
(! "|")))]
|
||||||
[(record label fields) (write-record distance label fields)]
|
[(record label fields) (write-record distance label fields)]
|
||||||
[(? list?) (write-sequence distance "[" "," "]" write-value v)]
|
[(? list?) (write-sequence distance "[" "," "]" write-value v)]
|
||||||
[(? set?) (write-sequence distance "#set{" "," "}" write-value (set->list v))]
|
[(? set?) (write-sequence distance "#set{" "," "}" write-value (if canonicalizing?
|
||||||
[(? dict?) (write-sequence distance "{" "," "}" write-key-value (dict->list v))]
|
(canonical-set-elements v)
|
||||||
|
(set->list v)))]
|
||||||
|
[(? dict?) (write-sequence distance "{" "," "}" write-key-value (if canonicalizing?
|
||||||
|
(canonical-dict-entries v)
|
||||||
|
(dict->list v)))]
|
||||||
|
|
||||||
[_ (error 'write-preserve "Cannot encode value ~v" v)]))
|
[_ (error 'write-preserve "Cannot encode value ~v" v)]))
|
||||||
|
|
||||||
|
@ -830,6 +854,94 @@
|
||||||
|
|
||||||
;;---------------------------------------------------------------------------
|
;;---------------------------------------------------------------------------
|
||||||
|
|
||||||
|
(define (typecode v)
|
||||||
|
(match v
|
||||||
|
[(? boolean?) 0]
|
||||||
|
[(? single-flonum?) 1]
|
||||||
|
[(? double-flonum?) 2]
|
||||||
|
[(? integer? x) 3]
|
||||||
|
[(? string?) 4]
|
||||||
|
[(? bytes?) 5]
|
||||||
|
[(? symbol?) 6]
|
||||||
|
[(record _ _) 7]
|
||||||
|
[(? list?) 8]
|
||||||
|
[(? set?) 9]
|
||||||
|
[(? dict?) 10]
|
||||||
|
[_ (error 'preserve-order "Cannot compare value ~v" v)]))
|
||||||
|
|
||||||
|
(define-syntax chain-order
|
||||||
|
(syntax-rules ()
|
||||||
|
[(_ o) o]
|
||||||
|
[(_ o more ...) (match o
|
||||||
|
['= (chain-order more ...)]
|
||||||
|
[other other])]))
|
||||||
|
|
||||||
|
(define (prepare-for-order v)
|
||||||
|
(match v
|
||||||
|
[(annotated _ _ item) (prepare-for-order item)]
|
||||||
|
[(? stream-of?) (stream-of->preserve v)]
|
||||||
|
[_ v]))
|
||||||
|
|
||||||
|
(define preserve-order
|
||||||
|
(order 'preserve-order
|
||||||
|
any/c
|
||||||
|
(lambda (a* b*)
|
||||||
|
(define a (prepare-for-order a*))
|
||||||
|
(define b (prepare-for-order b*))
|
||||||
|
(define ta (typecode a))
|
||||||
|
(define tb (typecode b))
|
||||||
|
(cond [(< ta tb) '<]
|
||||||
|
[(> ta tb) '>]
|
||||||
|
[else (match ta ;; == tb
|
||||||
|
[7 (chain-order
|
||||||
|
(preserve-order (record-label a) (record-label b))
|
||||||
|
(preserve-order (record-fields a)) (preserve-order (record-fields b)))]
|
||||||
|
[8 (match* (a b)
|
||||||
|
[('() '()) '=]
|
||||||
|
[('() _) '<]
|
||||||
|
[(_ '()) '>]
|
||||||
|
[((cons a0 a1) (cons b0 b1))
|
||||||
|
(chain-order (preserve-order a0 b0) (preserve-order a1 b1))])]
|
||||||
|
[9 (preserve-order (canonical-set-elements a) (canonical-set-elements b))]
|
||||||
|
[10 (preserve-order (canonical-dict-keys a) (canonical-dict-keys b))]
|
||||||
|
[_ (datum-order a b)])]))))
|
||||||
|
|
||||||
|
(define preserve<? (order-<? preserve-order))
|
||||||
|
|
||||||
|
;;---------------------------------------------------------------------------
|
||||||
|
|
||||||
|
(define canonicalize-preserves? (make-parameter #f))
|
||||||
|
|
||||||
|
(define *canonical-cache* (vector (make-weak-hasheq)
|
||||||
|
(make-weak-hasheq)
|
||||||
|
(make-weak-hasheq)
|
||||||
|
(make-weak-hasheq)))
|
||||||
|
|
||||||
|
(define (canonical-set-elements v)
|
||||||
|
(hash-ref! (vector-ref *canonical-cache* 0)
|
||||||
|
v
|
||||||
|
(lambda () (sort (set->list v) preserve<?))))
|
||||||
|
|
||||||
|
(define (canonical-dict-entries v)
|
||||||
|
(hash-ref! (vector-ref *canonical-cache* 1)
|
||||||
|
v
|
||||||
|
(lambda () (sort (dict->list v) preserve<? #:key car))))
|
||||||
|
|
||||||
|
(define (canonical-dict-keys-and-values v)
|
||||||
|
(hash-ref! (vector-ref *canonical-cache* 2)
|
||||||
|
v
|
||||||
|
(lambda () (let loop ((xs (canonical-dict-entries v)))
|
||||||
|
(match xs
|
||||||
|
['() '()]
|
||||||
|
[(cons (cons kk vv) rest) (cons kk (cons vv (loop rest)))])))))
|
||||||
|
|
||||||
|
(define (canonical-dict-keys v)
|
||||||
|
(hash-ref! (vector-ref *canonical-cache* 3)
|
||||||
|
v
|
||||||
|
(lambda () (map car (canonical-dict-entries v)))))
|
||||||
|
|
||||||
|
;;---------------------------------------------------------------------------
|
||||||
|
|
||||||
(module+ test
|
(module+ test
|
||||||
(require rackunit)
|
(require rackunit)
|
||||||
(require racket/runtime-path)
|
(require racket/runtime-path)
|
||||||
|
@ -994,10 +1106,10 @@
|
||||||
|
|
||||||
(define (run-test-case variety t-name loc binary-form annotated-text-form)
|
(define (run-test-case variety t-name loc binary-form annotated-text-form)
|
||||||
(define text-form (strip-annotations annotated-text-form))
|
(define text-form (strip-annotations annotated-text-form))
|
||||||
(define-values (forward back)
|
(define-values (forward back can-execute-nondet-with-canonicalization?)
|
||||||
(match (hash-ref samples-txt-expected t-name text-form)
|
(match (hash-ref samples-txt-expected t-name text-form)
|
||||||
[(asymmetric f b) (values f b)]
|
[(asymmetric f b) (values f b #f)] ;; #f because e.g. annotation4 includes annotations
|
||||||
[v (values v v)]))
|
[v (values v v #t)]))
|
||||||
(check-equal? text-form back loc)
|
(check-equal? text-form back loc)
|
||||||
(check-equal? (d-strip (encode text-form)) back loc)
|
(check-equal? (d-strip (encode text-form)) back loc)
|
||||||
(check-equal? (d-strip (encode forward)) back loc)
|
(check-equal? (d-strip (encode forward)) back loc)
|
||||||
|
@ -1009,8 +1121,10 @@
|
||||||
(check-equal? (string->preserve-syntax (preserve->string annotated-text-form))
|
(check-equal? (string->preserve-syntax (preserve->string annotated-text-form))
|
||||||
annotated-text-form
|
annotated-text-form
|
||||||
loc)
|
loc)
|
||||||
(unless (memq variety '(nondeterministic))
|
(when (or (not (memq variety '(nondeterministic)))
|
||||||
(check-equal? (encode forward) binary-form loc))
|
(and can-execute-nondet-with-canonicalization?))
|
||||||
|
(parameterize ((canonicalize-preserves? (if (memq variety '(nondeterministic)) #t #f)))
|
||||||
|
(check-equal? (encode forward) binary-form loc)))
|
||||||
(unless (memq variety '(nondeterministic streaming))
|
(unless (memq variety '(nondeterministic streaming))
|
||||||
(check-equal? (encode annotated-text-form) binary-form loc)))
|
(check-equal? (encode annotated-text-form) binary-form loc)))
|
||||||
|
|
||||||
|
|
Loading…
Reference in New Issue