Optionally-canonicalizing binary output.
This commit is contained in:
parent
02c02b641f
commit
0832b94ce9
|
@ -2,6 +2,7 @@
|
|||
;; Preserve, as in Fruit Preserve, as in a remarkably weak pun on pickling/dehydration etc
|
||||
|
||||
(provide (struct-out stream-of)
|
||||
stream-of->preserve
|
||||
(all-from-out "record.rkt")
|
||||
(struct-out annotated)
|
||||
annotate
|
||||
|
@ -22,9 +23,13 @@
|
|||
encode
|
||||
decode
|
||||
decode-syntax
|
||||
wire-value)
|
||||
wire-value
|
||||
preserve-order
|
||||
preserve<?
|
||||
canonicalize-preserves?)
|
||||
|
||||
(require racket/bytes)
|
||||
(require (only-in racket/contract any/c))
|
||||
(require racket/dict)
|
||||
(require (only-in racket/format ~a))
|
||||
(require racket/generator)
|
||||
|
@ -37,9 +42,22 @@
|
|||
(require (only-in syntax/readerr raise-read-error raise-read-eof-error))
|
||||
(require net/base64)
|
||||
(require (for-syntax racket/base))
|
||||
(require data/order)
|
||||
|
||||
(struct stream-of (kind generator-thunk) #:transparent)
|
||||
|
||||
(define (stream-of->preserve s)
|
||||
(match-define (stream-of kind generator-thunk) s)
|
||||
(define g (generator-thunk))
|
||||
(define pieces (for/list [(p (in-producer g (void)))] p))
|
||||
(match kind
|
||||
['string (bytes->string/utf-8 (bytes-append* pieces))]
|
||||
['byte-string (bytes-append* pieces)]
|
||||
['symbol (string->symbol (bytes->string/utf-8 (bytes-append* pieces)))]
|
||||
['sequence pieces]
|
||||
['set (list->set pieces)]
|
||||
['dictionary (apply hash pieces)]))
|
||||
|
||||
;; Syntax properties and syntax objects would be almost perfect for
|
||||
;; representing annotations, plus position/source tracking as
|
||||
;; lagniappe, but unfortunately they don't play nicely with data much
|
||||
|
@ -159,20 +177,26 @@
|
|||
(reverse (for/fold [(acc '())] [((k v) (in-dict d))] (cons v (cons k acc)))))
|
||||
|
||||
(define (encode-value v)
|
||||
(match ((current-value->placeholder) v)
|
||||
(define canonicalizing? (canonicalize-preserves?))
|
||||
(match (and (not canonicalizing?) ((current-value->placeholder) v))
|
||||
[(? integer? n)
|
||||
(bit-string (#b0001 :: bits 4) (n :: (wire-length)))]
|
||||
[#f
|
||||
(let restart ((v v))
|
||||
(match v
|
||||
[#f (bytes #b00000000)]
|
||||
[#t (bytes #b00000001)]
|
||||
[(? single-flonum?) (bit-string #b00000010 (v :: float bits 32))]
|
||||
[(? double-flonum?) (bit-string #b00000011 (v :: float bits 64))]
|
||||
[(annotated annotations _ item)
|
||||
(if canonicalizing?
|
||||
(restart item)
|
||||
(bit-string ((apply bit-string-append
|
||||
(map (lambda (a) (bit-string #b00000101 ((encode-value a) :: binary)))
|
||||
annotations)) :: binary)
|
||||
((encode-value item) :: binary))]
|
||||
((encode-value item) :: binary)))]
|
||||
[(? stream-of?) #:when canonicalizing?
|
||||
(restart (stream-of->preserve v))]
|
||||
[(stream-of 'string p) (encode-stream 1 1 bytes? (p))]
|
||||
[(stream-of 'byte-string p) (encode-stream 1 2 bytes? (p))]
|
||||
[(stream-of 'symbol p) (encode-stream 1 3 bytes? (p))]
|
||||
|
@ -192,10 +216,14 @@
|
|||
|
||||
[(record label fields) (encode-array-like 0 (cons label fields))]
|
||||
[(? list?) (encode-array-like 1 v)]
|
||||
[(? set?) (encode-array-like 2 (set->list v))]
|
||||
[(? dict?) (encode-array-like 3 (dict-keys-and-values v))]
|
||||
[(? set?) (encode-array-like 2 (if canonicalizing?
|
||||
(canonical-set-elements v)
|
||||
(set->list v)))]
|
||||
[(? dict?) (encode-array-like 3 (if canonicalizing?
|
||||
(canonical-dict-keys-and-values v)
|
||||
(dict-keys-and-values v)))]
|
||||
|
||||
[_ (error 'encode-value "Cannot encode value ~v" v)])]))
|
||||
[_ (error 'encode-value "Cannot encode value ~v" v)]))]))
|
||||
|
||||
;;---------------------------------------------------------------------------
|
||||
|
||||
|
@ -654,6 +682,7 @@
|
|||
[#t 2] ;; a default
|
||||
[other other]))
|
||||
(define indenting? (and indent-amount0 #t))
|
||||
(define canonicalizing? (canonicalize-preserves?))
|
||||
|
||||
(define-syntax-rule (! fmt arg ...) (fprintf o fmt arg ...))
|
||||
|
||||
|
@ -776,22 +805,13 @@
|
|||
(define (write-value distance v)
|
||||
(match v
|
||||
[(annotated annotations _ item)
|
||||
(when (not canonicalizing?)
|
||||
(for [(a (in-list annotations))]
|
||||
(! "@")
|
||||
(write-value (+ distance 1) a)
|
||||
(!indent* distance))
|
||||
(!indent* distance)))
|
||||
(write-value distance item)]
|
||||
[(stream-of kind generator-thunk)
|
||||
(define g (generator-thunk))
|
||||
(define pieces (for/list [(p (in-producer g (void)))] p))
|
||||
(write-value distance
|
||||
(match kind
|
||||
['string (bytes->string/utf-8 (bytes-append* pieces))]
|
||||
['byte-string (bytes-append* pieces)]
|
||||
['symbol (string->symbol (bytes->string/utf-8 (bytes-append* pieces)))]
|
||||
['sequence pieces]
|
||||
['set (list->set pieces)]
|
||||
['dictionary (apply hash pieces)]))]
|
||||
[(? stream-of?) (write-value distance (stream-of->preserve v))]
|
||||
[#f (! "#false")]
|
||||
[#t (! "#true")]
|
||||
[(? single-flonum?) (! "~vf" (real->double-flonum v))]
|
||||
|
@ -818,8 +838,12 @@
|
|||
(! "|")))]
|
||||
[(record label fields) (write-record distance label fields)]
|
||||
[(? list?) (write-sequence distance "[" "," "]" write-value v)]
|
||||
[(? set?) (write-sequence distance "#set{" "," "}" write-value (set->list v))]
|
||||
[(? dict?) (write-sequence distance "{" "," "}" write-key-value (dict->list v))]
|
||||
[(? set?) (write-sequence distance "#set{" "," "}" write-value (if canonicalizing?
|
||||
(canonical-set-elements v)
|
||||
(set->list v)))]
|
||||
[(? dict?) (write-sequence distance "{" "," "}" write-key-value (if canonicalizing?
|
||||
(canonical-dict-entries v)
|
||||
(dict->list v)))]
|
||||
|
||||
[_ (error 'write-preserve "Cannot encode value ~v" v)]))
|
||||
|
||||
|
@ -830,6 +854,94 @@
|
|||
|
||||
;;---------------------------------------------------------------------------
|
||||
|
||||
(define (typecode v)
|
||||
(match v
|
||||
[(? boolean?) 0]
|
||||
[(? single-flonum?) 1]
|
||||
[(? double-flonum?) 2]
|
||||
[(? integer? x) 3]
|
||||
[(? string?) 4]
|
||||
[(? bytes?) 5]
|
||||
[(? symbol?) 6]
|
||||
[(record _ _) 7]
|
||||
[(? list?) 8]
|
||||
[(? set?) 9]
|
||||
[(? dict?) 10]
|
||||
[_ (error 'preserve-order "Cannot compare value ~v" v)]))
|
||||
|
||||
(define-syntax chain-order
|
||||
(syntax-rules ()
|
||||
[(_ o) o]
|
||||
[(_ o more ...) (match o
|
||||
['= (chain-order more ...)]
|
||||
[other other])]))
|
||||
|
||||
(define (prepare-for-order v)
|
||||
(match v
|
||||
[(annotated _ _ item) (prepare-for-order item)]
|
||||
[(? stream-of?) (stream-of->preserve v)]
|
||||
[_ v]))
|
||||
|
||||
(define preserve-order
|
||||
(order 'preserve-order
|
||||
any/c
|
||||
(lambda (a* b*)
|
||||
(define a (prepare-for-order a*))
|
||||
(define b (prepare-for-order b*))
|
||||
(define ta (typecode a))
|
||||
(define tb (typecode b))
|
||||
(cond [(< ta tb) '<]
|
||||
[(> ta tb) '>]
|
||||
[else (match ta ;; == tb
|
||||
[7 (chain-order
|
||||
(preserve-order (record-label a) (record-label b))
|
||||
(preserve-order (record-fields a)) (preserve-order (record-fields b)))]
|
||||
[8 (match* (a b)
|
||||
[('() '()) '=]
|
||||
[('() _) '<]
|
||||
[(_ '()) '>]
|
||||
[((cons a0 a1) (cons b0 b1))
|
||||
(chain-order (preserve-order a0 b0) (preserve-order a1 b1))])]
|
||||
[9 (preserve-order (canonical-set-elements a) (canonical-set-elements b))]
|
||||
[10 (preserve-order (canonical-dict-keys a) (canonical-dict-keys b))]
|
||||
[_ (datum-order a b)])]))))
|
||||
|
||||
(define preserve<? (order-<? preserve-order))
|
||||
|
||||
;;---------------------------------------------------------------------------
|
||||
|
||||
(define canonicalize-preserves? (make-parameter #f))
|
||||
|
||||
(define *canonical-cache* (vector (make-weak-hasheq)
|
||||
(make-weak-hasheq)
|
||||
(make-weak-hasheq)
|
||||
(make-weak-hasheq)))
|
||||
|
||||
(define (canonical-set-elements v)
|
||||
(hash-ref! (vector-ref *canonical-cache* 0)
|
||||
v
|
||||
(lambda () (sort (set->list v) preserve<?))))
|
||||
|
||||
(define (canonical-dict-entries v)
|
||||
(hash-ref! (vector-ref *canonical-cache* 1)
|
||||
v
|
||||
(lambda () (sort (dict->list v) preserve<? #:key car))))
|
||||
|
||||
(define (canonical-dict-keys-and-values v)
|
||||
(hash-ref! (vector-ref *canonical-cache* 2)
|
||||
v
|
||||
(lambda () (let loop ((xs (canonical-dict-entries v)))
|
||||
(match xs
|
||||
['() '()]
|
||||
[(cons (cons kk vv) rest) (cons kk (cons vv (loop rest)))])))))
|
||||
|
||||
(define (canonical-dict-keys v)
|
||||
(hash-ref! (vector-ref *canonical-cache* 3)
|
||||
v
|
||||
(lambda () (map car (canonical-dict-entries v)))))
|
||||
|
||||
;;---------------------------------------------------------------------------
|
||||
|
||||
(module+ test
|
||||
(require rackunit)
|
||||
(require racket/runtime-path)
|
||||
|
@ -994,10 +1106,10 @@
|
|||
|
||||
(define (run-test-case variety t-name loc binary-form annotated-text-form)
|
||||
(define text-form (strip-annotations annotated-text-form))
|
||||
(define-values (forward back)
|
||||
(define-values (forward back can-execute-nondet-with-canonicalization?)
|
||||
(match (hash-ref samples-txt-expected t-name text-form)
|
||||
[(asymmetric f b) (values f b)]
|
||||
[v (values v v)]))
|
||||
[(asymmetric f b) (values f b #f)] ;; #f because e.g. annotation4 includes annotations
|
||||
[v (values v v #t)]))
|
||||
(check-equal? text-form back loc)
|
||||
(check-equal? (d-strip (encode text-form)) back loc)
|
||||
(check-equal? (d-strip (encode forward)) back loc)
|
||||
|
@ -1009,8 +1121,10 @@
|
|||
(check-equal? (string->preserve-syntax (preserve->string annotated-text-form))
|
||||
annotated-text-form
|
||||
loc)
|
||||
(unless (memq variety '(nondeterministic))
|
||||
(check-equal? (encode forward) binary-form loc))
|
||||
(when (or (not (memq variety '(nondeterministic)))
|
||||
(and can-execute-nondet-with-canonicalization?))
|
||||
(parameterize ((canonicalize-preserves? (if (memq variety '(nondeterministic)) #t #f)))
|
||||
(check-equal? (encode forward) binary-form loc)))
|
||||
(unless (memq variety '(nondeterministic streaming))
|
||||
(check-equal? (encode annotated-text-form) binary-form loc)))
|
||||
|
||||
|
|
Loading…
Reference in New Issue