From 0832b94ce918509e3cbd58833b9ec105d737eca7 Mon Sep 17 00:00:00 2001 From: Tony Garnock-Jones Date: Thu, 24 Oct 2019 14:35:27 +0100 Subject: [PATCH] Optionally-canonicalizing binary output. --- implementations/racket/preserves/main.rkt | 222 ++++++++++++++++------ 1 file changed, 168 insertions(+), 54 deletions(-) diff --git a/implementations/racket/preserves/main.rkt b/implementations/racket/preserves/main.rkt index 5579663..b77236b 100644 --- a/implementations/racket/preserves/main.rkt +++ b/implementations/racket/preserves/main.rkt @@ -2,6 +2,7 @@ ;; Preserve, as in Fruit Preserve, as in a remarkably weak pun on pickling/dehydration etc (provide (struct-out stream-of) + stream-of->preserve (all-from-out "record.rkt") (struct-out annotated) annotate @@ -22,9 +23,13 @@ encode decode decode-syntax - wire-value) + wire-value + preserve-order + preservepreserve s) + (match-define (stream-of kind generator-thunk) s) + (define g (generator-thunk)) + (define pieces (for/list [(p (in-producer g (void)))] p)) + (match kind + ['string (bytes->string/utf-8 (bytes-append* pieces))] + ['byte-string (bytes-append* pieces)] + ['symbol (string->symbol (bytes->string/utf-8 (bytes-append* pieces)))] + ['sequence pieces] + ['set (list->set pieces)] + ['dictionary (apply hash pieces)])) + ;; Syntax properties and syntax objects would be almost perfect for ;; representing annotations, plus position/source tracking as ;; lagniappe, but unfortunately they don't play nicely with data much @@ -159,43 +177,53 @@ (reverse (for/fold [(acc '())] [((k v) (in-dict d))] (cons v (cons k acc))))) (define (encode-value v) - (match ((current-value->placeholder) v) + (define canonicalizing? (canonicalize-preserves?)) + (match (and (not canonicalizing?) ((current-value->placeholder) v)) [(? integer? n) (bit-string (#b0001 :: bits 4) (n :: (wire-length)))] [#f - (match v - [#f (bytes #b00000000)] - [#t (bytes #b00000001)] - [(? single-flonum?) (bit-string #b00000010 (v :: float bits 32))] - [(? double-flonum?) (bit-string #b00000011 (v :: float bits 64))] - [(annotated annotations _ item) - (bit-string ((apply bit-string-append - (map (lambda (a) (bit-string #b00000101 ((encode-value a) :: binary))) - annotations)) :: binary) - ((encode-value item) :: binary))] - [(stream-of 'string p) (encode-stream 1 1 bytes? (p))] - [(stream-of 'byte-string p) (encode-stream 1 2 bytes? (p))] - [(stream-of 'symbol p) (encode-stream 1 3 bytes? (p))] - [(stream-of 'sequence p) (encode-stream 2 1 (lambda (x) #t) (p))] - [(stream-of 'set p) (encode-stream 2 2 (lambda (x) #t) (p))] - [(stream-of 'dictionary p) (encode-stream 2 3 (lambda (x) #t) (p))] + (let restart ((v v)) + (match v + [#f (bytes #b00000000)] + [#t (bytes #b00000001)] + [(? single-flonum?) (bit-string #b00000010 (v :: float bits 32))] + [(? double-flonum?) (bit-string #b00000011 (v :: float bits 64))] + [(annotated annotations _ item) + (if canonicalizing? + (restart item) + (bit-string ((apply bit-string-append + (map (lambda (a) (bit-string #b00000101 ((encode-value a) :: binary))) + annotations)) :: binary) + ((encode-value item) :: binary)))] + [(? stream-of?) #:when canonicalizing? + (restart (stream-of->preserve v))] + [(stream-of 'string p) (encode-stream 1 1 bytes? (p))] + [(stream-of 'byte-string p) (encode-stream 1 2 bytes? (p))] + [(stream-of 'symbol p) (encode-stream 1 3 bytes? (p))] + [(stream-of 'sequence p) (encode-stream 2 1 (lambda (x) #t) (p))] + [(stream-of 'set p) (encode-stream 2 2 (lambda (x) #t) (p))] + [(stream-of 'dictionary p) (encode-stream 2 3 (lambda (x) #t) (p))] - [(? integer? x) #:when (<= -3 x 12) (bit-string (#b0011 :: bits 4) (x :: bits 4))] - ;; [0 (bytes #b10000000)] - [(? integer?) - (define raw-bit-count (+ (integer-length v) 1)) ;; at least one sign bit - (define byte-count (quotient (+ raw-bit-count 7) 8)) - (bit-string (#b0100 :: bits 4) (byte-count :: (wire-length)) (v :: integer bytes byte-count))] - [(? string?) (encode-binary-like 1 (string->bytes/utf-8 v))] - [(? bytes?) (encode-binary-like 2 v)] - [(? symbol?) (encode-binary-like 3 (string->bytes/utf-8 (symbol->string v)))] + [(? integer? x) #:when (<= -3 x 12) (bit-string (#b0011 :: bits 4) (x :: bits 4))] + ;; [0 (bytes #b10000000)] + [(? integer?) + (define raw-bit-count (+ (integer-length v) 1)) ;; at least one sign bit + (define byte-count (quotient (+ raw-bit-count 7) 8)) + (bit-string (#b0100 :: bits 4) (byte-count :: (wire-length)) (v :: integer bytes byte-count))] + [(? string?) (encode-binary-like 1 (string->bytes/utf-8 v))] + [(? bytes?) (encode-binary-like 2 v)] + [(? symbol?) (encode-binary-like 3 (string->bytes/utf-8 (symbol->string v)))] - [(record label fields) (encode-array-like 0 (cons label fields))] - [(? list?) (encode-array-like 1 v)] - [(? set?) (encode-array-like 2 (set->list v))] - [(? dict?) (encode-array-like 3 (dict-keys-and-values v))] + [(record label fields) (encode-array-like 0 (cons label fields))] + [(? list?) (encode-array-like 1 v)] + [(? set?) (encode-array-like 2 (if canonicalizing? + (canonical-set-elements v) + (set->list v)))] + [(? dict?) (encode-array-like 3 (if canonicalizing? + (canonical-dict-keys-and-values v) + (dict-keys-and-values v)))] - [_ (error 'encode-value "Cannot encode value ~v" v)])])) + [_ (error 'encode-value "Cannot encode value ~v" v)]))])) ;;--------------------------------------------------------------------------- @@ -654,6 +682,7 @@ [#t 2] ;; a default [other other])) (define indenting? (and indent-amount0 #t)) + (define canonicalizing? (canonicalize-preserves?)) (define-syntax-rule (! fmt arg ...) (fprintf o fmt arg ...)) @@ -776,22 +805,13 @@ (define (write-value distance v) (match v [(annotated annotations _ item) - (for [(a (in-list annotations))] - (! "@") - (write-value (+ distance 1) a) - (!indent* distance)) + (when (not canonicalizing?) + (for [(a (in-list annotations))] + (! "@") + (write-value (+ distance 1) a) + (!indent* distance))) (write-value distance item)] - [(stream-of kind generator-thunk) - (define g (generator-thunk)) - (define pieces (for/list [(p (in-producer g (void)))] p)) - (write-value distance - (match kind - ['string (bytes->string/utf-8 (bytes-append* pieces))] - ['byte-string (bytes-append* pieces)] - ['symbol (string->symbol (bytes->string/utf-8 (bytes-append* pieces)))] - ['sequence pieces] - ['set (list->set pieces)] - ['dictionary (apply hash pieces)]))] + [(? stream-of?) (write-value distance (stream-of->preserve v))] [#f (! "#false")] [#t (! "#true")] [(? single-flonum?) (! "~vf" (real->double-flonum v))] @@ -818,8 +838,12 @@ (! "|")))] [(record label fields) (write-record distance label fields)] [(? list?) (write-sequence distance "[" "," "]" write-value v)] - [(? set?) (write-sequence distance "#set{" "," "}" write-value (set->list v))] - [(? dict?) (write-sequence distance "{" "," "}" write-key-value (dict->list v))] + [(? set?) (write-sequence distance "#set{" "," "}" write-value (if canonicalizing? + (canonical-set-elements v) + (set->list v)))] + [(? dict?) (write-sequence distance "{" "," "}" write-key-value (if canonicalizing? + (canonical-dict-entries v) + (dict->list v)))] [_ (error 'write-preserve "Cannot encode value ~v" v)])) @@ -830,6 +854,94 @@ ;;--------------------------------------------------------------------------- +(define (typecode v) + (match v + [(? boolean?) 0] + [(? single-flonum?) 1] + [(? double-flonum?) 2] + [(? integer? x) 3] + [(? string?) 4] + [(? bytes?) 5] + [(? symbol?) 6] + [(record _ _) 7] + [(? list?) 8] + [(? set?) 9] + [(? dict?) 10] + [_ (error 'preserve-order "Cannot compare value ~v" v)])) + +(define-syntax chain-order + (syntax-rules () + [(_ o) o] + [(_ o more ...) (match o + ['= (chain-order more ...)] + [other other])])) + +(define (prepare-for-order v) + (match v + [(annotated _ _ item) (prepare-for-order item)] + [(? stream-of?) (stream-of->preserve v)] + [_ v])) + +(define preserve-order + (order 'preserve-order + any/c + (lambda (a* b*) + (define a (prepare-for-order a*)) + (define b (prepare-for-order b*)) + (define ta (typecode a)) + (define tb (typecode b)) + (cond [(< ta tb) '<] + [(> ta tb) '>] + [else (match ta ;; == tb + [7 (chain-order + (preserve-order (record-label a) (record-label b)) + (preserve-order (record-fields a)) (preserve-order (record-fields b)))] + [8 (match* (a b) + [('() '()) '=] + [('() _) '<] + [(_ '()) '>] + [((cons a0 a1) (cons b0 b1)) + (chain-order (preserve-order a0 b0) (preserve-order a1 b1))])] + [9 (preserve-order (canonical-set-elements a) (canonical-set-elements b))] + [10 (preserve-order (canonical-dict-keys a) (canonical-dict-keys b))] + [_ (datum-order a b)])])))) + +(define preservelist v) preservelist v) preservepreserve-syntax (preserve->string annotated-text-form)) annotated-text-form loc) - (unless (memq variety '(nondeterministic)) - (check-equal? (encode forward) binary-form loc)) + (when (or (not (memq variety '(nondeterministic))) + (and can-execute-nondet-with-canonicalization?)) + (parameterize ((canonicalize-preserves? (if (memq variety '(nondeterministic)) #t #f))) + (check-equal? (encode forward) binary-form loc))) (unless (memq variety '(nondeterministic streaming)) (check-equal? (encode annotated-text-form) binary-form loc)))