Heuristics for choosing base64 vs ascii-ish binary display

This commit is contained in:
Tony Garnock-Jones 2019-08-22 20:59:37 +01:00
parent 4d73ab6d69
commit 54b33f5f13
1 changed files with 47 additions and 13 deletions

View File

@ -13,6 +13,8 @@
read-preserve-syntax
string->preserve
string->preserve-syntax
(struct-out binary-display-heuristics)
current-binary-display-heuristics
write-preserve
preserve->string
current-value->placeholder
@ -33,6 +35,7 @@
(require "record.rkt")
(require "varint.rkt")
(require (only-in syntax/readerr raise-read-error raise-read-eof-error))
(require net/base64)
(struct stream-of (kind generator-thunk) #:transparent)
@ -355,7 +358,6 @@
(define (read-preserve [i (current-input-port)]
#:read-syntax? [read-syntax? #f]
#:source [source (object-name i)])
(local-require net/base64)
(local-require file/sha1)
(define-match-expander px
@ -635,6 +637,10 @@
;;---------------------------------------------------------------------------
(struct binary-display-heuristics (printable-ascii-proportion max-length) #:transparent)
(define current-binary-display-heuristics (make-parameter (binary-display-heuristics 3/4 1024)))
(define (write-preserve v0 [o (current-output-port)] #:indent [indent-amount0 #f])
(define indent-amount (match indent-amount0
[#f 0]
@ -721,6 +727,45 @@
(<= #x23 b #x5b)
(<= #x5d b #x7e)))
(define (write-binary-stringlike v)
(! "#\"")
(for [(b (in-bytes v))]
(match b
[#x22 (! "\\\"")]
[(? binunescaped?) (write-stringlike-char (integer->char b))]
[_ (write-stringlike-char (integer->char b)
(lambda (c) (! "\\x~a" (~a #:min-width 2
#:align 'right
#:left-pad-string "0"
(number->string b 16)))))]))
(! "\""))
(define (write-binary-base64 outer-distance v)
;; Racket's encoder breaks lines after 72 characters.
;; That corresponds to 54 bytes of input binary.
(! "#base64{")
(if (and indenting? (> (bytes-length v) 54))
(let* ((inner-distance (+ outer-distance indent-amount))
(line-separator (bytes-append #"\n" (make-bytes inner-distance 32)))
(encoded (base64-encode v line-separator)))
(write-bytes line-separator o)
(write-bytes encoded o 0 (- (bytes-length encoded) indent-amount)))
(write-bytes (base64-encode v #"") o))
(! "}"))
(define (write-binary outer-distance v)
(match-define (binary-display-heuristics proportion maxlen) (current-binary-display-heuristics))
(define vlen (bytes-length v))
(if (>= vlen maxlen)
(write-binary-base64 outer-distance v)
(let* ((sample-length (min vlen maxlen))
(printable-ascii-count (for/sum [(i (in-range 0 sample-length))
(b (in-bytes v))]
(if (or (<= 32 b 126) (= b 9) (= b 10) (= b 13)) 1 0))))
(if (or (zero? vlen) (>= (/ printable-ascii-count sample-length) proportion))
(write-binary-stringlike v)
(write-binary-base64 outer-distance v)))))
(define (write-value distance v)
(match v
[(annotated annotations _ item)
@ -741,18 +786,7 @@
[#\" (! "\\\"")]
[_ (write-stringlike-char c)]))
(! "\"")]
[(? bytes?)
(! "#\"")
(for [(b (in-bytes v))]
(match b
[#x22 (! "\\\"")]
[(? binunescaped?) (write-stringlike-char (integer->char b))]
[_ (write-stringlike-char (integer->char b)
(lambda (c) (! "\\x~a" (~a #:min-width 2
#:align 'right
#:left-pad-string "0"
(number->string b 16)))))]))
(! "\"")]
[(? bytes?) (write-binary distance v)]
[(? symbol?)
(define s (symbol->string v))
;; FIXME: This regular expression is conservatively correct, but Anglo-chauvinistic.