#lang racket/base (provide write-preserve/text preserve->string (struct-out binary-display-heuristics) current-binary-display-heuristics) (require racket/match) (require racket/format) (require net/base64) (require "annotation.rkt") (require "float.rkt") (require "record.rkt") (require "object-id.rkt") (require racket/dict) (require racket/set) (require (only-in racket/port with-output-to-string)) (define PIPE #\|) (struct binary-display-heuristics (printable-ascii-proportion max-length) #:transparent) (define current-binary-display-heuristics (make-parameter (binary-display-heuristics 3/4 1024))) (define (write-preserve/text v0 [o (current-output-port)] #:indent [indent-amount0 #f] #:encode-embedded [encode-embedded0 #f] #:commas? [commas? #t] #:write-annotations? [write-annotations? #t]) (define encode-embedded (or encode-embedded0 object-id)) (define indent-amount (match indent-amount0 [#f 0] [#t 2] ;; a default [other other])) (define indenting? (and indent-amount0 #t)) (define-syntax-rule (! fmt arg ...) (fprintf o fmt arg ...)) (define (!indent distance) (when indenting? (! "\n~a" (make-string distance #\space)))) (define (!indent* distance) (if indenting? (!indent distance) (! " "))) (define (write-stringlike-char c [default (lambda (c) (! "~a" c))]) (match c [#\\ (! "\\\\")] [#\u08 (! "\\b")] [#\u0C (! "\\f")] [#\u0A (! "\\n")] [#\u0D (! "\\r")] [#\u09 (! "\\t")] [_ (default c)])) (define (write-sequence outer-distance opener comma closer item-writer vs) (define inner-distance (+ outer-distance indent-amount)) (! "~a" opener) (match vs ['() (void)] [(list v0) (item-writer outer-distance v0)] [(cons v0 vs) (!indent inner-distance) (item-writer inner-distance v0) (for [(v (in-list vs))] (! "~a" comma) (!indent* inner-distance) (item-writer inner-distance v)) (!indent outer-distance)]) (! "~a" closer)) (define (write-record outer-distance label fields) (! "<") (write-value outer-distance label) (for ([f (in-list fields)]) (! " ") (write-value outer-distance f)) (! ">")) (define (write-key-value distance kv) (match-define (cons k v) kv) (write-value distance k) (! ": ") (write-value distance v)) (define (binunescaped? b) (or (<= #x20 b #x21) (<= #x23 b #x5b) (<= #x5d b #x7e))) (define (write-binary-stringlike v) (! "#\"") (for [(b (in-bytes v))] (match b [#x22 (! "\\\"")] [(? binunescaped?) (write-stringlike-char (integer->char b))] [_ (write-stringlike-char (integer->char b) (lambda (c) (! "\\x~a" (~a #:min-width 2 #:align 'right #:left-pad-string "0" (number->string b 16)))))])) (! "\"")) (define (write-binary-base64 outer-distance v) ;; Racket's encoder breaks lines after 72 characters. ;; That corresponds to 54 bytes of input binary. (! "#[") (if (and indenting? (> (bytes-length v) 54)) (let* ((inner-distance (+ outer-distance indent-amount)) (line-separator (bytes-append #"\n" (make-bytes inner-distance 32))) (encoded (base64-encode v line-separator))) (write-bytes line-separator o) (write-bytes encoded o 0 (- (bytes-length encoded) indent-amount))) (write-bytes (base64-encode v #"") o)) (! "]")) (define (write-binary outer-distance v) (match-define (binary-display-heuristics proportion maxlen) (current-binary-display-heuristics)) (define vlen (bytes-length v)) (if (>= vlen maxlen) (write-binary-base64 outer-distance v) (let* ((sample-length (min vlen maxlen)) (printable-ascii-count (for/sum [(i (in-range 0 sample-length)) (b (in-bytes v))] (if (or (<= 32 b 126) (= b 9) (= b 10) (= b 13)) 1 0)))) (if (or (zero? vlen) (>= (/ printable-ascii-count sample-length) proportion)) (write-binary-stringlike v) (write-binary-base64 outer-distance v))))) (define (write-value distance v) (match v [(annotated annotations _ item) (when write-annotations? (for [(a (in-list annotations))] (! "@") (write-value (+ distance 1) a) (!indent* distance))) (write-value distance item)] [#f (! "#f")] [#t (! "#t")] [(float v) (! "~vf" v)] [(? flonum?) (! "~v" v)] [(? integer? x) (! "~v" v)] [(? string?) (! "\"") (for [(c (in-string v))] (match c [#\" (! "\\\"")] [_ (write-stringlike-char c)])) (! "\"")] [(? bytes?) (write-binary distance v)] [(? symbol?) (define s (symbol->string v)) ;; FIXME: This regular expression is conservatively correct, but Anglo-chauvinistic. (if (regexp-match #px"^[a-zA-Z~!$%^&*?_=+/.][-a-zA-Z~!$%^&*?_=+/.0-9]*$" s) (! "~a" s) (begin (! "|") (for [(c (in-string s))] (match c [(== PIPE) (! "\\|")] [_ (write-stringlike-char c)])) (! "|")))] [(record label fields) (write-record distance label fields)] [(? list?) (write-sequence distance "[" (if commas? "," "") "]" write-value v)] [(? set?) (write-sequence distance "#{" (if commas? "," "") "}" write-value (set->list v))] [(? dict?) (write-sequence distance "{" (if commas? "," "") "}" write-key-value (dict->list v))] [other (! "#!") (write-value distance (encode-embedded other))])) (write-value 0 v0)) (define (preserve->string v0 #:indent [indent-amount #f] #:encode-embedded [encode-embedded #f] #:commas? [commas? #t] #:write-annotations? [write-annotations? #t]) (with-output-to-string (lambda () (write-preserve/text v0 #:indent indent-amount #:encode-embedded encode-embedded #:commas? commas? #:write-annotations? write-annotations?))))