preserves/implementations/racket/preserves/preserves/write-text.rkt

188 lines
6.7 KiB
Racket

#lang racket/base
(provide write-preserve/text
preserve->string
(struct-out binary-display-heuristics)
current-binary-display-heuristics)
(require racket/match)
(require racket/format)
(require net/base64)
(require "annotation.rkt")
(require "float.rkt")
(require "record.rkt")
(require "object-id.rkt")
(require racket/dict)
(require racket/set)
(require (only-in racket/port with-output-to-string))
(define PIPE #\|)
(struct binary-display-heuristics (printable-ascii-proportion max-length) #:transparent)
(define current-binary-display-heuristics (make-parameter (binary-display-heuristics 3/4 1024)))
(define (write-preserve/text v0 [o (current-output-port)]
#:indent [indent-amount0 #f]
#:encode-embedded [encode-embedded0 #f]
#:commas? [commas? #t]
#:write-annotations? [write-annotations? #t])
(define encode-embedded (or encode-embedded0 object-id))
(define indent-amount (match indent-amount0
[#f 0]
[#t 2] ;; a default
[other other]))
(define indenting? (and indent-amount0 #t))
(define-syntax-rule (! fmt arg ...) (fprintf o fmt arg ...))
(define (!indent distance)
(when indenting?
(! "\n~a" (make-string distance #\space))))
(define (!indent* distance)
(if indenting?
(!indent distance)
(! " ")))
(define (write-stringlike-char c [default (lambda (c) (! "~a" c))])
(match c
[#\\ (! "\\\\")]
[#\u08 (! "\\b")]
[#\u0C (! "\\f")]
[#\u0A (! "\\n")]
[#\u0D (! "\\r")]
[#\u09 (! "\\t")]
[_ (default c)]))
(define (write-sequence outer-distance opener comma closer item-writer vs)
(define inner-distance (+ outer-distance indent-amount))
(! "~a" opener)
(match vs
['() (void)]
[(list v0)
(item-writer outer-distance v0)]
[(cons v0 vs)
(!indent inner-distance)
(item-writer inner-distance v0)
(for [(v (in-list vs))]
(! "~a" comma)
(!indent* inner-distance)
(item-writer inner-distance v))
(!indent outer-distance)])
(! "~a" closer))
(define (write-record outer-distance label fields)
(! "<")
(write-value outer-distance label)
(for ([f (in-list fields)])
(! " ")
(write-value outer-distance f))
(! ">"))
(define (write-key-value distance kv)
(match-define (cons k v) kv)
(write-value distance k)
(! ": ")
(write-value distance v))
(define (binunescaped? b)
(or (<= #x20 b #x21)
(<= #x23 b #x5b)
(<= #x5d b #x7e)))
(define (write-binary-stringlike v)
(! "#\"")
(for [(b (in-bytes v))]
(match b
[#x22 (! "\\\"")]
[(? binunescaped?) (write-stringlike-char (integer->char b))]
[_ (write-stringlike-char (integer->char b)
(lambda (c) (! "\\x~a" (~a #:min-width 2
#:align 'right
#:left-pad-string "0"
(number->string b 16)))))]))
(! "\""))
(define (write-binary-base64 outer-distance v)
;; Racket's encoder breaks lines after 72 characters.
;; That corresponds to 54 bytes of input binary.
(! "#[")
(if (and indenting? (> (bytes-length v) 54))
(let* ((inner-distance (+ outer-distance indent-amount))
(line-separator (bytes-append #"\n" (make-bytes inner-distance 32)))
(encoded (base64-encode v line-separator)))
(write-bytes line-separator o)
(write-bytes encoded o 0 (- (bytes-length encoded) indent-amount)))
(write-bytes (base64-encode v #"") o))
(! "]"))
(define (write-binary outer-distance v)
(match-define (binary-display-heuristics proportion maxlen) (current-binary-display-heuristics))
(define vlen (bytes-length v))
(if (>= vlen maxlen)
(write-binary-base64 outer-distance v)
(let* ((sample-length (min vlen maxlen))
(printable-ascii-count (for/sum [(i (in-range 0 sample-length))
(b (in-bytes v))]
(if (or (<= 32 b 126) (= b 9) (= b 10) (= b 13)) 1 0))))
(if (or (zero? vlen) (>= (/ printable-ascii-count sample-length) proportion))
(write-binary-stringlike v)
(write-binary-base64 outer-distance v)))))
(define (write-value distance v)
(match v
[(annotated annotations _ item)
(when write-annotations?
(for [(a (in-list annotations))]
(! "@")
(write-value (+ distance 1) a)
(!indent* distance)))
(write-value distance item)]
[#f (! "#f")]
[#t (! "#t")]
[(float v) (! "~vf" v)]
[(? flonum?) (! "~v" v)]
[(? integer? x) (! "~v" v)]
[(? string?)
(! "\"")
(for [(c (in-string v))]
(match c
[#\" (! "\\\"")]
[_ (write-stringlike-char c)]))
(! "\"")]
[(? bytes?) (write-binary distance v)]
[(? symbol?)
(define s (symbol->string v))
;; FIXME: This regular expression is conservatively correct, but Anglo-chauvinistic.
(if (regexp-match #px"^[a-zA-Z~!$%^&*?_=+/.][-a-zA-Z~!$%^&*?_=+/.0-9]*$" s)
(! "~a" s)
(begin (! "|")
(for [(c (in-string s))]
(match c
[(== PIPE) (! "\\|")]
[_ (write-stringlike-char c)]))
(! "|")))]
[(record label fields) (write-record distance label fields)]
[(? list?) (write-sequence distance "[" (if commas? "," "") "]" write-value v)]
[(? set?) (write-sequence distance "#{" (if commas? "," "") "}" write-value (set->list v))]
[(? dict?) (write-sequence distance "{" (if commas? "," "") "}" write-key-value (dict->list v))]
[other
(! "#!")
(write-value distance (encode-embedded other))]))
(write-value 0 v0))
(define (preserve->string v0
#:indent [indent-amount #f]
#:encode-embedded [encode-embedded #f]
#:commas? [commas? #t]
#:write-annotations? [write-annotations? #t])
(with-output-to-string
(lambda () (write-preserve/text v0
#:indent indent-amount
#:encode-embedded encode-embedded
#:commas? commas?
#:write-annotations? write-annotations?))))