preserves/implementations/racket/preserves/preserves/main.rkt

1197 lines
48 KiB
Racket

#lang racket/base
;; Preserve, as in Fruit Preserve, as in a remarkably weak pun on pickling/dehydration etc
(provide (struct-out stream-of)
stream-of->preserve
(all-from-out "record.rkt")
(struct-out annotated)
annotate
strip-annotations
strip-annotations-proc
peel-annotations
peel-annotations-proc
read-preserve
read-preserve-syntax
string->preserve
string->preserve-syntax
(struct-out binary-display-heuristics)
current-binary-display-heuristics
write-preserve
preserve->string
current-value->placeholder
current-placeholder->value
prepend-noop
encode
decode
decode-syntax
wire-value
preserve-order
preserve<?
canonicalize-preserves?)
(require racket/bytes)
(require (only-in racket/contract any/c))
(require racket/dict)
(require (only-in racket/format ~a))
(require racket/generator)
(require racket/match)
(require (only-in racket/port with-output-to-string))
(require racket/set)
(require bitsyntax)
(require "record.rkt")
(require "varint.rkt")
(require (only-in syntax/readerr raise-read-error raise-read-eof-error))
(require net/base64)
(require (for-syntax racket/base))
(require data/order)
(struct stream-of (kind generator-thunk) #:transparent)
(define (stream-of->preserve s)
(match-define (stream-of kind generator-thunk) s)
(define g (generator-thunk))
(define pieces (for/list [(p (in-producer g (void)))] p))
(match kind
['string (bytes->string/utf-8 (bytes-append* pieces))]
['byte-string (bytes-append* pieces)]
['symbol (string->symbol (bytes->string/utf-8 (bytes-append* pieces)))]
['sequence pieces]
['set (list->set pieces)]
['dictionary (apply hash pieces)]))
;; Syntax properties and syntax objects would be almost perfect for
;; representing annotations, plus position/source tracking as
;; lagniappe, but unfortunately they don't play nicely with data much
;; outside of ordinary S-expressions as found in Racket source.
;;
;; So we do our own thing, for now.
;;
(struct annotated (annotations srcloc item) #:transparent
#:methods gen:equal+hash
[(define (equal-proc a b =?) (=? (annotated-item a) (annotated-item b)))
(define (hash-proc a h) (h (annotated-item a)))
(define (hash2-proc a h) (h (annotated-item a)))])
(define (annotate v . as)
(match v
[(annotated annotations srcloc item)
(annotated (append as annotations) srcloc item)]
[item
(annotated as #f item)]))
(define (strip-annotations-proc v #:depth [depth +inf.0])
(let walk* ((v v) (depth depth))
(define next-depth (- depth 1))
(define (walk v) (walk* v next-depth))
(if (zero? depth)
v
(match v
[(annotated _ _ item)
(match item
[(record label fields) (record (walk* label depth) (map walk fields))]
[(? list?) (map walk item)]
[(? set?) (for/set [(i (in-set item))] (walk i))]
[(? dict?) (for/hash [((k v) (in-dict item))] (values (walk* k depth) (walk v)))]
[(? annotated?) (error 'strip-annotations "Improper annotation structure: ~v" v)]
[_ item])]
[_ v]))))
(define (peel-annotations-proc v)
(strip-annotations-proc v #:depth 1))
(define-match-expander strip-annotations
(syntax-rules () [(_ pat extra ...) (app (lambda (v) (strip-annotations-proc v extra ...)) pat)])
(lambda (stx)
(syntax-case stx ()
[(_ args ...) #'(strip-annotations-proc args ...)]
[_ #'strip-annotations-proc])))
(define-match-expander peel-annotations
(syntax-rules () [(_ pat extra ...) (app (lambda (v) (peel-annotations-proc v extra ...)) pat)])
(lambda (stx)
(syntax-case stx ()
[(_ args ...) #'(peel-annotations-proc args ...)]
[_ #'peel-annotations-proc])))
(define current-value->placeholder (make-parameter (lambda (v) #f)))
(define current-placeholder->value (make-parameter (lambda (v) (void))))
(define (prepend-noop encoded-value)
(bit-string-append #"\xff" encoded-value))
(define (encode v)
(bit-string->bytes (bit-string (v :: (wire-value)))))
(define ((default-on-short bs)) (error 'decode "Short encoding: ~v" bs))
(define ((default-on-fail bs)) (error 'decode "Invalid encoding: ~v" bs))
(define (decode bs
#:read-syntax? [read-syntax? #f]
#:on-short [on-short (default-on-short bs)]
[on-fail (default-on-fail bs)])
(bit-string-case bs
#:on-short (lambda (fail) (on-short))
([ (v :: (wire-value #:read-syntax? read-syntax?)) ] v)
(else (on-fail))))
(define (decode-syntax bs
#:on-short [on-short (default-on-short bs)]
[on-fail (default-on-fail bs)])
(decode bs on-fail #:read-syntax? #t #:on-short on-short))
(define-syntax wire-value
(syntax-rules ()
[(_ #t input ks kf) (decode-value input ks kf #:read-syntax? #f)]
[(_ #t input ks kf #:read-syntax? rs) (decode-value input ks kf #:read-syntax? rs)]
[(_ #f v) (encode-value v)]))
(define-syntax wire-length
(syntax-rules ()
[(_ #t input ks kf) (decode-wire-length input ks kf)]
[(_ #f v) (encode-wire-length v)]))
(define (encode-wire-length v)
(when (negative? v) (error 'encode-wire-length "Cannot encode negative wire-length ~v" v))
(if (< v #b1111)
(bit-string (v :: bits 4))
(bit-string (#b1111 :: bits 4) ((encode-varint v) :: binary))))
(define (encode-array-like minor fields)
(bit-string (2 :: bits 2)
(minor :: bits 2)
((length fields) :: (wire-length))
((apply bit-string-append (map encode-value fields)) :: binary)))
(define (encode-binary-like minor bs)
(bit-string (1 :: bits 2)
(minor :: bits 2)
((bytes-length bs) :: (wire-length))
(bs :: binary)))
(define (encode-stream major minor chunk-ok? generator)
(bit-string-append (bit-string (#b0010 :: bits 4) (major :: bits 2) (minor :: bits 2))
(let loop ()
(match (generator)
[(? void?) #""]
[(? chunk-ok? v) (bit-string-append (encode-value v) (loop))]
[bad (error 'encode-stream "Cannot encode chunk: ~v" bad)]))
(bit-string #b00000100)))
(define (dict-keys-and-values d)
(reverse (for/fold [(acc '())] [((k v) (in-dict d))] (cons v (cons k acc)))))
(define (encode-value v)
(define canonicalizing? (canonicalize-preserves?))
(match (and (not canonicalizing?) ((current-value->placeholder) v))
[(? integer? n)
(bit-string (#b0001 :: bits 4) (n :: (wire-length)))]
[#f
(let restart ((v v))
(match v
[#f (bytes #b00000000)]
[#t (bytes #b00000001)]
[(? single-flonum?) (bit-string #b00000010 (v :: float bits 32))]
[(? double-flonum?) (bit-string #b00000011 (v :: float bits 64))]
[(annotated annotations _ item)
(if canonicalizing?
(restart item)
(bit-string ((apply bit-string-append
(map (lambda (a) (bit-string #b00000101 ((encode-value a) :: binary)))
annotations)) :: binary)
((encode-value item) :: binary)))]
[(? stream-of?) #:when canonicalizing?
(restart (stream-of->preserve v))]
[(stream-of 'string p) (encode-stream 1 1 bytes? (p))]
[(stream-of 'byte-string p) (encode-stream 1 2 bytes? (p))]
[(stream-of 'symbol p) (encode-stream 1 3 bytes? (p))]
[(stream-of 'sequence p) (encode-stream 2 1 (lambda (x) #t) (p))]
[(stream-of 'set p) (encode-stream 2 2 (lambda (x) #t) (p))]
[(stream-of 'dictionary p) (encode-stream 2 3 (lambda (x) #t) (p))]
[(? integer? x) #:when (<= -3 x 12) (bit-string (#b0011 :: bits 4) (x :: bits 4))]
;; [0 (bytes #b10000000)]
[(? integer?)
(define raw-bit-count (+ (integer-length v) 1)) ;; at least one sign bit
(define byte-count (quotient (+ raw-bit-count 7) 8))
(bit-string (#b0100 :: bits 4) (byte-count :: (wire-length)) (v :: integer bytes byte-count))]
[(? string?) (encode-binary-like 1 (string->bytes/utf-8 v))]
[(? bytes?) (encode-binary-like 2 v)]
[(? symbol?) (encode-binary-like 3 (string->bytes/utf-8 (symbol->string v)))]
[(record label fields) (encode-array-like 0 (cons label fields))]
[(? list?) (encode-array-like 1 v)]
[(? set?) (encode-array-like 2 (if canonicalizing?
(canonical-set-elements v)
(set->list v)))]
[(? dict?) (encode-array-like 3 (if canonicalizing?
(canonical-dict-keys-and-values v)
(dict-keys-and-values v)))]
[_ (error 'encode-value "Cannot encode value ~v" v)]))]))
;;---------------------------------------------------------------------------
(define (decode-wire-length bs ks kf)
(bit-string-case bs
#:on-short (lambda (fail) (kf #t))
([ (= #b1111 :: bits 4) (rest :: binary) ]
(decode-varint rest
(lambda (v tail)
(if (< v #b1111)
(kf)
(ks v tail)))
kf))
([ (v :: bits 4) (rest :: binary) ] (ks v rest))
(else (kf))))
(define (decode-binary minor bs rest ks kf)
(match minor
[0 (if (positive? (bit-string-length bs))
(ks (bit-string->signed-integer bs #t) rest)
(ks 0 rest))]
[2 (ks bs rest)]
[(or 1 3)
((with-handlers [(exn:fail:contract? (lambda (e) kf))]
(define s (bytes->string/utf-8 bs))
(lambda () (ks (if (= minor 3) (string->symbol s) s) rest))))]))
(define (decode-compound minor vs rest ks kf)
(match* (minor vs)
[(0 (list* label fields)) (ks (record label fields) rest)]
[(0 '()) (kf)]
[(1 _) (ks vs rest)]
[(2 _) (ks (list->set vs) rest)]
[(3 _) (if (even? (length vs))
(ks (apply hash vs) rest)
(kf))]))
(define (decode-value input ks kf #:read-syntax? read-syntax?)
(define (position rest)
(- (bytes-length input) (arithmetic-shift (bit-string-length rest) -3)))
(define nil-annotation
(if read-syntax?
(lambda (ks bs)
(lambda (result rest)
(define pos0 (position bs))
(define pos1 (position rest))
(ks (annotated '()
(srcloc #f #f #f (+ pos0 1) (- pos1 pos0))
result)
rest)))
(lambda (ks bs) ks)))
(define cons-annotation
(if read-syntax?
(lambda (ks a v rest)
(match-define (annotated annotations srcloc item) v)
(ks (annotated (cons a annotations) srcloc item) rest))
(lambda (ks a v rest)
(ks v rest))))
(define (decode-values n bs ks kf)
(let loop ((n n) (acc-rev '()) (bs bs))
(if (zero? n)
(ks (reverse acc-rev) bs)
(decode-one bs (lambda (v rest) (loop (- n 1) (cons v acc-rev) rest)) kf))))
(define (decode-stream minor annotations-ok chunk-ok? join-chunks decode bs ks kf)
(let loop ((acc-rev '()) (rest bs))
(define (accept-one chunk rest)
(if (chunk-ok? chunk)
(loop (cons chunk acc-rev) rest)
(kf)))
(bit-string-case rest
#:on-short (lambda (fail) (kf #t))
([ (= #b00000100 :: bits 8) (rest :: binary) ]
(decode minor
(join-chunks (reverse acc-rev))
rest
(nil-annotation ks bs)
kf))
([ (= #b00000101 :: bits 8) (rest1 :: binary) ]
(if annotations-ok
(decode-one rest accept-one kf)
(kf)))
(else
(decode-one rest accept-one kf)))))
(define bytes-chunk?
(if read-syntax?
(lambda (v) (bytes? (annotated-item v)))
bytes?))
(define bytes-chunk-append*
(if read-syntax?
(lambda (vs) (bytes-append* (map annotated-item vs)))
bytes-append*))
(define (decode-one bs ks kf)
(bit-string-case bs
#:on-short (lambda (fail) (kf #t))
([ (= #b00000000 :: bits 8) (rest :: binary) ]
((nil-annotation ks bs) #f rest))
([ (= #b00000001 :: bits 8) (rest :: binary) ]
((nil-annotation ks bs) #t rest))
([ (= #b00000010 :: bits 8) (v :: float bits 32) (rest :: binary) ]
((nil-annotation ks bs) (real->single-flonum v) rest))
([ (= #b00000011 :: bits 8) (v :: float bits 64) (rest :: binary) ]
((nil-annotation ks bs) v rest))
([ (= #b00000101 :: bits 8) (rest :: binary) ]
(decode-one rest
(lambda (a rest)
(decode-one rest
(lambda (v rest)
(cons-annotation ks a v rest))
kf))
kf))
([ (= #b0001 :: bits 4) (placeholder :: (wire-length)) (rest :: binary) ]
(match ((current-placeholder->value) placeholder)
[(? void?) (error 'decode "Invalid Preserves placeholder: ~v" placeholder)]
[v ((nil-annotation ks bs) v rest)]))
([ (= #b001001 :: bits 6) (minor :: bits 2) (rest :: binary) ]
(decode-stream minor #f bytes-chunk? bytes-chunk-append* decode-binary rest ks kf))
([ (= #b001010 :: bits 6) (minor :: bits 2) (rest :: binary) ]
(decode-stream minor #t (lambda (x) #t) values decode-compound rest ks kf))
([ (= #b0011 :: bits 4) (x :: bits 4) (rest :: binary) ]
((nil-annotation ks bs) (if (> x 12) (- x 16) x) rest))
([ (= #b01 :: bits 2) (minor :: bits 2) (byte-count :: (wire-length))
(bits :: binary bytes byte-count)
(rest :: binary) ]
(decode-binary minor (bit-string->bytes bits) rest (nil-annotation ks bs) kf))
([ (= #b10 :: bits 2) (minor :: bits 2) (field-count :: (wire-length)) (rest :: binary) ]
(decode-values field-count
rest
(lambda (fields rest)
(decode-compound minor fields rest (nil-annotation ks bs) kf))
kf))
([ (= #b11111111 :: bits 8) (rest :: binary) ]
(decode-one rest ks kf))
(else (kf))))
(decode-one input ks kf))
;;---------------------------------------------------------------------------
(define PIPE #\|)
(define (skip-whitespace* i)
(regexp-match? #px#"^(\\s|,)*" i)) ;; side effect: consumes matched portion of input
(define (parse-error* #:raise-proc [raise-proc raise-read-error] i source fmt . args)
(define-values [line column pos] (port-next-location i))
(raise-proc (format "read-preserve: ~a" (apply format fmt args))
source
line
column
pos
#f))
(define (read-preserve [i (current-input-port)]
#:read-syntax? [read-syntax? #f]
#:source [source (object-name i)])
(local-require file/sha1)
(define-match-expander px
(syntax-rules ()
[(_ re pat) (app (lambda (v) (regexp-try-match re v)) pat)]))
(define (parse-error fmt . args)
(apply parse-error* i source fmt args))
(define (eof-guard v)
(match v
[(? eof-object?)
(parse-error* #:raise-proc raise-read-eof-error i source "Unexpected end of input")]
[v v]))
(define (peek/no-eof) (eof-guard (peek-char i)))
(define (read/no-eof) (eof-guard (read-char i)))
(define (skip-whitespace) (skip-whitespace* i))
(define (read-sequence terminator)
(sequence-fold '() (lambda (acc v) (cons v acc)) reverse terminator))
(define (set-add* s e)
(when (set-member? s e) (parse-error "Duplicate set element: ~v" e))
(set-add s e))
(define (read-dictionary-or-set seed)
(sequence-fold seed
(lambda (acc k)
(skip-whitespace)
(match (peek-char i)
[#\: (read-char i)
(when (set? acc) (parse-error "Unexpected key/value separator in set"))
(let ((acc (or acc (hash))))
(when (hash-has-key? acc k) (parse-error "Duplicate key: ~v" k))
(hash-set acc k (read-value)))]
[_ (when (hash? acc) (parse-error "Missing expected key/value separator"))
(set-add* (or acc (set)) k)]))
(lambda (acc) (or acc (hash)))
#\}))
(define (read-raw-symbol acc)
(match (peek-char i)
[(or (? eof-object?)
(? char? (or #\( #\) #\{ #\} #\[ #\] #\< #\>
#\" #\; #\, #\@ #\# #\: (== PIPE)
(? char-whitespace?))))
(if (null? acc)
(parse-error "Invalid character ~v at start of value" (peek-char i))
(string->symbol (list->string (reverse acc))))]
[_ (read-raw-symbol (cons (read-char i) acc))]))
(define (read-base64-binary acc)
(skip-whitespace)
(define ch (read/no-eof))
(cond [(eqv? ch #\})
(base64-decode (string->bytes/latin-1 (list->string (reverse acc))))]
[(or (and (char>=? ch #\A) (char<=? ch #\Z))
(and (char>=? ch #\a) (char<=? ch #\z))
(and (char>=? ch #\0) (char<=? ch #\9))
(memv ch '(#\+ #\/ #\- #\_ #\=)))
(read-base64-binary (cons ch acc))]
[else
(parse-error "Invalid base64 character")]))
(define (hexdigit? ch)
(or (and (char>=? ch #\A) (char<=? ch #\F))
(and (char>=? ch #\a) (char<=? ch #\f))
(and (char>=? ch #\0) (char<=? ch #\9))))
(define (read-hex-binary acc)
(skip-whitespace)
(define ch (read/no-eof))
(cond [(eqv? ch #\})
(hex-string->bytes (list->string (reverse acc)))]
[(hexdigit? ch)
(define ch2 (read/no-eof))
(when (not (hexdigit? ch2))
(parse-error "Hex-encoded binary digits must come in pairs"))
(read-hex-binary (cons ch2 (cons ch acc)))]
[else
(parse-error "Invalid hex character")]))
(define (read-stringlike xform-item finish terminator-char hexescape-char hexescape-proc)
(let loop ((acc '()))
(match (read/no-eof)
[(== terminator-char) (finish (reverse acc))]
[#\\ (match (read/no-eof)
[(== hexescape-char) (loop (cons (hexescape-proc) acc))]
[(and c (or (== terminator-char) #\\ #\/)) (loop (cons (xform-item c) acc))]
[#\b (loop (cons (xform-item #\u08) acc))]
[#\f (loop (cons (xform-item #\u0C) acc))]
[#\n (loop (cons (xform-item #\u0A) acc))]
[#\r (loop (cons (xform-item #\u0D) acc))]
[#\t (loop (cons (xform-item #\u09) acc))]
[c (parse-error "Invalid escape code \\~a" c)])]
[c (loop (cons (xform-item c) acc))])))
(define (read-string terminator-char)
(read-stringlike values
list->string
terminator-char
#\u
(lambda ()
(integer->char
(match i
[(px #px#"^[a-fA-F0-9]{4}" (list hexdigits))
(define n1 (string->number (bytes->string/utf-8 hexdigits) 16))
(if (<= #xd800 n1 #xdfff) ;; surrogate pair first half
(match i
[(px #px#"^\\\\u([a-fA-F0-9]{4})" (list _ hexdigits2))
(define n2 (string->number (bytes->string/utf-8 hexdigits2) 16))
(if (<= #xdc00 n2 #xdfff)
(+ (arithmetic-shift (- n1 #xd800) 10)
(- n2 #xdc00)
#x10000)
(parse-error "Bad second half of surrogate pair"))]
[_ (parse-error "Missing second half of surrogate pair")])
n1)]
[_ (parse-error "Bad string \\u escape")])))))
(define (read-literal-binary)
(read-stringlike (lambda (c)
(define b (char->integer c))
(when (>= b 256)
(parse-error "Invalid code point ~a (~v) in literal binary" b c))
b)
list->bytes
#\"
#\x
(lambda ()
(match i
[(px #px#"^[a-fA-F0-9]{2}" (list hexdigits))
(string->number (bytes->string/utf-8 hexdigits) 16)]
[_ (parse-error "Bad binary \\x escape")]))))
(define (read-intpart acc-rev)
(match (peek-char i)
[#\0 (read-fracexp (cons (read-char i) acc-rev))]
[_ (read-digit+ acc-rev read-fracexp)]))
(define (read-digit* acc-rev k)
(match (peek-char i)
[(? char? (? char-numeric?)) (read-digit* (cons (read-char i) acc-rev) k)]
[_ (k acc-rev)]))
(define (read-digit+ acc-rev k)
(match (peek-char i)
[(? char? (? char-numeric?)) (read-digit* (cons (read-char i) acc-rev) k)]
[_ (parse-error "Incomplete number")]))
(define (read-fracexp acc-rev)
(match (peek-char i)
[#\. (read-digit+ (cons (read-char i) acc-rev) read-exp)]
[_ (read-exp acc-rev)]))
(define (read-exp acc-rev)
(match (peek-char i)
[(or #\e #\E) (read-sign-and-exp (cons (read-char i) acc-rev))]
[_ (finish-number acc-rev)]))
(define (read-sign-and-exp acc-rev)
(match (peek-char i)
[(or #\+ #\-) (read-digit+ (cons (read-char i) acc-rev) finish-number)]
[_ (read-digit+ acc-rev finish-number)]))
(define (finish-number acc-rev)
(define s (list->string (reverse acc-rev)))
(define n (string->number s))
(when (not n) (parse-error "Invalid number: ~v" s))
(if (flonum? n)
(match (peek-char i)
[(or #\f #\F) (read-char i) (real->single-flonum n)]
[_ n])
n))
(define (read-number)
(match (peek/no-eof)
[#\- (read-intpart (list (read-char i)))]
[_ (read-intpart (list))]))
(define (sequence-fold acc accumulate-one finish terminator-char)
(let loop ((acc acc))
(skip-whitespace)
(match (peek/no-eof)
[(== terminator-char) (read-char i) (finish acc)]
[_ (loop (accumulate-one acc (read-value)))])))
(define nil-annotation
(if read-syntax?
(lambda (thunk)
(define-values (line0 col0 pos0) (port-next-location i))
(define v (thunk))
(define-values (line1 col1 pos1) (port-next-location i))
(define loc (and line0 col0 pos0 pos1 (srcloc source line0 col0 pos0 (- pos1 pos0))))
(match v
[(annotated annotations _ item) (annotated annotations loc item)]
[item (annotated '() loc item)]))
(lambda (thunk) (thunk))))
(define cons-annotation
(if read-syntax?
(lambda (a v)
(match-define (annotated annotations srcloc item) v)
(annotated (cons a annotations) srcloc item))
(lambda (a v) v)))
(define (read-value)
(skip-whitespace)
(define sigil (peek-char i))
(match sigil
[(? eof-object? o) o]
[#\@ (read-char i)
(define a (eof-guard (read-value)))
(define v (eof-guard (read-value)))
(cons-annotation a v)]
[_
(nil-annotation
(lambda ()
(match sigil
[#\{ (read-char i) (read-dictionary-or-set #f)]
[#\[ (read-char i) (read-sequence #\])]
[#\< (read-char i)
(match (read-sequence #\>)
['() (parse-error "Missing record label")]
[(cons head fields) (record head fields)])]
[(or #\- #\0 #\1 #\2 #\3 #\4 #\5 #\6 #\7 #\8 #\9) (read-number)]
[#\" (read-char i) (read-string #\")]
[(== PIPE) (read-char i) (string->symbol (read-string PIPE))]
[#\# (match i
[(px #px#"^#set\\{" (list _))
(sequence-fold (set) set-add* values #\})]
[(px #px#"^#value" (list _))
(define bs (read-preserve i #:read-syntax? #t))
(when (not (bytes? (annotated-item bs)))
(parse-error "ByteString must follow #value"))
(when (not (null? (annotated-annotations bs)))
(parse-error "Annotations not permitted after #value"))
(decode (annotated-item bs) #:read-syntax? read-syntax?)]
[(px #px#"^#true" (list _))
#t]
[(px #px#"^#false" (list _))
#f]
[(px #px#"^#\"" (list _))
(read-literal-binary)]
[(px #px#"^#hex\\{" (list _))
(read-hex-binary '())]
[(px #px#"^#base64\\{" (list _))
(read-base64-binary '())]
[_
(read-char i)
(parse-error "Invalid preserve value")])]
[#\: (read-char i) (parse-error "Unexpected key/value separator between items")]
[_ (read-raw-symbol '())])))]))
(read-value))
(define (read-preserve-syntax [i (current-input-port)]
#:source [source #f])
(read-preserve i #:read-syntax? #t #:source source))
(define (string->preserve s #:read-syntax? [read-syntax? #f] #:track-position? [track-position? #t])
(define p (open-input-string s))
(define source "<string>")
(when track-position? (port-count-lines! p))
(define v (read-preserve p #:read-syntax? read-syntax? #:source source))
(when (eof-object? v)
(parse-error* #:raise-proc raise-read-eof-error p source "Unexpected end of input"))
(skip-whitespace* p)
(when (not (eof-object? (peek-char p)))
(parse-error* p source "Unexpected text following preserve"))
v)
(define (string->preserve-syntax s)
(string->preserve s #:read-syntax? #t))
;;---------------------------------------------------------------------------
(struct binary-display-heuristics (printable-ascii-proportion max-length) #:transparent)
(define current-binary-display-heuristics (make-parameter (binary-display-heuristics 3/4 1024)))
(define (write-preserve v0 [o (current-output-port)] #:indent [indent-amount0 #f])
(define indent-amount (match indent-amount0
[#f 0]
[#t 2] ;; a default
[other other]))
(define indenting? (and indent-amount0 #t))
(define canonicalizing? (canonicalize-preserves?))
(define-syntax-rule (! fmt arg ...) (fprintf o fmt arg ...))
(define (!indent distance)
(when indenting?
(! "\n~a" (make-string distance #\space))))
(define (!indent* distance)
(if indenting?
(!indent distance)
(! " ")))
(define (write-stringlike-char c [default (lambda (c) (! "~a" c))])
(match c
[#\\ (! "\\\\")]
[#\u08 (! "\\b")]
[#\u0C (! "\\f")]
[#\u0A (! "\\n")]
[#\u0D (! "\\r")]
[#\u09 (! "\\t")]
[_ (default c)]))
(define (write-sequence outer-distance opener comma closer item-writer vs)
(define inner-distance (+ outer-distance indent-amount))
(! "~a" opener)
(match vs
['() (void)]
[(cons v0 vs)
(!indent inner-distance)
(item-writer inner-distance v0)
(for [(v (in-list vs))]
(! "~a" comma)
(!indent* inner-distance)
(item-writer inner-distance v))
(!indent outer-distance)])
(! "~a" closer))
(define (write-record outer-distance label fields)
(! "<")
(write-value outer-distance label)
(for ([f (in-list fields)])
(! " ")
(write-value outer-distance f))
(! ">"))
;; (define (write-record outer-distance label fields)
;; (define simple-label? (or (boolean? label) (number? label) (string? label)
;; (bytes? label) (symbol? label)))
;; (define inner-distance (+ outer-distance
;; (if simple-label?
;; (+ 2 (string-length (preserve->string label #:indent #f)))
;; indent-amount)))
;; (define (write-fields fields)
;; (for ([f (in-list fields)])
;; (!indent* inner-distance)
;; (write-value inner-distance f)))
;;
;; (! "<")
;; (write-value inner-distance label)
;; (if simple-label?
;; (match fields
;; ['() (void)]
;; [(cons field0 fields)
;; (! " ")
;; (write-value inner-distance field0)
;; (write-fields fields)])
;; (write-fields fields))
;; (! ">"))
(define (write-key-value distance kv)
(match-define (cons k v) kv)
(write-value distance k)
(! ": ")
(write-value distance v))
(define (binunescaped? b)
(or (<= #x20 b #x21)
(<= #x23 b #x5b)
(<= #x5d b #x7e)))
(define (write-binary-stringlike v)
(! "#\"")
(for [(b (in-bytes v))]
(match b
[#x22 (! "\\\"")]
[(? binunescaped?) (write-stringlike-char (integer->char b))]
[_ (write-stringlike-char (integer->char b)
(lambda (c) (! "\\x~a" (~a #:min-width 2
#:align 'right
#:left-pad-string "0"
(number->string b 16)))))]))
(! "\""))
(define (write-binary-base64 outer-distance v)
;; Racket's encoder breaks lines after 72 characters.
;; That corresponds to 54 bytes of input binary.
(! "#base64{")
(if (and indenting? (> (bytes-length v) 54))
(let* ((inner-distance (+ outer-distance indent-amount))
(line-separator (bytes-append #"\n" (make-bytes inner-distance 32)))
(encoded (base64-encode v line-separator)))
(write-bytes line-separator o)
(write-bytes encoded o 0 (- (bytes-length encoded) indent-amount)))
(write-bytes (base64-encode v #"") o))
(! "}"))
(define (write-binary outer-distance v)
(match-define (binary-display-heuristics proportion maxlen) (current-binary-display-heuristics))
(define vlen (bytes-length v))
(if (>= vlen maxlen)
(write-binary-base64 outer-distance v)
(let* ((sample-length (min vlen maxlen))
(printable-ascii-count (for/sum [(i (in-range 0 sample-length))
(b (in-bytes v))]
(if (or (<= 32 b 126) (= b 9) (= b 10) (= b 13)) 1 0))))
(if (or (zero? vlen) (>= (/ printable-ascii-count sample-length) proportion))
(write-binary-stringlike v)
(write-binary-base64 outer-distance v)))))
(define (write-value distance v)
(match v
[(annotated annotations _ item)
(when (not canonicalizing?)
(for [(a (in-list annotations))]
(! "@")
(write-value (+ distance 1) a)
(!indent* distance)))
(write-value distance item)]
[(? stream-of?) (write-value distance (stream-of->preserve v))]
[#f (! "#false")]
[#t (! "#true")]
[(? single-flonum?) (! "~vf" (real->double-flonum v))]
[(? double-flonum?) (! "~v" v)]
[(? integer? x) (! "~v" v)]
[(? string?)
(! "\"")
(for [(c (in-string v))]
(match c
[#\" (! "\\\"")]
[_ (write-stringlike-char c)]))
(! "\"")]
[(? bytes?) (write-binary distance v)]
[(? symbol?)
(define s (symbol->string v))
;; FIXME: This regular expression is conservatively correct, but Anglo-chauvinistic.
(if (regexp-match #px"[a-zA-Z~!$%^&*?_=+/.][-a-zA-Z~!$%^&*?_=+/.0-9]*" s)
(! "~a" s)
(begin (! "|")
(for [(c (in-string s))]
(match c
[(== PIPE) (! "\\|")]
[_ (write-stringlike-char c)]))
(! "|")))]
[(record label fields) (write-record distance label fields)]
[(? list?) (write-sequence distance "[" "," "]" write-value v)]
[(? set?) (write-sequence distance "#set{" "," "}" write-value (if canonicalizing?
(canonical-set-elements v)
(set->list v)))]
[(? dict?) (write-sequence distance "{" "," "}" write-key-value (if canonicalizing?
(canonical-dict-entries v)
(dict->list v)))]
[_ (error 'write-preserve "Cannot encode value ~v" v)]))
(write-value 0 v0))
(define (preserve->string v0 #:indent [indent-amount #f])
(with-output-to-string (lambda () (write-preserve v0 #:indent indent-amount))))
;;---------------------------------------------------------------------------
(define (typecode v)
(match v
[(? boolean?) 0]
[(? single-flonum?) 1]
[(? double-flonum?) 2]
[(? integer? x) 3]
[(? string?) 4]
[(? bytes?) 5]
[(? symbol?) 6]
[(record _ _) 7]
[(? list?) 8]
[(? set?) 9]
[(? dict?) 10]
[_ (error 'preserve-order "Cannot compare value ~v" v)]))
(define-syntax chain-order
(syntax-rules ()
[(_ o) o]
[(_ o more ...) (match o
['= (chain-order more ...)]
[other other])]))
(define (prepare-for-order v)
(match v
[(annotated _ _ item) (prepare-for-order item)]
[(? stream-of?) (stream-of->preserve v)]
[_ v]))
(define preserve-order
(order 'preserve-order
any/c
(lambda (a* b*)
(define a (prepare-for-order a*))
(define b (prepare-for-order b*))
(define ta (typecode a))
(define tb (typecode b))
(cond [(< ta tb) '<]
[(> ta tb) '>]
[else (match ta ;; == tb
[7 (chain-order
(preserve-order (record-label a) (record-label b))
(preserve-order (record-fields a)) (preserve-order (record-fields b)))]
[8 (match* (a b)
[('() '()) '=]
[('() _) '<]
[(_ '()) '>]
[((cons a0 a1) (cons b0 b1))
(chain-order (preserve-order a0 b0) (preserve-order a1 b1))])]
[9 (preserve-order (canonical-set-elements a) (canonical-set-elements b))]
[10 (preserve-order (canonical-dict-keys a) (canonical-dict-keys b))]
[_ (datum-order a b)])]))))
(define preserve<? (order-<? preserve-order))
;;---------------------------------------------------------------------------
(define canonicalize-preserves? (make-parameter #f))
(define *canonical-cache* (vector (make-weak-hasheq)
(make-weak-hasheq)
(make-weak-hasheq)
(make-weak-hasheq)))
(define (canonical-set-elements v)
(hash-ref! (vector-ref *canonical-cache* 0)
v
(lambda () (sort (set->list v) preserve<?))))
(define (canonical-dict-entries v)
(hash-ref! (vector-ref *canonical-cache* 1)
v
(lambda () (sort (dict->list v) preserve<? #:key car))))
(define (canonical-dict-keys-and-values v)
(hash-ref! (vector-ref *canonical-cache* 2)
v
(lambda () (let loop ((xs (canonical-dict-entries v)))
(match xs
['() '()]
[(cons (cons kk vv) rest) (cons kk (cons vv (loop rest)))])))))
(define (canonical-dict-keys v)
(hash-ref! (vector-ref *canonical-cache* 3)
v
(lambda () (map car (canonical-dict-entries v)))))
;;---------------------------------------------------------------------------
(module+ test
(require rackunit)
(require racket/runtime-path)
(require syntax/srcloc)
(check-equal? (bit-string->bytes (bit-string (0 :: bits 4) (0 :: (wire-length)))) (bytes 0))
(check-equal? (bit-string->bytes (bit-string (0 :: bits 4) (3 :: (wire-length)))) (bytes 3))
(check-equal? (bit-string->bytes (bit-string (0 :: bits 4) (14 :: (wire-length)))) (bytes 14))
(check-equal? (bit-string->bytes (bit-string (0 :: bits 4) (15 :: (wire-length)))) (bytes 15 15))
(check-equal? (bit-string->bytes (bit-string (0 :: bits 4) (100 :: (wire-length))))
(bytes 15 100))
(check-equal? (bit-string->bytes (bit-string (0 :: bits 4) (300 :: (wire-length))))
(bytes 15 #b10101100 #b00000010))
(define (dwl bs)
(bit-string-case bs
#:on-short (lambda (fail) 'short)
([ (= 0 :: bits 4) (w :: (wire-length)) ] w)
(else (void))))
(check-equal? (dwl (bytes 0)) 0)
(check-equal? (dwl (bytes 3)) 3)
(check-equal? (dwl (bytes 14)) 14)
(check-equal? (dwl (bytes 15)) 'short)
(check-equal? (dwl (bytes 15 9)) (void)) ;; not canonical
(check-equal? (dwl (bytes 15 15)) 15)
(check-equal? (dwl (bytes 15 100)) 100)
(check-equal? (dwl (bytes 15 #b10101100 #b00000010)) 300)
(check-equal? (dwl (bytes 15 #b10101100)) 'short)
(define (d bs)
(for [(i (in-range 0 (- (bytes-length bs) 1)))]
(when (not (eq? (decode (subbytes bs 0 i)
#:on-short (lambda () 'short)
void)
'short))
(error 'd "~a-byte prefix of ~v does not read as short" i bs)))
(decode-syntax bs
#:on-short (lambda () 'short)
void))
(define (d-strip bs)
(strip-annotations (d bs)))
(struct discard () #:prefab)
(struct capture (detail) #:prefab)
(struct observe (specification) #:prefab)
(struct speak (who what) #:prefab)
(struct date (year month day) #:prefab)
(struct thing (id) #:prefab)
(struct person thing (name date-of-birth) #:prefab)
(struct titled person (title) #:prefab)
(struct asymmetric (forward back))
(define (stream-of* kind . items)
(stream-of kind (lambda () (sequence->generator items))))
(define samples-txt-expected
(hash 'record1 (capture (discard))
'record2 (observe (speak (discard) (capture (discard))))
'list4a '(1 2 3 4)
'list1 (asymmetric (stream-of* 'sequence 1 2 3 4)
'(1 2 3 4))
'list5 '(-2 -1 0 1)
'string3 "hello"
'string2 (asymmetric (stream-of* 'string #"he" #"llo")
"hello")
'string1 (asymmetric (stream-of* 'string #"he" #"ll" #"o")
"hello")
'bytes1 (asymmetric (stream-of* 'byte-string #"he" #"ll" #"o")
#"hello")
'symbol1 (asymmetric (stream-of* 'symbol #"he" #"ll" #"o")
'hello)
'list6 `("hello" there #"world" () ,(set) #t #f)
'bytes2 #"hello"
'bytes3 #"ABC"
'bytes4 #"ABC"
'bytes5 #"AJN"
'bytes7 #"corymb"
'bytes8 #"corymb"
'bytes9 #"Hi"
'bytes10 #"Hi"
'bytes11 #"Hi"
'value1 #"corymb"
'value2 #t
'value3 #t
'value4 #t
'value5 #t
'value6 (list 1 2 3)
'list0 '()
'dict0 (hash)
'string0 ""
'string0a (asymmetric (stream-of* 'string)
"")
'symbol0 '||
'set0 (set)
'set1 (set 1 2 3)
'set1a (set 1 2 3)
'string4 "abc\u6c34\u6C34\\/\"\b\f\n\r\txyz"
'bytes13 #"abc\x6c\x34\xf0\\/\"\b\f\n\r\txyz"
'string5 "\U0001D11E"
'list2 (asymmetric (stream-of* 'sequence
(stream-of* 'string #"abc")
(stream-of* 'string #"def"))
'("abc" "def"))
'list3 (asymmetric (stream-of* 'sequence '("a" 1) '("b" 2) '("c" 3))
'(("a" 1) ("b" 2) ("c" 3)))
'record1 (capture (discard))
'record2 (observe (speak (discard) (capture (discard))))
'record3 (titled 101 "Blackwell" (date 1821 2 3) "Dr")
'record4 (asymmetric (record 'discard '()) (discard))
'record5 (record 7 '(()))
'record6 (asymmetric (record 'discard '(surprise))
'#s(discard surprise))
'record7 (record "aString" '(3 4))
'record8 (record (discard) '(3 4))
'list7 (list 'abc '|...| 'def)
'dict1 (hash 'a 1
"b" #t
'(1 2 3) #"c"
(hash 'first-name "Elizabeth") (hash 'surname "Blackwell"))
'rfc8259-example1 (hash "Image"
(hash "Width" 800
"Height" 600
"Title" "View from 15th Floor"
"Thumbnail" (hash "Url" "http://www.example.com/image/481989943"
"Height" 125
"Width" 100)
"Animated" 'false
"IDs" (list 116 943 234 38793)))
'rfc8259-example2 (list (hash
"precision" "zip"
"Latitude" 37.7668
"Longitude" -122.3959
"Address" ""
"City" "SAN FRANCISCO"
"State" "CA"
"Zip" "94107"
"Country" "US")
(hash
"precision" "zip"
"Latitude" 37.371991
"Longitude" -122.026020
"Address" ""
"City" "SUNNYVALE"
"State" "CA"
"Zip" "94085"
"Country" "US"))
'annotation1 (asymmetric (annotate 9 "abc") 9)
'annotation2 (asymmetric (annotate (list '() (annotate '() "x")) "abc" "def") '(() ()))
'annotation3 (asymmetric (annotate 5 (annotate 2 1) (annotate 4 3)) 5)
'annotation4 (asymmetric (hash (annotate 'a 'ak) (annotate 1 'av)
(annotate 'b 'bk) (annotate 2 'bv))
(hash 'a 1 'b 2))
'annotation5 (asymmetric (annotate `#s(R ,(annotate 'f 'af)) 'ar) `#s(R f))
'annotation6 (asymmetric (record (annotate 'R 'ar) (list (annotate 'f 'af))) `#s(R f))
'annotation7 (asymmetric (annotate '() 'a 'b 'c) '())
))
(define (run-test-case variety t-name loc binary-form annotated-text-form)
(define text-form (strip-annotations annotated-text-form))
(define-values (forward back can-execute-nondet-with-canonicalization?)
(match (hash-ref samples-txt-expected t-name text-form)
[(asymmetric f b) (values f b #f)] ;; #f because e.g. annotation4 includes annotations
[v (values v v #t)]))
(check-equal? text-form back loc) ;; expectation 1
(check-equal? (d-strip (encode text-form)) back loc) ;; expectation 2
(check-equal? (d-strip (encode forward)) back loc) ;; expectation 3
(check-equal? (d-strip binary-form) back loc) ;; expectation 4
(check-equal? (d binary-form) annotated-text-form loc) ;; expectation 5
(check-equal? (d (encode annotated-text-form)) annotated-text-form loc) ;; expectation 6
(check-equal? (string->preserve (preserve->string text-form)) back loc) ;; expectation 7
(check-equal? (string->preserve (preserve->string forward)) back loc) ;; expectation 8
(check-equal? (string->preserve-syntax (preserve->string annotated-text-form)) ;; similar to 8
annotated-text-form
loc)
(when (and (not (memq variety '(decode)))
(or (not (memq variety '(nondeterministic)))
(and can-execute-nondet-with-canonicalization?)))
;; expectations 9 and 10
(parameterize ((canonicalize-preserves? (if (memq variety '(nondeterministic)) #t #f)))
(check-equal? (encode forward) binary-form loc)))
(unless (memq variety '(decode nondeterministic streaming))
;; expectation 11
(check-equal? (encode annotated-text-form) binary-form loc)))
(define-runtime-path tests-path "../../../../tests")
(let* ((path (build-path tests-path "samples.txt"))
(testfile (call-with-input-file path
(lambda (p)
(port-count-lines! p)
(read-preserve-syntax p #:source path)))))
(match-define (peel-annotations
`#s(TestCases
,(strip-annotations
`#s(ExpectedPlaceholderMapping ,placeholder->value-map))
,tests))
testfile)
(define value->placeholder-map (for/hash [((k v) (in-hash placeholder->value-map))]
(values v k)))
(parameterize
((current-value->placeholder (lambda (v) (hash-ref value->placeholder-map v #f)))
(current-placeholder->value (lambda (p) (hash-ref placeholder->value-map p void))))
(for [((t-name* t*) (in-hash (annotated-item tests)))]
(define t-name (strip-annotations t-name*))
(define loc (format "Test case '~a' (~a)" t-name (source-location->string (annotated-srcloc t*))))
(define (fail-test fmt . args)
(fail (format "~a: ~a" loc (apply format fmt args))))
(displayln loc)
(match (peel-annotations t*)
[`#s(Test ,(strip-annotations binary-form) ,annotated-text-form)
(run-test-case 'normal t-name loc binary-form annotated-text-form)]
[`#s(NondeterministicTest ,(strip-annotations binary-form) ,annotated-text-form)
(run-test-case 'nondeterministic t-name loc binary-form annotated-text-form)]
[`#s(StreamingTest ,(strip-annotations binary-form) ,annotated-text-form)
(run-test-case 'streaming t-name loc binary-form annotated-text-form)]
[`#s(DecodeTest ,(strip-annotations binary-form) ,annotated-text-form)
(run-test-case 'decode t-name loc binary-form annotated-text-form)]
[`#s(ParseError ,(strip-annotations str))
(with-handlers [(exn:fail:read:eof?
(lambda (e) (fail-test "Unexpected EOF: ~e" e)))
(exn:fail:read?
(lambda (e) 'ok))
((lambda (e) #t)
(lambda (e) (fail-test "Unexpected exception: ~e" e)))]
(string->preserve str)
(fail-test "Unexpected success"))]
[`#s(ParseShort ,(strip-annotations str))
(with-handlers [(exn:fail:read:eof? (lambda (e) 'ok))
((lambda (e) #t)
(lambda (e) (fail-test "Unexpected exception: ~e" e)))]
(string->preserve str)
(fail-test "Unexpected success"))]
[`#s(DecodeShort ,(strip-annotations bs))
(check-eq? (d bs) 'short loc)]
[`#s(DecodeError ,(strip-annotations bs))
(check-true (void? (d bs)) loc)]
[_
(write-preserve t* #:indent #f)
(newline)])))
)
)