#lang racket/base ;; Preserve, as in Fruit Preserve, as in a remarkably weak pun on pickling/dehydration etc (provide (struct-out stream-of) stream-of->preserve (all-from-out "record.rkt") (struct-out annotated) annotate strip-annotations strip-annotations-proc peel-annotations peel-annotations-proc read-preserve read-preserve-syntax string->preserve string->preserve-syntax (struct-out binary-display-heuristics) current-binary-display-heuristics write-preserve preserve->string current-value->placeholder current-placeholder->value prepend-noop encode decode decode-syntax wire-value preserve-order preservepreserve s) (match-define (stream-of kind generator-thunk) s) (define g (generator-thunk)) (define pieces (for/list [(p (in-producer g (void)))] p)) (match kind ['string (bytes->string/utf-8 (bytes-append* pieces))] ['byte-string (bytes-append* pieces)] ['symbol (string->symbol (bytes->string/utf-8 (bytes-append* pieces)))] ['sequence pieces] ['set (list->set pieces)] ['dictionary (apply hash pieces)])) ;; Syntax properties and syntax objects would be almost perfect for ;; representing annotations, plus position/source tracking as ;; lagniappe, but unfortunately they don't play nicely with data much ;; outside of ordinary S-expressions as found in Racket source. ;; ;; So we do our own thing, for now. ;; (struct annotated (annotations srcloc item) #:transparent #:methods gen:equal+hash [(define (equal-proc a b =?) (=? (annotated-item a) (annotated-item b))) (define (hash-proc a h) (h (annotated-item a))) (define (hash2-proc a h) (h (annotated-item a)))]) (define (annotate v . as) (match v [(annotated annotations srcloc item) (annotated (append as annotations) srcloc item)] [item (annotated as #f item)])) (define (strip-annotations-proc v #:depth [depth +inf.0]) (let walk* ((v v) (depth depth)) (define next-depth (- depth 1)) (define (walk v) (walk* v next-depth)) (if (zero? depth) v (match v [(annotated _ _ item) (match item [(record label fields) (record (walk* label depth) (map walk fields))] [(? list?) (map walk item)] [(? set?) (for/set [(i (in-set item))] (walk i))] [(? dict?) (for/hash [((k v) (in-dict item))] (values (walk* k depth) (walk v)))] [(? annotated?) (error 'strip-annotations "Improper annotation structure: ~v" v)] [_ item])] [_ v])))) (define (peel-annotations-proc v) (strip-annotations-proc v #:depth 1)) (define-match-expander strip-annotations (syntax-rules () [(_ pat extra ...) (app (lambda (v) (strip-annotations-proc v extra ...)) pat)]) (lambda (stx) (syntax-case stx () [(_ args ...) #'(strip-annotations-proc args ...)] [_ #'strip-annotations-proc]))) (define-match-expander peel-annotations (syntax-rules () [(_ pat extra ...) (app (lambda (v) (peel-annotations-proc v extra ...)) pat)]) (lambda (stx) (syntax-case stx () [(_ args ...) #'(peel-annotations-proc args ...)] [_ #'peel-annotations-proc]))) (define current-value->placeholder (make-parameter (lambda (v) #f))) (define current-placeholder->value (make-parameter (lambda (v) (void)))) (define (prepend-noop encoded-value) (bit-string-append #"\xff" encoded-value)) (define (encode v) (bit-string->bytes (bit-string (v :: (wire-value))))) (define ((default-on-short bs)) (error 'decode "Short encoding: ~v" bs)) (define ((default-on-fail bs)) (error 'decode "Invalid encoding: ~v" bs)) (define (decode bs #:read-syntax? [read-syntax? #f] #:on-short [on-short (default-on-short bs)] [on-fail (default-on-fail bs)]) (bit-string-case bs #:on-short (lambda (fail) (on-short)) ([ (v :: (wire-value #:read-syntax? read-syntax?)) ] v) (else (on-fail)))) (define (decode-syntax bs #:on-short [on-short (default-on-short bs)] [on-fail (default-on-fail bs)]) (decode bs on-fail #:read-syntax? #t #:on-short on-short)) (define-syntax wire-value (syntax-rules () [(_ #t input ks kf) (decode-value input ks kf #:read-syntax? #f)] [(_ #t input ks kf #:read-syntax? rs) (decode-value input ks kf #:read-syntax? rs)] [(_ #f v) (encode-value v)])) (define-syntax wire-length (syntax-rules () [(_ #t input ks kf) (decode-wire-length input ks kf)] [(_ #f v) (encode-wire-length v)])) (define (encode-wire-length v) (when (negative? v) (error 'encode-wire-length "Cannot encode negative wire-length ~v" v)) (if (< v #b1111) (bit-string (v :: bits 4)) (bit-string (#b1111 :: bits 4) ((encode-varint v) :: binary)))) (define (encode-array-like minor fields) (bit-string (2 :: bits 2) (minor :: bits 2) ((length fields) :: (wire-length)) ((apply bit-string-append (map encode-value fields)) :: binary))) (define (encode-binary-like minor bs) (bit-string (1 :: bits 2) (minor :: bits 2) ((bytes-length bs) :: (wire-length)) (bs :: binary))) (define (encode-stream major minor chunk-ok? generator) (bit-string-append (bit-string (#b0010 :: bits 4) (major :: bits 2) (minor :: bits 2)) (let loop () (match (generator) [(? void?) #""] [(? chunk-ok? v) (bit-string-append (encode-value v) (loop))] [bad (error 'encode-stream "Cannot encode chunk: ~v" bad)])) (bit-string #b00000100))) (define (dict-keys-and-values d) (reverse (for/fold [(acc '())] [((k v) (in-dict d))] (cons v (cons k acc))))) (define (encode-value v) (define canonicalizing? (canonicalize-preserves?)) (match (and (not canonicalizing?) ((current-value->placeholder) v)) [(? integer? n) (bit-string (#b0001 :: bits 4) (n :: (wire-length)))] [#f (let restart ((v v)) (match v [#f (bytes #b00000000)] [#t (bytes #b00000001)] [(? single-flonum?) (bit-string #b00000010 (v :: float bits 32))] [(? double-flonum?) (bit-string #b00000011 (v :: float bits 64))] [(annotated annotations _ item) (if canonicalizing? (restart item) (bit-string ((apply bit-string-append (map (lambda (a) (bit-string #b00000101 ((encode-value a) :: binary))) annotations)) :: binary) ((encode-value item) :: binary)))] [(? stream-of?) #:when canonicalizing? (restart (stream-of->preserve v))] [(stream-of 'string p) (encode-stream 1 1 bytes? (p))] [(stream-of 'byte-string p) (encode-stream 1 2 bytes? (p))] [(stream-of 'symbol p) (encode-stream 1 3 bytes? (p))] [(stream-of 'sequence p) (encode-stream 2 1 (lambda (x) #t) (p))] [(stream-of 'set p) (encode-stream 2 2 (lambda (x) #t) (p))] [(stream-of 'dictionary p) (encode-stream 2 3 (lambda (x) #t) (p))] [(? integer? x) #:when (<= -3 x 12) (bit-string (#b0011 :: bits 4) (x :: bits 4))] ;; [0 (bytes #b10000000)] [(? integer?) (define raw-bit-count (+ (integer-length v) 1)) ;; at least one sign bit (define byte-count (quotient (+ raw-bit-count 7) 8)) (bit-string (#b0100 :: bits 4) (byte-count :: (wire-length)) (v :: integer bytes byte-count))] [(? string?) (encode-binary-like 1 (string->bytes/utf-8 v))] [(? bytes?) (encode-binary-like 2 v)] [(? symbol?) (encode-binary-like 3 (string->bytes/utf-8 (symbol->string v)))] [(record label fields) (encode-array-like 0 (cons label fields))] [(? list?) (encode-array-like 1 v)] [(? set?) (encode-array-like 2 (if canonicalizing? (canonical-set-elements v) (set->list v)))] [(? dict?) (encode-array-like 3 (if canonicalizing? (canonical-dict-keys-and-values v) (dict-keys-and-values v)))] [_ (error 'encode-value "Cannot encode value ~v" v)]))])) ;;--------------------------------------------------------------------------- (define (decode-wire-length bs ks kf) (bit-string-case bs #:on-short (lambda (fail) (kf #t)) ([ (= #b1111 :: bits 4) (rest :: binary) ] (decode-varint rest (lambda (v tail) (if (< v #b1111) (kf) (ks v tail))) kf)) ([ (v :: bits 4) (rest :: binary) ] (ks v rest)) (else (kf)))) (define (decode-binary minor bs rest ks kf) (match minor [0 (if (positive? (bit-string-length bs)) (ks (bit-string->signed-integer bs #t) rest) (ks 0 rest))] [2 (ks bs rest)] [(or 1 3) ((with-handlers [(exn:fail:contract? (lambda (e) kf))] (define s (bytes->string/utf-8 bs)) (lambda () (ks (if (= minor 3) (string->symbol s) s) rest))))])) (define (decode-compound minor vs rest ks kf) (match* (minor vs) [(0 (list* label fields)) (ks (record label fields) rest)] [(0 '()) (kf)] [(1 _) (ks vs rest)] [(2 _) (ks (list->set vs) rest)] [(3 _) (if (even? (length vs)) (ks (apply hash vs) rest) (kf))])) (define (decode-value input ks kf #:read-syntax? read-syntax?) (define (position rest) (- (bytes-length input) (arithmetic-shift (bit-string-length rest) -3))) (define nil-annotation (if read-syntax? (lambda (ks bs) (lambda (result rest) (define pos0 (position bs)) (define pos1 (position rest)) (ks (annotated '() (srcloc #f #f #f (+ pos0 1) (- pos1 pos0)) result) rest))) (lambda (ks bs) ks))) (define cons-annotation (if read-syntax? (lambda (ks a v rest) (match-define (annotated annotations srcloc item) v) (ks (annotated (cons a annotations) srcloc item) rest)) (lambda (ks a v rest) (ks v rest)))) (define (decode-values n bs ks kf) (let loop ((n n) (acc-rev '()) (bs bs)) (if (zero? n) (ks (reverse acc-rev) bs) (decode-one bs (lambda (v rest) (loop (- n 1) (cons v acc-rev) rest)) kf)))) (define (decode-stream minor annotations-ok chunk-ok? join-chunks decode bs ks kf) (let loop ((acc-rev '()) (rest bs)) (define (accept-one chunk rest) (if (chunk-ok? chunk) (loop (cons chunk acc-rev) rest) (kf))) (bit-string-case rest #:on-short (lambda (fail) (kf #t)) ([ (= #b00000100 :: bits 8) (rest :: binary) ] (decode minor (join-chunks (reverse acc-rev)) rest (nil-annotation ks bs) kf)) ([ (= #b00000101 :: bits 8) (rest1 :: binary) ] (if annotations-ok (decode-one rest accept-one kf) (kf))) (else (decode-one rest accept-one kf))))) (define bytes-chunk? (if read-syntax? (lambda (v) (bytes? (annotated-item v))) bytes?)) (define bytes-chunk-append* (if read-syntax? (lambda (vs) (bytes-append* (map annotated-item vs))) bytes-append*)) (define (decode-one bs ks kf) (bit-string-case bs #:on-short (lambda (fail) (kf #t)) ([ (= #b00000000 :: bits 8) (rest :: binary) ] ((nil-annotation ks bs) #f rest)) ([ (= #b00000001 :: bits 8) (rest :: binary) ] ((nil-annotation ks bs) #t rest)) ([ (= #b00000010 :: bits 8) (v :: float bits 32) (rest :: binary) ] ((nil-annotation ks bs) (real->single-flonum v) rest)) ([ (= #b00000011 :: bits 8) (v :: float bits 64) (rest :: binary) ] ((nil-annotation ks bs) v rest)) ([ (= #b00000101 :: bits 8) (rest :: binary) ] (decode-one rest (lambda (a rest) (decode-one rest (lambda (v rest) (cons-annotation ks a v rest)) kf)) kf)) ([ (= #b0001 :: bits 4) (placeholder :: (wire-length)) (rest :: binary) ] (match ((current-placeholder->value) placeholder) [(? void?) (error 'decode "Invalid Preserves placeholder: ~v" placeholder)] [v ((nil-annotation ks bs) v rest)])) ([ (= #b001001 :: bits 6) (minor :: bits 2) (rest :: binary) ] (decode-stream minor #f bytes-chunk? bytes-chunk-append* decode-binary rest ks kf)) ([ (= #b001010 :: bits 6) (minor :: bits 2) (rest :: binary) ] (decode-stream minor #t (lambda (x) #t) values decode-compound rest ks kf)) ([ (= #b0011 :: bits 4) (x :: bits 4) (rest :: binary) ] ((nil-annotation ks bs) (if (> x 12) (- x 16) x) rest)) ([ (= #b01 :: bits 2) (minor :: bits 2) (byte-count :: (wire-length)) (bits :: binary bytes byte-count) (rest :: binary) ] (decode-binary minor (bit-string->bytes bits) rest (nil-annotation ks bs) kf)) ([ (= #b10 :: bits 2) (minor :: bits 2) (field-count :: (wire-length)) (rest :: binary) ] (decode-values field-count rest (lambda (fields rest) (decode-compound minor fields rest (nil-annotation ks bs) kf)) kf)) ([ (= #b11111111 :: bits 8) (rest :: binary) ] (decode-one rest ks kf)) (else (kf)))) (decode-one input ks kf)) ;;--------------------------------------------------------------------------- (define PIPE #\|) (define (skip-whitespace* i) (regexp-match? #px#"^(\\s|,)*" i)) ;; side effect: consumes matched portion of input (define (parse-error* #:raise-proc [raise-proc raise-read-error] i source fmt . args) (define-values [line column pos] (port-next-location i)) (raise-proc (format "read-preserve: ~a" (apply format fmt args)) source line column pos #f)) (define (read-preserve [i (current-input-port)] #:read-syntax? [read-syntax? #f] #:source [source (object-name i)]) (local-require file/sha1) (define-match-expander px (syntax-rules () [(_ re pat) (app (lambda (v) (regexp-try-match re v)) pat)])) (define (parse-error fmt . args) (apply parse-error* i source fmt args)) (define (eof-guard v) (match v [(? eof-object?) (parse-error* #:raise-proc raise-read-eof-error i source "Unexpected end of input")] [v v])) (define (peek/no-eof) (eof-guard (peek-char i))) (define (read/no-eof) (eof-guard (read-char i))) (define (skip-whitespace) (skip-whitespace* i)) (define (read-sequence terminator) (sequence-fold '() (lambda (acc v) (cons v acc)) reverse terminator)) (define (set-add* s e) (when (set-member? s e) (parse-error "Duplicate set element: ~v" e)) (set-add s e)) (define (read-dictionary-or-set seed) (sequence-fold seed (lambda (acc k) (skip-whitespace) (match (peek-char i) [#\: (read-char i) (when (set? acc) (parse-error "Unexpected key/value separator in set")) (let ((acc (or acc (hash)))) (when (hash-has-key? acc k) (parse-error "Duplicate key: ~v" k)) (hash-set acc k (read-value)))] [_ (when (hash? acc) (parse-error "Missing expected key/value separator")) (set-add* (or acc (set)) k)])) (lambda (acc) (or acc (hash))) #\})) (define (read-raw-symbol acc) (match (peek-char i) [(or (? eof-object?) (? char? (or #\( #\) #\{ #\} #\[ #\] #\< #\> #\" #\; #\, #\@ #\# #\: (== PIPE) (? char-whitespace?)))) (if (null? acc) (parse-error "Invalid character ~v at start of value" (peek-char i)) (string->symbol (list->string (reverse acc))))] [_ (read-raw-symbol (cons (read-char i) acc))])) (define (read-base64-binary acc) (skip-whitespace) (define ch (read/no-eof)) (cond [(eqv? ch #\}) (base64-decode (string->bytes/latin-1 (list->string (reverse acc))))] [(or (and (char>=? ch #\A) (char<=? ch #\Z)) (and (char>=? ch #\a) (char<=? ch #\z)) (and (char>=? ch #\0) (char<=? ch #\9)) (memv ch '(#\+ #\/ #\- #\_ #\=))) (read-base64-binary (cons ch acc))] [else (parse-error "Invalid base64 character")])) (define (hexdigit? ch) (or (and (char>=? ch #\A) (char<=? ch #\F)) (and (char>=? ch #\a) (char<=? ch #\f)) (and (char>=? ch #\0) (char<=? ch #\9)))) (define (read-hex-binary acc) (skip-whitespace) (define ch (read/no-eof)) (cond [(eqv? ch #\}) (hex-string->bytes (list->string (reverse acc)))] [(hexdigit? ch) (define ch2 (read/no-eof)) (when (not (hexdigit? ch2)) (parse-error "Hex-encoded binary digits must come in pairs")) (read-hex-binary (cons ch2 (cons ch acc)))] [else (parse-error "Invalid hex character")])) (define (read-stringlike xform-item finish terminator-char hexescape-char hexescape-proc) (let loop ((acc '())) (match (read/no-eof) [(== terminator-char) (finish (reverse acc))] [#\\ (match (read/no-eof) [(== hexescape-char) (loop (cons (hexescape-proc) acc))] [(and c (or (== terminator-char) #\\ #\/)) (loop (cons (xform-item c) acc))] [#\b (loop (cons (xform-item #\u08) acc))] [#\f (loop (cons (xform-item #\u0C) acc))] [#\n (loop (cons (xform-item #\u0A) acc))] [#\r (loop (cons (xform-item #\u0D) acc))] [#\t (loop (cons (xform-item #\u09) acc))] [c (parse-error "Invalid escape code \\~a" c)])] [c (loop (cons (xform-item c) acc))]))) (define (read-string terminator-char) (read-stringlike values list->string terminator-char #\u (lambda () (integer->char (match i [(px #px#"^[a-fA-F0-9]{4}" (list hexdigits)) (define n1 (string->number (bytes->string/utf-8 hexdigits) 16)) (if (<= #xd800 n1 #xdfff) ;; surrogate pair first half (match i [(px #px#"^\\\\u([a-fA-F0-9]{4})" (list _ hexdigits2)) (define n2 (string->number (bytes->string/utf-8 hexdigits2) 16)) (if (<= #xdc00 n2 #xdfff) (+ (arithmetic-shift (- n1 #xd800) 10) (- n2 #xdc00) #x10000) (parse-error "Bad second half of surrogate pair"))] [_ (parse-error "Missing second half of surrogate pair")]) n1)] [_ (parse-error "Bad string \\u escape")]))))) (define (read-literal-binary) (read-stringlike (lambda (c) (define b (char->integer c)) (when (>= b 256) (parse-error "Invalid code point ~a (~v) in literal binary" b c)) b) list->bytes #\" #\x (lambda () (match i [(px #px#"^[a-fA-F0-9]{2}" (list hexdigits)) (string->number (bytes->string/utf-8 hexdigits) 16)] [_ (parse-error "Bad binary \\x escape")])))) (define (read-intpart acc-rev) (match (peek-char i) [#\0 (read-fracexp (cons (read-char i) acc-rev))] [_ (read-digit+ acc-rev read-fracexp)])) (define (read-digit* acc-rev k) (match (peek-char i) [(? char? (? char-numeric?)) (read-digit* (cons (read-char i) acc-rev) k)] [_ (k acc-rev)])) (define (read-digit+ acc-rev k) (match (peek-char i) [(? char? (? char-numeric?)) (read-digit* (cons (read-char i) acc-rev) k)] [_ (parse-error "Incomplete number")])) (define (read-fracexp acc-rev) (match (peek-char i) [#\. (read-digit+ (cons (read-char i) acc-rev) read-exp)] [_ (read-exp acc-rev)])) (define (read-exp acc-rev) (match (peek-char i) [(or #\e #\E) (read-sign-and-exp (cons (read-char i) acc-rev))] [_ (finish-number acc-rev)])) (define (read-sign-and-exp acc-rev) (match (peek-char i) [(or #\+ #\-) (read-digit+ (cons (read-char i) acc-rev) finish-number)] [_ (read-digit+ acc-rev finish-number)])) (define (finish-number acc-rev) (define s (list->string (reverse acc-rev))) (define n (string->number s)) (when (not n) (parse-error "Invalid number: ~v" s)) (if (flonum? n) (match (peek-char i) [(or #\f #\F) (read-char i) (real->single-flonum n)] [_ n]) n)) (define (read-number) (match (peek/no-eof) [#\- (read-intpart (list (read-char i)))] [_ (read-intpart (list))])) (define (sequence-fold acc accumulate-one finish terminator-char) (let loop ((acc acc)) (skip-whitespace) (match (peek/no-eof) [(== terminator-char) (read-char i) (finish acc)] [_ (loop (accumulate-one acc (read-value)))]))) (define nil-annotation (if read-syntax? (lambda (thunk) (define-values (line0 col0 pos0) (port-next-location i)) (define v (thunk)) (define-values (line1 col1 pos1) (port-next-location i)) (define loc (and line0 col0 pos0 pos1 (srcloc source line0 col0 pos0 (- pos1 pos0)))) (match v [(annotated annotations _ item) (annotated annotations loc item)] [item (annotated '() loc item)])) (lambda (thunk) (thunk)))) (define cons-annotation (if read-syntax? (lambda (a v) (match-define (annotated annotations srcloc item) v) (annotated (cons a annotations) srcloc item)) (lambda (a v) v))) (define (read-value) (skip-whitespace) (define sigil (peek-char i)) (match sigil [(? eof-object? o) o] [#\@ (read-char i) (define a (eof-guard (read-value))) (define v (eof-guard (read-value))) (cons-annotation a v)] [_ (nil-annotation (lambda () (match sigil [#\{ (read-char i) (read-dictionary-or-set #f)] [#\[ (read-char i) (read-sequence #\])] [#\< (read-char i) (match (read-sequence #\>) ['() (parse-error "Missing record label")] [(cons head fields) (record head fields)])] [(or #\- #\0 #\1 #\2 #\3 #\4 #\5 #\6 #\7 #\8 #\9) (read-number)] [#\" (read-char i) (read-string #\")] [(== PIPE) (read-char i) (string->symbol (read-string PIPE))] [#\# (match i [(px #px#"^#set\\{" (list _)) (sequence-fold (set) set-add* values #\})] [(px #px#"^#value" (list _)) (define bs (read-preserve i #:read-syntax? #t)) (when (not (bytes? (annotated-item bs))) (parse-error "ByteString must follow #value")) (when (not (null? (annotated-annotations bs))) (parse-error "Annotations not permitted after #value")) (decode (annotated-item bs) #:read-syntax? read-syntax?)] [(px #px#"^#true" (list _)) #t] [(px #px#"^#false" (list _)) #f] [(px #px#"^#\"" (list _)) (read-literal-binary)] [(px #px#"^#hex\\{" (list _)) (read-hex-binary '())] [(px #px#"^#base64\\{" (list _)) (read-base64-binary '())] [_ (read-char i) (parse-error "Invalid preserve value")])] [#\: (read-char i) (parse-error "Unexpected key/value separator between items")] [_ (read-raw-symbol '())])))])) (read-value)) (define (read-preserve-syntax [i (current-input-port)] #:source [source #f]) (read-preserve i #:read-syntax? #t #:source source)) (define (string->preserve s #:read-syntax? [read-syntax? #f] #:track-position? [track-position? #t]) (define p (open-input-string s)) (define source "") (when track-position? (port-count-lines! p)) (define v (read-preserve p #:read-syntax? read-syntax? #:source source)) (when (eof-object? v) (parse-error* #:raise-proc raise-read-eof-error p source "Unexpected end of input")) (skip-whitespace* p) (when (not (eof-object? (peek-char p))) (parse-error* p source "Unexpected text following preserve")) v) (define (string->preserve-syntax s) (string->preserve s #:read-syntax? #t)) ;;--------------------------------------------------------------------------- (struct binary-display-heuristics (printable-ascii-proportion max-length) #:transparent) (define current-binary-display-heuristics (make-parameter (binary-display-heuristics 3/4 1024))) (define (write-preserve v0 [o (current-output-port)] #:indent [indent-amount0 #f]) (define indent-amount (match indent-amount0 [#f 0] [#t 2] ;; a default [other other])) (define indenting? (and indent-amount0 #t)) (define canonicalizing? (canonicalize-preserves?)) (define-syntax-rule (! fmt arg ...) (fprintf o fmt arg ...)) (define (!indent distance) (when indenting? (! "\n~a" (make-string distance #\space)))) (define (!indent* distance) (if indenting? (!indent distance) (! " "))) (define (write-stringlike-char c [default (lambda (c) (! "~a" c))]) (match c [#\\ (! "\\\\")] [#\u08 (! "\\b")] [#\u0C (! "\\f")] [#\u0A (! "\\n")] [#\u0D (! "\\r")] [#\u09 (! "\\t")] [_ (default c)])) (define (write-sequence outer-distance opener comma closer item-writer vs) (define inner-distance (+ outer-distance indent-amount)) (! "~a" opener) (match vs ['() (void)] [(cons v0 vs) (!indent inner-distance) (item-writer inner-distance v0) (for [(v (in-list vs))] (! "~a" comma) (!indent* inner-distance) (item-writer inner-distance v)) (!indent outer-distance)]) (! "~a" closer)) (define (write-record outer-distance label fields) (! "<") (write-value outer-distance label) (for ([f (in-list fields)]) (! " ") (write-value outer-distance f)) (! ">")) ;; (define (write-record outer-distance label fields) ;; (define simple-label? (or (boolean? label) (number? label) (string? label) ;; (bytes? label) (symbol? label))) ;; (define inner-distance (+ outer-distance ;; (if simple-label? ;; (+ 2 (string-length (preserve->string label #:indent #f))) ;; indent-amount))) ;; (define (write-fields fields) ;; (for ([f (in-list fields)]) ;; (!indent* inner-distance) ;; (write-value inner-distance f))) ;; ;; (! "<") ;; (write-value inner-distance label) ;; (if simple-label? ;; (match fields ;; ['() (void)] ;; [(cons field0 fields) ;; (! " ") ;; (write-value inner-distance field0) ;; (write-fields fields)]) ;; (write-fields fields)) ;; (! ">")) (define (write-key-value distance kv) (match-define (cons k v) kv) (write-value distance k) (! ": ") (write-value distance v)) (define (binunescaped? b) (or (<= #x20 b #x21) (<= #x23 b #x5b) (<= #x5d b #x7e))) (define (write-binary-stringlike v) (! "#\"") (for [(b (in-bytes v))] (match b [#x22 (! "\\\"")] [(? binunescaped?) (write-stringlike-char (integer->char b))] [_ (write-stringlike-char (integer->char b) (lambda (c) (! "\\x~a" (~a #:min-width 2 #:align 'right #:left-pad-string "0" (number->string b 16)))))])) (! "\"")) (define (write-binary-base64 outer-distance v) ;; Racket's encoder breaks lines after 72 characters. ;; That corresponds to 54 bytes of input binary. (! "#base64{") (if (and indenting? (> (bytes-length v) 54)) (let* ((inner-distance (+ outer-distance indent-amount)) (line-separator (bytes-append #"\n" (make-bytes inner-distance 32))) (encoded (base64-encode v line-separator))) (write-bytes line-separator o) (write-bytes encoded o 0 (- (bytes-length encoded) indent-amount))) (write-bytes (base64-encode v #"") o)) (! "}")) (define (write-binary outer-distance v) (match-define (binary-display-heuristics proportion maxlen) (current-binary-display-heuristics)) (define vlen (bytes-length v)) (if (>= vlen maxlen) (write-binary-base64 outer-distance v) (let* ((sample-length (min vlen maxlen)) (printable-ascii-count (for/sum [(i (in-range 0 sample-length)) (b (in-bytes v))] (if (or (<= 32 b 126) (= b 9) (= b 10) (= b 13)) 1 0)))) (if (or (zero? vlen) (>= (/ printable-ascii-count sample-length) proportion)) (write-binary-stringlike v) (write-binary-base64 outer-distance v))))) (define (write-value distance v) (match v [(annotated annotations _ item) (when (not canonicalizing?) (for [(a (in-list annotations))] (! "@") (write-value (+ distance 1) a) (!indent* distance))) (write-value distance item)] [(? stream-of?) (write-value distance (stream-of->preserve v))] [#f (! "#false")] [#t (! "#true")] [(? single-flonum?) (! "~vf" (real->double-flonum v))] [(? double-flonum?) (! "~v" v)] [(? integer? x) (! "~v" v)] [(? string?) (! "\"") (for [(c (in-string v))] (match c [#\" (! "\\\"")] [_ (write-stringlike-char c)])) (! "\"")] [(? bytes?) (write-binary distance v)] [(? symbol?) (define s (symbol->string v)) ;; FIXME: This regular expression is conservatively correct, but Anglo-chauvinistic. (if (regexp-match #px"[a-zA-Z~!$%^&*?_=+/.][-a-zA-Z~!$%^&*?_=+/.0-9]*" s) (! "~a" s) (begin (! "|") (for [(c (in-string s))] (match c [(== PIPE) (! "\\|")] [_ (write-stringlike-char c)])) (! "|")))] [(record label fields) (write-record distance label fields)] [(? list?) (write-sequence distance "[" "," "]" write-value v)] [(? set?) (write-sequence distance "#set{" "," "}" write-value (if canonicalizing? (canonical-set-elements v) (set->list v)))] [(? dict?) (write-sequence distance "{" "," "}" write-key-value (if canonicalizing? (canonical-dict-entries v) (dict->list v)))] [_ (error 'write-preserve "Cannot encode value ~v" v)])) (write-value 0 v0)) (define (preserve->string v0 #:indent [indent-amount #f]) (with-output-to-string (lambda () (write-preserve v0 #:indent indent-amount)))) ;;--------------------------------------------------------------------------- (define (typecode v) (match v [(? boolean?) 0] [(? single-flonum?) 1] [(? double-flonum?) 2] [(? integer? x) 3] [(? string?) 4] [(? bytes?) 5] [(? symbol?) 6] [(record _ _) 7] [(? list?) 8] [(? set?) 9] [(? dict?) 10] [_ (error 'preserve-order "Cannot compare value ~v" v)])) (define-syntax chain-order (syntax-rules () [(_ o) o] [(_ o more ...) (match o ['= (chain-order more ...)] [other other])])) (define (prepare-for-order v) (match v [(annotated _ _ item) (prepare-for-order item)] [(? stream-of?) (stream-of->preserve v)] [_ v])) (define preserve-order (order 'preserve-order any/c (lambda (a* b*) (define a (prepare-for-order a*)) (define b (prepare-for-order b*)) (define ta (typecode a)) (define tb (typecode b)) (cond [(< ta tb) '<] [(> ta tb) '>] [else (match ta ;; == tb [7 (chain-order (preserve-order (record-label a) (record-label b)) (preserve-order (record-fields a)) (preserve-order (record-fields b)))] [8 (match* (a b) [('() '()) '=] [('() _) '<] [(_ '()) '>] [((cons a0 a1) (cons b0 b1)) (chain-order (preserve-order a0 b0) (preserve-order a1 b1))])] [9 (preserve-order (canonical-set-elements a) (canonical-set-elements b))] [10 (preserve-order (canonical-dict-keys a) (canonical-dict-keys b))] [_ (datum-order a b)])])))) (define preservelist v) preservelist v) preservebytes (bit-string (0 :: bits 4) (0 :: (wire-length)))) (bytes 0)) (check-equal? (bit-string->bytes (bit-string (0 :: bits 4) (3 :: (wire-length)))) (bytes 3)) (check-equal? (bit-string->bytes (bit-string (0 :: bits 4) (14 :: (wire-length)))) (bytes 14)) (check-equal? (bit-string->bytes (bit-string (0 :: bits 4) (15 :: (wire-length)))) (bytes 15 15)) (check-equal? (bit-string->bytes (bit-string (0 :: bits 4) (100 :: (wire-length)))) (bytes 15 100)) (check-equal? (bit-string->bytes (bit-string (0 :: bits 4) (300 :: (wire-length)))) (bytes 15 #b10101100 #b00000010)) (define (dwl bs) (bit-string-case bs #:on-short (lambda (fail) 'short) ([ (= 0 :: bits 4) (w :: (wire-length)) ] w) (else (void)))) (check-equal? (dwl (bytes 0)) 0) (check-equal? (dwl (bytes 3)) 3) (check-equal? (dwl (bytes 14)) 14) (check-equal? (dwl (bytes 15)) 'short) (check-equal? (dwl (bytes 15 9)) (void)) ;; not canonical (check-equal? (dwl (bytes 15 15)) 15) (check-equal? (dwl (bytes 15 100)) 100) (check-equal? (dwl (bytes 15 #b10101100 #b00000010)) 300) (check-equal? (dwl (bytes 15 #b10101100)) 'short) (define (d bs) (for [(i (in-range 0 (- (bytes-length bs) 1)))] (when (not (eq? (decode (subbytes bs 0 i) #:on-short (lambda () 'short) void) 'short)) (error 'd "~a-byte prefix of ~v does not read as short" i bs))) (decode-syntax bs #:on-short (lambda () 'short) void)) (define (d-strip bs) (strip-annotations (d bs))) (struct discard () #:prefab) (struct capture (detail) #:prefab) (struct observe (specification) #:prefab) (struct speak (who what) #:prefab) (struct date (year month day) #:prefab) (struct thing (id) #:prefab) (struct person thing (name date-of-birth) #:prefab) (struct titled person (title) #:prefab) (struct asymmetric (forward back)) (define (stream-of* kind . items) (stream-of kind (lambda () (sequence->generator items)))) (define samples-txt-expected (hash 'record1 (capture (discard)) 'record2 (observe (speak (discard) (capture (discard)))) 'list4a '(1 2 3 4) 'list1 (asymmetric (stream-of* 'sequence 1 2 3 4) '(1 2 3 4)) 'list5 '(-2 -1 0 1) 'string3 "hello" 'string2 (asymmetric (stream-of* 'string #"he" #"llo") "hello") 'string1 (asymmetric (stream-of* 'string #"he" #"ll" #"o") "hello") 'bytes1 (asymmetric (stream-of* 'byte-string #"he" #"ll" #"o") #"hello") 'symbol1 (asymmetric (stream-of* 'symbol #"he" #"ll" #"o") 'hello) 'list6 `("hello" there #"world" () ,(set) #t #f) 'bytes2 #"hello" 'bytes3 #"ABC" 'bytes4 #"ABC" 'bytes5 #"AJN" 'bytes7 #"corymb" 'bytes8 #"corymb" 'bytes9 #"Hi" 'bytes10 #"Hi" 'bytes11 #"Hi" 'value1 #"corymb" 'value2 #t 'value3 #t 'value4 #t 'value5 #t 'value6 (list 1 2 3) 'list0 '() 'dict0 (hash) 'string0 "" 'string0a (asymmetric (stream-of* 'string) "") 'symbol0 '|| 'set0 (set) 'set1 (set 1 2 3) 'set1a (set 1 2 3) 'string4 "abc\u6c34\u6C34\\/\"\b\f\n\r\txyz" 'bytes13 #"abc\x6c\x34\xf0\\/\"\b\f\n\r\txyz" 'string5 "\U0001D11E" 'list2 (asymmetric (stream-of* 'sequence (stream-of* 'string #"abc") (stream-of* 'string #"def")) '("abc" "def")) 'list3 (asymmetric (stream-of* 'sequence '("a" 1) '("b" 2) '("c" 3)) '(("a" 1) ("b" 2) ("c" 3))) 'record1 (capture (discard)) 'record2 (observe (speak (discard) (capture (discard)))) 'record3 (titled 101 "Blackwell" (date 1821 2 3) "Dr") 'record4 (asymmetric (record 'discard '()) (discard)) 'record5 (record 7 '(())) 'record6 (asymmetric (record 'discard '(surprise)) '#s(discard surprise)) 'record7 (record "aString" '(3 4)) 'record8 (record (discard) '(3 4)) 'list7 (list 'abc '|...| 'def) 'dict1 (hash 'a 1 "b" #t '(1 2 3) #"c" (hash 'first-name "Elizabeth") (hash 'surname "Blackwell")) 'rfc8259-example1 (hash "Image" (hash "Width" 800 "Height" 600 "Title" "View from 15th Floor" "Thumbnail" (hash "Url" "http://www.example.com/image/481989943" "Height" 125 "Width" 100) "Animated" 'false "IDs" (list 116 943 234 38793))) 'rfc8259-example2 (list (hash "precision" "zip" "Latitude" 37.7668 "Longitude" -122.3959 "Address" "" "City" "SAN FRANCISCO" "State" "CA" "Zip" "94107" "Country" "US") (hash "precision" "zip" "Latitude" 37.371991 "Longitude" -122.026020 "Address" "" "City" "SUNNYVALE" "State" "CA" "Zip" "94085" "Country" "US")) 'annotation1 (asymmetric (annotate 9 "abc") 9) 'annotation2 (asymmetric (annotate (list '() (annotate '() "x")) "abc" "def") '(() ())) 'annotation3 (asymmetric (annotate 5 (annotate 2 1) (annotate 4 3)) 5) 'annotation4 (asymmetric (hash (annotate 'a 'ak) (annotate 1 'av) (annotate 'b 'bk) (annotate 2 'bv)) (hash 'a 1 'b 2)) 'annotation5 (asymmetric (annotate `#s(R ,(annotate 'f 'af)) 'ar) `#s(R f)) 'annotation6 (asymmetric (record (annotate 'R 'ar) (list (annotate 'f 'af))) `#s(R f)) 'annotation7 (asymmetric (annotate '() 'a 'b 'c) '()) )) (define (run-test-case variety t-name loc binary-form annotated-text-form) (define text-form (strip-annotations annotated-text-form)) (define-values (forward back can-execute-nondet-with-canonicalization?) (match (hash-ref samples-txt-expected t-name text-form) [(asymmetric f b) (values f b #f)] ;; #f because e.g. annotation4 includes annotations [v (values v v #t)])) (check-equal? text-form back loc) ;; expectation 1 (check-equal? (d-strip (encode text-form)) back loc) ;; expectation 2 (check-equal? (d-strip (encode forward)) back loc) ;; expectation 3 (check-equal? (d-strip binary-form) back loc) ;; expectation 4 (check-equal? (d binary-form) annotated-text-form loc) ;; expectation 5 (check-equal? (d (encode annotated-text-form)) annotated-text-form loc) ;; expectation 6 (check-equal? (string->preserve (preserve->string text-form)) back loc) ;; expectation 7 (check-equal? (string->preserve (preserve->string forward)) back loc) ;; expectation 8 (check-equal? (string->preserve-syntax (preserve->string annotated-text-form)) ;; similar to 8 annotated-text-form loc) (when (and (not (memq variety '(decode))) (or (not (memq variety '(nondeterministic))) (and can-execute-nondet-with-canonicalization?))) ;; expectations 9 and 10 (parameterize ((canonicalize-preserves? (if (memq variety '(nondeterministic)) #t #f))) (check-equal? (encode forward) binary-form loc))) (unless (memq variety '(decode nondeterministic streaming)) ;; expectation 11 (check-equal? (encode annotated-text-form) binary-form loc))) (define-runtime-path tests-path "../../../../tests") (let* ((path (build-path tests-path "samples.txt")) (testfile (call-with-input-file path (lambda (p) (port-count-lines! p) (read-preserve-syntax p #:source path))))) (match-define (peel-annotations `#s(TestCases ,(strip-annotations `#s(ExpectedPlaceholderMapping ,placeholder->value-map)) ,tests)) testfile) (define value->placeholder-map (for/hash [((k v) (in-hash placeholder->value-map))] (values v k))) (parameterize ((current-value->placeholder (lambda (v) (hash-ref value->placeholder-map v #f))) (current-placeholder->value (lambda (p) (hash-ref placeholder->value-map p void)))) (for [((t-name* t*) (in-hash (annotated-item tests)))] (define t-name (strip-annotations t-name*)) (define loc (format "Test case '~a' (~a)" t-name (source-location->string (annotated-srcloc t*)))) (define (fail-test fmt . args) (fail (format "~a: ~a" loc (apply format fmt args)))) (displayln loc) (match (peel-annotations t*) [`#s(Test ,(strip-annotations binary-form) ,annotated-text-form) (run-test-case 'normal t-name loc binary-form annotated-text-form)] [`#s(NondeterministicTest ,(strip-annotations binary-form) ,annotated-text-form) (run-test-case 'nondeterministic t-name loc binary-form annotated-text-form)] [`#s(StreamingTest ,(strip-annotations binary-form) ,annotated-text-form) (run-test-case 'streaming t-name loc binary-form annotated-text-form)] [`#s(DecodeTest ,(strip-annotations binary-form) ,annotated-text-form) (run-test-case 'decode t-name loc binary-form annotated-text-form)] [`#s(ParseError ,(strip-annotations str)) (with-handlers [(exn:fail:read:eof? (lambda (e) (fail-test "Unexpected EOF: ~e" e))) (exn:fail:read? (lambda (e) 'ok)) ((lambda (e) #t) (lambda (e) (fail-test "Unexpected exception: ~e" e)))] (string->preserve str) (fail-test "Unexpected success"))] [`#s(ParseShort ,(strip-annotations str)) (with-handlers [(exn:fail:read:eof? (lambda (e) 'ok)) ((lambda (e) #t) (lambda (e) (fail-test "Unexpected exception: ~e" e)))] (string->preserve str) (fail-test "Unexpected success"))] [`#s(DecodeShort ,(strip-annotations bs)) (check-eq? (d bs) 'short loc)] [`#s(DecodeError ,(strip-annotations bs)) (check-true (void? (d bs)) loc)] [_ (write-preserve t* #:indent #f) (newline)]))) ) )