958 lines
36 KiB
Racket
958 lines
36 KiB
Racket
#lang racket/base
|
|
;; Preserve, as in Fruit Preserve, as in a remarkably weak pun on pickling/dehydration etc
|
|
|
|
(provide (struct-out stream-of)
|
|
(struct-out record)
|
|
(struct-out annotated)
|
|
strip-annotations
|
|
peel-annotations
|
|
read-preserve
|
|
read-preserve-syntax
|
|
string->preserve
|
|
string->preserve-syntax
|
|
write-preserve
|
|
preserve->string
|
|
current-value->placeholder
|
|
current-placeholder->value
|
|
encode
|
|
decode
|
|
decode-syntax
|
|
wire-value)
|
|
|
|
(require racket/bytes)
|
|
(require racket/dict)
|
|
(require (only-in racket/format ~a))
|
|
(require racket/generator)
|
|
(require racket/match)
|
|
(require (only-in racket/port with-output-to-string))
|
|
(require racket/set)
|
|
(require bitsyntax)
|
|
(require "struct.rkt")
|
|
(require "varint.rkt")
|
|
(require (only-in syntax/readerr raise-read-error))
|
|
|
|
(struct stream-of (kind generator) #:transparent)
|
|
|
|
;; Syntax properties and syntax objects would be almost perfect for
|
|
;; representing annotations, plus position/source tracking as
|
|
;; lagniappe, but unfortunately they don't play nicely with data much
|
|
;; outside of ordinary S-expressions as found in Racket source.
|
|
;;
|
|
;; So we do our own thing, for now.
|
|
;;
|
|
(struct annotated (annotations srcloc item) #:transparent
|
|
#:methods gen:equal+hash
|
|
[(define (equal-proc a b =?) (=? (annotated-item a) (annotated-item b)))
|
|
(define (hash-proc a h) (h (annotated-item a)))
|
|
(define (hash2-proc a h) (h (annotated-item a)))])
|
|
|
|
(struct record (label fields) #:transparent)
|
|
|
|
(define (build-record label fields)
|
|
(with-handlers [(exn:fail:contract? (lambda (e) (record label fields)))]
|
|
(apply make-prefab-struct label fields)))
|
|
|
|
(define (strip-annotations v #:depth [depth +inf.0])
|
|
(let walk* ((v v) (depth depth))
|
|
(define next-depth (- depth 1))
|
|
(define (walk v) (walk* v next-depth))
|
|
(if (zero? depth)
|
|
v
|
|
(match v
|
|
[(annotated _ _ item)
|
|
(match item
|
|
[(record label fields) (build-record (walk* label depth) (map walk fields))]
|
|
[(? non-object-struct?)
|
|
(error 'strip-annotations "Cannot strip-annotations from struct: ~v" v)]
|
|
[(? list?) (map walk item)]
|
|
[(? set?) (for/set [(i (in-set item))] (walk i))]
|
|
[(? dict?) (for/hash [((k v) (in-dict item))] (values (walk k) (walk v)))]
|
|
[(? annotated?)
|
|
(error 'strip-annotations "Improper annotation structure: ~v" v)]
|
|
[_ item])]
|
|
[_ v]))))
|
|
|
|
(define (peel-annotations v)
|
|
(strip-annotations v #:depth 1))
|
|
|
|
(define current-value->placeholder (make-parameter (lambda (v) #f)))
|
|
(define current-placeholder->value (make-parameter (lambda (v) (void))))
|
|
|
|
(define (encode v)
|
|
(bit-string->bytes (bit-string (v :: (wire-value)))))
|
|
|
|
(define ((default-on-short bs)) (error 'decode "Short encoding: ~v" bs))
|
|
(define ((default-on-fail bs)) (error 'decode "Invalid encoding: ~v" bs))
|
|
|
|
(define (decode bs
|
|
#:read-syntax? [read-syntax? #f]
|
|
#:on-short [on-short (default-on-short bs)]
|
|
[on-fail (default-on-fail bs)])
|
|
(bit-string-case bs
|
|
#:on-short (lambda (fail) (on-short))
|
|
([ (v :: (wire-value #:read-syntax? read-syntax?)) ] v)
|
|
(else (on-fail))))
|
|
|
|
(define (decode-syntax bs
|
|
#:on-short [on-short (default-on-short bs)]
|
|
[on-fail (default-on-fail bs)])
|
|
(decode #:read-syntax? #t #:on-short on-short #:on-fail on-fail))
|
|
|
|
(define-syntax wire-value
|
|
(syntax-rules ()
|
|
[(_ #t input ks kf) (decode-value input ks kf #:read-syntax? #f)]
|
|
[(_ #t input ks kf #:read-syntax? rs) (decode-value input ks kf #:read-syntax? rs)]
|
|
[(_ #f v) (encode-value v)]))
|
|
|
|
(define-syntax wire-length
|
|
(syntax-rules ()
|
|
[(_ #t input ks kf) (decode-wire-length input ks kf)]
|
|
[(_ #f v) (encode-wire-length v)]))
|
|
|
|
(define (encode-wire-length v)
|
|
(when (negative? v) (error 'encode-wire-length "Cannot encode negative wire-length ~v" v))
|
|
(if (< v #b1111)
|
|
(bit-string (v :: bits 4))
|
|
(bit-string (#b1111 :: bits 4) ((encode-varint v) :: binary))))
|
|
|
|
(define (encode-array-like minor fields)
|
|
(bit-string (2 :: bits 2)
|
|
(minor :: bits 2)
|
|
((length fields) :: (wire-length))
|
|
((apply bit-string-append (map encode-value fields)) :: binary)))
|
|
|
|
(define (encode-binary-like minor bs)
|
|
(bit-string (1 :: bits 2)
|
|
(minor :: bits 2)
|
|
((bytes-length bs) :: (wire-length))
|
|
(bs :: binary)))
|
|
|
|
(define (encode-stream major minor chunk-ok? generator)
|
|
(bit-string-append (bit-string (#b0010 :: bits 4) (major :: bits 2) (minor :: bits 2))
|
|
(let loop ()
|
|
(match (generator)
|
|
[(? void?) #""]
|
|
[(? chunk-ok? v) (bit-string-append (encode-value v) (loop))]
|
|
[bad (error 'encode-stream "Cannot encode chunk: ~v" bad)]))
|
|
(bit-string #b00000100)))
|
|
|
|
(define (dict-keys-and-values d)
|
|
(reverse (for/fold [(acc '())] [((k v) (in-dict d))] (cons v (cons k acc)))))
|
|
|
|
(define (encode-record key fields)
|
|
(encode-array-like 0 (cons key fields)))
|
|
|
|
(define (encode-value v)
|
|
(match v
|
|
[#f (bytes #b00000000)]
|
|
[#t (bytes #b00000001)]
|
|
[(? single-flonum?) (bit-string #b00000010 (v :: float bits 32))]
|
|
[(? double-flonum?) (bit-string #b00000011 (v :: float bits 64))]
|
|
[(annotated annotations _ item)
|
|
(apply bit-string-append
|
|
(map (lambda (a) (bit-string #b00000101 ((encode-value a) :: binary))) annotations)
|
|
(encode-value item))]
|
|
[(stream-of 'string p) (encode-stream 1 1 bytes? p)]
|
|
[(stream-of 'byte-string p) (encode-stream 1 2 bytes? p)]
|
|
[(stream-of 'symbol p) (encode-stream 1 3 bytes? p)]
|
|
[(stream-of 'sequence p) (encode-stream 2 1 (lambda (x) #t) p)]
|
|
[(stream-of 'set p) (encode-stream 2 2 (lambda (x) #t) p)]
|
|
[(stream-of 'dictionary p) (encode-stream 2 3 (lambda (x) #t) p)]
|
|
|
|
[(? integer? x) #:when (<= -3 x 12) (bit-string (#b0011 :: bits 4) (x :: bits 4))]
|
|
;; [0 (bytes #b10000000)]
|
|
[(? integer?)
|
|
(define raw-bit-count (+ (integer-length v) 1)) ;; at least one sign bit
|
|
(define byte-count (quotient (+ raw-bit-count 7) 8))
|
|
(bit-string (#b0100 :: bits 4) (byte-count :: (wire-length)) (v :: integer bytes byte-count))]
|
|
[(? string?) (encode-binary-like 1 (string->bytes/utf-8 v))]
|
|
[(? bytes?) (encode-binary-like 2 v)]
|
|
[(? symbol?) (encode-binary-like 3 (string->bytes/utf-8 (symbol->string v)))]
|
|
|
|
[(record label fields) (encode-record label fields)]
|
|
[(? non-object-struct?)
|
|
(define key (prefab-struct-key v))
|
|
(when (not key) (error 'encode-value "Cannot encode non-prefab struct ~v" v))
|
|
(encode-record key (cdr (vector->list (struct->vector v))))]
|
|
|
|
[(? list?) (encode-array-like 1 v)]
|
|
[(? set?) (encode-array-like 2 (set->list v))]
|
|
[(? dict?) (encode-array-like 3 (dict-keys-and-values v))]
|
|
|
|
[_ (error 'encode-value "Cannot encode value ~v" v)]))
|
|
|
|
;;---------------------------------------------------------------------------
|
|
|
|
(define (decode-wire-length bs ks kf)
|
|
(bit-string-case bs
|
|
#:on-short (lambda (fail) (kf #t))
|
|
([ (= #b1111 :: bits 4) (rest :: binary) ]
|
|
(decode-varint rest
|
|
(lambda (v tail)
|
|
(if (< v #b1111)
|
|
(kf)
|
|
(ks v tail)))
|
|
kf))
|
|
([ (v :: bits 4) (rest :: binary) ] (ks v rest))
|
|
(else (kf))))
|
|
|
|
(define (decode-binary minor bs rest ks kf)
|
|
(match minor
|
|
[0 (if (positive? (bit-string-length bs))
|
|
(ks (bit-string->signed-integer bs #t) rest)
|
|
(ks 0 rest))]
|
|
[2 (ks bs rest)]
|
|
[(or 1 3)
|
|
((with-handlers [(exn:fail:contract? (lambda (e) kf))]
|
|
(define s (bytes->string/utf-8 bs))
|
|
(lambda () (ks (if (= minor 3) (string->symbol s) s) rest))))]))
|
|
|
|
(define (decode-compound minor vs rest ks kf)
|
|
(match* (minor vs)
|
|
[(0 (list* label fields)) (ks (build-record label fields) rest)]
|
|
[(0 '()) (kf)]
|
|
[(1 _) (ks vs rest)]
|
|
[(2 _) (ks (list->set vs) rest)]
|
|
[(3 _) (if (even? (length vs))
|
|
(ks (apply hash vs) rest)
|
|
(kf))]))
|
|
|
|
(define (decode-value input ks kf #:read-syntax? read-syntax?)
|
|
|
|
(define (position rest)
|
|
(- (bytes-length input) (arithmetic-shift (bit-string-length rest) -3)))
|
|
|
|
(define nil-annotation
|
|
(if read-syntax?
|
|
(lambda (ks bs)
|
|
(lambda (result rest)
|
|
(define pos0 (position bs))
|
|
(define pos1 (position rest))
|
|
(ks (annotated '()
|
|
(srcloc #f #f #f (+ pos0 1) (- pos1 pos0))
|
|
result)
|
|
rest)))
|
|
(lambda (ks bs) ks)))
|
|
|
|
(define cons-annotation
|
|
(if read-syntax?
|
|
(lambda (ks a v rest)
|
|
(match-define (annotated annotations srcloc item) v)
|
|
(ks (annotated (cons a annotations) srcloc item) rest))
|
|
(lambda (ks a v rest)
|
|
(ks v rest))))
|
|
|
|
(define (decode-values n bs ks kf)
|
|
(let loop ((n n) (acc-rev '()) (bs bs))
|
|
(if (zero? n)
|
|
(ks (reverse acc-rev) bs)
|
|
(decode-one bs (lambda (v rest) (loop (- n 1) (cons v acc-rev) rest)) kf))))
|
|
|
|
(define (decode-stream minor annotations-ok chunk-ok? join-chunks decode bs ks kf)
|
|
(let loop ((acc-rev '()) (rest bs))
|
|
(define (accept-one chunk rest)
|
|
(if (chunk-ok? chunk)
|
|
(loop (cons chunk acc-rev) rest)
|
|
(kf)))
|
|
(bit-string-case rest
|
|
#:on-short (lambda (fail) (kf #t))
|
|
([ (= #b00000100 :: bits 8) (rest :: binary) ]
|
|
(decode minor
|
|
(join-chunks (reverse acc-rev))
|
|
rest
|
|
(nil-annotation ks bs)
|
|
kf))
|
|
([ (= #b00000101 :: bits 8) (rest1 :: binary) ]
|
|
(if annotations-ok
|
|
(decode-one rest accept-one kf)
|
|
(kf)))
|
|
(else
|
|
(decode-one rest accept-one kf)))))
|
|
|
|
(define bytes-chunk?
|
|
(if read-syntax?
|
|
(lambda (v) (bytes? (annotated-item v)))
|
|
bytes?))
|
|
|
|
(define bytes-chunk-append*
|
|
(if read-syntax?
|
|
(lambda (vs) (bytes-append* (map annotated-item vs)))
|
|
bytes-append*))
|
|
|
|
(define (decode-one bs ks kf)
|
|
(bit-string-case bs
|
|
#:on-short (lambda (fail) (kf #t))
|
|
([ (= #b00000000 :: bits 8) (rest :: binary) ]
|
|
((nil-annotation ks bs) #f rest))
|
|
([ (= #b00000001 :: bits 8) (rest :: binary) ]
|
|
((nil-annotation ks bs) #t rest))
|
|
|
|
([ (= #b00000010 :: bits 8) (v :: float bits 32) (rest :: binary) ]
|
|
((nil-annotation ks bs) (real->single-flonum v) rest))
|
|
([ (= #b00000011 :: bits 8) (v :: float bits 64) (rest :: binary) ]
|
|
((nil-annotation ks bs) v rest))
|
|
|
|
([ (= #b00000101 :: bits 8) (rest :: binary) ]
|
|
(decode-one rest
|
|
(lambda (a rest)
|
|
(decode-one rest
|
|
(lambda (v rest)
|
|
(cons-annotation ks a v rest))
|
|
kf))
|
|
kf))
|
|
|
|
([ (= #b0001 :: bits 4) (placeholder :: (wire-length)) (rest :: binary) ]
|
|
(match ((current-placeholder->value) placeholder)
|
|
[(? void?) (error 'decode "Invalid Preserves placeholder: ~v" placeholder)]
|
|
[v ((nil-annotation ks bs) v rest)]))
|
|
|
|
([ (= #b001001 :: bits 6) (minor :: bits 2) (rest :: binary) ]
|
|
(decode-stream minor #f bytes-chunk? bytes-chunk-append* decode-binary rest ks kf))
|
|
([ (= #b001010 :: bits 6) (minor :: bits 2) (rest :: binary) ]
|
|
(decode-stream minor #t (lambda (x) #t) values decode-compound rest ks kf))
|
|
|
|
([ (= #b0011 :: bits 4) (x :: bits 4) (rest :: binary) ]
|
|
((nil-annotation ks bs) (if (> x 12) (- x 16) x) rest))
|
|
|
|
([ (= #b01 :: bits 2) (minor :: bits 2) (byte-count :: (wire-length))
|
|
(bits :: binary bytes byte-count)
|
|
(rest :: binary) ]
|
|
(decode-binary minor (bit-string->bytes bits) rest (nil-annotation ks bs) kf))
|
|
|
|
([ (= #b10 :: bits 2) (minor :: bits 2) (field-count :: (wire-length)) (rest :: binary) ]
|
|
(decode-values field-count
|
|
rest
|
|
(lambda (fields rest)
|
|
(decode-compound minor fields rest (nil-annotation ks bs) kf))
|
|
kf))
|
|
|
|
(else (kf))))
|
|
|
|
(decode-one input ks kf))
|
|
|
|
;;---------------------------------------------------------------------------
|
|
|
|
(define PIPE #\|)
|
|
|
|
(define (skip-whitespace* i)
|
|
(regexp-match? #px#"^(\\s|,)*" i)) ;; side effect: consumes matched portion of input
|
|
|
|
(define (parse-error* i fmt . args)
|
|
(define-values [line column pos] (port-next-location i))
|
|
(raise-read-error (format "read-preserve: ~a" (apply format fmt args))
|
|
(object-name i)
|
|
line
|
|
column
|
|
pos
|
|
#f))
|
|
|
|
(define (read-preserve [i (current-input-port)]
|
|
#:read-syntax? [read-syntax? #f]
|
|
#:source [source #f])
|
|
(local-require net/base64)
|
|
(local-require file/sha1)
|
|
|
|
(define-match-expander px
|
|
(syntax-rules ()
|
|
[(_ re pat) (app (lambda (v) (regexp-try-match re v)) pat)]))
|
|
|
|
(define (parse-error fmt . args)
|
|
(apply parse-error* i fmt args))
|
|
|
|
(define (eof-guard v)
|
|
(match v
|
|
[(? eof-object?) (parse-error "Unexpected end of input")]
|
|
[v v]))
|
|
|
|
(define (peek/no-eof) (eof-guard (peek-char i)))
|
|
(define (read/no-eof) (eof-guard (read-char i)))
|
|
|
|
(define (skip-whitespace) (skip-whitespace* i))
|
|
|
|
(define (read-sequence terminator)
|
|
(sequence-fold '() (lambda (acc v) (cons v acc)) reverse terminator))
|
|
|
|
(define (read-dictionary-or-set seed)
|
|
(sequence-fold seed
|
|
(lambda (acc k)
|
|
(skip-whitespace)
|
|
(match (peek-char i)
|
|
[#\: (read-char i)
|
|
(when (set? acc) (parse-error "Unexpected key/value separator in set"))
|
|
(hash-set (or acc (hash)) k (read-value))]
|
|
[_ (when (hash? acc) (parse-error "Missing expected key/value separator"))
|
|
(set-add (or acc (set)) k)]))
|
|
(lambda (acc) (or acc (hash)))
|
|
#\}))
|
|
|
|
(define (read-raw-symbol acc)
|
|
(match (peek-char i)
|
|
[(or (? eof-object?)
|
|
(? char? (or #\( #\) #\{ #\} #\[ #\] #\< #\>
|
|
#\" #\; #\, #\# #\: (== PIPE)
|
|
(? char-whitespace?))))
|
|
(if (null? acc)
|
|
(parse-error "Invalid character ~v at start of value; skipping" (read-char i))
|
|
(string->symbol (list->string (reverse acc))))]
|
|
[_ (read-raw-symbol (cons (read-char i) acc))]))
|
|
|
|
(define (read-base64-binary acc)
|
|
(skip-whitespace)
|
|
(define ch (read/no-eof))
|
|
(cond [(eqv? ch #\})
|
|
(base64-decode (string->bytes/latin-1 (list->string (reverse acc))))]
|
|
[(or (and (char>=? ch #\A) (char<=? ch #\Z))
|
|
(and (char>=? ch #\a) (char<=? ch #\z))
|
|
(and (char>=? ch #\0) (char<=? ch #\9))
|
|
(memv ch '(#\+ #\/ #\- #\_ #\=)))
|
|
(read-base64-binary (cons ch acc))]
|
|
[else
|
|
(parse-error "Invalid base64 character")]))
|
|
|
|
(define (hexdigit? ch)
|
|
(or (and (char>=? ch #\A) (char<=? ch #\F))
|
|
(and (char>=? ch #\a) (char<=? ch #\f))
|
|
(and (char>=? ch #\0) (char<=? ch #\9))))
|
|
|
|
(define (read-hex-binary acc)
|
|
(skip-whitespace)
|
|
(define ch (read/no-eof))
|
|
(cond [(eqv? ch #\})
|
|
(hex-string->bytes (list->string (reverse acc)))]
|
|
[(hexdigit? ch)
|
|
(define ch2 (read/no-eof))
|
|
(when (not (hexdigit? ch2))
|
|
(parse-error "Hex-encoded binary digits must come in pairs"))
|
|
(read-hex-binary (cons ch2 (cons ch acc)))]
|
|
[else
|
|
(parse-error "Invalid hex character")]))
|
|
|
|
(define (read-stringlike xform-item finish terminator-char hexescape-char hexescape-proc)
|
|
(let loop ((acc '()))
|
|
(match (read/no-eof)
|
|
[(== terminator-char) (finish (reverse acc))]
|
|
[#\\ (match (read/no-eof)
|
|
[(== hexescape-char) (loop (cons (hexescape-proc) acc))]
|
|
[(and c (or (== terminator-char) #\\ #\/)) (loop (cons (xform-item c) acc))]
|
|
[#\b (loop (cons (xform-item #\u08) acc))]
|
|
[#\f (loop (cons (xform-item #\u0C) acc))]
|
|
[#\n (loop (cons (xform-item #\u0A) acc))]
|
|
[#\r (loop (cons (xform-item #\u0D) acc))]
|
|
[#\t (loop (cons (xform-item #\u09) acc))]
|
|
[c (parse-error "Invalid escape code \\~a" c)])]
|
|
[c (loop (cons (xform-item c) acc))])))
|
|
|
|
(define (read-string terminator-char)
|
|
(read-stringlike values
|
|
list->string
|
|
terminator-char
|
|
#\u
|
|
(lambda ()
|
|
(integer->char
|
|
(match i
|
|
[(px #px#"^[a-fA-F0-9]{4}" (list hexdigits))
|
|
(define n1 (string->number (bytes->string/utf-8 hexdigits) 16))
|
|
(if (<= #xd800 n1 #xdfff) ;; surrogate pair first half
|
|
(match i
|
|
[(px #px#"^\\\\u([a-fA-F0-9]{4})" (list _ hexdigits2))
|
|
(define n2 (string->number (bytes->string/utf-8 hexdigits2) 16))
|
|
(if (<= #xdc00 n2 #xdfff)
|
|
(+ (arithmetic-shift (- n1 #xd800) 10)
|
|
(- n2 #xdc00)
|
|
#x10000)
|
|
(parse-error "Bad second half of surrogate pair"))]
|
|
[_ (parse-error "Missing second half of surrogate pair")])
|
|
n1)]
|
|
[_ (parse-error "Bad string \\u escape")])))))
|
|
|
|
(define (read-literal-binary)
|
|
(read-stringlike (lambda (c)
|
|
(define b (char->integer c))
|
|
(when (>= b 256)
|
|
(parse-error "Invalid code point ~a (~v) in literal binary" b c))
|
|
b)
|
|
list->bytes
|
|
#\"
|
|
#\x
|
|
(lambda ()
|
|
(match i
|
|
[(px #px#"^[a-fA-F0-9]{2}" (list hexdigits))
|
|
(string->number (bytes->string/utf-8 hexdigits) 16)]
|
|
[_ (parse-error "Bad binary \\x escape")]))))
|
|
|
|
(define (read-intpart acc-rev)
|
|
(match (peek-char i)
|
|
[#\0 (read-fracexp (cons (read-char i) acc-rev))]
|
|
[_ (read-digit+ acc-rev read-fracexp)]))
|
|
|
|
(define (read-digit* acc-rev k)
|
|
(match (peek-char i)
|
|
[(? char? (? char-numeric?)) (read-digit* (cons (read-char i) acc-rev) k)]
|
|
[_ (k acc-rev)]))
|
|
|
|
(define (read-digit+ acc-rev k)
|
|
(match (peek-char i)
|
|
[(? char? (? char-numeric?)) (read-digit* (cons (read-char i) acc-rev) k)]
|
|
[_ (parse-error "Incomplete number")]))
|
|
|
|
(define (read-fracexp acc-rev)
|
|
(match (peek-char i)
|
|
[#\. (read-digit+ (cons (read-char i) acc-rev) read-exp)]
|
|
[_ (read-exp acc-rev)]))
|
|
|
|
(define (read-exp acc-rev)
|
|
(match (peek-char i)
|
|
[(or #\e #\E) (read-sign-and-exp (cons (read-char i) acc-rev))]
|
|
[_ (finish-number acc-rev)]))
|
|
|
|
(define (read-sign-and-exp acc-rev)
|
|
(match (peek-char i)
|
|
[(or #\+ #\-) (read-digit+ (cons (read-char i) acc-rev) finish-number)]
|
|
[_ (read-digit+ acc-rev finish-number)]))
|
|
|
|
(define (finish-number acc-rev)
|
|
(define s (list->string (reverse acc-rev)))
|
|
(define n (string->number s))
|
|
(when (not n) (parse-error "Invalid number: ~v" s))
|
|
(if (flonum? n)
|
|
(match (peek-char i)
|
|
[(or #\f #\F) (read-char i) (real->single-flonum n)]
|
|
[_ n])
|
|
n))
|
|
|
|
(define (read-number)
|
|
(match (peek/no-eof)
|
|
[#\- (read-intpart (list (read-char i)))]
|
|
[_ (read-intpart (list))]))
|
|
|
|
(define (sequence-fold acc accumulate-one finish terminator-char)
|
|
(let loop ((acc acc))
|
|
(skip-whitespace)
|
|
(match (peek/no-eof)
|
|
[(== terminator-char) (read-char i) (finish acc)]
|
|
[_ (loop (accumulate-one acc (read-value)))])))
|
|
|
|
(define nil-annotation
|
|
(if read-syntax?
|
|
(lambda (thunk)
|
|
(define-values (line0 col0 pos0) (port-next-location i))
|
|
(define v (thunk))
|
|
(define-values (line1 col1 pos1) (port-next-location i))
|
|
(define loc (and line0 col0 pos0 pos1 (srcloc source line0 col0 pos0 (- pos1 pos0))))
|
|
(match v
|
|
[(annotated annotations _ item) (annotated annotations loc item)]
|
|
[item (annotated '() loc item)]))
|
|
(lambda (thunk) (thunk))))
|
|
|
|
(define cons-annotation
|
|
(if read-syntax?
|
|
(lambda (a v)
|
|
(match-define (annotated annotations srcloc item) v)
|
|
(annotated (cons a annotations) srcloc item))
|
|
(lambda (a v) v)))
|
|
|
|
(define (read-value)
|
|
(skip-whitespace)
|
|
(define sigil (peek-char i))
|
|
(match sigil
|
|
[(? eof-object? o) o]
|
|
[#\@ (read-char i)
|
|
(define a (eof-guard (read-value)))
|
|
(define v (eof-guard (read-value)))
|
|
(cons-annotation a v)]
|
|
[_
|
|
(nil-annotation
|
|
(lambda ()
|
|
(match sigil
|
|
[#\{ (read-char i) (read-dictionary-or-set #f)]
|
|
[#\[ (read-char i) (read-sequence #\])]
|
|
[#\< (read-char i)
|
|
(match (read-sequence #\>)
|
|
['() (parse-error "Missing record label")]
|
|
[(cons head fields) (build-record head fields)])]
|
|
[(or #\- #\0 #\1 #\2 #\3 #\4 #\5 #\6 #\7 #\8 #\9) (read-number)]
|
|
[#\" (read-char i) (read-string #\")]
|
|
[(== PIPE) (read-char i) (string->symbol (read-string PIPE))]
|
|
[#\# (match i
|
|
[(px #px#"^#set\\{" (list _))
|
|
(sequence-fold (set) set-add values #\})]
|
|
[(px #px#"^#value" (list _))
|
|
(define bs (read-preserve i #:read-syntax? #t))
|
|
(when (not (bytes? (annotated-item bs)))
|
|
(parse-error "ByteString must follow #value"))
|
|
(when (not (null? (annotated-annotations bs)))
|
|
(parse-error "Annotations not permitted after #value"))
|
|
(decode (annotated-item bs) #:read-syntax? read-syntax?)]
|
|
[(px #px#"^#true" (list _))
|
|
#t]
|
|
[(px #px#"^#false" (list _))
|
|
#f]
|
|
[(px #px#"^#\"" (list _))
|
|
(read-literal-binary)]
|
|
[(px #px#"^#hex\\{" (list _))
|
|
(read-hex-binary '())]
|
|
[(px #px#"^#base64\\{" (list _))
|
|
(read-base64-binary '())]
|
|
[_
|
|
(read-char i)
|
|
(parse-error "Invalid preserve value")])]
|
|
[#\: (read-char i) (parse-error "Unexpected key/value separator between items")]
|
|
[_ (read-raw-symbol '())])))]))
|
|
|
|
(read-value))
|
|
|
|
(define (read-preserve-syntax [i (current-input-port)]
|
|
#:source [source #f])
|
|
(read-preserve i #:read-syntax? #t #:source source))
|
|
|
|
(define (string->preserve s #:read-syntax? [read-syntax? #f] #:track-position? [track-position? #t])
|
|
(define p (open-input-string s))
|
|
(when track-position? (port-count-lines! p))
|
|
(define v (read-preserve p #:read-syntax? read-syntax? #:source "<string>"))
|
|
(when (eof-object? v)
|
|
(parse-error* p "Unexpected end of input"))
|
|
(skip-whitespace* p)
|
|
(when (not (eof-object? (peek-char p)))
|
|
(parse-error* p "Unexpected text following preserve"))
|
|
v)
|
|
|
|
(define (string->preserve-syntax s)
|
|
(string->preserve s #:read-syntax? #t))
|
|
|
|
;;---------------------------------------------------------------------------
|
|
|
|
(define (write-preserve v0 [o (current-output-port)] #:indent [indent-amount0 #f])
|
|
(define indent-amount (match indent-amount0
|
|
[#f 0]
|
|
[#t 2] ;; a default
|
|
[other other]))
|
|
(define indenting? (and indent-amount0 #t))
|
|
|
|
(define-syntax-rule (! fmt arg ...) (fprintf o fmt arg ...))
|
|
|
|
(define (!indent distance)
|
|
(when indenting?
|
|
(! "\n~a" (make-string distance #\space))))
|
|
|
|
(define (!indent* distance)
|
|
(if indenting?
|
|
(!indent distance)
|
|
(! " ")))
|
|
|
|
(define (write-stringlike-char c [default (lambda (c) (! "~a" c))])
|
|
(match c
|
|
[#\\ (! "\\\\")]
|
|
[#\u08 (! "\\b")]
|
|
[#\u0C (! "\\f")]
|
|
[#\u0A (! "\\n")]
|
|
[#\u0D (! "\\r")]
|
|
[#\u09 (! "\\t")]
|
|
[_ (default c)]))
|
|
|
|
(define (write-sequence outer-distance opener comma closer item-writer vs)
|
|
(define inner-distance (+ outer-distance indent-amount))
|
|
(! "~a" opener)
|
|
(match vs
|
|
['() (void)]
|
|
[(cons v0 vs)
|
|
(!indent inner-distance)
|
|
(item-writer inner-distance v0)
|
|
(for [(v (in-list vs))]
|
|
(! "~a" comma)
|
|
(!indent* inner-distance)
|
|
(item-writer inner-distance v))
|
|
(!indent outer-distance)])
|
|
(! "~a" closer))
|
|
|
|
(define (write-record outer-distance label fields)
|
|
(! "<")
|
|
(write-value outer-distance label)
|
|
(for ([f (in-list fields)])
|
|
(! " ")
|
|
(write-value outer-distance f))
|
|
(! ">"))
|
|
|
|
;; (define (write-record outer-distance label fields)
|
|
;; (define simple-label? (or (boolean? label) (number? label) (string? label)
|
|
;; (bytes? label) (symbol? label)))
|
|
;; (define inner-distance (+ outer-distance
|
|
;; (if simple-label?
|
|
;; (+ 2 (string-length (preserve->string label #:indent #f)))
|
|
;; indent-amount)))
|
|
;; (define (write-fields fields)
|
|
;; (for ([f (in-list fields)])
|
|
;; (!indent* inner-distance)
|
|
;; (write-value inner-distance f)))
|
|
;;
|
|
;; (! "<")
|
|
;; (write-value inner-distance label)
|
|
;; (if simple-label?
|
|
;; (match fields
|
|
;; ['() (void)]
|
|
;; [(cons field0 fields)
|
|
;; (! " ")
|
|
;; (write-value inner-distance field0)
|
|
;; (write-fields fields)])
|
|
;; (write-fields fields))
|
|
;; (! ">"))
|
|
|
|
(define (write-key-value distance kv)
|
|
(match-define (cons k v) kv)
|
|
(write-value distance k)
|
|
(! ": ")
|
|
(write-value distance v))
|
|
|
|
(define (binunescaped? b)
|
|
(or (<= #x20 b #x21)
|
|
(<= #x23 b #x5b)
|
|
(<= #x5d b #x7e)))
|
|
|
|
(define (write-value distance v)
|
|
(match v
|
|
[(annotated annotations _ item)
|
|
(for [(a (in-list annotations))]
|
|
(! "@")
|
|
(write-value (+ distance 1) a)
|
|
(!indent* distance))
|
|
(write-value distance item)]
|
|
[#f (! "#false")]
|
|
[#t (! "#true")]
|
|
[(? single-flonum?) (! "~vf" v)]
|
|
[(? double-flonum?) (! "~v" v)]
|
|
[(? integer? x) (! "~v" v)]
|
|
[(? string?)
|
|
(! "\"")
|
|
(for [(c (in-string v))]
|
|
(match c
|
|
[#\" (! "\\\"")]
|
|
[_ (write-stringlike-char c)]))
|
|
(! "\"")]
|
|
[(? bytes?)
|
|
(! "#\"")
|
|
(for [(b (in-bytes v))]
|
|
(match b
|
|
[#x22 (! "\\\"")]
|
|
[(? binunescaped?) (write-stringlike-char (integer->char b))]
|
|
[_ (write-stringlike-char (integer->char b)
|
|
(lambda (c) (! "\\x~a" (~a #:min-width 2
|
|
#:align 'right
|
|
#:left-pad-string "0"
|
|
(number->string b 16)))))]))
|
|
(! "\"")]
|
|
[(? symbol?)
|
|
(define s (symbol->string v))
|
|
;; FIXME: This regular expression is conservatively correct, but Anglo-chauvinistic.
|
|
(if (regexp-match #px"[a-zA-Z~!$%^&*?_=+/.][-a-zA-Z~!$%^&*?_=+/.0-9]*" s)
|
|
(! "~a" s)
|
|
(begin (! "|")
|
|
(for [(c (in-string s))]
|
|
(match c
|
|
[(== PIPE) (! "\\|")]
|
|
[_ (write-stringlike-char c)]))
|
|
(! "|")))]
|
|
[(record label fields) (write-record distance label fields)]
|
|
[(? non-object-struct?)
|
|
(define key (prefab-struct-key v))
|
|
(when (not key) (error 'encode-value "Cannot encode non-prefab struct ~v" v))
|
|
(write-record distance key (cdr (vector->list (struct->vector v))))]
|
|
|
|
[(? list?) (write-sequence distance "[" "," "]" write-value v)]
|
|
[(? set?) (write-sequence distance "#set{" "," "}" write-value (set->list v))]
|
|
[(? dict?) (write-sequence distance "{" "," "}" write-key-value (dict->list v))]
|
|
|
|
[_ (error 'write-preserve "Cannot encode value ~v" v)]))
|
|
|
|
(write-value 0 v0))
|
|
|
|
(define (preserve->string v0 #:indent [indent-amount #f])
|
|
(with-output-to-string (lambda () (write-preserve v0 #:indent indent-amount))))
|
|
|
|
;;---------------------------------------------------------------------------
|
|
|
|
(module+ test
|
|
(require rackunit)
|
|
(require racket/runtime-path)
|
|
(require (for-syntax racket syntax/srcloc))
|
|
|
|
(check-equal? (bit-string->bytes (bit-string (0 :: bits 4) (0 :: (wire-length)))) (bytes 0))
|
|
(check-equal? (bit-string->bytes (bit-string (0 :: bits 4) (3 :: (wire-length)))) (bytes 3))
|
|
(check-equal? (bit-string->bytes (bit-string (0 :: bits 4) (14 :: (wire-length)))) (bytes 14))
|
|
(check-equal? (bit-string->bytes (bit-string (0 :: bits 4) (15 :: (wire-length)))) (bytes 15 15))
|
|
(check-equal? (bit-string->bytes (bit-string (0 :: bits 4) (100 :: (wire-length))))
|
|
(bytes 15 100))
|
|
(check-equal? (bit-string->bytes (bit-string (0 :: bits 4) (300 :: (wire-length))))
|
|
(bytes 15 #b10101100 #b00000010))
|
|
|
|
(define (dwl bs)
|
|
(bit-string-case bs
|
|
#:on-short (lambda (fail) 'short)
|
|
([ (= 0 :: bits 4) (w :: (wire-length)) ] w)
|
|
(else (void))))
|
|
|
|
(check-equal? (dwl (bytes 0)) 0)
|
|
(check-equal? (dwl (bytes 3)) 3)
|
|
(check-equal? (dwl (bytes 14)) 14)
|
|
(check-equal? (dwl (bytes 15)) 'short)
|
|
(check-equal? (dwl (bytes 15 9)) (void)) ;; not canonical
|
|
(check-equal? (dwl (bytes 15 15)) 15)
|
|
(check-equal? (dwl (bytes 15 100)) 100)
|
|
(check-equal? (dwl (bytes 15 #b10101100 #b00000010)) 300)
|
|
(check-equal? (dwl (bytes 15 #b10101100)) 'short)
|
|
|
|
(define (d bs)
|
|
(for [(i (in-range 0 (- (bytes-length bs) 1)))]
|
|
(when (not (eq? (decode (subbytes bs 0 i)
|
|
#:on-short (lambda () 'short)
|
|
void)
|
|
'short))
|
|
(error 'd "~a-byte prefix of ~v does not read as short" i bs)))
|
|
(decode bs
|
|
#:on-short (lambda () 'short)
|
|
void))
|
|
|
|
(define-syntax (cross-check stx)
|
|
(syntax-case stx ()
|
|
((_ text v (b ...))
|
|
#'(let ((val v)) (cross-check text v v (b ...))))
|
|
((_ text forward back (b ...))
|
|
#`(let ((loc #,(source-location->string #'forward)))
|
|
(check-equal? (string->preserve text) back loc)
|
|
(check-equal? (d (encode forward)) back loc)
|
|
(check-equal? (d (encode back)) back loc)
|
|
(check-equal? (d (expected b ...)) back loc)
|
|
(check-equal? (encode forward) (expected b ...) loc)
|
|
))))
|
|
|
|
(define-syntax (cross-check/nondeterministic stx)
|
|
(syntax-case stx ()
|
|
((_ text v (b ...))
|
|
#'(let ((val v)) (cross-check/nondeterministic text v v (b ...))))
|
|
((_ text forward back (b ...))
|
|
#`(let ((loc #,(source-location->string #'forward)))
|
|
(check-equal? (string->preserve text) back loc)
|
|
(check-equal? (d (encode forward)) back loc)
|
|
(check-equal? (d (encode back)) back loc)
|
|
(check-equal? (d (expected b ...)) back loc)
|
|
))))
|
|
|
|
(struct discard () #:prefab)
|
|
(struct capture (detail) #:prefab)
|
|
(struct observe (specification) #:prefab)
|
|
|
|
(struct speak (who what) #:prefab)
|
|
|
|
(struct date (year month day) #:prefab)
|
|
(struct thing (id) #:prefab)
|
|
(struct person thing (name date-of-birth) #:prefab)
|
|
(struct titled person (title) #:prefab)
|
|
|
|
(struct asymmetric (forward back))
|
|
(struct nondeterministic (value))
|
|
|
|
(define samples-txt-expected
|
|
(hash 'record1 (capture (discard))
|
|
'record2 (observe (speak (discard) (capture (discard))))
|
|
'list4a '(1 2 3 4)
|
|
'list1 (asymmetric (stream-of 'sequence (sequence->generator '(1 2 3 4)))
|
|
'(1 2 3 4))
|
|
'list5 '(-2 -1 0 1)
|
|
'string3 "hello"
|
|
'string2 (asymmetric (stream-of 'string (sequence->generator '(#"he" #"llo")))
|
|
"hello")
|
|
'string1 (asymmetric (stream-of 'string (sequence->generator '(#"he" #"ll" #"" #"" #"o")))
|
|
"hello")
|
|
'bytes1 (asymmetric (stream-of 'byte-string (sequence->generator '(#"he" #"ll" #"" #"" #"o")))
|
|
#"hello")
|
|
'symbol1 (asymmetric (stream-of 'symbol (sequence->generator '(#"he" #"ll" #"" #"" #"o")))
|
|
'hello)
|
|
'list6 `("hello" there #"world" () ,(set) #t #f)
|
|
'bytes2 #"hello"
|
|
'bytes3 #"ABC"
|
|
'bytes4 #"ABC"
|
|
'bytes5 #"AJN"
|
|
'bytes7 #"corymb"
|
|
'bytes8 #"corymb"
|
|
'bytes9 #"Hi"
|
|
'bytes10 #"Hi"
|
|
'bytes11 #"Hi"
|
|
'value1 #"corymb"
|
|
'value2 #t
|
|
'value3 #t
|
|
'value4 #t
|
|
'value5 #t
|
|
'value6 (list 1 2 3)
|
|
'list0 '()
|
|
'dict0 (hash)
|
|
'string0 ""
|
|
'string0a (asymmetric (stream-of 'string (sequence->generator '()))
|
|
"")
|
|
'symbol0 '||
|
|
'set0 (set)
|
|
'set1 (set 1 2 3)
|
|
'set1a (set 1 2 3)
|
|
'string4 "abc\u6c34\u6C34\\/\"\b\f\n\r\txyz"
|
|
'bytes13 #"abc\x6c\x34\xf0\\/\"\b\f\n\r\txyz"
|
|
'string5 "\U0001D11E"
|
|
'list2 '("abc" "def")
|
|
'record1 (capture (discard))
|
|
'record2 (observe (speak (discard) (capture (discard))))
|
|
'record3 (titled 101 "Blackwell" (date 1821 2 3) "Dr")
|
|
'record4 (asymmetric (record 'discard '()) (discard))
|
|
'record5 (record 7 '(()))
|
|
'record6 (asymmetric (record 'discard '(surprise))
|
|
'#s(discard surprise))
|
|
'record7 (record "aString" '(3 4))
|
|
'record8 (record (discard) '(3 4))
|
|
'list7 (list 'abc '|...| 'def)
|
|
'dict1 (hash 'a 1
|
|
"b" #t
|
|
'(1 2 3) #"c"
|
|
(hash 'first-name "Elizabeth") (hash 'surname "Blackwell"))
|
|
'rfc8259-example1 (nondeterministic
|
|
(hash "Image"
|
|
(hash "Width" 800
|
|
"Height" 600
|
|
"Title" "View from 15th Floor"
|
|
"Thumbnail" (hash "Url" "http://www.example.com/image/481989943"
|
|
"Height" 125
|
|
"Width" 100)
|
|
"Animated" #f
|
|
"IDs" (list 116 943 234 38793))))
|
|
'rfc8259-example2 (nondeterministic
|
|
(list (hash
|
|
"precision" "zip"
|
|
"Latitude" 37.7668
|
|
"Longitude" -122.3959
|
|
"Address" ""
|
|
"City" "SAN FRANCISCO"
|
|
"State" "CA"
|
|
"Zip" "94107"
|
|
"Country" "US")
|
|
(hash
|
|
"precision" "zip"
|
|
"Latitude" 37.371991
|
|
"Longitude" -122.026020
|
|
"Address" ""
|
|
"City" "SUNNYVALE"
|
|
"State" "CA"
|
|
"Zip" "94085"
|
|
"Country" "US")))
|
|
))
|
|
|
|
(define-runtime-path tests-path "../../../tests")
|
|
(let* ((path (build-path tests-path "samples.txt"))
|
|
(tests (call-with-input-file path
|
|
(lambda (p)
|
|
(port-count-lines! p)
|
|
(read-preserve-syntax p #:source path)))))
|
|
(local-require racket/pretty)
|
|
(for [((t-name t) (in-hash (annotated-item tests)))]
|
|
(newline)
|
|
(newline)
|
|
(write-preserve t #:indent #f)
|
|
(newline)
|
|
(newline)
|
|
(pretty-print (list (peel-annotations t-name)
|
|
(peel-annotations t)))))
|
|
)
|