From 85fe7b3b07a78a2a561b3cedff0adda96752bdb6 Mon Sep 17 00:00:00 2001 From: Tony Garnock-Jones Date: Wed, 30 Dec 2020 16:43:18 +0100 Subject: [PATCH] Update Racket implementation --- implementations/racket/preserves/info.rkt | 1 - .../racket/preserves/preserves/annotation.rkt | 72 + .../racket/preserves/preserves/float.rkt | 8 + .../racket/preserves/preserves/jelly.rkt | 138 ++ .../racket/preserves/preserves/main.rkt | 1203 +---------------- .../racket/preserves/preserves/order.rkt | 98 ++ .../preserves/preserves/read-binary.rkt | 120 ++ .../racket/preserves/preserves/read-text.rkt | 346 +++++ .../racket/preserves/preserves/record.rkt | 5 +- .../preserves/preserves/tests/test-main.rkt | 201 +++ .../racket/preserves/preserves/tool.rkt | 19 +- .../racket/preserves/preserves/varint.rkt | 57 +- .../preserves/preserves/write-binary.rkt | 120 ++ .../racket/preserves/preserves/write-text.rkt | 178 +++ preserves.el | 10 +- preserves.md | 16 +- tests/samples.bin | Bin 8642 -> 7902 bytes tests/samples.txt | 456 +++---- 18 files changed, 1596 insertions(+), 1452 deletions(-) create mode 100644 implementations/racket/preserves/preserves/annotation.rkt create mode 100644 implementations/racket/preserves/preserves/float.rkt create mode 100644 implementations/racket/preserves/preserves/jelly.rkt create mode 100644 implementations/racket/preserves/preserves/order.rkt create mode 100644 implementations/racket/preserves/preserves/read-binary.rkt create mode 100644 implementations/racket/preserves/preserves/read-text.rkt create mode 100644 implementations/racket/preserves/preserves/tests/test-main.rkt create mode 100644 implementations/racket/preserves/preserves/write-binary.rkt create mode 100644 implementations/racket/preserves/preserves/write-text.rkt diff --git a/implementations/racket/preserves/info.rkt b/implementations/racket/preserves/info.rkt index a7e6f87..c075258 100644 --- a/implementations/racket/preserves/info.rkt +++ b/implementations/racket/preserves/info.rkt @@ -1,6 +1,5 @@ #lang setup/infotab (define collection 'multi) (define deps '("base" - "bitsyntax" "rackunit-lib" "data-lib")) diff --git a/implementations/racket/preserves/preserves/annotation.rkt b/implementations/racket/preserves/preserves/annotation.rkt new file mode 100644 index 0000000..122f4b1 --- /dev/null +++ b/implementations/racket/preserves/preserves/annotation.rkt @@ -0,0 +1,72 @@ +#lang racket/base + +(provide (struct-out annotated) + annotate + strip-annotations + strip-annotations-proc + peel-annotations + peel-annotations-proc) + +(require racket/match) +(require (for-syntax racket/base)) +(require "record.rkt") +(require racket/dict) +(require racket/set) + +;; Syntax properties and syntax objects would be almost perfect for +;; representing annotations, plus position/source tracking as +;; lagniappe, but unfortunately they don't play nicely with data much +;; outside of ordinary S-expressions as found in Racket source. +;; +;; So we do our own thing, for now. +;; +;; See also https://gitlab.com/preserves/preserves/-/issues/16, +;; 'Consider Racket "correlated objects" for annotations +;; representation'. +;; +(struct annotated (annotations srcloc item) #:transparent + #:methods gen:equal+hash + [(define (equal-proc a b =?) (=? (annotated-item a) (annotated-item b))) + (define (hash-proc a h) (h (annotated-item a))) + (define (hash2-proc a h) (h (annotated-item a)))]) + +(define (annotate v . as) + (match v + [(annotated annotations srcloc item) + (annotated (append as annotations) srcloc item)] + [item + (annotated as #f item)])) + +(define (strip-annotations-proc v #:depth [depth +inf.0]) + (let walk* ((v v) (depth depth)) + (define next-depth (- depth 1)) + (define (walk v) (walk* v next-depth)) + (if (zero? depth) + v + (match v + [(annotated _ _ item) + (match item + [(record label fields) (record (walk* label depth) (map walk fields))] + [(? list?) (map walk item)] + [(? set?) (for/set [(i (in-set item))] (walk i))] + [(? dict?) (for/hash [((k v) (in-dict item))] (values (walk* k depth) (walk v)))] + [(? annotated?) (error 'strip-annotations "Improper annotation structure: ~v" v)] + [_ item])] + [_ v])))) + +(define (peel-annotations-proc v) + (strip-annotations-proc v #:depth 1)) + +(define-match-expander strip-annotations + (syntax-rules () [(_ pat extra ...) (app (lambda (v) (strip-annotations-proc v extra ...)) pat)]) + (lambda (stx) + (syntax-case stx () + [(_ args ...) #'(strip-annotations-proc args ...)] + [_ #'strip-annotations-proc]))) + +(define-match-expander peel-annotations + (syntax-rules () [(_ pat extra ...) (app (lambda (v) (peel-annotations-proc v extra ...)) pat)]) + (lambda (stx) + (syntax-case stx () + [(_ args ...) #'(peel-annotations-proc args ...)] + [_ #'peel-annotations-proc]))) diff --git a/implementations/racket/preserves/preserves/float.rkt b/implementations/racket/preserves/preserves/float.rkt new file mode 100644 index 0000000..66183c2 --- /dev/null +++ b/implementations/racket/preserves/preserves/float.rkt @@ -0,0 +1,8 @@ +#lang racket/base +;; Wrapper struct to mark a need for 32-bit IEEE floating-point +;; precision (de)serialization. In many circumstances, Racket lacks +;; 32-bit floating point support, and single-flonum? always yields #f. + +(provide (struct-out float)) + +(struct float (value) #:transparent) diff --git a/implementations/racket/preserves/preserves/jelly.rkt b/implementations/racket/preserves/preserves/jelly.rkt new file mode 100644 index 0000000..54383e4 --- /dev/null +++ b/implementations/racket/preserves/preserves/jelly.rkt @@ -0,0 +1,138 @@ +#lang racket +;; Jelly, a very shaky implementation of Preserves - intended to +;; demonstrate a minimal implementation of Preserves binary I/O, +;; without error-checking or configurability etc. + +(provide (all-defined-out)) + +;;--------------------------------------------------------------------------- +;; Representing values + +(struct record (label fields) #:transparent) +(struct float (value) #:transparent) ;; a marker for single-precision I/O +(struct annotated (annotations item) #:transparent) + +;;--------------------------------------------------------------------------- +;; Reader + +(define (read-preserve/binary [in-port (current-input-port)]) + (let/ec return + + (define (next) + (match (next-byte) + [#x80 #f] + [#x81 #t] + [#x82 (float (floating-point-bytes->real (next-bytes 4) #t 0 4))] + [#x83 (floating-point-bytes->real (next-bytes 8) #t 0 8)] + [#x84 '#:end] + [#x85 (let ((a (next))) + (match (next) + [(annotated as i) (annotated (cons a as) i)] + [i (annotated (list a) i)]))] + [(? (between #x90 #x9C) v) (- v #x90)] + [(? (between #x9D #x9F) v) (- v #xA0)] + [(? (between #xA0 #xAF) v) (next-integer (- v #xA0 -1))] + [#xB0 (next-integer (next-varint))] + [#xB1 (bytes->string/utf-8 (next-bytes (next-varint)))] + [#xB2 (next-bytes (next-varint))] + [#xB3 (string->symbol (bytes->string/utf-8 (next-bytes (next-varint))))] + [#xB4 (apply (lambda (label . fields) (record label fields)) (next-items))] + [#xB5 (next-items)] + [#xB6 (list->set (next-items))] + [#xB7 (apply hash (next-items))])) + + (define (next-items) (match (next) ['#:end '()] [v (cons v (next-items))])) + + (define (eof-guard v) (if (eof-object? v) (return eof) v)) + + (define (next-byte) (eof-guard (read-byte in-port))) + + (define (next-bytes n) + (define bs (eof-guard (read-bytes n in-port))) + (if (< (bytes-length bs) n) (return eof) bs)) + + (define (next-varint) (eof-guard (read-varint in-port))) + + (define (next-integer n) + (define acc0 (next-byte)) + (define acc (if (< acc0 128) acc0 (- acc0 256))) + (for/fold [(acc acc)] [(n (in-range (- n 1)))] (+ (* acc 256) (next-byte)))) + + (next))) + +(define ((between lo hi) v) (<= lo v hi)) + +(define (read-varint in-port) + (let/ec return + (let loop () + (define b (read-byte in-port)) + (cond [(eof-object? b) (return b)] + [(< b 128) b] + [else (+ (* (loop) 128) (- b 128))])))) + +;;--------------------------------------------------------------------------- +;; Writer + +(define (write-preserve/binary v [out-port (current-output-port)]) + (define (output v) + (match v + [#f (write-byte #x80 out-port)] + [#t (write-byte #x81 out-port)] + [(float v) (write-byte #x82 out-port) (output-bytes (real->floating-point-bytes v 4 #t))] + [(? flonum?) (write-byte #x83 out-port) (output-bytes (real->floating-point-bytes v 8 #t))] + + [(annotated as v) + (for [(a (in-list as))] (write-byte #x85 out-port) (output a)) + (output v)] + + [(? integer?) + (cond [(<= -3 v -1) (write-byte (+ v #xA0) out-port)] + [(<= 0 v 12) (write-byte (+ v #x90) out-port)] + [else (define raw-bit-count (+ (integer-length v) 1)) ;; at least one sign bit + (define byte-count (quotient (+ raw-bit-count 7) 8)) + (if (<= byte-count 16) + (write-byte (+ byte-count #xA0 -1) out-port) + (begin (write-byte #xB0 out-port) + (write-varint byte-count out-port))) + (for [(shift (in-range (* byte-count 8) 0 -8))] + (write-byte (bitwise-bit-field v (- shift 8) shift) out-port))])] + + [(? string?) (count-bytes 1 (string->bytes/utf-8 v))] + [(? bytes?) (count-bytes 2 v)] + [(? symbol?) (count-bytes 3 (string->bytes/utf-8 (symbol->string v)))] + + [(record label fields) (with-seq 4 (output label) (for-each output fields))] + [(? list?) (with-seq 5 (for-each output v))] + [(? set?) (with-seq 6 (output-set v))] + [(? hash?) (with-seq 7 (output-hash v))] + + [_ (error 'write-preserve/binary "Invalid value: ~v" v)])) + + (define (output-bytes bs) (write-bytes bs out-port)) + + (define-syntax-rule (with-seq tag body ...) + (begin (write-byte (+ tag #xB0) out-port) + body ... + (write-byte #x84 out-port))) + + (define (count-bytes tag bs) + (write-byte (+ tag #xB0) out-port) + (write-varint (bytes-length bs) out-port) + (output-bytes bs)) + + (define (encode v) (call-with-output-bytes (lambda (p) (write-preserve/binary v p)))) + + (define (output-set v) + (for-each output-bytes (sort (for/list [(e (in-set v))] (encode e)) bytespreserve - (all-from-out "record.rkt") - (struct-out annotated) - annotate - strip-annotations - strip-annotations-proc - peel-annotations - peel-annotations-proc - read-preserve - read-preserve-syntax - string->preserve - string->preserve-syntax - (struct-out binary-display-heuristics) - current-binary-display-heuristics - write-preserve - preserve->string - prepend-noop - encode - decode - decode-syntax - wire-value - preserve-order - preservepreserve s) - (match-define (stream-of kind generator-thunk) s) - (define g (generator-thunk)) - (define pieces (for/list [(p (in-producer g (void)))] p)) - (match kind - ['string (bytes->string/utf-8 (bytes-append* pieces))] - ['byte-string (bytes-append* pieces)] - ['symbol (string->symbol (bytes->string/utf-8 (bytes-append* pieces)))] - ['sequence pieces] - ['set (list->set pieces)] - ['dictionary (apply hash pieces)])) - -;; Syntax properties and syntax objects would be almost perfect for -;; representing annotations, plus position/source tracking as -;; lagniappe, but unfortunately they don't play nicely with data much -;; outside of ordinary S-expressions as found in Racket source. -;; -;; So we do our own thing, for now. -;; -(struct annotated (annotations srcloc item) #:transparent - #:methods gen:equal+hash - [(define (equal-proc a b =?) (=? (annotated-item a) (annotated-item b))) - (define (hash-proc a h) (h (annotated-item a))) - (define (hash2-proc a h) (h (annotated-item a)))]) - -(define (annotate v . as) - (match v - [(annotated annotations srcloc item) - (annotated (append as annotations) srcloc item)] - [item - (annotated as #f item)])) - -(define (strip-annotations-proc v #:depth [depth +inf.0]) - (let walk* ((v v) (depth depth)) - (define next-depth (- depth 1)) - (define (walk v) (walk* v next-depth)) - (if (zero? depth) - v - (match v - [(annotated _ _ item) - (match item - [(record label fields) (record (walk* label depth) (map walk fields))] - [(? list?) (map walk item)] - [(? set?) (for/set [(i (in-set item))] (walk i))] - [(? dict?) (for/hash [((k v) (in-dict item))] (values (walk* k depth) (walk v)))] - [(? annotated?) (error 'strip-annotations "Improper annotation structure: ~v" v)] - [_ item])] - [_ v])))) - -(define (peel-annotations-proc v) - (strip-annotations-proc v #:depth 1)) - -(define-match-expander strip-annotations - (syntax-rules () [(_ pat extra ...) (app (lambda (v) (strip-annotations-proc v extra ...)) pat)]) - (lambda (stx) - (syntax-case stx () - [(_ args ...) #'(strip-annotations-proc args ...)] - [_ #'strip-annotations-proc]))) - -(define-match-expander peel-annotations - (syntax-rules () [(_ pat extra ...) (app (lambda (v) (peel-annotations-proc v extra ...)) pat)]) - (lambda (stx) - (syntax-case stx () - [(_ args ...) #'(peel-annotations-proc args ...)] - [_ #'peel-annotations-proc]))) - -(define (prepend-noop encoded-value) - (bit-string-append #"\xff" encoded-value)) - -(define (encode v) - (bit-string->bytes (bit-string (v :: (wire-value))))) - -(define ((default-on-short bs)) (error 'decode "Short encoding: ~v" bs)) -(define ((default-on-fail bs)) (error 'decode "Invalid encoding: ~v" bs)) - -(define (decode bs - #:read-syntax? [read-syntax? #f] - #:on-short [on-short (default-on-short bs)] - [on-fail (default-on-fail bs)]) - (bit-string-case bs - #:on-short (lambda (fail) (on-short)) - ([ (v :: (wire-value #:read-syntax? read-syntax?)) ] v) - (else (on-fail)))) - -(define (decode-syntax bs - #:on-short [on-short (default-on-short bs)] - [on-fail (default-on-fail bs)]) - (decode bs on-fail #:read-syntax? #t #:on-short on-short)) - -(define-syntax wire-value - (syntax-rules () - [(_ #t input ks kf) (decode-value input ks kf #:read-syntax? #f)] - [(_ #t input ks kf #:read-syntax? rs) (decode-value input ks kf #:read-syntax? rs)] - [(_ #f v) (encode-value v)])) - -(define-syntax wire-length - (syntax-rules () - [(_ #t input ks kf) (decode-wire-length input ks kf)] - [(_ #f v) (encode-wire-length v)])) - -(define (encode-wire-length v) - (when (negative? v) (error 'encode-wire-length "Cannot encode negative wire-length ~v" v)) - (if (< v #b1111) - (bit-string (v :: bits 4)) - (bit-string (#b1111 :: bits 4) ((encode-varint v) :: binary)))) - -(define (encode-array-like minor fields) - (bit-string (2 :: bits 2) - (minor :: bits 2) - ((length fields) :: (wire-length)) - ((apply bit-string-append (map encode-value fields)) :: binary))) - -(define (encode-binary-like minor bs) - (bit-string (1 :: bits 2) - (minor :: bits 2) - ((bytes-length bs) :: (wire-length)) - (bs :: binary))) - -(define (encode-stream major minor chunk-ok? generator) - (bit-string-append (bit-string (#b0010 :: bits 4) (major :: bits 2) (minor :: bits 2)) - (let loop () - (match (generator) - [(? void?) #""] - [(? chunk-ok? v) (bit-string-append (encode-value v) (loop))] - [bad (error 'encode-stream "Cannot encode chunk: ~v" bad)])) - (bit-string #b00000100))) - -(define (dict-keys-and-values d) - (reverse (for/fold [(acc '())] [((k v) (in-dict d))] (cons v (cons k acc))))) - -(define (encode-value v) - (define canonicalizing? (canonicalize-preserves?)) - (let restart ((v v)) - (match v - [#f (bytes #b00000000)] - [#t (bytes #b00000001)] - [(? single-flonum?) (bit-string #b00000010 (v :: float bits 32))] - [(? double-flonum?) (bit-string #b00000011 (v :: float bits 64))] - [(annotated annotations _ item) - (if canonicalizing? - (restart item) - (bit-string ((apply bit-string-append - (map (lambda (a) (bit-string #b00000101 ((encode-value a) :: binary))) - annotations)) :: binary) - ((encode-value item) :: binary)))] - [(? stream-of?) #:when canonicalizing? - (restart (stream-of->preserve v))] - [(stream-of 'string p) (encode-stream 1 1 bytes? (p))] - [(stream-of 'byte-string p) (encode-stream 1 2 bytes? (p))] - [(stream-of 'symbol p) (encode-stream 1 3 bytes? (p))] - [(stream-of 'sequence p) (encode-stream 2 1 (lambda (x) #t) (p))] - [(stream-of 'set p) (encode-stream 2 2 (lambda (x) #t) (p))] - [(stream-of 'dictionary p) (encode-stream 2 3 (lambda (x) #t) (p))] - - [(? integer? x) #:when (<= -3 x 12) (bit-string (#b0011 :: bits 4) (x :: bits 4))] - ;; [0 (bytes #b10000000)] - [(? integer?) - (define raw-bit-count (+ (integer-length v) 1)) ;; at least one sign bit - (define byte-count (quotient (+ raw-bit-count 7) 8)) - (bit-string (#b0100 :: bits 4) (byte-count :: (wire-length)) (v :: integer bytes byte-count))] - [(? string?) (encode-binary-like 1 (string->bytes/utf-8 v))] - [(? bytes?) (encode-binary-like 2 v)] - [(? symbol?) (encode-binary-like 3 (string->bytes/utf-8 (symbol->string v)))] - - [(record label fields) (encode-array-like 0 (cons label fields))] - [(? list?) (encode-array-like 1 v)] - [(? set?) (encode-array-like 2 (if canonicalizing? - (canonical-set-elements v) - (set->list v)))] - [(? dict?) (encode-array-like 3 (if canonicalizing? - (canonical-dict-keys-and-values v) - (dict-keys-and-values v)))] - - [_ (error 'encode-value "Cannot encode value ~v" v)]))) - -;;--------------------------------------------------------------------------- - -(define (decode-wire-length bs ks kf) - (bit-string-case bs - #:on-short (lambda (fail) (kf #t)) - ([ (= #b1111 :: bits 4) (rest :: binary) ] - (decode-varint rest - (lambda (v tail) - (if (< v #b1111) - (kf) - (ks v tail))) - kf)) - ([ (v :: bits 4) (rest :: binary) ] (ks v rest)) - (else (kf)))) - -(define (decode-binary minor bs rest ks kf) - (match minor - [0 (if (positive? (bit-string-length bs)) - (ks (bit-string->signed-integer bs #t) rest) - (ks 0 rest))] - [2 (ks bs rest)] - [(or 1 3) - ((with-handlers [(exn:fail:contract? (lambda (e) kf))] - (define s (bytes->string/utf-8 bs)) - (lambda () (ks (if (= minor 3) (string->symbol s) s) rest))))])) - -(define (decode-compound minor vs rest ks kf) - (match* (minor vs) - [(0 (list* label fields)) (ks (record label fields) rest)] - [(0 '()) (kf)] - [(1 _) (ks vs rest)] - [(2 _) (ks (list->set vs) rest)] - [(3 _) (if (even? (length vs)) - (ks (apply hash vs) rest) - (kf))])) - -(define (decode-value input ks kf #:read-syntax? read-syntax?) - - (define (position rest) - (- (bytes-length input) (arithmetic-shift (bit-string-length rest) -3))) - - (define nil-annotation - (if read-syntax? - (lambda (ks bs) - (lambda (result rest) - (define pos0 (position bs)) - (define pos1 (position rest)) - (ks (annotated '() - (srcloc #f #f #f (+ pos0 1) (- pos1 pos0)) - result) - rest))) - (lambda (ks bs) ks))) - - (define cons-annotation - (if read-syntax? - (lambda (ks a v rest) - (match-define (annotated annotations srcloc item) v) - (ks (annotated (cons a annotations) srcloc item) rest)) - (lambda (ks a v rest) - (ks v rest)))) - - (define (decode-values n bs ks kf) - (let loop ((n n) (acc-rev '()) (bs bs)) - (if (zero? n) - (ks (reverse acc-rev) bs) - (decode-one bs (lambda (v rest) (loop (- n 1) (cons v acc-rev) rest)) kf)))) - - (define (decode-stream minor annotations-ok chunk-ok? join-chunks decode bs ks kf) - (let loop ((acc-rev '()) (rest bs)) - (define (accept-one chunk rest) - (if (chunk-ok? chunk) - (loop (cons chunk acc-rev) rest) - (kf))) - (bit-string-case rest - #:on-short (lambda (fail) (kf #t)) - ([ (= #b00000100 :: bits 8) (rest :: binary) ] - (decode minor - (join-chunks (reverse acc-rev)) - rest - (nil-annotation ks bs) - kf)) - ([ (= #b00000101 :: bits 8) (rest1 :: binary) ] - (if annotations-ok - (decode-one rest accept-one kf) - (kf))) - (else - (decode-one rest accept-one kf))))) - - (define (valid-unwrapped-chunk? c) - (and (bytes? c) (positive? (bytes-length c)))) - - (define valid-chunk? - (if read-syntax? - (lambda (v) (valid-unwrapped-chunk? (annotated-item v))) - valid-unwrapped-chunk?)) - - (define bytes-chunk-append* - (if read-syntax? - (lambda (vs) (bytes-append* (map annotated-item vs))) - bytes-append*)) - - (define (decode-one bs ks kf) - (bit-string-case bs - #:on-short (lambda (fail) (kf #t)) - ([ (= #b00000000 :: bits 8) (rest :: binary) ] - ((nil-annotation ks bs) #f rest)) - ([ (= #b00000001 :: bits 8) (rest :: binary) ] - ((nil-annotation ks bs) #t rest)) - - ([ (= #b00000010 :: bits 8) (v :: float bits 32) (rest :: binary) ] - ((nil-annotation ks bs) (real->single-flonum v) rest)) - ([ (= #b00000011 :: bits 8) (v :: float bits 64) (rest :: binary) ] - ((nil-annotation ks bs) v rest)) - - ([ (= #b00000101 :: bits 8) (rest :: binary) ] - (decode-one rest - (lambda (a rest) - (decode-one rest - (lambda (v rest) - (cons-annotation ks a v rest)) - kf)) - kf)) - - ([ (= #b0001 :: bits 4) (rest :: binary) ] - (kf)) - - ([ (= #b001001 :: bits 6) (minor :: bits 2) (rest :: binary) ] - (decode-stream minor #f valid-chunk? bytes-chunk-append* decode-binary rest ks kf)) - ([ (= #b001010 :: bits 6) (minor :: bits 2) (rest :: binary) ] - (decode-stream minor #t (lambda (x) #t) values decode-compound rest ks kf)) - - ([ (= #b0011 :: bits 4) (x :: bits 4) (rest :: binary) ] - ((nil-annotation ks bs) (if (> x 12) (- x 16) x) rest)) - - ([ (= #b01 :: bits 2) (minor :: bits 2) (byte-count :: (wire-length)) - (bits :: binary bytes byte-count) - (rest :: binary) ] - (decode-binary minor (bit-string->bytes bits) rest (nil-annotation ks bs) kf)) - - ([ (= #b10 :: bits 2) (minor :: bits 2) (field-count :: (wire-length)) (rest :: binary) ] - (decode-values field-count - rest - (lambda (fields rest) - (decode-compound minor fields rest (nil-annotation ks bs) kf)) - kf)) - - ([ (= #b11111111 :: bits 8) (rest :: binary) ] - (decode-one rest ks kf)) - - (else (kf)))) - - (decode-one input ks kf)) - -;;--------------------------------------------------------------------------- - -(define PIPE #\|) - -(define (skip-whitespace* i) - (regexp-match? #px#"^(\\s|,)*" i)) ;; side effect: consumes matched portion of input - -(define (parse-error* #:raise-proc [raise-proc raise-read-error] i source fmt . args) - (define-values [line column pos] (port-next-location i)) - (raise-proc (format "read-preserve: ~a" (apply format fmt args)) - source - line - column - pos - #f)) - -(define (read-preserve [i (current-input-port)] +(define (read-preserve [in-port (current-input-port)] #:read-syntax? [read-syntax? #f] - #:source [source (object-name i)]) - (local-require file/sha1) - - (define-match-expander px - (syntax-rules () - [(_ re pat) (app (lambda (v) (regexp-try-match re v)) pat)])) - - (define (parse-error fmt . args) - (apply parse-error* i source fmt args)) - - (define (eof-guard v) - (match v - [(? eof-object?) - (parse-error* #:raise-proc raise-read-eof-error i source "Unexpected end of input")] - [v v])) - - (define (peek/no-eof) (eof-guard (peek-char i))) - (define (read/no-eof) (eof-guard (read-char i))) - - (define (skip-whitespace) (skip-whitespace* i)) - - (define (read-sequence terminator) - (sequence-fold '() (lambda (acc v) (cons v acc)) reverse terminator)) - - (define (set-add* s e) - (when (set-member? s e) (parse-error "Duplicate set element: ~v" e)) - (set-add s e)) - - (define (read-dictionary-or-set seed) - (sequence-fold seed - (lambda (acc k) - (skip-whitespace) - (match (peek-char i) - [#\: (read-char i) - (when (set? acc) (parse-error "Unexpected key/value separator in set")) - (let ((acc (or acc (hash)))) - (when (hash-has-key? acc k) (parse-error "Duplicate key: ~v" k)) - (hash-set acc k (read-value)))] - [_ (when (hash? acc) (parse-error "Missing expected key/value separator")) - (set-add* (or acc (set)) k)])) - (lambda (acc) (or acc (hash))) - #\})) - - (define (read-raw-symbol acc) - (match (peek-char i) - [(or (? eof-object?) - (? char? (or #\( #\) #\{ #\} #\[ #\] #\< #\> - #\" #\; #\, #\@ #\# #\: (== PIPE) - (? char-whitespace?)))) - (if (null? acc) - (parse-error "Invalid character ~v at start of value" (peek-char i)) - (string->symbol (list->string (reverse acc))))] - [_ (read-raw-symbol (cons (read-char i) acc))])) - - (define (read-base64-binary acc) - (skip-whitespace) - (define ch (read/no-eof)) - (cond [(eqv? ch #\}) - (base64-decode (string->bytes/latin-1 (list->string (reverse acc))))] - [(or (and (char>=? ch #\A) (char<=? ch #\Z)) - (and (char>=? ch #\a) (char<=? ch #\z)) - (and (char>=? ch #\0) (char<=? ch #\9)) - (memv ch '(#\+ #\/ #\- #\_ #\=))) - (read-base64-binary (cons ch acc))] - [else - (parse-error "Invalid base64 character")])) - - (define (hexdigit? ch) - (or (and (char>=? ch #\A) (char<=? ch #\F)) - (and (char>=? ch #\a) (char<=? ch #\f)) - (and (char>=? ch #\0) (char<=? ch #\9)))) - - (define (read-hex-binary acc) - (skip-whitespace) - (define ch (read/no-eof)) - (cond [(eqv? ch #\}) - (hex-string->bytes (list->string (reverse acc)))] - [(hexdigit? ch) - (define ch2 (read/no-eof)) - (when (not (hexdigit? ch2)) - (parse-error "Hex-encoded binary digits must come in pairs")) - (read-hex-binary (cons ch2 (cons ch acc)))] - [else - (parse-error "Invalid hex character")])) - - (define (read-stringlike xform-item finish terminator-char hexescape-char hexescape-proc) - (let loop ((acc '())) - (match (read/no-eof) - [(== terminator-char) (finish (reverse acc))] - [#\\ (match (read/no-eof) - [(== hexescape-char) (loop (cons (hexescape-proc) acc))] - [(and c (or (== terminator-char) #\\ #\/)) (loop (cons (xform-item c) acc))] - [#\b (loop (cons (xform-item #\u08) acc))] - [#\f (loop (cons (xform-item #\u0C) acc))] - [#\n (loop (cons (xform-item #\u0A) acc))] - [#\r (loop (cons (xform-item #\u0D) acc))] - [#\t (loop (cons (xform-item #\u09) acc))] - [c (parse-error "Invalid escape code \\~a" c)])] - [c (loop (cons (xform-item c) acc))]))) - - (define (read-string terminator-char) - (read-stringlike values - list->string - terminator-char - #\u - (lambda () - (integer->char - (match i - [(px #px#"^[a-fA-F0-9]{4}" (list hexdigits)) - (define n1 (string->number (bytes->string/utf-8 hexdigits) 16)) - (if (<= #xd800 n1 #xdfff) ;; surrogate pair first half - (match i - [(px #px#"^\\\\u([a-fA-F0-9]{4})" (list _ hexdigits2)) - (define n2 (string->number (bytes->string/utf-8 hexdigits2) 16)) - (if (<= #xdc00 n2 #xdfff) - (+ (arithmetic-shift (- n1 #xd800) 10) - (- n2 #xdc00) - #x10000) - (parse-error "Bad second half of surrogate pair"))] - [_ (parse-error "Missing second half of surrogate pair")]) - n1)] - [_ (parse-error "Bad string \\u escape")]))))) - - (define (read-literal-binary) - (read-stringlike (lambda (c) - (define b (char->integer c)) - (when (>= b 256) - (parse-error "Invalid code point ~a (~v) in literal binary" b c)) - b) - list->bytes - #\" - #\x - (lambda () - (match i - [(px #px#"^[a-fA-F0-9]{2}" (list hexdigits)) - (string->number (bytes->string/utf-8 hexdigits) 16)] - [_ (parse-error "Bad binary \\x escape")])))) - - (define (read-intpart acc-rev) - (match (peek-char i) - [#\0 (read-fracexp (cons (read-char i) acc-rev))] - [_ (read-digit+ acc-rev read-fracexp)])) - - (define (read-digit* acc-rev k) - (match (peek-char i) - [(? char? (? char-numeric?)) (read-digit* (cons (read-char i) acc-rev) k)] - [_ (k acc-rev)])) - - (define (read-digit+ acc-rev k) - (match (peek-char i) - [(? char? (? char-numeric?)) (read-digit* (cons (read-char i) acc-rev) k)] - [_ (parse-error "Incomplete number")])) - - (define (read-fracexp acc-rev) - (match (peek-char i) - [#\. (read-digit+ (cons (read-char i) acc-rev) read-exp)] - [_ (read-exp acc-rev)])) - - (define (read-exp acc-rev) - (match (peek-char i) - [(or #\e #\E) (read-sign-and-exp (cons (read-char i) acc-rev))] - [_ (finish-number acc-rev)])) - - (define (read-sign-and-exp acc-rev) - (match (peek-char i) - [(or #\+ #\-) (read-digit+ (cons (read-char i) acc-rev) finish-number)] - [_ (read-digit+ acc-rev finish-number)])) - - (define (finish-number acc-rev) - (define s (list->string (reverse acc-rev))) - (define n (string->number s)) - (when (not n) (parse-error "Invalid number: ~v" s)) - (if (flonum? n) - (match (peek-char i) - [(or #\f #\F) (read-char i) (real->single-flonum n)] - [_ n]) - n)) - - (define (read-number) - (match (peek/no-eof) - [#\- (read-intpart (list (read-char i)))] - [_ (read-intpart (list))])) - - (define (sequence-fold acc accumulate-one finish terminator-char) - (let loop ((acc acc)) - (skip-whitespace) - (match (peek/no-eof) - [(== terminator-char) (read-char i) (finish acc)] - [_ (loop (accumulate-one acc (read-value)))]))) - - (define nil-annotation - (if read-syntax? - (lambda (thunk) - (define-values (line0 col0 pos0) (port-next-location i)) - (define v (thunk)) - (define-values (line1 col1 pos1) (port-next-location i)) - (define loc (and line0 col0 pos0 pos1 (srcloc source line0 col0 pos0 (- pos1 pos0)))) - (match v - [(annotated annotations _ item) (annotated annotations loc item)] - [item (annotated '() loc item)])) - (lambda (thunk) (thunk)))) - - (define cons-annotation - (if read-syntax? - (lambda (a v) - (match-define (annotated annotations srcloc item) v) - (annotated (cons a annotations) srcloc item)) - (lambda (a v) v))) - - (define (read-value) - (skip-whitespace) - (define sigil (peek-char i)) - (match sigil - [(? eof-object? o) o] - [#\@ (read-char i) - (define a (eof-guard (read-value))) - (define v (eof-guard (read-value))) - (cons-annotation a v)] - [_ - (nil-annotation - (lambda () - (match sigil - [#\{ (read-char i) (read-dictionary-or-set #f)] - [#\[ (read-char i) (read-sequence #\])] - [#\< (read-char i) - (match (read-sequence #\>) - ['() (parse-error "Missing record label")] - [(cons head fields) (record head fields)])] - [(or #\- #\0 #\1 #\2 #\3 #\4 #\5 #\6 #\7 #\8 #\9) (read-number)] - [#\" (read-char i) (read-string #\")] - [(== PIPE) (read-char i) (string->symbol (read-string PIPE))] - [#\# (match i - [(px #px#"^#set\\{" (list _)) - (sequence-fold (set) set-add* values #\})] - [(px #px#"^#value" (list _)) - (define bs (read-preserve i #:read-syntax? #t)) - (when (not (bytes? (annotated-item bs))) - (parse-error "ByteString must follow #value")) - (when (not (null? (annotated-annotations bs))) - (parse-error "Annotations not permitted after #value")) - (decode (annotated-item bs) #:read-syntax? read-syntax?)] - [(px #px#"^#true" (list _)) - #t] - [(px #px#"^#false" (list _)) - #f] - [(px #px#"^#\"" (list _)) - (read-literal-binary)] - [(px #px#"^#hex\\{" (list _)) - (read-hex-binary '())] - [(px #px#"^#base64\\{" (list _)) - (read-base64-binary '())] - [_ - (read-char i) - (parse-error "Invalid preserve value")])] - [#\: (read-char i) (parse-error "Unexpected key/value separator between items")] - [_ (read-raw-symbol '())])))])) - - (read-value)) - -(define (read-preserve-syntax [i (current-input-port)] - #:source [source #f]) - (read-preserve i #:read-syntax? #t #:source source)) - -(define (string->preserve s #:read-syntax? [read-syntax? #f] #:track-position? [track-position? #t]) - (define p (open-input-string s)) - (define source "") - (when track-position? (port-count-lines! p)) - (define v (read-preserve p #:read-syntax? read-syntax? #:source source)) - (when (eof-object? v) - (parse-error* #:raise-proc raise-read-eof-error p source "Unexpected end of input")) - (skip-whitespace* p) - (when (not (eof-object? (peek-char p))) - (parse-error* p source "Unexpected text following preserve")) - v) - -(define (string->preserve-syntax s) - (string->preserve s #:read-syntax? #t)) - -;;--------------------------------------------------------------------------- - -(struct binary-display-heuristics (printable-ascii-proportion max-length) #:transparent) - -(define current-binary-display-heuristics (make-parameter (binary-display-heuristics 3/4 1024))) - -(define (write-preserve v0 [o (current-output-port)] #:indent [indent-amount0 #f]) - (define indent-amount (match indent-amount0 - [#f 0] - [#t 2] ;; a default - [other other])) - (define indenting? (and indent-amount0 #t)) - (define canonicalizing? (canonicalize-preserves?)) - - (define-syntax-rule (! fmt arg ...) (fprintf o fmt arg ...)) - - (define (!indent distance) - (when indenting? - (! "\n~a" (make-string distance #\space)))) - - (define (!indent* distance) - (if indenting? - (!indent distance) - (! " "))) - - (define (write-stringlike-char c [default (lambda (c) (! "~a" c))]) - (match c - [#\\ (! "\\\\")] - [#\u08 (! "\\b")] - [#\u0C (! "\\f")] - [#\u0A (! "\\n")] - [#\u0D (! "\\r")] - [#\u09 (! "\\t")] - [_ (default c)])) - - (define (write-sequence outer-distance opener comma closer item-writer vs) - (define inner-distance (+ outer-distance indent-amount)) - (! "~a" opener) - (match vs - ['() (void)] - [(cons v0 vs) - (!indent inner-distance) - (item-writer inner-distance v0) - (for [(v (in-list vs))] - (! "~a" comma) - (!indent* inner-distance) - (item-writer inner-distance v)) - (!indent outer-distance)]) - (! "~a" closer)) - - (define (write-record outer-distance label fields) - (! "<") - (write-value outer-distance label) - (for ([f (in-list fields)]) - (! " ") - (write-value outer-distance f)) - (! ">")) - - ;; (define (write-record outer-distance label fields) - ;; (define simple-label? (or (boolean? label) (number? label) (string? label) - ;; (bytes? label) (symbol? label))) - ;; (define inner-distance (+ outer-distance - ;; (if simple-label? - ;; (+ 2 (string-length (preserve->string label #:indent #f))) - ;; indent-amount))) - ;; (define (write-fields fields) - ;; (for ([f (in-list fields)]) - ;; (!indent* inner-distance) - ;; (write-value inner-distance f))) - ;; - ;; (! "<") - ;; (write-value inner-distance label) - ;; (if simple-label? - ;; (match fields - ;; ['() (void)] - ;; [(cons field0 fields) - ;; (! " ") - ;; (write-value inner-distance field0) - ;; (write-fields fields)]) - ;; (write-fields fields)) - ;; (! ">")) - - (define (write-key-value distance kv) - (match-define (cons k v) kv) - (write-value distance k) - (! ": ") - (write-value distance v)) - - (define (binunescaped? b) - (or (<= #x20 b #x21) - (<= #x23 b #x5b) - (<= #x5d b #x7e))) - - (define (write-binary-stringlike v) - (! "#\"") - (for [(b (in-bytes v))] - (match b - [#x22 (! "\\\"")] - [(? binunescaped?) (write-stringlike-char (integer->char b))] - [_ (write-stringlike-char (integer->char b) - (lambda (c) (! "\\x~a" (~a #:min-width 2 - #:align 'right - #:left-pad-string "0" - (number->string b 16)))))])) - (! "\"")) - - (define (write-binary-base64 outer-distance v) - ;; Racket's encoder breaks lines after 72 characters. - ;; That corresponds to 54 bytes of input binary. - (! "#base64{") - (if (and indenting? (> (bytes-length v) 54)) - (let* ((inner-distance (+ outer-distance indent-amount)) - (line-separator (bytes-append #"\n" (make-bytes inner-distance 32))) - (encoded (base64-encode v line-separator))) - (write-bytes line-separator o) - (write-bytes encoded o 0 (- (bytes-length encoded) indent-amount))) - (write-bytes (base64-encode v #"") o)) - (! "}")) - - (define (write-binary outer-distance v) - (match-define (binary-display-heuristics proportion maxlen) (current-binary-display-heuristics)) - (define vlen (bytes-length v)) - (if (>= vlen maxlen) - (write-binary-base64 outer-distance v) - (let* ((sample-length (min vlen maxlen)) - (printable-ascii-count (for/sum [(i (in-range 0 sample-length)) - (b (in-bytes v))] - (if (or (<= 32 b 126) (= b 9) (= b 10) (= b 13)) 1 0)))) - (if (or (zero? vlen) (>= (/ printable-ascii-count sample-length) proportion)) - (write-binary-stringlike v) - (write-binary-base64 outer-distance v))))) - - (define (write-value distance v) - (match v - [(annotated annotations _ item) - (when (not canonicalizing?) - (for [(a (in-list annotations))] - (! "@") - (write-value (+ distance 1) a) - (!indent* distance))) - (write-value distance item)] - [(? stream-of?) (write-value distance (stream-of->preserve v))] - [#f (! "#false")] - [#t (! "#true")] - [(? single-flonum?) (! "~vf" (real->double-flonum v))] - [(? double-flonum?) (! "~v" v)] - [(? integer? x) (! "~v" v)] - [(? string?) - (! "\"") - (for [(c (in-string v))] - (match c - [#\" (! "\\\"")] - [_ (write-stringlike-char c)])) - (! "\"")] - [(? bytes?) (write-binary distance v)] - [(? symbol?) - (define s (symbol->string v)) - ;; FIXME: This regular expression is conservatively correct, but Anglo-chauvinistic. - (if (regexp-match #px"[a-zA-Z~!$%^&*?_=+/.][-a-zA-Z~!$%^&*?_=+/.0-9]*" s) - (! "~a" s) - (begin (! "|") - (for [(c (in-string s))] - (match c - [(== PIPE) (! "\\|")] - [_ (write-stringlike-char c)])) - (! "|")))] - [(record label fields) (write-record distance label fields)] - [(? list?) (write-sequence distance "[" "," "]" write-value v)] - [(? set?) (write-sequence distance "#set{" "," "}" write-value (if canonicalizing? - (canonical-set-elements v) - (set->list v)))] - [(? dict?) (write-sequence distance "{" "," "}" write-key-value (if canonicalizing? - (canonical-dict-entries v) - (dict->list v)))] - - [_ (error 'write-preserve "Cannot encode value ~v" v)])) - - (write-value 0 v0)) - -(define (preserve->string v0 #:indent [indent-amount #f]) - (with-output-to-string (lambda () (write-preserve v0 #:indent indent-amount)))) - -;;--------------------------------------------------------------------------- - -(define (typecode v) - (match v - [(? boolean?) 0] - [(? single-flonum?) 1] - [(? double-flonum?) 2] - [(? integer? x) 3] - [(? string?) 4] - [(? bytes?) 5] - [(? symbol?) 6] - [(record _ _) 7] - [(? list?) 8] - [(? set?) 9] - [(? dict?) 10] - [_ (error 'preserve-order "Cannot compare value ~v" v)])) - -(define-syntax chain-order - (syntax-rules () - [(_ o) o] - [(_ o more ...) (match o - ['= (chain-order more ...)] - [other other])])) - -(define (prepare-for-order v) - (match v - [(annotated _ _ item) (prepare-for-order item)] - [(? stream-of?) (stream-of->preserve v)] - [_ v])) - -(define preserve-order - (order 'preserve-order - any/c - (lambda (a* b*) - (define a (prepare-for-order a*)) - (define b (prepare-for-order b*)) - (define ta (typecode a)) - (define tb (typecode b)) - (cond [(< ta tb) '<] - [(> ta tb) '>] - [else (match ta ;; == tb - [7 (chain-order - (preserve-order (record-label a) (record-label b)) - (preserve-order (record-fields a)) (preserve-order (record-fields b)))] - [8 (match* (a b) - [('() '()) '=] - [('() _) '<] - [(_ '()) '>] - [((cons a0 a1) (cons b0 b1)) - (chain-order (preserve-order a0 b0) (preserve-order a1 b1))])] - [9 (preserve-order (canonical-set-elements a) (canonical-set-elements b))] - [10 (preserve-order (canonical-dict-keys a) (canonical-dict-keys b))] - [_ (datum-order a b)])])))) - -(define preservelist v) preservelist v) preservebytes (bit-string (0 :: bits 4) (0 :: (wire-length)))) (bytes 0)) - (check-equal? (bit-string->bytes (bit-string (0 :: bits 4) (3 :: (wire-length)))) (bytes 3)) - (check-equal? (bit-string->bytes (bit-string (0 :: bits 4) (14 :: (wire-length)))) (bytes 14)) - (check-equal? (bit-string->bytes (bit-string (0 :: bits 4) (15 :: (wire-length)))) (bytes 15 15)) - (check-equal? (bit-string->bytes (bit-string (0 :: bits 4) (100 :: (wire-length)))) - (bytes 15 100)) - (check-equal? (bit-string->bytes (bit-string (0 :: bits 4) (300 :: (wire-length)))) - (bytes 15 #b10101100 #b00000010)) - - (define (dwl bs) - (bit-string-case bs - #:on-short (lambda (fail) 'short) - ([ (= 0 :: bits 4) (w :: (wire-length)) ] w) - (else (void)))) - - (check-equal? (dwl (bytes 0)) 0) - (check-equal? (dwl (bytes 3)) 3) - (check-equal? (dwl (bytes 14)) 14) - (check-equal? (dwl (bytes 15)) 'short) - (check-equal? (dwl (bytes 15 9)) (void)) ;; not canonical - (check-equal? (dwl (bytes 15 15)) 15) - (check-equal? (dwl (bytes 15 100)) 100) - (check-equal? (dwl (bytes 15 #b10101100 #b00000010)) 300) - (check-equal? (dwl (bytes 15 #b10101100)) 'short) - - (define (d bs #:allow-invalid-prefix? [allow-invalid-prefix? #f]) - (for [(i (in-range 0 (- (bytes-length bs) 1)))] - (define result (decode (subbytes bs 0 i) #:on-short (lambda () 'short) void)) - (when (and (not (eq? result 'short)) - (not (and allow-invalid-prefix? (void? result)))) - (error 'd "~a-byte prefix of ~v does not read as short; result: ~v" i bs result))) - (decode-syntax bs - #:on-short (lambda () 'short) - void)) - - (define (d-strip bs) - (strip-annotations (d bs))) - - (struct discard () #:prefab) - (struct capture (detail) #:prefab) - (struct observe (specification) #:prefab) - - (struct speak (who what) #:prefab) - - (struct date (year month day) #:prefab) - (struct thing (id) #:prefab) - (struct person thing (name date-of-birth) #:prefab) - (struct titled person (title) #:prefab) - - (struct asymmetric (forward back)) - - (define (stream-of* kind . items) - (stream-of kind (lambda () (sequence->generator items)))) - - (define samples-txt-expected - (hash 'record1 (capture (discard)) - 'record2 (observe (speak (discard) (capture (discard)))) - 'list4a '(1 2 3 4) - 'list1 (asymmetric (stream-of* 'sequence 1 2 3 4) - '(1 2 3 4)) - 'list5 '(-2 -1 0 1) - 'string3 "hello" - 'string2 (asymmetric (stream-of* 'string #"he" #"llo") - "hello") - 'string1 (asymmetric (stream-of* 'string #"he" #"ll" #"o") - "hello") - 'bytes1 (asymmetric (stream-of* 'byte-string #"he" #"ll" #"o") - #"hello") - 'symbol1 (asymmetric (stream-of* 'symbol #"he" #"ll" #"o") - 'hello) - 'list6 `("hello" there #"world" () ,(set) #t #f) - 'bytes2 #"hello" - 'bytes3 #"ABC" - 'bytes4 #"ABC" - 'bytes5 #"AJN" - 'bytes7 #"corymb" - 'bytes8 #"corymb" - 'bytes9 #"Hi" - 'bytes10 #"Hi" - 'bytes11 #"Hi" - 'value1 #"corymb" - 'value2 #t - 'value3 #t - 'value4 #t - 'value5 #t - 'value6 (list 1 2 3) - 'list0 '() - 'dict0 (hash) - 'string0 "" - 'string0a (asymmetric (stream-of* 'string) - "") - 'symbol0 '|| - 'set0 (set) - 'set1 (set 1 2 3) - 'set1a (set 1 2 3) - 'string4 "abc\u6c34\u6C34\\/\"\b\f\n\r\txyz" - 'bytes13 #"abc\x6c\x34\xf0\\/\"\b\f\n\r\txyz" - 'string5 "\U0001D11E" - 'list2 (asymmetric (stream-of* 'sequence - (stream-of* 'string #"abc") - (stream-of* 'string #"def")) - '("abc" "def")) - 'list3 (asymmetric (stream-of* 'sequence '("a" 1) '("b" 2) '("c" 3)) - '(("a" 1) ("b" 2) ("c" 3))) - 'record1 (capture (discard)) - 'record2 (observe (speak (discard) (capture (discard)))) - 'record3 (titled 101 "Blackwell" (date 1821 2 3) "Dr") - 'record4 (asymmetric (record 'discard '()) (discard)) - 'record5 (record 7 '(())) - 'record6 (asymmetric (record 'discard '(surprise)) - '#s(discard surprise)) - 'record7 (record "aString" '(3 4)) - 'record8 (record (discard) '(3 4)) - 'list7 (list 'abc '|...| 'def) - 'dict1 (hash 'a 1 - "b" #t - '(1 2 3) #"c" - (hash 'first-name "Elizabeth") (hash 'surname "Blackwell")) - 'rfc8259-example1 (hash "Image" - (hash "Width" 800 - "Height" 600 - "Title" "View from 15th Floor" - "Thumbnail" (hash "Url" "http://www.example.com/image/481989943" - "Height" 125 - "Width" 100) - "Animated" 'false - "IDs" (list 116 943 234 38793))) - 'rfc8259-example2 (list (hash - "precision" "zip" - "Latitude" 37.7668 - "Longitude" -122.3959 - "Address" "" - "City" "SAN FRANCISCO" - "State" "CA" - "Zip" "94107" - "Country" "US") - (hash - "precision" "zip" - "Latitude" 37.371991 - "Longitude" -122.026020 - "Address" "" - "City" "SUNNYVALE" - "State" "CA" - "Zip" "94085" - "Country" "US")) - 'annotation1 (asymmetric (annotate 9 "abc") 9) - 'annotation2 (asymmetric (annotate (list '() (annotate '() "x")) "abc" "def") '(() ())) - 'annotation3 (asymmetric (annotate 5 (annotate 2 1) (annotate 4 3)) 5) - 'annotation4 (asymmetric (hash (annotate 'a 'ak) (annotate 1 'av) - (annotate 'b 'bk) (annotate 2 'bv)) - (hash 'a 1 'b 2)) - 'annotation5 (asymmetric (annotate `#s(R ,(annotate 'f 'af)) 'ar) `#s(R f)) - 'annotation6 (asymmetric (record (annotate 'R 'ar) (list (annotate 'f 'af))) `#s(R f)) - 'annotation7 (asymmetric (annotate '() 'a 'b 'c) '()) - )) - - (define (run-test-case variety t-name loc binary-form annotated-text-form) - (define text-form (strip-annotations annotated-text-form)) - (define-values (forward back can-execute-nondet-with-canonicalization?) - (match (hash-ref samples-txt-expected t-name text-form) - [(asymmetric f b) (values f b #f)] ;; #f because e.g. annotation4 includes annotations - [v (values v v #t)])) - (check-equal? text-form back loc) ;; expectation 1 - (check-equal? (d-strip (encode text-form)) back loc) ;; expectation 2 - (check-equal? (d-strip (encode forward)) back loc) ;; expectation 3 - (check-equal? (d-strip binary-form) back loc) ;; expectation 4 - (check-equal? (d binary-form) annotated-text-form loc) ;; expectation 5 - (check-equal? (d (encode annotated-text-form)) annotated-text-form loc) ;; expectation 6 - (check-equal? (string->preserve (preserve->string text-form)) back loc) ;; expectation 7 - (check-equal? (string->preserve (preserve->string forward)) back loc) ;; expectation 8 - (check-equal? (string->preserve-syntax (preserve->string annotated-text-form)) ;; similar to 8 - annotated-text-form - loc) - (when (and (not (memq variety '(decode))) - (or (not (memq variety '(nondeterministic))) - (and can-execute-nondet-with-canonicalization?))) - ;; expectations 9 and 10 - (parameterize ((canonicalize-preserves? (if (memq variety '(nondeterministic)) #t #f))) - (check-equal? (encode forward) binary-form loc))) - (unless (memq variety '(decode nondeterministic streaming)) - ;; expectation 11 - (check-equal? (encode annotated-text-form) binary-form loc))) - - (define-runtime-path tests-path "../../../../tests") - (let* ((path (build-path tests-path "samples.txt")) - (testfile (call-with-input-file path - (lambda (p) - (port-count-lines! p) - (read-preserve-syntax p #:source path))))) - (match-define (peel-annotations `#s(TestCases ,tests)) testfile) - (for [((t-name* t*) (in-hash (annotated-item tests)))] - (define t-name (strip-annotations t-name*)) - (define loc (format "Test case '~a' (~a)" t-name (source-location->string (annotated-srcloc t*)))) - (define (fail-test fmt . args) - (fail (format "~a: ~a" loc (apply format fmt args)))) - (displayln loc) - (match (peel-annotations t*) - [`#s(Test ,(strip-annotations binary-form) ,annotated-text-form) - (run-test-case 'normal t-name loc binary-form annotated-text-form)] - [`#s(NondeterministicTest ,(strip-annotations binary-form) ,annotated-text-form) - (run-test-case 'nondeterministic t-name loc binary-form annotated-text-form)] - [`#s(StreamingTest ,(strip-annotations binary-form) ,annotated-text-form) - (run-test-case 'streaming t-name loc binary-form annotated-text-form)] - [`#s(DecodeTest ,(strip-annotations binary-form) ,annotated-text-form) - (run-test-case 'decode t-name loc binary-form annotated-text-form)] - [`#s(ParseError ,(strip-annotations str)) - (with-handlers [(exn:fail:read:eof? - (lambda (e) (fail-test "Unexpected EOF: ~e" e))) - (exn:fail:read? - (lambda (e) 'ok)) - ((lambda (e) #t) - (lambda (e) (fail-test "Unexpected exception: ~e" e)))] - (string->preserve str) - (fail-test "Unexpected success"))] - [(or `#s(ParseShort ,(strip-annotations str)) - `#s(ParseEOF ,(strip-annotations str))) - (with-handlers [(exn:fail:read:eof? (lambda (e) 'ok)) - ((lambda (e) #t) - (lambda (e) (fail-test "Unexpected exception: ~e" e)))] - (string->preserve str) - (fail-test "Unexpected success"))] - [(or `#s(DecodeShort ,(strip-annotations bs)) - `#s(DecodeEOF ,(strip-annotations bs))) - (check-eq? (d bs) 'short loc)] - [`#s(DecodeError ,(strip-annotations bs)) - (check-true (void? (d bs #:allow-invalid-prefix? #t)) loc)] - [_ - (write-preserve t* #:indent #f) - (newline)])) - ) - ) + #:read-annotations? [read-annotations? read-syntax?] + #:source [source (object-name in-port)]) + (define b (peek-byte in-port)) + (cond [(eof-object? b) b] + [(<= #x80 b #xBF) + (read-preserve/binary in-port + #:read-syntax? read-syntax? + #:read-annotations? read-annotations?)] + [else + (read-preserve/text in-port + #:read-syntax? read-syntax? + #:read-annotations? read-annotations? + #:source source)])) diff --git a/implementations/racket/preserves/preserves/order.rkt b/implementations/racket/preserves/preserves/order.rkt new file mode 100644 index 0000000..fd1b19d --- /dev/null +++ b/implementations/racket/preserves/preserves/order.rkt @@ -0,0 +1,98 @@ +#lang racket/base + +(provide preserve-order + preserve ta tb) '>] + [else (match ta ;; == tb + [7 (chain-order + (preserve-order (record-label a) (record-label b)) + (preserve-order (record-fields a)) (preserve-order (record-fields b)))] + [8 (match* (a b) + [('() '()) '=] + [('() _) '<] + [(_ '()) '>] + [((cons a0 a1) (cons b0 b1)) + (chain-order (preserve-order a0 b0) (preserve-order a1 b1))])] + [9 (preserve-order (sorted-set-elements a) (sorted-set-elements b))] + [10 (preserve-order (sorted-dict-keys a) (sorted-dict-keys b))] + [_ (datum-order a b)])])))) + +(define preservelist v) preservelist v) preservepreserve) + +(require racket/match) +(require "record.rkt") +(require "float.rkt") +(require "annotation.rkt") +(require "varint.rkt") +(require racket/set) +(require (only-in racket/port call-with-input-bytes)) + +(define (default-on-short) (error 'read-preserve/binary "Short Preserves binary")) +(define (default-on-fail message . args) (error 'read-preserve/binary (apply format message args))) + +(define (bytes->preserve bs + #:read-syntax? [read-syntax? #f] + #:read-annotations? [read-annotations? read-syntax?] + #:on-short [on-short default-on-short] + [on-fail default-on-fail]) + (call-with-input-bytes + bs + (lambda (p) + (match (read-preserve/binary p + #:read-syntax? read-syntax? + #:read-annotations? read-annotations? + #:on-short on-short + on-fail) + [(? eof-object?) (on-short)] + [v v])))) + +(define ((between lo hi) v) (<= lo v hi)) + +(define (read-preserve/binary [in-port (current-input-port)] + #:read-syntax? [read-syntax? #f] + #:read-annotations? [read-annotations? read-syntax?] + #:on-short [on-short default-on-short] + [on-fail default-on-fail]) + (let/ec return + + (define (next) (wrap (pos) (next* (next-byte)))) + + (define (next* lead-byte) + (match (next** lead-byte) + ['#:end (return (on-fail "Unexpected sequence end marker"))] + [v v])) + + (define pos + (if read-syntax? + (lambda () + (define-values (_line _column position) (port-next-location in-port)) + position) + (lambda () #f))) + + (define wrap + (if read-syntax? + (lambda (pos0 v) + (if (annotated? v) + v + (annotated '() (srcloc #f #f #f pos0 (- (pos) pos0)) v))) + (lambda (pos0 v) v))) + + (define (next** lead-byte) + (match lead-byte + [#x80 #f] + [#x81 #t] + [#x82 (float (floating-point-bytes->real (next-bytes 4) #t 0 4))] + [#x83 (floating-point-bytes->real (next-bytes 8) #t 0 8)] + [#x84 '#:end] + [#x85 (let ((a (next))) + (if read-annotations? + (annotate (next) a) + (next)))] + [(? (between #x90 #x9C) v) (- v #x90)] + [(? (between #x9D #x9F) v) (- v #xA0)] + [(? (between #xA0 #xAF) v) (next-integer (- v #xA0 -1))] + [#xB0 (next-integer (next-varint))] + [#xB1 (bytes->string/utf-8 (next-bytes (next-varint)))] + [#xB2 (next-bytes (next-varint))] + [#xB3 (string->symbol (bytes->string/utf-8 (next-bytes (next-varint))))] + [#xB4 (apply (lambda (label . fields) (record label fields)) (next-items))] + [#xB5 (next-items)] + [#xB6 (list->set (next-items))] + [#xB7 (build-dictionary (next-items))] + [_ (return (on-fail "Invalid Preserves binary tag: ~v" lead-byte))])) + + (define (eof-guard v) + (if (eof-object? v) + (return (on-short)) + v)) + + (define (next-byte) (eof-guard (read-byte in-port))) + + (define (next-bytes n) + (define bs (eof-guard (read-bytes n in-port))) + (if (< (bytes-length bs) n) (return (on-short)) bs)) + + (define (next-varint) (eof-guard (read-varint in-port))) + + (define (next-integer n) + (when (zero? n) (return (on-fail "Zero-length integer not permitted"))) + (define acc0 (next-byte)) + (define acc (if (< acc0 128) acc0 (- acc0 256))) + (for/fold [(acc acc)] [(n (in-range (- n 1)))] (+ (* acc 256) (next-byte)))) + + (define (next-items) + (define pos0 (pos)) + (match (next** (next-byte)) + ['#:end '()] + [v (cons (wrap pos0 v) (next-items))])) + + (define (build-dictionary items) + (when (not (even? (length items))) (return (on-fail "Odd number of items in dictionary"))) + (apply hash items)) + + (let ((pos0 (pos))) + (match (read-byte in-port) + [(? eof-object?) eof] + [lead-byte (wrap pos0 (next* lead-byte))])))) diff --git a/implementations/racket/preserves/preserves/read-text.rkt b/implementations/racket/preserves/preserves/read-text.rkt new file mode 100644 index 0000000..4b86219 --- /dev/null +++ b/implementations/racket/preserves/preserves/read-text.rkt @@ -0,0 +1,346 @@ +#lang racket/base + +(provide read-preserve/text + string->preserve) + +(require racket/match) +(require racket/set) +(require "annotation.rkt") +(require "read-binary.rkt") +(require "record.rkt") +(require "float.rkt") +(require syntax/readerr) +(require (only-in file/sha1 hex-string->bytes)) +(require (only-in net/base64 base64-decode)) + +(define PIPE #\|) + +(define (parse-error* #:raise-proc [raise-proc raise-read-error] i source fmt . args) + (define-values [line column pos] (port-next-location i)) + (raise-proc (format "read-preserve: ~a" (apply format fmt args)) + source + line + column + pos + #f)) + +(define (string->preserve s + #:read-syntax? [read-syntax? #f] + #:read-annotations? [read-annotations? read-syntax?] + #:source [source ""]) + (define p (open-input-string s)) + (when read-syntax? (port-count-lines! p)) + (define v (read-preserve/text p + #:read-syntax? read-syntax? + #:read-annotations? read-annotations? + #:source source)) + (when (eof-object? v) + (parse-error* #:raise-proc raise-read-eof-error p source "Unexpected end of input")) + (skip-whitespace* p) + (when (not (eof-object? (peek-char p))) + (parse-error* p source "Unexpected text following preserve")) + v) + +(define (skip-whitespace* i) + (regexp-match? #px#"^(\\s|,)*" i)) ;; side effect: consumes matched portion of input + +(define-match-expander px + (syntax-rules () + [(_ re pat) (app (lambda (v) (regexp-try-match re v)) pat)])) + +(define (read-preserve/text [in-port (current-input-port)] + #:read-syntax? [read-syntax? #f] + #:read-annotations? [read-annotations? read-syntax?] + #:source [source (object-name in-port)]) + + ;;--------------------------------------------------------------------------- + ;; Core of parser + + (define (next) (wrap (pos) (next*))) + + (define (next*) + (skip-whitespace) + (match (next-char) + [#\- (read-intpart (list #\-) (next-char))] + [(and c (or #\0 #\1 #\2 #\3 #\4 #\5 #\6 #\7 #\8 #\9)) (read-intpart '() c)] + [#\" (read-string #\")] + [(== PIPE) (string->symbol (read-string PIPE))] + + [#\; (annotate-next-with (read-comment-line))] + [#\@ (annotate-next-with (next))] + + [#\: (parse-error "Unexpected key/value separator between items")] + + [#\# (match (next-char) + [#\f #f] + [#\t #t] + [#\{ (sequence-fold (set) set-add* values #\})] + [#\" (read-literal-binary)] + [#\x (if (eqv? (next-char) #\") + (read-hex-binary '()) + (parse-error "Expected open-quote at start of hex ByteString"))] + [#\[ (read-base64-binary '())] + [#\= (define bs (read-preserve/text in-port #:read-syntax? #t #:source source)) + (when (not (bytes? (annotated-item bs))) + (parse-error "ByteString must follow #=")) + (when (not (null? (annotated-annotations bs))) + (parse-error "Annotations not permitted after #=")) + (bytes->preserve + (annotated-item bs) + (lambda (message . args) + (apply parse-error (string-append "Embedded binary value: " message) args)) + #:read-syntax? read-syntax? + #:read-annotations? read-annotations? + #:on-short (lambda () (parse-error "Incomplete embedded binary value")))] + [c (parse-error "Invalid # syntax: ~v" c)])] + + [#\< (match (read-sequence #\>) + ['() (parse-error "Missing record label")] + [(cons label fields) (record label fields)])] + [#\[ (read-sequence #\])] + [#\{ (read-dictionary)] + + [#\> (parse-error "Unexpected >")] + [#\] (parse-error "Unexpected ]")] + [#\} (parse-error "Unexpected }")] + + [c (read-raw-symbol (list c))])) + + (define (set-add* s e) + (when (set-member? s e) (parse-error "Duplicate set element: ~v" e)) + (set-add s e)) + + (define (annotate-next-with a) + (if read-annotations? + (annotate (next) a) + (next))) + + ;;--------------------------------------------------------------------------- + ;; Basic I/O + + (define (parse-error fmt . args) + (apply parse-error* in-port source fmt args)) + + (define (eof-guard v) + (when (eof-object? v) + (parse-error* #:raise-proc raise-read-eof-error in-port source "Unexpected end of input")) + v) + + (define (next-char) (eof-guard (read-char in-port))) + + (define (skip-whitespace) (skip-whitespace* in-port)) + + ;;--------------------------------------------------------------------------- + ;; Source location tracking + + (define pos + (if read-syntax? + (lambda () + (define-values (line column position) (port-next-location in-port)) + (list line column position)) + (lambda () #f))) + + (define wrap + (if read-syntax? + (lambda (pos0 v) + (if (annotated? v) + v + (let () + (match-define (list line0 column0 position0) pos0) + (match-define (list line1 column1 position1) (pos)) + (define loc (and line0 column0 position0 position1 + (srcloc source line0 column0 position0 (- position1 position0)))) + (annotated '() loc v)))) + (lambda (pos0 v) v))) + + ;;--------------------------------------------------------------------------- + ;; Numbers + + (define (read-intpart acc-rev ch) + (match ch + [#\0 (read-fracexp (cons ch acc-rev))] + [_ (read-digit+ acc-rev read-fracexp ch)])) + + (define (read-digit* acc-rev k) + (match (peek-char in-port) + [(? char? (? char-numeric?)) (read-digit* (cons (read-char in-port) acc-rev) k)] + [_ (k acc-rev)])) + + (define (read-digit+ acc-rev k [ch (read-char in-port)]) + (match ch + [(? char? (? char-numeric?)) (read-digit* (cons ch acc-rev) k)] + [_ (parse-error "Incomplete number")])) + + (define (read-fracexp acc-rev) + (match (peek-char in-port) + [#\. (read-digit+ (cons (read-char in-port) acc-rev) read-exp)] + [_ (read-exp acc-rev)])) + + (define (read-exp acc-rev) + (match (peek-char in-port) + [(or #\e #\E) (read-sign-and-exp (cons (read-char in-port) acc-rev))] + [_ (finish-number acc-rev)])) + + (define (read-sign-and-exp acc-rev) + (match (peek-char in-port) + [(or #\+ #\-) (read-digit+ (cons (read-char in-port) acc-rev) finish-number)] + [_ (read-digit+ acc-rev finish-number)])) + + (define (finish-number acc-rev) + (define s (list->string (reverse acc-rev))) + (define n (string->number s 10)) + (when (not n) (parse-error "Invalid number: ~v" s)) + (if (flonum? n) + (match (peek-char in-port) + [(or #\f #\F) (read-char in-port) (float n)] + [_ n]) + n)) + + ;;--------------------------------------------------------------------------- + ;; String-like things + + (define (read-stringlike xform-item finish terminator-char hexescape-char hexescape-proc) + (let loop ((acc '())) + (match (next-char) + [(== terminator-char) (finish (reverse acc))] + [#\\ (match (next-char) + [(== hexescape-char) (loop (cons (hexescape-proc) acc))] + [(and c (or (== terminator-char) #\\ #\/)) (loop (cons (xform-item c) acc))] + [#\b (loop (cons (xform-item #\u08) acc))] + [#\f (loop (cons (xform-item #\u0C) acc))] + [#\n (loop (cons (xform-item #\u0A) acc))] + [#\r (loop (cons (xform-item #\u0D) acc))] + [#\t (loop (cons (xform-item #\u09) acc))] + [c (parse-error "Invalid escape code \\~a" c)])] + [c (loop (cons (xform-item c) acc))]))) + + (define (read-string terminator-char) + (read-stringlike values + list->string + terminator-char + #\u + (lambda () + (integer->char + (match in-port + [(px #px#"^[a-fA-F0-9]{4}" (list hexdigits)) + (define n1 (string->number (bytes->string/utf-8 hexdigits) 16)) + (if (<= #xd800 n1 #xdfff) ;; surrogate pair first half + (match in-port + [(px #px#"^\\\\u([a-fA-F0-9]{4})" (list _ hexdigits2)) + (define n2 (string->number (bytes->string/utf-8 hexdigits2) 16)) + (if (<= #xdc00 n2 #xdfff) + (+ (arithmetic-shift (- n1 #xd800) 10) + (- n2 #xdc00) + #x10000) + (parse-error "Bad second half of surrogate pair"))] + [_ (parse-error "Missing second half of surrogate pair")]) + n1)] + [_ (parse-error "Bad string \\u escape")]))))) + + (define (read-literal-binary) + (read-stringlike (lambda (c) + (define b (char->integer c)) + (when (>= b 256) + (parse-error "Invalid code point ~a (~v) in literal binary" b c)) + b) + list->bytes + #\" + #\x + (lambda () + (match in-port + [(px #px#"^[a-fA-F0-9]{2}" (list hexdigits)) + (string->number (bytes->string/utf-8 hexdigits) 16)] + [_ (parse-error "Bad binary \\x escape")])))) + + ;;--------------------------------------------------------------------------- + ;; Hex-encoded ByteStrings + + (define (hexdigit? ch) + (or (and (char>=? ch #\A) (char<=? ch #\F)) + (and (char>=? ch #\a) (char<=? ch #\f)) + (and (char>=? ch #\0) (char<=? ch #\9)))) + + (define (read-hex-binary acc) + (skip-whitespace) + (define ch (next-char)) + (cond [(eqv? ch #\") + (hex-string->bytes (list->string (reverse acc)))] + [(hexdigit? ch) + (define ch2 (next-char)) + (when (not (hexdigit? ch2)) + (parse-error "Hex-encoded binary digits must come in pairs")) + (read-hex-binary (cons ch2 (cons ch acc)))] + [else + (parse-error "Invalid hex character")])) + + ;;--------------------------------------------------------------------------- + ;; Base64-encoded ByteStrings + + (define (read-base64-binary acc) + (skip-whitespace) + (define ch (next-char)) + (cond [(eqv? ch #\]) + (base64-decode (string->bytes/latin-1 (list->string (reverse acc))))] + [(or (and (char>=? ch #\A) (char<=? ch #\Z)) + (and (char>=? ch #\a) (char<=? ch #\z)) + (and (char>=? ch #\0) (char<=? ch #\9)) + (memv ch '(#\+ #\/ #\- #\_ #\=))) + (read-base64-binary (cons ch acc))] + [else + (parse-error "Invalid base64 character")])) + + ;;--------------------------------------------------------------------------- + ;; Comments + + (define (read-comment-line) + (define pos0 (pos)) + (let loop ((acc '())) + (match (next-char) + [(or #\newline #\return) + (wrap pos0 (list->string (reverse acc)))] + [c (loop (cons c acc))]))) + + ;;--------------------------------------------------------------------------- + ;; Collections + + (define (sequence-fold acc accumulate-one finish terminator-char) + (let loop ((acc acc)) + (skip-whitespace) + (match (eof-guard (peek-char in-port)) + [(== terminator-char) (read-char in-port) (finish acc)] + [_ (loop (accumulate-one acc (next)))]))) + + (define (read-sequence terminator) + (sequence-fold '() (lambda (acc v) (cons v acc)) reverse terminator)) + + (define (read-dictionary) + (sequence-fold (hash) + (lambda (acc k) + (skip-whitespace) + (match (peek-char in-port) + [#\: (read-char in-port) + (when (hash-has-key? acc k) (parse-error "Duplicate key: ~v" k)) + (hash-set acc k (next))] + [_ (parse-error "Missing expected key/value separator")])) + values + #\})) + + ;;--------------------------------------------------------------------------- + ;; "Raw" symbols + + (define (read-raw-symbol acc) + (match (peek-char in-port) + [(or (? eof-object?) + (? char? (or #\( #\) #\{ #\} #\[ #\] #\< #\> + #\" #\; #\, #\@ #\# #\: (== PIPE) + (? char-whitespace?)))) + (string->symbol (list->string (reverse acc)))] + [_ (read-raw-symbol (cons (read-char in-port) acc))])) + + ;;--------------------------------------------------------------------------- + ;; Main entry point to parser + + (skip-whitespace) + (match (peek-char in-port) + [(? eof-object?) eof] + [_ (next)])) diff --git a/implementations/racket/preserves/preserves/record.rkt b/implementations/racket/preserves/preserves/record.rkt index 1712d2b..e34d82f 100644 --- a/implementations/racket/preserves/preserves/record.rkt +++ b/implementations/racket/preserves/preserves/record.rkt @@ -20,8 +20,9 @@ [(record label fields) (values #t label fields)] [(? non-object-struct?) (define key (prefab-struct-key r)) - (when (not key) (error 'preserves "Cannot process non-prefab struct ~v" r)) - (values #t key (cdr (vector->list (struct->vector r))))] + (if key + (values #t key (cdr (vector->list (struct->vector r)))) + (values #f #f #f))] [_ (values #f #f #f)])) (define-match-expander record-expander diff --git a/implementations/racket/preserves/preserves/tests/test-main.rkt b/implementations/racket/preserves/preserves/tests/test-main.rkt new file mode 100644 index 0000000..75953ad --- /dev/null +++ b/implementations/racket/preserves/preserves/tests/test-main.rkt @@ -0,0 +1,201 @@ +#lang racket/base + +(require "../main.rkt") + +(require racket/match) +(require racket/set) + +(require rackunit) +(require racket/runtime-path) +(require syntax/srcloc) + +(define (d bs #:allow-invalid-prefix? [allow-invalid-prefix? #f]) + (for [(i (in-range 1 (- (bytes-length bs) 1)))] + (define result (bytes->preserve (subbytes bs 0 i) #:on-short (lambda () 'short) void)) + (when (and (not (eq? result 'short)) + (not (and allow-invalid-prefix? (void? result)))) + (error 'd "~a-byte prefix of ~v does not read as short; result: ~v" i bs result))) + (bytes->preserve bs + #:read-syntax? #t + #:on-short (lambda () 'short) + void)) + +(define (d-strip bs) (strip-annotations (d bs))) + +(struct discard () #:prefab) +(struct capture (detail) #:prefab) +(struct observe (specification) #:prefab) + +(struct speak (who what) #:prefab) + +(struct date (year month day) #:prefab) +(struct thing (id) #:prefab) +(struct person thing (name date-of-birth) #:prefab) +(struct titled person (title) #:prefab) + +(struct asymmetric (forward back)) + +(define samples-txt-expected + (hash 'record1 (capture (discard)) + 'record2 (observe (speak (discard) (capture (discard)))) + 'list4a '(1 2 3 4) + 'list5 '(-2 -1 0 1) + 'string3 "hello" + 'list6 `("hello" there #"world" () ,(set) #t #f) + 'bytes2 #"hello" + 'bytes3 #"ABC" + 'bytes4 #"ABC" + 'bytes5 #"AJN" + 'bytes7 #"corymb" + 'bytes8 #"corymb" + 'bytes9 #"Hi" + 'bytes10 #"Hi" + 'bytes11 #"Hi" + 'value1 #"corymb" + 'value2 #t + 'value3 #t + 'value4 #t + 'value5 #t + 'value6 (list 1 2 3) + 'list0 '() + 'dict0 (hash) + 'string0 "" + 'symbol0 '|| + 'set0 (set) + 'set1 (set 1 2 3) + 'set1a (set 1 2 3) + 'string4 "abc\u6c34\u6C34\\/\"\b\f\n\r\txyz" + 'bytes13 #"abc\x6c\x34\xf0\\/\"\b\f\n\r\txyz" + 'string5 "\U0001D11E" + 'record1 (capture (discard)) + 'record2 (observe (speak (discard) (capture (discard)))) + 'record3 (titled 101 "Blackwell" (date 1821 2 3) "Dr") + 'record4 (asymmetric (record 'discard '()) (discard)) + 'record5 (record 7 '(())) + 'record6 (asymmetric (record 'discard '(surprise)) + '#s(discard surprise)) + 'record7 (record "aString" '(3 4)) + 'record8 (record (discard) '(3 4)) + 'list7 (list 'abc '|...| 'def) + 'dict1 (hash 'a 1 + "b" #t + '(1 2 3) #"c" + (hash 'first-name "Elizabeth") (hash 'surname "Blackwell")) + 'rfc8259-example1 (hash "Image" + (hash "Width" 800 + "Height" 600 + "Title" "View from 15th Floor" + "Thumbnail" (hash "Url" "http://www.example.com/image/481989943" + "Height" 125 + "Width" 100) + "Animated" 'false + "IDs" (list 116 943 234 38793))) + 'rfc8259-example2 (list (hash + "precision" "zip" + "Latitude" 37.7668 + "Longitude" -122.3959 + "Address" "" + "City" "SAN FRANCISCO" + "State" "CA" + "Zip" "94107" + "Country" "US") + (hash + "precision" "zip" + "Latitude" 37.371991 + "Longitude" -122.026020 + "Address" "" + "City" "SUNNYVALE" + "State" "CA" + "Zip" "94085" + "Country" "US")) + 'annotation1 (asymmetric (annotate 9 "abc") 9) + 'annotation2 (asymmetric (annotate (list '() (annotate '() "x")) "abc" "def") '(() ())) + 'annotation3 (asymmetric (annotate 5 (annotate 2 1) (annotate 4 3)) 5) + 'annotation4 (asymmetric (hash (annotate 'a 'ak) (annotate 1 'av) + (annotate 'b 'bk) (annotate 2 'bv)) + (hash 'a 1 'b 2)) + 'annotation5 (asymmetric (annotate `#s(R ,(annotate 'f 'af)) 'ar) `#s(R f)) + 'annotation6 (asymmetric (record (annotate 'R 'ar) (list (annotate 'f 'af))) `#s(R f)) + 'annotation7 (asymmetric (annotate '() 'a 'b 'c) '()) + )) + +(define (run-test-case variety t-name loc binary-form annotated-text-form) + (define text-form (strip-annotations annotated-text-form)) + (define-values (forward back can-execute-nondet-with-canonicalization?) + (match (hash-ref samples-txt-expected t-name text-form) + [(asymmetric f b) (values f b #f)] ;; #f because e.g. annotation4 includes annotations + [v (values v v #t)])) + (check-equal? text-form back loc) ;; expectation 1 + (check-equal? (d-strip (preserve->bytes text-form)) back loc) ;; expectation 2 + (check-equal? (d-strip (preserve->bytes forward)) back loc) ;; expectation 3 + (check-equal? (d-strip binary-form) back loc) ;; expectation 4 + (check-equal? (d binary-form) annotated-text-form loc) ;; expectation 5 + (check-equal? (d (preserve->bytes annotated-text-form)) annotated-text-form loc) ;; expectation 6 + (check-equal? (string->preserve (preserve->string text-form)) back loc) ;; expectation 7 + (check-equal? (string->preserve (preserve->string forward)) back loc) ;; expectation 8 + ;; similar to 8: + (check-equal? (string->preserve (preserve->string annotated-text-form) #:read-syntax? #t) + annotated-text-form + loc) + (when (and (not (memq variety '(decode))) + (or (not (memq variety '(nondeterministic))) + (and can-execute-nondet-with-canonicalization?))) + ;; expectations 9 and 10 + (check-equal? (preserve->bytes forward + #:canonicalizing? #t + #:write-annotations? #t) + binary-form + loc)) + (unless (memq variety '(decode nondeterministic)) + ;; expectation 11 + (check-equal? (preserve->bytes annotated-text-form #:write-annotations? #t) + binary-form + loc))) + +(define-runtime-path tests-path "../../../../../tests") +(let* ((path (build-path tests-path "samples.txt")) + (testfile (call-with-input-file path + (lambda (p) + (port-count-lines! p) + (read-preserve p #:read-syntax? #t #:source path))))) + (match-define (peel-annotations `#s(TestCases ,tests)) testfile) + (for [((t-name* t*) (in-hash (annotated-item tests)))] + (define t-name (strip-annotations t-name*)) + (define loc (format "Test case '~a' (~a)" t-name (source-location->string (annotated-srcloc t*)))) + (define (fail-test fmt . args) + (fail (format "~a: ~a" loc (apply format fmt args)))) + (displayln loc) + (match (peel-annotations t*) + [`#s(Test ,(strip-annotations binary-form) ,annotated-text-form) + (run-test-case 'normal t-name loc binary-form annotated-text-form)] + [`#s(NondeterministicTest ,(strip-annotations binary-form) ,annotated-text-form) + (run-test-case 'nondeterministic t-name loc binary-form annotated-text-form)] + [`#s(StreamingTest ,(strip-annotations binary-form) ,annotated-text-form) + (run-test-case 'streaming t-name loc binary-form annotated-text-form)] + [`#s(DecodeTest ,(strip-annotations binary-form) ,annotated-text-form) + (run-test-case 'decode t-name loc binary-form annotated-text-form)] + [`#s(ParseError ,(strip-annotations str)) + (with-handlers [(exn:fail:read:eof? + (lambda (e) (fail-test "Unexpected EOF: ~e" e))) + (exn:fail:read? + (lambda (e) 'ok)) + ((lambda (e) #t) + (lambda (e) (fail-test "Unexpected exception: ~e" e)))] + (string->preserve str) + (fail-test "Unexpected success"))] + [(or `#s(ParseShort ,(strip-annotations str)) + `#s(ParseEOF ,(strip-annotations str))) + (with-handlers [(exn:fail:read:eof? (lambda (e) 'ok)) + ((lambda (e) #t) + (lambda (e) (fail-test "Unexpected exception: ~e" e)))] + (string->preserve str) + (fail-test "Unexpected success"))] + [(or `#s(DecodeShort ,(strip-annotations bs)) + `#s(DecodeEOF ,(strip-annotations bs))) + (check-eq? (d bs) 'short loc)] + [`#s(DecodeError ,(strip-annotations bs)) + (check-true (void? (d bs #:allow-invalid-prefix? #t)) loc)] + [_ + (write-preserve/text t* #:indent #f) + (newline)])) + ) diff --git a/implementations/racket/preserves/preserves/tool.rkt b/implementations/racket/preserves/preserves/tool.rkt index 42d5893..e845001 100644 --- a/implementations/racket/preserves/preserves/tool.rkt +++ b/implementations/racket/preserves/preserves/tool.rkt @@ -6,7 +6,7 @@ (module+ main (require racket/cmdline) - (define input-format 'text) + (define input-format 'any) (define output-format 'binary) (define indent? #t) (define annotations? #t) @@ -17,6 +17,8 @@ ["--btoa" "Binary to text" (begin (set! input-format 'binary) (set! output-format 'text))] + [("--ia" "--input-any") "Autodetect input mode (default)" + (set! input-format 'any)] [("--ib" "--input-binary") "Set binary input mode" (set! input-format 'binary)] [("--it" "--input-text") "Set text input mode" @@ -25,9 +27,11 @@ (set! output-format 'binary)] [("--ot" "--output-text") "Set text output mode" (set! output-format 'text)] - ["--indent" "Enable indent for text output" + ["--indent" "Enable indent and set text output mode" + (set! output-format 'text) (set! indent? #t)] - ["--no-indent" "Disable indent for text output" + ["--no-indent" "Disable indent and set text output mode" + (set! output-format 'text) (set! indent? #f)] ["--annotations" "Output annotations" (set! annotations? #t)] @@ -36,9 +40,10 @@ (define v ((if annotations? values strip-annotations) (match input-format - ['text (read-preserve-syntax #:source "")] - ['binary (decode-syntax (port->bytes))]))) + ['any (read-preserve #:read-syntax? #t #:source "")] + ['text (read-preserve/text #:read-syntax? #t #:source "")] + ['binary (read-preserve/binary #:read-syntax? #t)]))) (void (match output-format - ['text (write-preserve v #:indent indent?)] - ['binary (write-bytes (encode v))])) + ['text (write-preserve/text v #:indent indent?)] + ['binary (write-preserve/binary v #:write-annotations? #t)])) (flush-output)) diff --git a/implementations/racket/preserves/preserves/varint.rkt b/implementations/racket/preserves/preserves/varint.rkt index 6d1403a..2e84492 100644 --- a/implementations/racket/preserves/preserves/varint.rkt +++ b/implementations/racket/preserves/preserves/varint.rkt @@ -8,42 +8,53 @@ ;; two's complement representation of the number in groups of 7 bits, ;; least significant group first." -(provide encode-varint +(provide write-varint + read-varint + encode-varint decode-varint) -(require bitsyntax) +(require racket/port) + +(define (write-varint v out-port) + (if (< v 128) + (write-byte v out-port) + (begin (write-byte (+ 128 (modulo v 128)) out-port) + (write-varint (quotient v 128) out-port)))) + +(define (read-varint in-port) + (let/ec return + (let loop () + (define b (read-byte in-port)) + (cond [(eof-object? b) (return b)] + [(< b 128) b] + [else (+ (* (loop) 128) (- b 128))])))) (define (encode-varint v) - (if (< v 128) - (bytes v) - (bit-string ((+ (modulo v 128) 128) :: bits 8) - ((encode-varint (quotient v 128)) :: binary)))) + (call-with-output-bytes (lambda (p) (write-varint v p)))) (define (decode-varint bs ks kf) - (bit-string-case bs - #:on-short (lambda (fail) (kf #t)) - ([ (= 1 :: bits 1) (v :: bits 7) (rest :: binary) ] - (decode-varint rest (lambda (acc tail) (ks (+ (* acc 128) v) tail)) kf)) - ([ (= 0 :: bits 1) (v :: bits 7) (rest :: binary) ] - (ks v rest)) - (else - (kf)))) + ((call-with-input-bytes bs (lambda (p) + (define v (read-varint p)) + (cond [(eof-object? v) (lambda () (kf #t))] + [else (define rest (port->bytes p)) + (lambda () (ks v rest))]))))) (module+ test (require rackunit) - (check-equal? (bit-string->bytes (encode-varint 0)) (bytes 0)) - (check-equal? (bit-string->bytes (encode-varint 1)) (bytes 1)) - (check-equal? (bit-string->bytes (encode-varint 127)) (bytes 127)) - (check-equal? (bit-string->bytes (encode-varint 128)) (bytes 128 1)) - (check-equal? (bit-string->bytes (encode-varint 255)) (bytes 255 1)) - (check-equal? (bit-string->bytes (encode-varint 256)) (bytes 128 2)) - (check-equal? (bit-string->bytes (encode-varint 300)) (bytes #b10101100 #b00000010)) - (check-equal? (bit-string->bytes (encode-varint 1000000000)) (bytes 128 148 235 220 3)) + (check-equal? (encode-varint 0) (bytes 0)) + (check-equal? (encode-varint 1) (bytes 1)) + (check-equal? (encode-varint 127) (bytes 127)) + (check-equal? (encode-varint 128) (bytes 128 1)) + (check-equal? (encode-varint 255) (bytes 255 1)) + (check-equal? (encode-varint 256) (bytes 128 2)) + (check-equal? (encode-varint 300) (bytes #b10101100 #b00000010)) + (check-equal? (encode-varint 1000000000) (bytes 128 148 235 220 3)) - (define (ks* v rest) (list v (bit-string->bytes rest))) + (define (ks* v rest) (list v rest)) (define (kf* [short? #f]) (if short? 'short (void))) + (check-equal? (decode-varint (bytes) ks* kf*) 'short) (check-equal? (decode-varint (bytes 0) ks* kf*) (list 0 (bytes))) (check-equal? (decode-varint (bytes 0 99) ks* kf*) (list 0 (bytes 99))) (check-equal? (decode-varint (bytes 1) ks* kf*) (list 1 (bytes))) diff --git a/implementations/racket/preserves/preserves/write-binary.rkt b/implementations/racket/preserves/preserves/write-binary.rkt new file mode 100644 index 0000000..f4a1f80 --- /dev/null +++ b/implementations/racket/preserves/preserves/write-binary.rkt @@ -0,0 +1,120 @@ +#lang racket/base + +(provide write-preserve/binary + preserve->bytes) + +(require racket/match) +(require (only-in racket/port call-with-output-bytes)) +(require "record.rkt") +(require "float.rkt") +(require "annotation.rkt") +(require "varint.rkt") +(require racket/set) +(require racket/dict) +(require (only-in racket/list flatten)) + +(define (preserve->bytes v + #:canonicalizing? [canonicalizing? #t] + #:write-annotations? [write-annotations? (not canonicalizing?)]) + (call-with-output-bytes + (lambda (p) (write-preserve/binary v p + #:canonicalizing? canonicalizing? + #:write-annotations? write-annotations?)))) + +(define (write-preserve/binary v [out-port (current-output-port)] + #:canonicalizing? [canonicalizing? #t] + #:write-annotations? [write-annotations? (not canonicalizing?)]) + + (define (output-byte b) + (write-byte b out-port)) + + (define (output-bytes bs) + (write-bytes bs out-port)) + + (define (output-varint v) + (write-varint v out-port)) + + (define-syntax-rule (with-seq tag body ...) + (begin (output-byte (+ tag #xB0)) + body ... + (output-byte #x84))) + + (define (count-bytes tag bs) + (output-byte (+ tag #xB0)) + (output-varint (bytes-length bs)) + (output-bytes bs)) + + (define (prepare v) (preserve->bytes v #:canonicalizing? #t)) + + (define (output-all vs) + (for [(v (in-list vs))] (output v))) + + (define output-set + (match* [canonicalizing? write-annotations?] + [[#t #f] (lambda (v) + (for-each output-bytes + (sort (for/list [(e (in-set v))] (prepare e)) bytesfloating-point-bytes v 4 #t))] + [(? flonum?) + (output-byte #x83) + (output-bytes (real->floating-point-bytes v 8 #t))] + + [(annotated as _ v) + (when write-annotations? + (for [(a (in-list as))] + (output-byte #x85) + (output a))) + (output v)] + + [(? integer?) + (cond [(<= -3 v -1) (output-byte (+ v #xA0))] + [(<= 0 v 12) (output-byte (+ v #x90))] + [else (define raw-bit-count (+ (integer-length v) 1)) ;; at least one sign bit + (define byte-count (quotient (+ raw-bit-count 7) 8)) + (if (<= byte-count 16) + (output-byte (+ byte-count #xA0 -1)) + (begin (output-byte #xB0) + (output-varint byte-count))) + (for [(shift (in-range (* byte-count 8) 0 -8))] + (output-byte (bitwise-bit-field v (- shift 8) shift)))])] + + [(? string?) (count-bytes 1 (string->bytes/utf-8 v))] + [(? bytes?) (count-bytes 2 v)] + [(? symbol?) (count-bytes 3 (string->bytes/utf-8 (symbol->string v)))] + + [(record label fields) (with-seq 4 (output label) (output-all fields))] + [(? list?) (with-seq 5 (output-all v))] + [(? set?) (with-seq 6 (output-set v))] + [(? dict?) (with-seq 7 (output-dict v))] + + [_ (error 'write-preserve/binary "Invalid value: ~v" v)])) + + (output v)) diff --git a/implementations/racket/preserves/preserves/write-text.rkt b/implementations/racket/preserves/preserves/write-text.rkt new file mode 100644 index 0000000..6e0a999 --- /dev/null +++ b/implementations/racket/preserves/preserves/write-text.rkt @@ -0,0 +1,178 @@ +#lang racket/base + +(provide write-preserve/text + preserve->string + + (struct-out binary-display-heuristics) + current-binary-display-heuristics) + +(require racket/match) +(require racket/format) +(require net/base64) +(require "annotation.rkt") +(require "float.rkt") +(require "record.rkt") +(require racket/dict) +(require racket/set) +(require (only-in racket/port with-output-to-string)) + +(define PIPE #\|) + +(struct binary-display-heuristics (printable-ascii-proportion max-length) #:transparent) + +(define current-binary-display-heuristics (make-parameter (binary-display-heuristics 3/4 1024))) + +(define (write-preserve/text v0 [o (current-output-port)] + #:indent [indent-amount0 #f] + #:write-annotations? [write-annotations? #t]) + (define indent-amount (match indent-amount0 + [#f 0] + [#t 2] ;; a default + [other other])) + (define indenting? (and indent-amount0 #t)) + + (define-syntax-rule (! fmt arg ...) (fprintf o fmt arg ...)) + + (define (!indent distance) + (when indenting? + (! "\n~a" (make-string distance #\space)))) + + (define (!indent* distance) + (if indenting? + (!indent distance) + (! " "))) + + (define (write-stringlike-char c [default (lambda (c) (! "~a" c))]) + (match c + [#\\ (! "\\\\")] + [#\u08 (! "\\b")] + [#\u0C (! "\\f")] + [#\u0A (! "\\n")] + [#\u0D (! "\\r")] + [#\u09 (! "\\t")] + [_ (default c)])) + + (define (write-sequence outer-distance opener comma closer item-writer vs) + (define inner-distance (+ outer-distance indent-amount)) + (! "~a" opener) + (match vs + ['() (void)] + [(list v0) + (item-writer outer-distance v0)] + [(cons v0 vs) + (!indent inner-distance) + (item-writer inner-distance v0) + (for [(v (in-list vs))] + (! "~a" comma) + (!indent* inner-distance) + (item-writer inner-distance v)) + (!indent outer-distance)]) + (! "~a" closer)) + + (define (write-record outer-distance label fields) + (! "<") + (write-value outer-distance label) + (for ([f (in-list fields)]) + (! " ") + (write-value outer-distance f)) + (! ">")) + + (define (write-key-value distance kv) + (match-define (cons k v) kv) + (write-value distance k) + (! ": ") + (write-value distance v)) + + (define (binunescaped? b) + (or (<= #x20 b #x21) + (<= #x23 b #x5b) + (<= #x5d b #x7e))) + + (define (write-binary-stringlike v) + (! "#\"") + (for [(b (in-bytes v))] + (match b + [#x22 (! "\\\"")] + [(? binunescaped?) (write-stringlike-char (integer->char b))] + [_ (write-stringlike-char (integer->char b) + (lambda (c) (! "\\x~a" (~a #:min-width 2 + #:align 'right + #:left-pad-string "0" + (number->string b 16)))))])) + (! "\"")) + + (define (write-binary-base64 outer-distance v) + ;; Racket's encoder breaks lines after 72 characters. + ;; That corresponds to 54 bytes of input binary. + (! "#[") + (if (and indenting? (> (bytes-length v) 54)) + (let* ((inner-distance (+ outer-distance indent-amount)) + (line-separator (bytes-append #"\n" (make-bytes inner-distance 32))) + (encoded (base64-encode v line-separator))) + (write-bytes line-separator o) + (write-bytes encoded o 0 (- (bytes-length encoded) indent-amount))) + (write-bytes (base64-encode v #"") o)) + (! "]")) + + (define (write-binary outer-distance v) + (match-define (binary-display-heuristics proportion maxlen) (current-binary-display-heuristics)) + (define vlen (bytes-length v)) + (if (>= vlen maxlen) + (write-binary-base64 outer-distance v) + (let* ((sample-length (min vlen maxlen)) + (printable-ascii-count (for/sum [(i (in-range 0 sample-length)) + (b (in-bytes v))] + (if (or (<= 32 b 126) (= b 9) (= b 10) (= b 13)) 1 0)))) + (if (or (zero? vlen) (>= (/ printable-ascii-count sample-length) proportion)) + (write-binary-stringlike v) + (write-binary-base64 outer-distance v))))) + + (define (write-value distance v) + (match v + [(annotated annotations _ item) + (when write-annotations? + (for [(a (in-list annotations))] + (! "@") + (write-value (+ distance 1) a) + (!indent* distance))) + (write-value distance item)] + [#f (! "#f")] + [#t (! "#t")] + [(float v) (! "~vf" v)] + [(? flonum?) (! "~v" v)] + [(? integer? x) (! "~v" v)] + [(? string?) + (! "\"") + (for [(c (in-string v))] + (match c + [#\" (! "\\\"")] + [_ (write-stringlike-char c)])) + (! "\"")] + [(? bytes?) (write-binary distance v)] + [(? symbol?) + (define s (symbol->string v)) + ;; FIXME: This regular expression is conservatively correct, but Anglo-chauvinistic. + (if (regexp-match #px"[a-zA-Z~!$%^&*?_=+/.][-a-zA-Z~!$%^&*?_=+/.0-9]*" s) + (! "~a" s) + (begin (! "|") + (for [(c (in-string s))] + (match c + [(== PIPE) (! "\\|")] + [_ (write-stringlike-char c)])) + (! "|")))] + [(record label fields) (write-record distance label fields)] + [(? list?) (write-sequence distance "[" "," "]" write-value v)] + [(? set?) (write-sequence distance "#{" "," "}" write-value (set->list v))] + [(? dict?) (write-sequence distance "{" "," "}" write-key-value (dict->list v))] + + [_ (error 'write-preserve/text "Cannot encode value ~v" v)])) + + (write-value 0 v0)) + +(define (preserve->string v0 + #:indent [indent-amount #f] + #:write-annotations? [write-annotations? #t]) + (with-output-to-string + (lambda () (write-preserve/text v0 + #:indent indent-amount + #:write-annotations? write-annotations?)))) diff --git a/preserves.el b/preserves.el index 292da20..ae99f9c 100644 --- a/preserves.el +++ b/preserves.el @@ -33,9 +33,9 @@ "Syntax table in use in preserves-mode buffers.") ;; (modify-syntax-entry ?' "\"" preserves-mode-syntax-table) -(modify-syntax-entry ?\n "> b" preserves-mode-syntax-table) -(modify-syntax-entry ?\r "> b" preserves-mode-syntax-table) -(modify-syntax-entry ?/ "_ 12b" preserves-mode-syntax-table) +(modify-syntax-entry ?\n ">" preserves-mode-syntax-table) +(modify-syntax-entry ?\r ">" preserves-mode-syntax-table) +(modify-syntax-entry ?\; "<" preserves-mode-syntax-table) (modify-syntax-entry ?< "(>" preserves-mode-syntax-table) (modify-syntax-entry ?> ")<" preserves-mode-syntax-table) (mapcar #'(lambda (x) (modify-syntax-entry x "_" preserves-mode-syntax-table)) @@ -55,9 +55,9 @@ (make-local-variable 'comment-end) (make-local-variable 'comment-start-skip) (setq comment-use-syntax t) - (setq comment-start "//") + (setq comment-start ";") (setq comment-end "") - (setq comment-start-skip "// *") + (setq comment-start-skip "; *") (make-local-variable 'font-lock-defaults) (setq font-lock-defaults '(preserves-font-lock-keywords nil nil ())) (make-local-variable 'indent-line-function) diff --git a/preserves.md b/preserves.md index aa1b11b..feec74d 100644 --- a/preserves.md +++ b/preserves.md @@ -367,10 +367,10 @@ double quote mark. Finally, any `Value` may be represented by escaping from the textual syntax to the [compact binary syntax](#compact-binary-syntax) by prefixing a `ByteString` containing the binary representation of the -`Value` with `#`.[^rationale-switch-to-binary] +`Value` with `#=`.[^rationale-switch-to-binary] [^no-literal-binary-in-text] [^compact-value-annotations] - Compact = "#" ws ByteString + Compact = "#=" ws ByteString [^rationale-switch-to-binary]: **Rationale.** The textual syntax cannot express every `Value`: specifically, it cannot express the @@ -686,6 +686,12 @@ encodes to binary as follows: B7 B1 05 "Image" B7 + B1 03 "IDs" B5 + A0 74 + A1 03 AF + A1 00 EA + A2 00 97 89 + 84 B1 05 "Title" B1 14 "View from 15th Floor" B1 05 "Width" A1 03 20 B1 06 "Height" A1 02 58 @@ -693,12 +699,6 @@ encodes to binary as follows: B1 09 "Thumbnail" B7 B1 03 "Url" B1 26 "http://www.example.com/image/481989943" - B1 03 "IDs" B5 - A0 74 - A1 03 AF - A1 00 EA - A2 00 97 89 - 84 B1 05 "Width" A0 64 B1 06 "Height" A0 7D 84 diff --git a/tests/samples.bin b/tests/samples.bin index cb5fca3ec1444a7352d5094bfe32604a58ef0531..7fadda12b884f3d230a55ce809d01c1f3ca9ae23 100644 GIT binary patch literal 7902 zcmd^E%WoUU8RtV3%{cNSaf|{*l8N0IQ6WkuCE1qqa3onqR5)^B*>2Oig_pZSapC1I zvAYz-7=h(xdFU;29*z^+p?oaRro%^gY|r^Kdg!629D3*>i2Iv;47ntg*l-e{sIXe@ ze)E0beDl3$w$^HH-IJnj`$Z(;UsKR8y>Wb2O*wl38u;k#ya_v{bnyX=Um(pQIZlDtm%hUu@lc|AP-d zy7lqxPaUVZ>z9&g(WP@|diadkbi?kas_y}I>NF%$`p^@(D%LZAzaRKdc=(SeC+Fiy zX_|)lU2@(L=&J?8>{ZY3WsL!Ux4*1D{yQveZM8u%U0#b zQ_3eWp5w5i%Q~TwTyQ;Vf@stx>)ht#RbttuT6!t$3gZ*(5E=N(8)ij4r>8HmMY-N!_{A+?&%D+)T}@vd75c9(~2Lsdk*Y{ss^zrd=sDx9z z&obXd;j5ub^8q_FgvJYD!T3Y=Vn_&o7}JIg`w@FNgclMvjOy|ZgKn{(dDIsW`2IrO zM}L|ADnDjNhNFqK`G~jKb0GnOaX#!S^Ak2MOS++}vZQI%rC!Yr;C{+p3^5Cf?pna* zd54|qk&dUZagvu*%_0@mE)a=WRpjCgLV1*vmsS=giB8w4=?x-e?k0PpbBl-7K>C-k;L6;`o-;MxtA?? zEEhaybyM;quYVuQ@66?URl$CC{-I`iBrPBxo?kAOG%BjDS6Y4AbXzoTcDs8p7o*GfP z(|jVYnwFi`r6OgiB~5)>f;SYLPBUqhO%J-DA=g)^ra8`IGw?-xD7%bZBhpEdn;`N@ zqQExp;$WPyu5qr?YS=V;T$&6D=t)=!FpF6NzwsTQw_9o? zI$VL^4lho3o%XZOd{9>S@x6OKXj*{o-t|E<0Vr1gy8g8X%ydcb-0?wzG*%z0j{&Hm zl@h@dYddW&3I_^~qujzDINseKoDHTMZQuLsy)DPP`-8`V!DrhnQGl_HW&-hT(ng|U zn3~c?^xfEWzID)Nbv$6Tzuk;WIk_31oSbaN6`FSj06xiy8ha+tch@>ssU29?i`3|E zef+r^`^iv)_K^4{{T)bq8FiN5kpI=yXZ^4)_ z!;mpy1wQtR755g<9jmct7x?6)SjJWf8>DPnQP))4LSC^|QzR~LZMzR|9GXFn&dy99 z3Rno`?`n*qC#)88!EbG+ma0PJTw(;#~1Q*~u4l>Bf`vQmJ zDVdv8G8u9ifXYYoM6bRZEH=}Owi)9Scn2RBtC3@oqc)Bp*AU!bQ7(Fntv>upP zR>IBH?#*c?UB}Fb$nP&Yj^QXAE`hkydYY@p>D%Pi|?AkkZ5 zFQvlYODM`jwFsI`qe7Jl^ewVhc6$xkC>AB_g~mV_CKaQMO^#S{>fKXcMchB}fSElc zVV~5y-@MZ4y!E${BjN!_ZWizL$QyN-(FY5cjMX>m@E9K~f+W8E;5sgUAUi8;jDZ*~ zF(3?$fhaC9Agqmn2re-oVkEx(%>!H*@D4}Bq7Sy3Nm(lK#vD(IYRTBr@?L8gRFDgG zK%}IRb5C$uNRuThUF~7B0~gNy(TT+5!hK$D;qb;*ZH+3;(GoQ+L%+$34w|(sr;c_+ zG#-df1-;mI@>x_}7CUV^k3lK~5r*0wUbE0*plxBeMC%qxt$Pz(gz=u~a%t{D^Hefb zi*jB0{Ca?bSQvP$#gfu0&Mx#dli!kno9$1(x>GuZyk3bNrJjf0pCH(WREnGcWVK`Kf3k8p|k+1GL z_q;nFM*T9hu2juP)0N5v3olMKgO0^*TyVUHfS%4CKPJ>0-@g6;S9e6{a^!wwGs6G9 zv17Bd(?S}IJ$S(XyJN!IBln|`2>-{JZm4>xY@_DvT``cl@Jz55LNddi=X7v(*x;ys z_yYB(fo0r&CoSn~8cp-vNly#ocRjCk39*5AuD|gaPV2Wnux*Oh+v&{F<3eF0_hS$q z|2SjR49v_+rm*2Y6vo~^r~kc|N+@f89~OGWUM6@(le*hK#=w?Pq_AX-@h9WvkWMDv$uhb5Ma(i-is`0{D1UAMR6pKnM0bw6_Jj}A** z;Jp$%a7m>Vk~fVa$;{%2_l#!XOe*m!Rj~`(al%I5pz2z|-j4m6?V8tBoZ6zlO5`QY zLQ8d_fbR!7&WAC>MN?x(3btK3IW<+OR3?4#Ced+EfozRV&78>0otT@OnfBS&m4JKw z9ejuR-;#FF5U4K04e_678~XW;T4IU085B-7vZ&fsHnuvyLe89@Us+sUT|CDUtNd#t zi!IKxnuKl+GT~l`})E^j%?mtKF?BTQE)xj-+zDm zlJ@WCUtlSmuFI;0s>tGRt0gElnEdF8S%31>>cy3nw=T_}U3%1sKfjy*b1NR6_{N{k ReYx=Yz{Jk~>$>61{{_Eth|d53 literal 8642 zcmds7+fN+V8DDHxy{L|H<0e(BBqvTFre%TIU9Q04h9zN(ApW!%$#8-%+6Wn zvKI)&#>qq9T%0QEL*hhvX(LsMQme^BCHJvv|AfBvp$~n^Lm#7TzjK*kW&sfWZX(DPOzf6!xZJX9DXBD|br>=qgW|es3J6nseYYH|k zvip0dH!i@}24PR{zDXKYq6=Q$Z?i)qxnX5AUo$OY8{TeBvmER;e}Nr`!Wv>+#&$rO zV*m7|7n9pRES!=Xe-&s=5Ore?yB;Z(soTcr!@`MhrSFr5zbPrYE_K^!o(xwCnsVuZ zM7xqp8=U8)mO2LUiCeK8%kwS$AYzQx z_=EA^D)}zKKxlP8k>7ztM-fOJoN*ArdYQXaAF_#$YRtIk9P)hEs-8{Sythw1gZBNH z)U9K1v~yx_pZe6kB}l{i+TMa$($GoONA)ss3Fn<*?ws+!3oX;q1tE{Q^opdu0zPgG zP#3K*Qg8Uru&{h>f$v5zz{l5|LoHi8kH*zr9Xr&qbkMIk%>j~o zLx>oZ9NfYq>ZL#jQ zbdq{=2N7DGREr4!dBS~In+&PlNg|x~e!R!jClgYI+UZql3g^NJRgZxd z_qv$r{%haPssj%)r!-O+pz15rEOoXe5eOXHh8TiO0V$!3Ypgo6#AB1Fp6?~PD7~pK zdqK5oTWnhy`$!<|g=rK3ks}~A3#I^&yLyg$Y zzBcZnYtYM-Ld1iN-kMd7XEmfKEIX>XHqE>~#7Yj&uaFxn$hOu9TDcJQ5i4H*oVqZR zP*pIiLI1!+gl(29s4y_Xp2e_C6LKp{%Np_Pgg{k_B}{cz$qguZRsoPQJfMv9A%j#2 zG(?cpsDsM1&L}qldJjq&Bw{8eKxkyBnE|UpokTj6UW+wTUjI%rOv~QAuR8+6(*41k zys+a>KjpW|&4X(-9sXio>sZIiZ6%w-`ez%a<$8X`!4BZD_cu7i%e5k!-!He5An^qrv)$%;% zJi5(dKy$H9c4B?%wM~S_k=BRwab%!6%L~qGUJCV?dK!fK=cjk!6_e}Wr*d!TUUPFy z*6h$wBx|dfYC*T|XJUrFQMgg#xkfI-9UYsp1c3iaaIAD%$)?k}{7`h!5FNjwq}3;v z91%f~Pq>m9430xGOw~2wOJ#YxJAb~$QO^pQOi~7irQ$Qjb|kG%mwZ}$kJ^O_Zn zLW2R3_5#XeAObB&S8OyXW4I{0gykpbD_A|<6WidgF>0nu5LaX3ldB=gk$28d4l4Yk zZqNYwYq`TZKKZ5K6I}@TWXf!ZhJ~4o9(?XxT{y6EGdW4U@tjsBnr&m+8E{|W(HRTb zR!6bbAv{CM(Z)x77cxr3uJpso4p`G&Uw}_@-AvpF1%}h=6GhfMMfS*M3~SZ$J*aBF z<%Zdc``EQ-%A~QGhXR?+=eEKmFql?<9LCDw8GujF^Apkyaz}B49&m$zK`Db2pFhuT zmGE|ALgq8+pjzR2;X1dcG)v{S2Zr|g`};kEn2DRZ$Y65e+RZDt#B*;hAG?%`B)~NL z_OENc<=e!lRf+3SCkxRH`C2K&3h-OL0coiCttYa%g$wS(?~pvDO7_hgb;I&>n0i67 z*mYE&vr&EiTgRQhbhJgo|LpDmkt*9=uBne^hsH9bfmzFXXwP@2iEDwya*d2&Qf|by{ z##*jC7F_{(2WF!@|BzV|+L6;?DUyfMHcU!(ybE2JINyLOGCVYtlV#tSO0`<-?R&(B z%5?HV0Q0fVx=G#~$9FG318X)h!dr|;E;}+TGc^#oo?`FKdf9?{T7^y@Dx-2SY+~61 zPmJj>IKs)7DYTfr$>PNqjsg!TBZ=Dr_=8_#kj7tq$IdovB-`utJ2t z`5Gm@Mr$bwqjOm1F+Kt$Kg#A5zI6y7n58g9R|B6l-5}-8-XZ)-L$shU6R3|(SE_y! zPBEdO^Vle_S%yKJL^rqk9L6gc+S5H;pb6Z^AUZvtd$S6|EGbjlATA*I?A+ch%m8qv z=upHOK{z^OCmC?eN{j?Y+{JV~!t`~O8xtugi*ksoo{ghswpy)XD+RBbX;Td7@zI?e zo4lS%@ee-c4W+MzJ}2BI{^teb4}h!~#bisS7duk$l4GN^tS%Z{IPr*VJvhjEC2)vr zTiBKb0^pI#_Md2vr(%1IFYK=7GLG?PHf|Wit3un=m zga!=QEm~7~G)x_c7HM)1U2XJ%C-~@U5GS?#%$cjfu%64Gk*bHS{oa zBTeFl>m~(=6^FgCQGQ2HzMpK2+iv-kA^k%uoG6@-YCvK2Mxwz zBKkRB+aBXiZ?_JkOJ_G6NSmFVyKG=B!KJW;RKw5<#VPidV^BCO7hBu#l2A(bGy`!Q z!9T^^DD#oQP663F2DRKoyZDTpW}J6#rRN*B+V6!t5~`#;*`X-mi5$w4V>r7-M;s7s z55}|yT!Z73Ys+y?@9sQScIUP1?!2DsydA#_Si!@GbbwLClbwTv)QU!|=t3@#-ly-3 z+=5rUMnN@u_!_0%=nA`#&<;~Dys@`rC=&<${!U%TRrbfUNTmHwn66n2Q0{lQFl%vl zTjCW@atN*b)15li`2Pgl4B+@5uib~yLoAhCq@A?~AE~$K z7QF?p-MM(}UTdXWENEB(=S~a1`6}?YXJ=`KR`*Uk#6L2?`6c{ucdrdgoc2K{P-Ca} cN5Oc0Gi(i`RKMLSZnr|+z+aA}TaVD~Kb-}xMF0Q* diff --git a/tests/samples.txt b/tests/samples.txt index 082e79a..5a434e1 100644 --- a/tests/samples.txt +++ b/tests/samples.txt @@ -2,23 +2,22 @@ @ "In each test, let value = strip(annotatedValue),", " forward = value,", " back = value," - "except where test-case-specific values of `forward` and/or `back` are provided" - "by the executing harness (of particular importance for `StreamingTest`s)," - "and check the following numbered expectations according to the table above:" + "except where test-case-specific values of `forward` and/or `back`", + "are provided by the executing harness, and check the following" + "numbered expectations according to the table above:" - "Each `StreamingTest` will need to have an implementation-specific `forward`" - "supplied that encodes to the specific format C byte sequences in `binary`." - "Alternatively, implementations may choose to skip expectation 11 for" - "`StreamingTest`s, treating them like `DecodeTest`s." - "" "Implementations may vary in their treatment of the difference between expectations" "13/14 and 16/17, depending on how they wish to treat end-of-stream conditions." ]> - annotation2: - annotation3: - annotation4: + annotation2: + annotation3: + annotation4: - annotation5: > - annotation6: > + annotation5: > + annotation6: > annotation7: - @"Stop reading symbols at @ -- this test has three separate annotations" - - bytes1: - bytes2: - bytes2a: - bytes3: - bytes4: - bytes5: - bytes6: @"Bytes must be 2-digits entire" - bytes7: - bytes8: - bytes9: - bytes10: - bytes11: + ;Stop reading symbols at @ -- this test has three separate annotations + + bytes2: + bytes2a: + bytes3: + bytes4: + bytes5: + bytes6: @"Bytes must be 2-digits entire" + bytes7: + bytes8: + bytes9: + bytes10: + bytes11: bytes12: @"Bytes syntax only supports \\x, not \\u" - bytes13: + bytes13: - dict0: - dict1: + dict0: + dict1: dict2: @"Missing close brace" dict2a: @"Missing close brace" dict3: @"Duplicate key" dict4: @"Unexpected close brace" - dict5: @"Missing value" - double1: - double2: - float1: - int-257: - int-256: - int-255: - int-254: - int-129: - int-128: - int-127: - int-4: - int-3: - int-2: - int-1: - int0: - int1: - int12: - int13: - int127: - int128: - int255: - int256: - int32767: - int32768: - int65535: - int65536: - int131072: - list0: - list1: - list2: - list3: - list4: - list4a: - list5: - list6: - list7: + dict5: @"Missing value" + double1: + double2: + float1: + int-257: + int-256: + int-255: + int-254: + int-129: + int-128: + int-127: + int-4: + int-3: + int-2: + int-1: + int0: + int1: + int12: + int13: + int127: + int128: + int255: + int256: + int32767: + int32768: + int65535: + int65536: + int131072: + list0: + list4: + list4a: + list5: + list6: + list7: list8: @"Missing close bracket" list9: @"Unexpected close bracket" - noop0: - noop1: - noop2: - noop3: - noop4: @"No-ops must be followed by something" - noop5: @"No input at all" - placeholder0: @"Placeholders are no longer supported" - placeholder1: @"Placeholders are no longer supported" - record1: >> - record2: , >>>> - record3: "Dr">> - record4: > - record5: > - record6: > - record7: > - record8: 3 4>> + list10: @"Missing end byte" + noinput0: @"No input at all" + record1: >> + record2: , >>>> + record3: "Dr">> + record4: > + record5: > + record6: > + record7: > + record8: 3 4>> record9: @"Missing record label" "> record10: @"Missing close-angle-bracket" record11: @"Unexpected close-angle-bracket" "> - set0: - set1: - set1a: - set2: @"Missing close brace" - set2a: @"Missing close brace" - set3: @"Duplicate value" - stream1: @"Chunk must be bytes" - stream2: @"Chunk must be bytes" - stream3: @"Chunk must be bytes" - stream4: @"Chunk must be bytes" - stream5: @"Chunk must be bytes" - stream6: @"Chunk must be bytes" - stream7: @"Missing end byte" - stream8: @"Missing element" - stream9: @"Unexpected end stream byte" - stream10: @"Empty chunks forbidden" - stream11: @"Empty chunks forbidden" - stream12: @"Empty chunks forbidden" - string0: - string0a: - string1: - string2: - string3: - string4: - string5: - symbol0: - symbol1: - symbol2: + set0: + set1: + set2: @"Missing close brace" + set2a: @"Missing close brace" + set3: @"Duplicate value" + string0: + string3: + string4: + string5: + symbol0: + symbol2: + tag0: @"Unexpected end tag" + tag1: @"Invalid tag" + tag2: @"Invalid tag" whitespace0: @"Leading spaces have to eventually yield something" whitespace1: @"No input at all" - value1: - value2: - value3: - value4: - value5: - value6: + value1: + value2: + value3: + value4: + value5: + value6: - longlist14: - longlist15: + longlist14: + longlist15: longlist100: - + longlist200: - + rfc8259-example1: rfc8259-example2: