Bug fixes to text reader, and more tests

This commit is contained in:
Tony Garnock-Jones 2018-09-27 21:35:03 +01:00
parent b807b38a44
commit 47519680d5
1 changed files with 139 additions and 100 deletions

View File

@ -17,6 +17,7 @@
(require racket/set)
(require bitsyntax)
(require syndicate/support/struct)
(require (only-in syntax/readerr raise-read-error))
(require imperative-syndicate/assertions)
(require imperative-syndicate/pattern)
@ -246,8 +247,22 @@
;;---------------------------------------------------------------------------
(define (skip-whitespace* i)
(regexp-match? #px#"^(\\s|,)*" i)
(match (peek-char i)
[#\; (regexp-match? #px#"[^\r\n]*[\r\n]" i) (skip-whitespace* i)]
[_ #t]))
(define (parse-error* i fmt . args)
(define-values [line column pos] (port-next-location i))
(raise-read-error (format "read-preserve: ~a" (apply format fmt args))
(object-name i)
line
column
pos
#f))
(define (read-preserve [i (current-input-port)])
(local-require (only-in syntax/readerr raise-read-error))
(local-require net/base64)
(local-require file/sha1)
@ -255,20 +270,8 @@
(syntax-rules ()
[(_ re pat) (app (lambda (v) (regexp-try-match re v)) pat)]))
(define (skip-whitespace)
(regexp-match? #px#"^(\\s|,)*" i)
(match (peek-char i)
[#\; (regexp-match? #px#"[^\r\n]*[\r\n]" i) (skip-whitespace)]
[_ #t]))
(define (parse-error fmt . args)
(define-values [line column pos] (port-next-location i))
(raise-read-error (format "read-preserve: ~a" (apply format fmt args))
(object-name i)
line
column
pos
#f))
(apply parse-error* i fmt args))
(define (eof-guard ch)
(match ch
@ -278,6 +281,8 @@
(define (peek/no-eof) (eof-guard (peek-char i)))
(define (read/no-eof) (eof-guard (read-char i)))
(define (skip-whitespace) (skip-whitespace* i))
(define (read-sequence terminator)
(sequence-fold '() accumulate-value reverse terminator))
@ -301,12 +306,14 @@
(define PIPE #\|)
(define (read-raw-symbol)
(let loop ((acc '()))
(match (peek-char i)
[(or #\{ #\} #\[ #\] #\" #\; #\, #\# #\: (? char-whitespace?) (? eof-object?) (== PIPE))
(string->symbol (list->string (reverse acc)))]
[_ (loop (cons (read-char i) acc))])))
(define (read-raw-symbol acc)
(match (peek-char i)
[(or (? eof-object?)
(? char? (or #\( #\) #\{ #\} #\[ #\]
#\" #\; #\, #\# #\: (== PIPE)
(? char-whitespace?))))
(string->symbol (list->string (reverse acc)))]
[_ (read-raw-symbol (cons (read-char i) acc))]))
(define (read-encoded-binary kind valid-char? finish)
(let loop ((acc '()))
@ -397,12 +404,12 @@
(define (read-digit* acc-rev k)
(match (peek-char i)
[(? char-numeric?) (read-digit* (cons (read-char i) acc-rev) k)]
[(? char? (? char-numeric?)) (read-digit* (cons (read-char i) acc-rev) k)]
[_ (k acc-rev)]))
(define (read-digit+ acc-rev k)
(match (peek-char i)
[(? char-numeric?) (read-digit* (cons (read-char i) acc-rev) k)]
[(? char? (? char-numeric?)) (read-digit* (cons (read-char i) acc-rev) k)]
[_ (parse-error "Incomplete number")]))
(define (read-fracexp acc-rev)
@ -424,7 +431,11 @@
(define s (list->string (reverse acc-rev)))
(define n (string->number s))
(when (not n) (parse-error "Invalid number: ~v" s))
n)
(if (flonum? n)
(match (peek-char i)
[(or #\f #\F) (read-char i) (real->single-flonum n)]
[_ n])
n))
(define (read-number)
(match (peek/no-eof)
@ -441,8 +452,11 @@
(define (collect-fields head)
(skip-whitespace)
(match (peek-char i)
[#\( (collect-fields (build-record head (read-sequence #\))))]
[_ head]))
[#\(
(read-char i)
(collect-fields (build-record head (read-sequence #\))))]
[_
head]))
(define (read-value)
(skip-whitespace)
@ -472,12 +486,17 @@
[_
(parse-error "Invalid preserve value")])]
[#\: (parse-error "Unexpected key/value separator between items")]
[_ (read-raw-symbol)])))
[_ (read-raw-symbol '())])))
(read-value))
(define (string->preserve s)
(read-preserve (open-input-string s)))
(define p (open-input-string s))
(define v (read-preserve p))
(skip-whitespace* p)
(when (not (eof-object? (peek-char p)))
(parse-error* p "Unexpected text following preserve"))
v)
;;---------------------------------------------------------------------------
@ -545,81 +564,90 @@
(define (d bs) (decode bs void))
(define-syntax (check-both-directions stx)
(define-syntax (cross-check stx)
(syntax-case stx ()
((_ v (b ...))
#'(let ((val v)) (check-both-directions v v (b ...))))
((_ forward back (b ...))
((_ text v (b ...))
#'(let ((val v)) (cross-check text v v (b ...))))
((_ text forward back (b ...))
#`(let ((loc #,(source-location->string #'forward)))
(check-equal? (string->preserve text) back)
(check-equal? (d (encode forward)) back loc)
(check-equal? (d (encode back)) back loc)
(check-equal? (d (expected b ...)) back loc)
(check-equal? (encode forward) (expected b ...) loc)
))))
(define-syntax (check-both-directions/nondeterministic stx)
(define-syntax (cross-check/nondeterministic stx)
(syntax-case stx ()
((_ v (b ...))
#'(let ((val v)) (check-both-directions/nondeterministic v v (b ...))))
((_ forward back (b ...))
((_ text v (b ...))
#'(let ((val v)) (cross-check/nondeterministic text v v (b ...))))
((_ text forward back (b ...))
#`(let ((loc #,(source-location->string #'forward)))
(check-equal? (string->preserve text) back)
(check-equal? (d (encode forward)) back loc)
(check-equal? (d (encode back)) back loc)
(check-equal? (d (expected b ...)) back loc)
))))
(check-both-directions (capture (discard)) (#x91 #x80))
(check-both-directions (observe (speak (discard) (capture (discard))))
(#xA1 #xB3 #x75 "speak" #x80 #x91 #x80))
(check-both-directions '(1 2 3 4) (#xC4 #x11 #x12 #x13 #x14))
(check-both-directions (stream-of 'sequence (sequence->generator '(1 2 3 4)))
'(1 2 3 4)
(#x2C #x11 #x12 #x13 #x14 #x3C))
(check-both-directions '(-2 -1 0 1) (#xC4 #x1E #x1F #x10 #x11))
(check-both-directions "hello" (#x55 "hello"))
(check-both-directions (stream-of 'string (sequence->generator '(#"he" #"llo")))
"hello"
(#x25 #x62 "he" #x63 "llo" #x35))
(check-both-directions (stream-of 'string (sequence->generator '(#"he" #"ll" #"" #"" #"o")))
"hello"
(#x25 #x62 "he" #x62 "ll" #x60 #x60 #x61 "o" #x35))
(check-both-directions (stream-of 'byte-string (sequence->generator '(#"he" #"ll" #"" #"" #"o")))
#"hello"
(#x26 #x62 "he" #x62 "ll" #x60 #x60 #x61 "o" #x36))
(check-both-directions (stream-of 'symbol (sequence->generator '(#"he" #"ll" #"" #"" #"o")))
'hello
(#x27 #x62 "he" #x62 "ll" #x60 #x60 #x61 "o" #x37))
(check-both-directions `("hello" there #"world" () ,(set) #t #f)
(#xC7 #x55 "hello" #x75 "there" #x65 "world" #xC0 #xD0 #x01 #x00))
(cross-check "capture(discard())" (capture (discard)) (#x91 #x80))
(cross-check "observe(speak(discard(), capture(discard())))"
(observe (speak (discard) (capture (discard))))
(#xA1 #xB3 #x75 "speak" #x80 #x91 #x80))
(cross-check "[1, 2, 3, 4]" '(1 2 3 4) (#xC4 #x11 #x12 #x13 #x14))
(cross-check "[1 2 3 4]"
(stream-of 'sequence (sequence->generator '(1 2 3 4)))
'(1 2 3 4)
(#x2C #x11 #x12 #x13 #x14 #x3C))
(cross-check " [ -2 -1 0 1 ] " '(-2 -1 0 1) (#xC4 #x1E #x1F #x10 #x11))
(cross-check "\"hello\"" "hello" (#x55 "hello"))
(cross-check "\"hello\""
(stream-of 'string (sequence->generator '(#"he" #"llo")))
"hello"
(#x25 #x62 "he" #x63 "llo" #x35))
(cross-check "\"hello\""
(stream-of 'string (sequence->generator '(#"he" #"ll" #"" #"" #"o")))
"hello"
(#x25 #x62 "he" #x62 "ll" #x60 #x60 #x61 "o" #x35))
(cross-check "#\"hello\""
(stream-of 'byte-string (sequence->generator '(#"he" #"ll" #"" #"" #"o")))
#"hello"
(#x26 #x62 "he" #x62 "ll" #x60 #x60 #x61 "o" #x36))
(cross-check "hello"
(stream-of 'symbol (sequence->generator '(#"he" #"ll" #"" #"" #"o")))
'hello
(#x27 #x62 "he" #x62 "ll" #x60 #x60 #x61 "o" #x37))
(cross-check "[\"hello\" there #\"world\" [] #set{} #true #false]"
`("hello" there #"world" () ,(set) #t #f)
(#xC7 #x55 "hello" #x75 "there" #x65 "world" #xC0 #xD0 #x01 #x00))
(check-both-directions -257 (#x42 #xFE #xFF))
(check-both-directions -256 (#x42 #xFF #x00))
(check-both-directions -255 (#x42 #xFF #x01))
(check-both-directions -254 (#x42 #xFF #x02))
(check-both-directions -129 (#x42 #xFF #x7F))
(check-both-directions -128 (#x41 #x80))
(check-both-directions -127 (#x41 #x81))
(check-both-directions -4 (#x41 #xFC))
(check-both-directions -3 (#x1D))
(check-both-directions -2 (#x1E))
(check-both-directions -1 (#x1F))
(check-both-directions 0 (#x10))
(check-both-directions 1 (#x11))
(check-both-directions 12 (#x1C))
(check-both-directions 13 (#x41 #x0D))
(check-both-directions 127 (#x41 #x7F))
(check-both-directions 128 (#x42 #x00 #x80))
(check-both-directions 255 (#x42 #x00 #xFF))
(check-both-directions 256 (#x42 #x01 #x00))
(check-both-directions 32767 (#x42 #x7F #xFF))
(check-both-directions 32768 (#x43 #x00 #x80 #x00))
(check-both-directions 65535 (#x43 #x00 #xFF #xFF))
(check-both-directions 65536 (#x43 #x01 #x00 #x00))
(check-both-directions 131072 (#x43 #x02 #x00 #x00))
(cross-check "-257" -257 (#x42 #xFE #xFF))
(cross-check "-256" -256 (#x42 #xFF #x00))
(cross-check "-255" -255 (#x42 #xFF #x01))
(cross-check "-254" -254 (#x42 #xFF #x02))
(cross-check "-129" -129 (#x42 #xFF #x7F))
(cross-check "-128" -128 (#x41 #x80))
(cross-check "-127" -127 (#x41 #x81))
(cross-check "-4" -4 (#x41 #xFC))
(cross-check "-3" -3 (#x1D))
(cross-check "-2" -2 (#x1E))
(cross-check "-1" -1 (#x1F))
(cross-check "0" 0 (#x10))
(cross-check "1" 1 (#x11))
(cross-check "12" 12 (#x1C))
(cross-check "13" 13 (#x41 #x0D))
(cross-check "127" 127 (#x41 #x7F))
(cross-check "128" 128 (#x42 #x00 #x80))
(cross-check "255" 255 (#x42 #x00 #xFF))
(cross-check "256" 256 (#x42 #x01 #x00))
(cross-check "32767" 32767 (#x42 #x7F #xFF))
(cross-check "32768" 32768 (#x43 #x00 #x80 #x00))
(cross-check "65535" 65535 (#x43 #x00 #xFF #xFF))
(cross-check "65536" 65536 (#x43 #x01 #x00 #x00))
(cross-check "131072" 131072 (#x43 #x02 #x00 #x00))
(check-both-directions 1.0f0 (#b00000010 #b00111111 #b10000000 0 0))
(check-both-directions 1.0 (#b00000011 #b00111111 #b11110000 0 0 0 0 0 0))
(check-both-directions -1.202e300 (#x03 #xFE #x3C #xB7 #xB7 #x59 #xBF #x04 #x26))
(cross-check "1.0f" 1.0f0 (#b00000010 #b00111111 #b10000000 0 0))
(cross-check "1.0" 1.0 (#b00000011 #b00111111 #b11110000 0 0 0 0 0 0))
(cross-check "-1.202e300" -1.202e300 (#x03 #xFE #x3C #xB7 #xB7 #x59 #xBF #x04 #x26))
(check-equal? (d (expected #x25 #x51 "a" #x35)) (void)) ;; Bad chunk type: must be bytes
(check-equal? (d (expected #x25 #x71 "a" #x35)) (void)) ;; Bad chunk type: must be bytes
@ -631,13 +659,13 @@
(check-equal? (d (expected #x26 #x61 "a" #x36)) #"a")
(check-equal? (d (expected #x27 #x61 "a" #x37)) 'a)
(struct date (year month day) #:prefab)
(struct thing (id) #:prefab)
(struct person thing (name date-of-birth) #:prefab)
(struct titled person (title) #:prefab)
(check-both-directions
(cross-check
"[titled person 2 thing 1](101, \"Blackwell\", date(1821 2 3), \"Dr\")"
(titled 101 "Blackwell" (date 1821 2 3) "Dr")
(#xB5 ;; Record, generic, 4+1
#xC5 ;; Sequence, 5
@ -656,21 +684,30 @@
#x52 #x44 #x72 ;; String, "Dr"
))
(check-both-directions (record 'discard '()) (discard) (#x80))
(check-both-directions (record 'discard '(surprise)) '#s(discard surprise) (#x81 #x78 "surprise"))
(check-both-directions (record 'capture '(x)) (capture 'x) (#x91 #x71 "x"))
(check-both-directions (record 'observe '(x)) (observe 'x) (#xA1 #x71 "x"))
(check-both-directions (record 'observe '(x y)) '#s(observe x y) (#xA2 #x71 "x" #x71 "y"))
(check-both-directions (record 'other '(x y))
'#s(other x y)
(#xB3 #x75 "other" #x71 "x" #x71 "y"))
(check-both-directions (record "aString" '(3 4)) (#xB3 #x57 "aString" #x13 #x14))
(check-both-directions (record (discard) '(3 4)) (#xB3 #x80 #x13 #x14))
(cross-check "discard()" (record 'discard '()) (discard) (#x80))
(cross-check "discard(surprise)"
(record 'discard '(surprise))
'#s(discard surprise)
(#x81 #x78 "surprise"))
(cross-check "capture(x)" (record 'capture '(x)) (capture 'x) (#x91 #x71 "x"))
(cross-check "observe(x)" (record 'observe '(x)) (observe 'x) (#xA1 #x71 "x"))
(cross-check "observe(x y)" (record 'observe '(x y)) '#s(observe x y) (#xA2 #x71 "x" #x71 "y"))
(cross-check "other(x y)"
(record 'other '(x y))
'#s(other x y)
(#xB3 #x75 "other" #x71 "x" #x71 "y"))
(cross-check "\"aString\"(3 4)"
(record "aString" '(3 4))
(#xB3 #x57 "aString" #x13 #x14))
(cross-check "discard()(3, 4)"
(record (discard) '(3 4))
(#xB3 #x80 #x13 #x14))
(check-equal? (d (expected #x2C #x00 #x00)) (void)) ;; missing end byte
(check-equal? (d (expected #xC3 #x00 #x00)) (void)) ;; missing element
(check-both-directions/nondeterministic
(cross-check/nondeterministic
"{a: 1, \"b\": #true, [1 2 3]: #\"c\", {first-name:\"Elizabeth\"}:{surname:\"Blackwell\"}}"
(hash 'a 1
"b" #t
'(1 2 3) #"c"
@ -727,7 +764,8 @@ EOF
EOF
))
(check-both-directions/nondeterministic
(cross-check/nondeterministic
"{\"Image\": {\"Width\": 800,\"Height\": 600,\"Title\": \"View from 15th Floor\",\"Thumbnail\": {\"Url\": \"http://www.example.com/image/481989943\",\"Height\": 125,\"Width\": 100},\"Animated\" : false,\"IDs\": [116, 943, 234, 38793]}}"
rfc8259-example1
(#xe2
#x55 "Image"
@ -748,7 +786,8 @@ EOF
#x43 #x00 #x97 #x89
))
(check-both-directions/nondeterministic
(cross-check/nondeterministic
"[{\"precision\": \"zip\",\"Latitude\": 37.7668,\"Longitude\": -122.3959,\"Address\": \"\",\"City\": \"SAN FRANCISCO\",\"State\": \"CA\",\"Zip\": \"94107\",\"Country\": \"US\"},{\"precision\": \"zip\",\"Latitude\": 37.371991,\"Longitude\": -122.026020,\"Address\": \"\",\"City\": \"SUNNYVALE\",\"State\": \"CA\",\"Zip\": \"94085\",\"Country\": \"US\"}]"
rfc8259-example2
(#xc2
#xef #x10