More tests and fixes

This commit is contained in:
Tony Garnock-Jones 2018-09-27 22:13:46 +01:00
parent 47519680d5
commit 5f9f2175f0
1 changed files with 54 additions and 33 deletions

View File

@ -315,34 +315,36 @@
(string->symbol (list->string (reverse acc)))]
[_ (read-raw-symbol (cons (read-char i) acc))]))
(define (read-encoded-binary kind valid-char? finish)
(let loop ((acc '()))
(skip-whitespace)
(let ((ch (read/no-eof)))
(cond [(valid-char? ch)
(loop (cons ch acc))]
[(eqv? ch #\})
(finish (list->string (reverse acc)))]
[else (parse-error "Invalid ~a character" kind)]))))
(define (read-base64-binary acc)
(skip-whitespace)
(define ch (read/no-eof))
(cond [(eqv? ch #\})
(base64-decode (string->bytes/latin-1 (list->string (reverse acc))))]
[(or (and (char>=? ch #\A) (char<=? ch #\Z))
(and (char>=? ch #\a) (char<=? ch #\z))
(and (char>=? ch #\0) (char<=? ch #\9))
(memv ch '(#\+ #\/ #\- #\_ #\=)))
(read-base64-binary (cons ch acc))]
[else
(parse-error "Invalid base64 character")]))
(define (read-base64-binary)
(read-encoded-binary "base64"
(lambda (ch)
(or (and (char>=? ch #\A) (char<=? ch #\Z))
(and (char>=? ch #\a) (char<=? ch #\z))
(and (char>=? ch #\0) (char<=? ch #\9))
(memv ch '(#\+ #\/ #\- #\_ #\=))))
(lambda (s)
(base64-decode (string->bytes/latin-1 s)))))
(define (hexdigit? ch)
(or (and (char>=? ch #\A) (char<=? ch #\F))
(and (char>=? ch #\a) (char<=? ch #\f))
(and (char>=? ch #\0) (char<=? ch #\9))))
(define (read-hex-binary)
(read-encoded-binary "hex"
(lambda (ch)
(or (and (char>=? ch #\A) (char<=? ch #\F))
(and (char>=? ch #\a) (char<=? ch #\f))
(and (char>=? ch #\0) (char<=? ch #\9))))
(lambda (s)
(hex-string->bytes s))))
(define (read-hex-binary acc)
(skip-whitespace)
(define ch (read/no-eof))
(cond [(eqv? ch #\})
(hex-string->bytes (list->string (reverse acc)))]
[(hexdigit? ch)
(define ch2 (read/no-eof))
(when (not (hexdigit? ch2))
(parse-error "Hex-encoded binary digits must come in pairs"))
(read-hex-binary (cons ch2 (cons ch acc)))]
[else
(parse-error "Invalid hex character")]))
(define (read-stringlike xform-item finish terminator-char hexescape-char hexescape-proc)
(let loop ((acc '()))
@ -368,11 +370,11 @@
(integer->char
(match i
[(px #px#"^[a-fA-F0-9]{4}" (list hexdigits))
(define n1 (string->number (bytes->string/utf-8 hexdigits)))
(define n1 (string->number (bytes->string/utf-8 hexdigits) 16))
(if (<= #xd800 n1 #xdfff) ;; surrogate pair first half
(match i
[(px #px#"^\\\\u([a-fA-F0-9]{4})" (list hexdigits2))
(define n2 (string->number (bytes->string/utf-8 hexdigits2)))
(define n2 (string->number (bytes->string/utf-8 hexdigits2) 16))
(if (<= #xdc00 n2 #xdfff)
(+ (arithmetic-shift (- n1 #xd800) 10)
(- n2 #xdc00)
@ -472,7 +474,7 @@
[(px #px#"^#set\\{" (list _))
(sequence-fold (set) accumulate-value values #\})]
[(px #px#"^#hexvalue\\{" (list _))
(decode (read-hex-binary) (lambda () (parse-error "Invalid #hexvalue encoding")))]
(decode (read-hex-binary '()) (lambda () (parse-error "Invalid #hexvalue encoding")))]
[(px #px#"^#true" (list _))
#t]
[(px #px#"^#false" (list _))
@ -480,9 +482,9 @@
[(px #px#"^#\"" (list _))
(read-literal-binary)]
[(px #px#"^#hex\\{" (list _))
(read-hex-binary)]
(read-hex-binary '())]
[(px #px#"^#base64\\{" (list _))
(read-base64-binary)]
(read-base64-binary '())]
[_
(parse-error "Invalid preserve value")])]
[#\: (parse-error "Unexpected key/value separator between items")]
@ -570,7 +572,7 @@
#'(let ((val v)) (cross-check text v v (b ...))))
((_ text forward back (b ...))
#`(let ((loc #,(source-location->string #'forward)))
(check-equal? (string->preserve text) back)
(check-equal? (string->preserve text) back loc)
(check-equal? (d (encode forward)) back loc)
(check-equal? (d (encode back)) back loc)
(check-equal? (d (expected b ...)) back loc)
@ -583,7 +585,7 @@
#'(let ((val v)) (cross-check/nondeterministic text v v (b ...))))
((_ text forward back (b ...))
#`(let ((loc #,(source-location->string #'forward)))
(check-equal? (string->preserve text) back)
(check-equal? (string->preserve text) back loc)
(check-equal? (d (encode forward)) back loc)
(check-equal? (d (encode back)) back loc)
(check-equal? (d (expected b ...)) back loc)
@ -620,6 +622,25 @@
`("hello" there #"world" () ,(set) #t #f)
(#xC7 #x55 "hello" #x75 "there" #x65 "world" #xC0 #xD0 #x01 #x00))
(cross-check "#\"ABC\"" #"ABC" (#x63 #x41 #x42 #x43))
(cross-check "#hex{414243}" #"ABC" (#x63 #x41 #x42 #x43))
(cross-check "#hex{ 41 4A 4e }" #"AJN" (#x63 #x41 #x4A #x4E))
(cross-check "#hex{ 41;re\n 42 43 }" #"ABC" (#x63 #x41 #x42 #x43))
(check-exn exn? (lambda () (string->preserve "#hex{414 243}"))) ;; bytes must be 2-digits entire
(cross-check "#base64{Y29yeW1i}" #"corymb" (#x66 "corymb"))
(cross-check "#base64{Y29 yeW 1i}" #"corymb" (#x66 "corymb"))
(cross-check ";; a comment\n#base64{\n;x\nY29 yeW 1i}" #"corymb" (#x66 "corymb"))
(cross-check "#base64{SGk=}" #"Hi" (#x62 "Hi"))
(cross-check "#base64{SGk}" #"Hi" (#x62 "Hi"))
(cross-check "#base64{ S G k }" #"Hi" (#x62 "Hi"))
(cross-check "\"abc\\u6c34\\u6C34\\\\\\/\\\"\\b\\f\\n\\r\\txyz\""
"abc\u6c34\u6c34\\/\"\b\f\n\r\txyz"
(#x5f #x14
#x61 #x62 #x63 #xe6 #xb0 #xb4 #xe6 #xb0
#xb4 #x5c #x2f #x22 #x08 #x0c #x0a #x0d
#x09 #x78 #x79 #x7a))
(cross-check "-257" -257 (#x42 #xFE #xFF))
(cross-check "-256" -256 (#x42 #xFF #x00))
(cross-check "-255" -255 (#x42 #xFF #x01))