diff --git a/syndicate/mc/preserve.rkt b/syndicate/mc/preserve.rkt index 1ccd553..d0f08e8 100644 --- a/syndicate/mc/preserve.rkt +++ b/syndicate/mc/preserve.rkt @@ -315,34 +315,36 @@ (string->symbol (list->string (reverse acc)))] [_ (read-raw-symbol (cons (read-char i) acc))])) - (define (read-encoded-binary kind valid-char? finish) - (let loop ((acc '())) - (skip-whitespace) - (let ((ch (read/no-eof))) - (cond [(valid-char? ch) - (loop (cons ch acc))] - [(eqv? ch #\}) - (finish (list->string (reverse acc)))] - [else (parse-error "Invalid ~a character" kind)])))) + (define (read-base64-binary acc) + (skip-whitespace) + (define ch (read/no-eof)) + (cond [(eqv? ch #\}) + (base64-decode (string->bytes/latin-1 (list->string (reverse acc))))] + [(or (and (char>=? ch #\A) (char<=? ch #\Z)) + (and (char>=? ch #\a) (char<=? ch #\z)) + (and (char>=? ch #\0) (char<=? ch #\9)) + (memv ch '(#\+ #\/ #\- #\_ #\=))) + (read-base64-binary (cons ch acc))] + [else + (parse-error "Invalid base64 character")])) - (define (read-base64-binary) - (read-encoded-binary "base64" - (lambda (ch) - (or (and (char>=? ch #\A) (char<=? ch #\Z)) - (and (char>=? ch #\a) (char<=? ch #\z)) - (and (char>=? ch #\0) (char<=? ch #\9)) - (memv ch '(#\+ #\/ #\- #\_ #\=)))) - (lambda (s) - (base64-decode (string->bytes/latin-1 s))))) + (define (hexdigit? ch) + (or (and (char>=? ch #\A) (char<=? ch #\F)) + (and (char>=? ch #\a) (char<=? ch #\f)) + (and (char>=? ch #\0) (char<=? ch #\9)))) - (define (read-hex-binary) - (read-encoded-binary "hex" - (lambda (ch) - (or (and (char>=? ch #\A) (char<=? ch #\F)) - (and (char>=? ch #\a) (char<=? ch #\f)) - (and (char>=? ch #\0) (char<=? ch #\9)))) - (lambda (s) - (hex-string->bytes s)))) + (define (read-hex-binary acc) + (skip-whitespace) + (define ch (read/no-eof)) + (cond [(eqv? ch #\}) + (hex-string->bytes (list->string (reverse acc)))] + [(hexdigit? ch) + (define ch2 (read/no-eof)) + (when (not (hexdigit? ch2)) + (parse-error "Hex-encoded binary digits must come in pairs")) + (read-hex-binary (cons ch2 (cons ch acc)))] + [else + (parse-error "Invalid hex character")])) (define (read-stringlike xform-item finish terminator-char hexescape-char hexescape-proc) (let loop ((acc '())) @@ -368,11 +370,11 @@ (integer->char (match i [(px #px#"^[a-fA-F0-9]{4}" (list hexdigits)) - (define n1 (string->number (bytes->string/utf-8 hexdigits))) + (define n1 (string->number (bytes->string/utf-8 hexdigits) 16)) (if (<= #xd800 n1 #xdfff) ;; surrogate pair first half (match i [(px #px#"^\\\\u([a-fA-F0-9]{4})" (list hexdigits2)) - (define n2 (string->number (bytes->string/utf-8 hexdigits2))) + (define n2 (string->number (bytes->string/utf-8 hexdigits2) 16)) (if (<= #xdc00 n2 #xdfff) (+ (arithmetic-shift (- n1 #xd800) 10) (- n2 #xdc00) @@ -472,7 +474,7 @@ [(px #px#"^#set\\{" (list _)) (sequence-fold (set) accumulate-value values #\})] [(px #px#"^#hexvalue\\{" (list _)) - (decode (read-hex-binary) (lambda () (parse-error "Invalid #hexvalue encoding")))] + (decode (read-hex-binary '()) (lambda () (parse-error "Invalid #hexvalue encoding")))] [(px #px#"^#true" (list _)) #t] [(px #px#"^#false" (list _)) @@ -480,9 +482,9 @@ [(px #px#"^#\"" (list _)) (read-literal-binary)] [(px #px#"^#hex\\{" (list _)) - (read-hex-binary)] + (read-hex-binary '())] [(px #px#"^#base64\\{" (list _)) - (read-base64-binary)] + (read-base64-binary '())] [_ (parse-error "Invalid preserve value")])] [#\: (parse-error "Unexpected key/value separator between items")] @@ -570,7 +572,7 @@ #'(let ((val v)) (cross-check text v v (b ...)))) ((_ text forward back (b ...)) #`(let ((loc #,(source-location->string #'forward))) - (check-equal? (string->preserve text) back) + (check-equal? (string->preserve text) back loc) (check-equal? (d (encode forward)) back loc) (check-equal? (d (encode back)) back loc) (check-equal? (d (expected b ...)) back loc) @@ -583,7 +585,7 @@ #'(let ((val v)) (cross-check/nondeterministic text v v (b ...)))) ((_ text forward back (b ...)) #`(let ((loc #,(source-location->string #'forward))) - (check-equal? (string->preserve text) back) + (check-equal? (string->preserve text) back loc) (check-equal? (d (encode forward)) back loc) (check-equal? (d (encode back)) back loc) (check-equal? (d (expected b ...)) back loc) @@ -620,6 +622,25 @@ `("hello" there #"world" () ,(set) #t #f) (#xC7 #x55 "hello" #x75 "there" #x65 "world" #xC0 #xD0 #x01 #x00)) + (cross-check "#\"ABC\"" #"ABC" (#x63 #x41 #x42 #x43)) + (cross-check "#hex{414243}" #"ABC" (#x63 #x41 #x42 #x43)) + (cross-check "#hex{ 41 4A 4e }" #"AJN" (#x63 #x41 #x4A #x4E)) + (cross-check "#hex{ 41;re\n 42 43 }" #"ABC" (#x63 #x41 #x42 #x43)) + (check-exn exn? (lambda () (string->preserve "#hex{414 243}"))) ;; bytes must be 2-digits entire + (cross-check "#base64{Y29yeW1i}" #"corymb" (#x66 "corymb")) + (cross-check "#base64{Y29 yeW 1i}" #"corymb" (#x66 "corymb")) + (cross-check ";; a comment\n#base64{\n;x\nY29 yeW 1i}" #"corymb" (#x66 "corymb")) + (cross-check "#base64{SGk=}" #"Hi" (#x62 "Hi")) + (cross-check "#base64{SGk}" #"Hi" (#x62 "Hi")) + (cross-check "#base64{ S G k }" #"Hi" (#x62 "Hi")) + + (cross-check "\"abc\\u6c34\\u6C34\\\\\\/\\\"\\b\\f\\n\\r\\txyz\"" + "abc\u6c34\u6c34\\/\"\b\f\n\r\txyz" + (#x5f #x14 + #x61 #x62 #x63 #xe6 #xb0 #xb4 #xe6 #xb0 + #xb4 #x5c #x2f #x22 #x08 #x0c #x0a #x0d + #x09 #x78 #x79 #x7a)) + (cross-check "-257" -257 (#x42 #xFE #xFF)) (cross-check "-256" -256 (#x42 #xFF #x00)) (cross-check "-255" -255 (#x42 #xFF #x01))