diff --git a/implementations/python/preserves/text.py b/implementations/python/preserves/text.py index decaeee..57f61c8 100644 --- a/implementations/python/preserves/text.py +++ b/implementations/python/preserves/text.py @@ -221,7 +221,9 @@ class Parser(TextCodec): def read_string(self, delimiter): def u16_escape(acc): n1 = self.hexnum(4) - if n1 >= 0xd800 and n1 <= 0xdbff: + if n1 >= 0xd800 and n1 <= 0xdfff: + if n1 >= 0xdc00: + raise DecodeError('Bad first half of surrogate pair') ok = True ok = ok and self.nextchar() == '\\' ok = ok and self.nextchar() == 'u' diff --git a/implementations/python/tests/samples.bin b/implementations/python/tests/samples.bin index 8ea5e28..3ef5ecf 100644 Binary files a/implementations/python/tests/samples.bin and b/implementations/python/tests/samples.bin differ diff --git a/implementations/python/tests/samples.pr b/implementations/python/tests/samples.pr index f969455..4ebe8e6 100644 --- a/implementations/python/tests/samples.pr +++ b/implementations/python/tests/samples.pr @@ -184,6 +184,22 @@ string3: string4: string5: + string6: @"Short unicode escape" + string7: @"Short unicode escape" + surrogatepair0str: @"Unmatched high surrogate" + surrogatepair1str: @"Unmatched low surrogate" + surrogatepair2str: @"Unmatched high surrogate" + surrogatepair3str: @"Unmatched low surrogate" + surrogatepair4str: @"Swapped surrogates" + surrogatepair5str: @"Two high surrogates" + surrogatepair6str: @"Two low surrogates" + surrogatepair0sym: @"Unmatched high surrogate" + surrogatepair1sym: @"Unmatched low surrogate" + surrogatepair2sym: @"Unmatched high surrogate" + surrogatepair3sym: @"Unmatched low surrogate" + surrogatepair4sym: @"Swapped surrogates" + surrogatepair5sym: @"Two high surrogates" + surrogatepair6sym: @"Two low surrogates" symbol0: symbol2: symbol3: @@ -197,6 +213,7 @@ symbol11: symbol12: symbol13: + symbol14: tag0: @"Unexpected end tag" tag1: @"Invalid tag" tag2: @"Invalid tag" diff --git a/implementations/racket/preserves/preserves/read-text.rkt b/implementations/racket/preserves/preserves/read-text.rkt index bbd0cf5..762cefa 100644 --- a/implementations/racket/preserves/preserves/read-text.rkt +++ b/implementations/racket/preserves/preserves/read-text.rkt @@ -177,16 +177,18 @@ (match in-port [(px #px#"^[a-fA-F0-9]{4}" (list hexdigits)) (define n1 (string->number (bytes->string/utf-8 hexdigits) 16)) - (if (<= #xd800 n1 #xdfff) ;; surrogate pair first half - (match in-port - [(px #px#"^\\\\u([a-fA-F0-9]{4})" (list _ hexdigits2)) - (define n2 (string->number (bytes->string/utf-8 hexdigits2) 16)) - (if (<= #xdc00 n2 #xdfff) - (+ (arithmetic-shift (- n1 #xd800) 10) - (- n2 #xdc00) - #x10000) - (parse-error "Bad second half of surrogate pair"))] - [_ (parse-error "Missing second half of surrogate pair")]) + (if (<= #xd800 n1 #xdfff) ;; surrogate pair + (if (>= n1 #xdc00) + (parse-error "Bad first half of surrogate pair") + (match in-port + [(px #px#"^\\\\u([a-fA-F0-9]{4})" (list _ hexdigits2)) + (define n2 (string->number (bytes->string/utf-8 hexdigits2) 16)) + (if (<= #xdc00 n2 #xdfff) + (+ (arithmetic-shift (- n1 #xd800) 10) + (- n2 #xdc00) + #x10000) + (parse-error "Bad second half of surrogate pair"))] + [_ (parse-error "Missing second half of surrogate pair")])) n1)] [_ (parse-error "Bad string \\u escape")]))))) diff --git a/implementations/racket/preserves/preserves/tests/Makefile b/implementations/racket/preserves/preserves/tests/Makefile new file mode 100644 index 0000000..9eca976 --- /dev/null +++ b/implementations/racket/preserves/preserves/tests/Makefile @@ -0,0 +1,3 @@ +test: + $(MAKE) -C ../.. update-test-data + raco test test-main.rkt diff --git a/implementations/racket/preserves/preserves/tests/samples.pr b/implementations/racket/preserves/preserves/tests/samples.pr index f969455..4ebe8e6 100644 --- a/implementations/racket/preserves/preserves/tests/samples.pr +++ b/implementations/racket/preserves/preserves/tests/samples.pr @@ -184,6 +184,22 @@ string3: string4: string5: + string6: @"Short unicode escape" + string7: @"Short unicode escape" + surrogatepair0str: @"Unmatched high surrogate" + surrogatepair1str: @"Unmatched low surrogate" + surrogatepair2str: @"Unmatched high surrogate" + surrogatepair3str: @"Unmatched low surrogate" + surrogatepair4str: @"Swapped surrogates" + surrogatepair5str: @"Two high surrogates" + surrogatepair6str: @"Two low surrogates" + surrogatepair0sym: @"Unmatched high surrogate" + surrogatepair1sym: @"Unmatched low surrogate" + surrogatepair2sym: @"Unmatched high surrogate" + surrogatepair3sym: @"Unmatched low surrogate" + surrogatepair4sym: @"Swapped surrogates" + surrogatepair5sym: @"Two high surrogates" + surrogatepair6sym: @"Two low surrogates" symbol0: symbol2: symbol3: @@ -197,6 +213,7 @@ symbol11: symbol12: symbol13: + symbol14: tag0: @"Unexpected end tag" tag1: @"Invalid tag" tag2: @"Invalid tag" diff --git a/tests/samples.bin b/tests/samples.bin index 8ea5e28..3ef5ecf 100644 Binary files a/tests/samples.bin and b/tests/samples.bin differ diff --git a/tests/samples.pr b/tests/samples.pr index f969455..4ebe8e6 100644 --- a/tests/samples.pr +++ b/tests/samples.pr @@ -184,6 +184,22 @@ string3: string4: string5: + string6: @"Short unicode escape" + string7: @"Short unicode escape" + surrogatepair0str: @"Unmatched high surrogate" + surrogatepair1str: @"Unmatched low surrogate" + surrogatepair2str: @"Unmatched high surrogate" + surrogatepair3str: @"Unmatched low surrogate" + surrogatepair4str: @"Swapped surrogates" + surrogatepair5str: @"Two high surrogates" + surrogatepair6str: @"Two low surrogates" + surrogatepair0sym: @"Unmatched high surrogate" + surrogatepair1sym: @"Unmatched low surrogate" + surrogatepair2sym: @"Unmatched high surrogate" + surrogatepair3sym: @"Unmatched low surrogate" + surrogatepair4sym: @"Swapped surrogates" + surrogatepair5sym: @"Two high surrogates" + surrogatepair6sym: @"Two low surrogates" symbol0: symbol2: symbol3: @@ -197,6 +213,7 @@ symbol11: symbol12: symbol13: + symbol14: tag0: @"Unexpected end tag" tag1: @"Invalid tag" tag2: @"Invalid tag"