New tests and fixes for bad surrogate pair situations. Closes #43
This commit is contained in:
parent
9595872177
commit
b8fb7abab1
|
@ -221,7 +221,9 @@ class Parser(TextCodec):
|
|||
def read_string(self, delimiter):
|
||||
def u16_escape(acc):
|
||||
n1 = self.hexnum(4)
|
||||
if n1 >= 0xd800 and n1 <= 0xdbff:
|
||||
if n1 >= 0xd800 and n1 <= 0xdfff:
|
||||
if n1 >= 0xdc00:
|
||||
raise DecodeError('Bad first half of surrogate pair')
|
||||
ok = True
|
||||
ok = ok and self.nextchar() == '\\'
|
||||
ok = ok and self.nextchar() == 'u'
|
||||
|
|
Binary file not shown.
|
@ -184,6 +184,22 @@
|
|||
string3: <Test #x"b10568656c6c6f" "hello">
|
||||
string4: <Test #x"b1 14 616263e6b0b4e6b0b45c2f22080c0a0d0978797a" "abc\u6c34\u6C34\\/\"\b\f\n\r\txyz">
|
||||
string5: <Test #x"b104f09d849e" "\uD834\uDD1E">
|
||||
string6: @"Short unicode escape" <ParseError "\"\\u6c\"">
|
||||
string7: @"Short unicode escape" <ParseError "\"\\u6c3\"">
|
||||
surrogatepair0str: @"Unmatched high surrogate" <ParseError "\"blah\\uD834\"">
|
||||
surrogatepair1str: @"Unmatched low surrogate" <ParseError "\"\\uDD1Eblah\"">
|
||||
surrogatepair2str: @"Unmatched high surrogate" <ParseError "\"blah\\uD834blah\"">
|
||||
surrogatepair3str: @"Unmatched low surrogate" <ParseError "\"blah\\uDD1Eblah\"">
|
||||
surrogatepair4str: @"Swapped surrogates" <ParseError "\"blah\\uDD1E\\uD834blah\"">
|
||||
surrogatepair5str: @"Two high surrogates" <ParseError "\"blah\\uD834\\uD834blah\"">
|
||||
surrogatepair6str: @"Two low surrogates" <ParseError "\"blah\\uDD1E\\uDD1Eblah\"">
|
||||
surrogatepair0sym: @"Unmatched high surrogate" <ParseError "|blah\\uD834|">
|
||||
surrogatepair1sym: @"Unmatched low surrogate" <ParseError "|\\uDD1Eblah|">
|
||||
surrogatepair2sym: @"Unmatched high surrogate" <ParseError "|blah\\uD834blah|">
|
||||
surrogatepair3sym: @"Unmatched low surrogate" <ParseError "|blah\\uDD1Eblah|">
|
||||
surrogatepair4sym: @"Swapped surrogates" <ParseError "|blah\\uDD1E\\uD834blah|">
|
||||
surrogatepair5sym: @"Two high surrogates" <ParseError "|blah\\uD834\\uD834blah|">
|
||||
surrogatepair6sym: @"Two low surrogates" <ParseError "|blah\\uDD1E\\uDD1Eblah|">
|
||||
symbol0: <Test #x"b300" ||>
|
||||
symbol2: <Test #x"b30568656c6c6f" hello>
|
||||
symbol3: <Test #x"b305312d322d33" 1-2-3>
|
||||
|
@ -197,6 +213,7 @@
|
|||
symbol11: <Test #x"b3042d2d2d61" ---a>
|
||||
symbol12: <Test #x"b3042d2d2d31" ---1>
|
||||
symbol13: <Test #x"b3042b312e78" +1.x>
|
||||
symbol14: <Test #x"b304f09d849e" |\uD834\uDD1E|>
|
||||
tag0: @"Unexpected end tag" <DecodeError #x"84">
|
||||
tag1: @"Invalid tag" <DecodeError #x"10">
|
||||
tag2: @"Invalid tag" <DecodeError #x"61b10110">
|
||||
|
|
|
@ -177,16 +177,18 @@
|
|||
(match in-port
|
||||
[(px #px#"^[a-fA-F0-9]{4}" (list hexdigits))
|
||||
(define n1 (string->number (bytes->string/utf-8 hexdigits) 16))
|
||||
(if (<= #xd800 n1 #xdfff) ;; surrogate pair first half
|
||||
(match in-port
|
||||
[(px #px#"^\\\\u([a-fA-F0-9]{4})" (list _ hexdigits2))
|
||||
(define n2 (string->number (bytes->string/utf-8 hexdigits2) 16))
|
||||
(if (<= #xdc00 n2 #xdfff)
|
||||
(+ (arithmetic-shift (- n1 #xd800) 10)
|
||||
(- n2 #xdc00)
|
||||
#x10000)
|
||||
(parse-error "Bad second half of surrogate pair"))]
|
||||
[_ (parse-error "Missing second half of surrogate pair")])
|
||||
(if (<= #xd800 n1 #xdfff) ;; surrogate pair
|
||||
(if (>= n1 #xdc00)
|
||||
(parse-error "Bad first half of surrogate pair")
|
||||
(match in-port
|
||||
[(px #px#"^\\\\u([a-fA-F0-9]{4})" (list _ hexdigits2))
|
||||
(define n2 (string->number (bytes->string/utf-8 hexdigits2) 16))
|
||||
(if (<= #xdc00 n2 #xdfff)
|
||||
(+ (arithmetic-shift (- n1 #xd800) 10)
|
||||
(- n2 #xdc00)
|
||||
#x10000)
|
||||
(parse-error "Bad second half of surrogate pair"))]
|
||||
[_ (parse-error "Missing second half of surrogate pair")]))
|
||||
n1)]
|
||||
[_ (parse-error "Bad string \\u escape")])))))
|
||||
|
||||
|
|
|
@ -0,0 +1,3 @@
|
|||
test:
|
||||
$(MAKE) -C ../.. update-test-data
|
||||
raco test test-main.rkt
|
|
@ -184,6 +184,22 @@
|
|||
string3: <Test #x"b10568656c6c6f" "hello">
|
||||
string4: <Test #x"b1 14 616263e6b0b4e6b0b45c2f22080c0a0d0978797a" "abc\u6c34\u6C34\\/\"\b\f\n\r\txyz">
|
||||
string5: <Test #x"b104f09d849e" "\uD834\uDD1E">
|
||||
string6: @"Short unicode escape" <ParseError "\"\\u6c\"">
|
||||
string7: @"Short unicode escape" <ParseError "\"\\u6c3\"">
|
||||
surrogatepair0str: @"Unmatched high surrogate" <ParseError "\"blah\\uD834\"">
|
||||
surrogatepair1str: @"Unmatched low surrogate" <ParseError "\"\\uDD1Eblah\"">
|
||||
surrogatepair2str: @"Unmatched high surrogate" <ParseError "\"blah\\uD834blah\"">
|
||||
surrogatepair3str: @"Unmatched low surrogate" <ParseError "\"blah\\uDD1Eblah\"">
|
||||
surrogatepair4str: @"Swapped surrogates" <ParseError "\"blah\\uDD1E\\uD834blah\"">
|
||||
surrogatepair5str: @"Two high surrogates" <ParseError "\"blah\\uD834\\uD834blah\"">
|
||||
surrogatepair6str: @"Two low surrogates" <ParseError "\"blah\\uDD1E\\uDD1Eblah\"">
|
||||
surrogatepair0sym: @"Unmatched high surrogate" <ParseError "|blah\\uD834|">
|
||||
surrogatepair1sym: @"Unmatched low surrogate" <ParseError "|\\uDD1Eblah|">
|
||||
surrogatepair2sym: @"Unmatched high surrogate" <ParseError "|blah\\uD834blah|">
|
||||
surrogatepair3sym: @"Unmatched low surrogate" <ParseError "|blah\\uDD1Eblah|">
|
||||
surrogatepair4sym: @"Swapped surrogates" <ParseError "|blah\\uDD1E\\uD834blah|">
|
||||
surrogatepair5sym: @"Two high surrogates" <ParseError "|blah\\uD834\\uD834blah|">
|
||||
surrogatepair6sym: @"Two low surrogates" <ParseError "|blah\\uDD1E\\uDD1Eblah|">
|
||||
symbol0: <Test #x"b300" ||>
|
||||
symbol2: <Test #x"b30568656c6c6f" hello>
|
||||
symbol3: <Test #x"b305312d322d33" 1-2-3>
|
||||
|
@ -197,6 +213,7 @@
|
|||
symbol11: <Test #x"b3042d2d2d61" ---a>
|
||||
symbol12: <Test #x"b3042d2d2d31" ---1>
|
||||
symbol13: <Test #x"b3042b312e78" +1.x>
|
||||
symbol14: <Test #x"b304f09d849e" |\uD834\uDD1E|>
|
||||
tag0: @"Unexpected end tag" <DecodeError #x"84">
|
||||
tag1: @"Invalid tag" <DecodeError #x"10">
|
||||
tag2: @"Invalid tag" <DecodeError #x"61b10110">
|
||||
|
|
Binary file not shown.
|
@ -184,6 +184,22 @@
|
|||
string3: <Test #x"b10568656c6c6f" "hello">
|
||||
string4: <Test #x"b1 14 616263e6b0b4e6b0b45c2f22080c0a0d0978797a" "abc\u6c34\u6C34\\/\"\b\f\n\r\txyz">
|
||||
string5: <Test #x"b104f09d849e" "\uD834\uDD1E">
|
||||
string6: @"Short unicode escape" <ParseError "\"\\u6c\"">
|
||||
string7: @"Short unicode escape" <ParseError "\"\\u6c3\"">
|
||||
surrogatepair0str: @"Unmatched high surrogate" <ParseError "\"blah\\uD834\"">
|
||||
surrogatepair1str: @"Unmatched low surrogate" <ParseError "\"\\uDD1Eblah\"">
|
||||
surrogatepair2str: @"Unmatched high surrogate" <ParseError "\"blah\\uD834blah\"">
|
||||
surrogatepair3str: @"Unmatched low surrogate" <ParseError "\"blah\\uDD1Eblah\"">
|
||||
surrogatepair4str: @"Swapped surrogates" <ParseError "\"blah\\uDD1E\\uD834blah\"">
|
||||
surrogatepair5str: @"Two high surrogates" <ParseError "\"blah\\uD834\\uD834blah\"">
|
||||
surrogatepair6str: @"Two low surrogates" <ParseError "\"blah\\uDD1E\\uDD1Eblah\"">
|
||||
surrogatepair0sym: @"Unmatched high surrogate" <ParseError "|blah\\uD834|">
|
||||
surrogatepair1sym: @"Unmatched low surrogate" <ParseError "|\\uDD1Eblah|">
|
||||
surrogatepair2sym: @"Unmatched high surrogate" <ParseError "|blah\\uD834blah|">
|
||||
surrogatepair3sym: @"Unmatched low surrogate" <ParseError "|blah\\uDD1Eblah|">
|
||||
surrogatepair4sym: @"Swapped surrogates" <ParseError "|blah\\uDD1E\\uD834blah|">
|
||||
surrogatepair5sym: @"Two high surrogates" <ParseError "|blah\\uD834\\uD834blah|">
|
||||
surrogatepair6sym: @"Two low surrogates" <ParseError "|blah\\uDD1E\\uDD1Eblah|">
|
||||
symbol0: <Test #x"b300" ||>
|
||||
symbol2: <Test #x"b30568656c6c6f" hello>
|
||||
symbol3: <Test #x"b305312d322d33" 1-2-3>
|
||||
|
@ -197,6 +213,7 @@
|
|||
symbol11: <Test #x"b3042d2d2d61" ---a>
|
||||
symbol12: <Test #x"b3042d2d2d31" ---1>
|
||||
symbol13: <Test #x"b3042b312e78" +1.x>
|
||||
symbol14: <Test #x"b304f09d849e" |\uD834\uDD1E|>
|
||||
tag0: @"Unexpected end tag" <DecodeError #x"84">
|
||||
tag1: @"Invalid tag" <DecodeError #x"10">
|
||||
tag2: @"Invalid tag" <DecodeError #x"61b10110">
|
||||
|
|
Loading…
Reference in New Issue