From 5a18b192bab161da67dc00635d769f647093211d Mon Sep 17 00:00:00 2001 From: Tony Garnock-Jones Date: Sun, 6 Nov 2022 23:55:33 +0100 Subject: [PATCH] Racket impl; update test data; tweak spec --- implementations/python/tests/samples.bin | Bin 8146 -> 9602 bytes implementations/python/tests/samples.pr | 48 ++++++- .../racket/preserves/preserves/read-text.rkt | 125 +++++++++--------- .../preserves/preserves/tests/samples.pr | 48 ++++++- .../racket/preserves/preserves/write-text.rkt | 13 +- preserves-text.md | 10 +- tests/samples.bin | Bin 8146 -> 9602 bytes tests/samples.pr | 48 ++++++- 8 files changed, 204 insertions(+), 88 deletions(-) diff --git a/implementations/python/tests/samples.bin b/implementations/python/tests/samples.bin index 5d5732a6cb96e095f414352381500b339b60cae5..3d595499fde0ec03d89756b410d7d69bed69395b 100644 GIT binary patch delta 1648 zcmaJ>J#W)M816N3Q&aWB{bE62wF9cGL%DosLJUPhs8t8V?lewoDI`*2O4{YB#Mz6o zAk?8N8|K=LAHWPde}V1{<-N1-?3A>Y^WN#+=eak&=Xvkv^Zx6vc+Rx4mn{9p>3ZML z*_qEYx8>pF5_>v#oLW^YT7L>25k{XDp3p|KZF@Pnh1Op#yzPnLvl)@g4}+DL&IhMs zdWUw)teAE8!1cP9?7^Ph={nn;PP-G#ZT1=)B$qD~OB<3{7zWEb?T&LFZ;{5ta;0i3 za&Z{UziJ)XO}B3DA3~w=dh;ReY*Y(^Z$8=i_x8-`r@u$&K_;4QN zKP(ReYV0}(_8o*9PT)ZjD#IXc?b^Fzdm_Fkl91{I2r>YI41j<*Bv+R7=?gmgv2eOY zh+Iisee~f4WYg^eG+saVug?9^i$#6LGVOI zv{2lliSCohxLLLSDX>J4Ns_%)M&o`&A}9=*fO!m_yAhttfak1n(_K)uZTw2nO!xR* zt=&{6$uN!J9L`4r6bJzY=^_BwxQ6$mK>>7t5hiAgMa%??m?;)9Gl&>Z$tKpV6DLh>0TG*R1SJ?JR|>goz9n*j Pd9#lcALHh0(rY*Y!^TVE diff --git a/implementations/python/tests/samples.pr b/implementations/python/tests/samples.pr index 9450d49..a99aae9 100644 --- a/implementations/python/tests/samples.pr +++ b/implementations/python/tests/samples.pr @@ -74,9 +74,35 @@ dict3: @"Duplicate key" dict4: @"Unexpected close brace" dict5: @"Missing value" + double0: + double+0: + double-0: double1: double2: + double3: + double4: @"Fewer than 16 digits" + double5: @"More than 16 digits" + double6: @"Invalid chars" + double7: @"Positive infinity" + double8: @"Negative infinity" + double9: @"-NaN" + double10: @"-NaN" + double11: @"+NaN" + double12: @"+NaN" + float0: + float+0: + float-0: float1: + float2: + float3: @"Fewer than 8 digits" + float4: @"More than 8 digits" + float5: @"Invalid chars" + float6: @"Positive infinity" + float7: @"Negative infinity" + float8: @"+NaN" + float9: @"+NaN" + float10: @"-NaN" + float11: @"-NaN" int-257: int-256: int-255: @@ -89,10 +115,13 @@ int-2: int-1: int0: + int+0: + int-0: int1: int12: int13: int127: + int+127: int128: int255: int256: @@ -112,6 +141,8 @@ list8: @"Missing close bracket" list9: @"Unexpected close bracket" list10: @"Missing end byte" + list11: + list12: noinput0: @"No input at all" embed0: embed1: @@ -138,17 +169,22 @@ string5: symbol0: symbol2: + symbol3: + symbol4: + symbol5: + symbol6: + symbol7: + symbol8: + symbol9: + symbol10: + symbol11: + symbol12: + symbol13: tag0: @"Unexpected end tag" tag1: @"Invalid tag" tag2: @"Invalid tag" whitespace0: @"Leading spaces have to eventually yield something" whitespace1: @"No input at all" - value1: - value2: - value3: - value4: - value5: - value6: longlist14: symbol (read-string PIPE))] @@ -82,21 +80,12 @@ [#\t #t] [#\{ (sequence-fold (set) set-add* values #\})] [#\" (read-literal-binary)] - [#\x (if (eqv? (next-char) #\") - (read-hex-binary '()) - (parse-error "Expected open-quote at start of hex ByteString"))] + [#\x (match (next-char) + [#\" (read-hex-binary '())] + [#\f (float (read-hex-float 4))] + [#\d (read-hex-float 8)] + [c (parse-error "Invalid #x syntax: ~v" c)])] [#\[ (read-base64-binary '())] - [#\= (define bs (read-preserve/text in-port #:read-syntax? #t #:source source)) - (when (not (bytes? (annotated-item bs))) - (parse-error "ByteString must follow #=")) - (when (not (null? (annotated-annotations bs))) - (parse-error "Annotations not permitted after #=")) - (bytes->preserve - (annotated-item bs) - (lambda (message . args) - (apply parse-error (string-append "Inline binary value: " message) args)) - #:read-syntax? read-syntax? - #:on-short (lambda () (parse-error "Incomplete inline binary value")))] [#\! (embedded (decode-embedded (next)))] [c (parse-error "Invalid # syntax: ~v" c)])] @@ -110,7 +99,7 @@ [#\] (parse-error "Unexpected ]")] [#\} (parse-error "Unexpected }")] - [c (read-raw-symbol (list c))])) + [c (read-raw-symbol-or-number (list c))])) (define (set-add* s e) (when (set-member? s e) (parse-error "Duplicate set element: ~v" e)) @@ -159,49 +148,6 @@ (annotated '() loc v)))) (lambda (pos0 v) v))) - ;;--------------------------------------------------------------------------- - ;; Numbers - - (define (read-intpart acc-rev ch) - (match ch - [#\0 (read-fracexp (cons ch acc-rev))] - [_ (read-digit+ acc-rev read-fracexp ch)])) - - (define (read-digit* acc-rev k) - (match (peek-char in-port) - [(? char? (? char-numeric?)) (read-digit* (cons (read-char in-port) acc-rev) k)] - [_ (k acc-rev)])) - - (define (read-digit+ acc-rev k [ch (read-char in-port)]) - (match ch - [(? char? (? char-numeric?)) (read-digit* (cons ch acc-rev) k)] - [_ (parse-error "Incomplete number")])) - - (define (read-fracexp acc-rev) - (match (peek-char in-port) - [#\. (read-digit+ (cons (read-char in-port) acc-rev) read-exp)] - [_ (read-exp acc-rev)])) - - (define (read-exp acc-rev) - (match (peek-char in-port) - [(or #\e #\E) (read-sign-and-exp (cons (read-char in-port) acc-rev))] - [_ (finish-number acc-rev)])) - - (define (read-sign-and-exp acc-rev) - (match (peek-char in-port) - [(or #\+ #\-) (read-digit+ (cons (read-char in-port) acc-rev) finish-number)] - [_ (read-digit+ acc-rev finish-number)])) - - (define (finish-number acc-rev) - (define s (list->string (reverse acc-rev))) - (define n (string->number s 10)) - (when (not n) (parse-error "Invalid number: ~v" s)) - (if (flonum? n) - (match (peek-char in-port) - [(or #\f #\F) (read-char in-port) (float n)] - [_ n]) - n)) - ;;--------------------------------------------------------------------------- ;; String-like things @@ -279,6 +225,17 @@ [else (parse-error "Invalid hex character")])) + ;;--------------------------------------------------------------------------- + ;; Hex-encoded floating point numbers + + (define (read-hex-float byte-count) + (unless (eqv? (next-char) #\") + (parse-error "Missing open-double-quote in hex-encoded floating-point number")) + (define bs (read-hex-binary '())) + (unless (= (bytes-length bs) byte-count) + (parse-error "Incorrect number of bytes in hex-encoded floating-point number")) + (floating-point-bytes->real bs #t 0 byte-count)) + ;;--------------------------------------------------------------------------- ;; Base64-encoded ByteStrings @@ -334,16 +291,56 @@ #\})) ;;--------------------------------------------------------------------------- - ;; "Raw" symbols + ;; "Raw" symbols and numbers - (define (read-raw-symbol acc) + (define (read-raw-symbol-or-number acc) (match (peek-char in-port) [(or (? eof-object?) (? char? (or #\( #\) #\{ #\} #\[ #\] #\< #\> #\" #\; #\, #\@ #\# #\: (== PIPE) (? char-whitespace?)))) - (string->symbol (list->string (reverse acc)))] - [_ (read-raw-symbol (cons (read-char in-port) acc))])) + (let ((input (reverse acc))) + (or (analyze-number input) + (string->symbol (list->string input))))] + [_ (read-raw-symbol-or-number (cons (read-char in-port) acc))])) + + (define (analyze-number input) + (match input + [(cons (and sign (or #\+ #\-)) input) (read-digit+ (list sign) read-fracexp input)] + [_ (read-digit+ (list) read-fracexp input)])) + + (define (read-digit* acc-rev k input) + (match input + [(cons (? char? (? char-numeric? d)) input) (read-digit* (cons d acc-rev) k input)] + [_ (k acc-rev input)])) + + (define (read-digit+ acc-rev k input) + (match input + [(cons (? char? (? char-numeric? d)) input) (read-digit* (cons d acc-rev) k input)] + [_ #f])) + + (define (read-fracexp acc-rev input) + (match input + [(cons #\. input) (read-digit+ (cons #\. acc-rev) read-exp input)] + [_ (read-exp acc-rev input)])) + + (define (read-exp acc-rev input) + (match input + [(cons (and e (or #\e #\E)) input) (read-sign-and-exp (cons e acc-rev) input)] + [_ (finish-number acc-rev input)])) + + (define (read-sign-and-exp acc-rev input) + (match input + [(cons (and sign (or #\+ #\-)) input) (read-digit+ (cons sign acc-rev) finish-number input)] + [_ (read-digit+ acc-rev finish-number input)])) + + (define (finish-number acc-rev input) + (define s (list->string (reverse acc-rev))) + (define n (string->number s 10)) + (cond [(not n) #f] + [(and (flonum? n) (member input '((#\f) (#\F)))) (float n)] + [(equal? input '()) n] + [else #f])) ;;--------------------------------------------------------------------------- ;; Main entry point to parser diff --git a/implementations/racket/preserves/preserves/tests/samples.pr b/implementations/racket/preserves/preserves/tests/samples.pr index 9450d49..a99aae9 100644 --- a/implementations/racket/preserves/preserves/tests/samples.pr +++ b/implementations/racket/preserves/preserves/tests/samples.pr @@ -74,9 +74,35 @@ dict3: @"Duplicate key" dict4: @"Unexpected close brace" dict5: @"Missing value" + double0: + double+0: + double-0: double1: double2: + double3: + double4: @"Fewer than 16 digits" + double5: @"More than 16 digits" + double6: @"Invalid chars" + double7: @"Positive infinity" + double8: @"Negative infinity" + double9: @"-NaN" + double10: @"-NaN" + double11: @"+NaN" + double12: @"+NaN" + float0: + float+0: + float-0: float1: + float2: + float3: @"Fewer than 8 digits" + float4: @"More than 8 digits" + float5: @"Invalid chars" + float6: @"Positive infinity" + float7: @"Negative infinity" + float8: @"+NaN" + float9: @"+NaN" + float10: @"-NaN" + float11: @"-NaN" int-257: int-256: int-255: @@ -89,10 +115,13 @@ int-2: int-1: int0: + int+0: + int-0: int1: int12: int13: int127: + int+127: int128: int255: int256: @@ -112,6 +141,8 @@ list8: @"Missing close bracket" list9: @"Unexpected close bracket" list10: @"Missing end byte" + list11: + list12: noinput0: @"No input at all" embed0: embed1: @@ -138,17 +169,22 @@ string5: symbol0: symbol2: + symbol3: + symbol4: + symbol5: + symbol6: + symbol7: + symbol8: + symbol9: + symbol10: + symbol11: + symbol12: + symbol13: tag0: @"Unexpected end tag" tag1: @"Invalid tag" tag2: @"Invalid tag" whitespace0: @"Leading spaces have to eventually yield something" whitespace1: @"No input at all" - value1: - value2: - value3: - value4: - value5: - value6: longlist14: hex-string)) (define PIPE #\|) @@ -132,6 +134,13 @@ (write-binary-stringlike v) (write-binary-base64 outer-distance v))))) + (define (write-float v byte-count hextype suffix) + (if (or (nan? v) (infinite? v)) + (! "#x~a\"~a\"" + hextype + (bytes->hex-string (real->floating-point-bytes v byte-count #t))) + (! "~v~a" v suffix))) + (define (write-value distance v) (match v [(annotated annotations _ item) @@ -143,8 +152,8 @@ (write-value distance item)] [#f (! "#f")] [#t (! "#t")] - [(float v) (! "~vf" v)] - [(? flonum?) (! "~v" v)] + [(float v) (write-float v 4 "f" "f")] + [(? flonum?) (write-float v 8 "d" "")] [(? integer? x) (! "~v" v)] [(? string?) (! "\"") diff --git a/preserves-text.md b/preserves-text.md index 1fe6fc7..6af4dae 100644 --- a/preserves-text.md +++ b/preserves-text.md @@ -42,7 +42,8 @@ Any `Value` may be preceded by whitespace. Value = ws (Record / Collection / Atom / Embedded) Collection = Sequence / Dictionary / Set - Atom = Boolean / String / ByteString / QuotedSymbol / SymbolOrNumber + Atom = Boolean / String / ByteString / + QuotedSymbol / SymbolOrNumber Each `Record` is an angle-bracket enclosed grouping of its label-`Value` followed by its field-`Value`s. @@ -211,13 +212,14 @@ represented as raw hexadecimal strings similar to hexadecimal syntax whereever convenient, even for values representable using the grammar above.[^rationale-no-general-machine-syntax] - Float =/ "#xf" %x22 8HEXDIG %x22 - Double =/ "#xd" %x22 16HEXDIG %x22 + Value =/ HexFloat / HexDouble + HexFloat = "#xf" %x22 4(ws 2HEXDIG) ws %x22 + HexDouble = "#xd" %x22 8(ws 2HEXDIG) ws %x22 [^rationale-no-general-machine-syntax]: **Rationale.** Previous versions of this specification included an escape to the [machine-oriented binary syntax](preserves-binary.html) by prefixing a `ByteString` - containing the binary representation of the `Value` with `#=`. The only + containing the binary representation of a `Value` with `#=`. The only true need for this feature was to represent otherwise-unrepresentable floating-point values. Instead, this specification allows such floating-point values to be written directly. Removing the `#=` syntax diff --git a/tests/samples.bin b/tests/samples.bin index 5d5732a6cb96e095f414352381500b339b60cae5..3d595499fde0ec03d89756b410d7d69bed69395b 100644 GIT binary patch delta 1648 zcmaJ>J#W)M816N3Q&aWB{bE62wF9cGL%DosLJUPhs8t8V?lewoDI`*2O4{YB#Mz6o zAk?8N8|K=LAHWPde}V1{<-N1-?3A>Y^WN#+=eak&=Xvkv^Zx6vc+Rx4mn{9p>3ZML z*_qEYx8>pF5_>v#oLW^YT7L>25k{XDp3p|KZF@Pnh1Op#yzPnLvl)@g4}+DL&IhMs zdWUw)teAE8!1cP9?7^Ph={nn;PP-G#ZT1=)B$qD~OB<3{7zWEb?T&LFZ;{5ta;0i3 za&Z{UziJ)XO}B3DA3~w=dh;ReY*Y(^Z$8=i_x8-`r@u$&K_;4QN zKP(ReYV0}(_8o*9PT)ZjD#IXc?b^Fzdm_Fkl91{I2r>YI41j<*Bv+R7=?gmgv2eOY zh+Iisee~f4WYg^eG+saVug?9^i$#6LGVOI zv{2lliSCohxLLLSDX>J4Ns_%)M&o`&A}9=*fO!m_yAhttfak1n(_K)uZTw2nO!xR* zt=&{6$uN!J9L`4r6bJzY=^_BwxQ6$mK>>7t5hiAgMa%??m?;)9Gl&>Z$tKpV6DLh>0TG*R1SJ?JR|>goz9n*j Pd9#lcALHh0(rY*Y!^TVE diff --git a/tests/samples.pr b/tests/samples.pr index 9450d49..a99aae9 100644 --- a/tests/samples.pr +++ b/tests/samples.pr @@ -74,9 +74,35 @@ dict3: @"Duplicate key" dict4: @"Unexpected close brace" dict5: @"Missing value" + double0: + double+0: + double-0: double1: double2: + double3: + double4: @"Fewer than 16 digits" + double5: @"More than 16 digits" + double6: @"Invalid chars" + double7: @"Positive infinity" + double8: @"Negative infinity" + double9: @"-NaN" + double10: @"-NaN" + double11: @"+NaN" + double12: @"+NaN" + float0: + float+0: + float-0: float1: + float2: + float3: @"Fewer than 8 digits" + float4: @"More than 8 digits" + float5: @"Invalid chars" + float6: @"Positive infinity" + float7: @"Negative infinity" + float8: @"+NaN" + float9: @"+NaN" + float10: @"-NaN" + float11: @"-NaN" int-257: int-256: int-255: @@ -89,10 +115,13 @@ int-2: int-1: int0: + int+0: + int-0: int1: int12: int13: int127: + int+127: int128: int255: int256: @@ -112,6 +141,8 @@ list8: @"Missing close bracket" list9: @"Unexpected close bracket" list10: @"Missing end byte" + list11: + list12: noinput0: @"No input at all" embed0: embed1: @@ -138,17 +169,22 @@ string5: symbol0: symbol2: + symbol3: + symbol4: + symbol5: + symbol6: + symbol7: + symbol8: + symbol9: + symbol10: + symbol11: + symbol12: + symbol13: tag0: @"Unexpected end tag" tag1: @"Invalid tag" tag2: @"Invalid tag" whitespace0: @"Leading spaces have to eventually yield something" whitespace1: @"No input at all" - value1: - value2: - value3: - value4: - value5: - value6: longlist14: