From 95958721774890bb64b32261afad3a248d90419d Mon Sep 17 00:00:00 2001 From: Tony Garnock-Jones Date: Sun, 29 Oct 2023 21:04:52 +0100 Subject: [PATCH] Test cases and implementation updates for delimiters following Boolean and SymbolOrNumber. Closes #47 --- .../javascript/packages/core/src/reader.ts | 23 ++-- .../packages/core/test/codec.test.ts | 8 ++ implementations/python/preserves/text.py | 21 ++-- implementations/python/tests/samples.bin | Bin 10467 -> 10848 bytes implementations/python/tests/samples.pr | 8 +- .../python/tests/test_preserves.py | 2 + .../racket/preserves/preserves/read-text.rkt | 26 +++-- .../preserves/preserves/tests/samples.pr | 8 +- .../preserves/preserves/tests/test-main.rkt | 2 + .../rust/preserves/src/value/text/reader.rs | 104 ++++++++++-------- tests/samples.bin | Bin 10467 -> 10848 bytes tests/samples.pr | 8 +- 12 files changed, 131 insertions(+), 79 deletions(-) diff --git a/implementations/javascript/packages/core/src/reader.ts b/implementations/javascript/packages/core/src/reader.ts index 3eaa823..50bec7b 100644 --- a/implementations/javascript/packages/core/src/reader.ts +++ b/implementations/javascript/packages/core/src/reader.ts @@ -158,14 +158,19 @@ export class ReaderState { return decodeBase64(acc); } + requireDelimiter(prefix: string): void { + if (this.delimiterFollows()) return; + this.error(`Delimiter must follow ${prefix}`, this.pos); + } + + delimiterFollows(): boolean { + if (this.atEnd()) return true; + const ch = this.peek(); + return ('(){}[]<>";,@#:|'.indexOf(ch) !== -1) || isSpace(ch); + } + readRawSymbolOrNumber(acc: string): Value { - while (true) { - if (this.atEnd()) break; - const ch = this.peek(); - if (('(){}[]<>";,@#:|'.indexOf(ch) !== -1) || isSpace(ch)) break; - this.advance(); - acc = acc + ch; - } + while (!this.delimiterFollows()) acc = acc + this.nextchar(); const m = NUMBER_RE.exec(acc); if (m) { if (m[2] === void 0) { @@ -344,8 +349,8 @@ export class Reader { case '#': { const c = this.state.nextchar(); switch (c) { - case 'f': return false; - case 't': return true; + case 'f': this.state.requireDelimiter('#f'); return false; + case 't': this.state.requireDelimiter('#t'); return true; case '{': return this.readSet(); case '"': return this.state.readLiteralBinary(); case 'x': switch (this.state.nextchar()) { diff --git a/implementations/javascript/packages/core/test/codec.test.ts b/implementations/javascript/packages/core/test/codec.test.ts index 046eced..3b21b76 100644 --- a/implementations/javascript/packages/core/test/codec.test.ts +++ b/implementations/javascript/packages/core/test/codec.test.ts @@ -242,6 +242,14 @@ describe('common test suite', () => { forward: annotate([], Symbol.for('a'), Symbol.for('b'), Symbol.for('c')), back: [] }, + delimiters4: { + forward: [false, annotate(true, "a line comment")], + back: [false, true], + }, + delimiters5: { + forward: [false, annotate(true, Symbol.for('ann'))], + back: [false, true], + }, list1: { forward: [1, 2, 3, 4], back: [1, 2, 3, 4] diff --git a/implementations/python/preserves/text.py b/implementations/python/preserves/text.py index 6caf9a4..decaeee 100644 --- a/implementations/python/preserves/text.py +++ b/implementations/python/preserves/text.py @@ -303,13 +303,18 @@ class Parser(TextCodec): raise DecodeError('Missing expected key/value separator') acc.append(self.next()) + def require_delimiter(self, prefix): + if not self.delimiter_follows(): + raise DecodeError('Delimiter must follow ' + prefix) + + def delimiter_follows(self): + if self._atend(): return True + c = self.peek() + return c.isspace() or c in '(){}[]<>";,@#:|' + def read_raw_symbol_or_number(self, acc): - while not self._atend(): - c = self.peek() - if c.isspace() or c in '(){}[]<>";,@#:|': - break - self.skip() - acc.append(c) + while not self.delimiter_follows(): + acc.append(self.nextchar()) acc = u''.join(acc) m = NUMBER_RE.match(acc) if m: @@ -350,8 +355,8 @@ class Parser(TextCodec): if c == '#': self.skip() c = self.nextchar() - if c == 'f': return self.wrap(False) - if c == 't': return self.wrap(True) + if c == 'f': self.require_delimiter('#f'); return self.wrap(False) + if c == 't': self.require_delimiter('#t'); return self.wrap(True) if c == '{': return self.wrap(self.read_set()) if c == '"': return self.wrap(self.read_literal_binary()) if c == 'x': diff --git a/implementations/python/tests/samples.bin b/implementations/python/tests/samples.bin index 6dcac25923b83295a8072ad5946b5994df948fd1..8ea5e2832e908b0639c81d46d22dc9dc8d61f87c 100644 GIT binary patch delta 396 zcmaLSy9&ZE6b9fyL8;*8;&e)J_xcE~qM(yF#Ck#tZ6Zm)u@&s>Vk-gf)*Sl~xx{8x7@WGaNQC;6VN JV~*~kb_ZS7po#zh delta 12 TcmaD5@;GoqpW bytes12: @"Bytes syntax only supports \\x, not \\u" bytes13: - + delimiters0: + delimiters1: @"Note no space between the falses here" + delimiters2: + delimiters3: @"No space between the #f and the foo" + delimiters4: @"Note no space after the #f" + delimiters5: @"Note no space after the #f" dict0: dict1: dict2: @"Missing close brace" diff --git a/implementations/python/tests/test_preserves.py b/implementations/python/tests/test_preserves.py index 0a335e2..39c34ff 100644 --- a/implementations/python/tests/test_preserves.py +++ b/implementations/python/tests/test_preserves.py @@ -238,6 +238,8 @@ expected_values = { "back": _R('R', Symbol('f')) }, "annotation7": { "forward": annotate([], Symbol('a'), Symbol('b'), Symbol('c')), "back": () }, + "delimiters4": { "forward": [False, annotate(True, 'a line comment')], "back": [False, True] }, + "delimiters5": { "forward": [False, annotate(True, Symbol('ann'))], "back": [False, True] }, "record2": { "value": _R('observe', _R('speak', _R('discard'), _R('capture', _R('discard')))) }, } diff --git a/implementations/racket/preserves/preserves/read-text.rkt b/implementations/racket/preserves/preserves/read-text.rkt index b3e20c2..bbd0cf5 100644 --- a/implementations/racket/preserves/preserves/read-text.rkt +++ b/implementations/racket/preserves/preserves/read-text.rkt @@ -77,8 +77,8 @@ [#\: (parse-error "Unexpected key/value separator between items")] [#\# (match (next-char) - [#\f #f] - [#\t #t] + [#\f (unless (delimiter-follows?) (parse-error "Delimiter must follow #f")) #f] + [#\t (unless (delimiter-follows?) (parse-error "Delimiter must follow #t")) #t] [#\{ (sequence-fold (set) set-add* values #\})] [#\" (read-literal-binary)] [#\x (match (next-char) @@ -296,16 +296,20 @@ ;;--------------------------------------------------------------------------- ;; "Raw" symbols and numbers + (define (delimiter-follows?) + (define c (peek-char in-port)) + (or (eof-object? c) + (char-whitespace? c) + (eqv? c PIPE) + (memv c '(#\< #\> #\[ #\] #\{ #\} #\( #\) + #\# #\: #\" #\@ #\; #\,)))) + (define (read-raw-symbol-or-number acc) - (match (peek-char in-port) - [(or (? eof-object?) - (? char? (or #\( #\) #\{ #\} #\[ #\] #\< #\> - #\" #\; #\, #\@ #\# #\: (== PIPE) - (? char-whitespace?)))) - (let ((input (reverse acc))) - (or (analyze-number input) - (string->symbol (list->string input))))] - [_ (read-raw-symbol-or-number (cons (read-char in-port) acc))])) + (if (delimiter-follows?) + (let ((input (reverse acc))) + (or (analyze-number input) + (string->symbol (list->string input)))) + (read-raw-symbol-or-number (cons (read-char in-port) acc)))) (define (analyze-number input) (match input diff --git a/implementations/racket/preserves/preserves/tests/samples.pr b/implementations/racket/preserves/preserves/tests/samples.pr index e53b902..f969455 100644 --- a/implementations/racket/preserves/preserves/tests/samples.pr +++ b/implementations/racket/preserves/preserves/tests/samples.pr @@ -67,7 +67,13 @@ bytes11: bytes12: @"Bytes syntax only supports \\x, not \\u" bytes13: - + delimiters0: + delimiters1: @"Note no space between the falses here" + delimiters2: + delimiters3: @"No space between the #f and the foo" + delimiters4: @"Note no space after the #f" + delimiters5: @"Note no space after the #f" dict0: dict1: dict2: @"Missing close brace" diff --git a/implementations/racket/preserves/preserves/tests/test-main.rkt b/implementations/racket/preserves/preserves/tests/test-main.rkt index 5f35914..c3681ec 100644 --- a/implementations/racket/preserves/preserves/tests/test-main.rkt +++ b/implementations/racket/preserves/preserves/tests/test-main.rkt @@ -120,6 +120,8 @@ 'annotation5 (asymmetric (annotate `#s(R ,(annotate 'f 'af)) 'ar) `#s(R f)) 'annotation6 (asymmetric (record (annotate 'R 'ar) (list (annotate 'f 'af))) `#s(R f)) 'annotation7 (asymmetric (annotate '() 'a 'b 'c) '()) + 'delimiters4 (asymmetric (list #f (annotate #t "a line comment")) (list #f #t)) + 'delimiters5 (asymmetric (list #f (annotate #t 'ann)) (list #f #t)) )) (define (run-test-case variety t-name loc binary-form annotated-text-form) diff --git a/implementations/rust/preserves/src/value/text/reader.rs b/implementations/rust/preserves/src/value/text/reader.rs index 9feda60..e2a104f 100644 --- a/implementations/rust/preserves/src/value/text/reader.rs +++ b/implementations/rust/preserves/src/value/text/reader.rs @@ -346,59 +346,67 @@ impl<'de, 'src, N: NestedValue, Dec: DomainParse, S: BinarySource<' } } + fn require_delimiter(&mut self, msg: &'static str) -> io::Result<()> { + if self.delimiter_follows()? { + Ok(()) + } else { + Err(io_syntax_error(msg)) + } + } + + fn delimiter_follows(&mut self) -> io::Result { + let c = match self.peek() { + Err(e) if is_eof_io_error(&e) => return Ok(true), + Err(e) => return Err(e)?, + Ok(c) if (c as char).is_whitespace() => return Ok(true), + Ok(c) => c, + }; + Ok(match c { + b'(' | b')' | b'{' | b'}' | b'[' | b']' | b'<' | b'>' | b'"' | b';' | b',' + | b'@' | b'#' | b':' | b'|' | b' ' => true, + _ => false, + }) + } + fn read_raw_symbol_or_number(&mut self, mut bs: Vec) -> io::Result { lazy_static! { static ref NUMBER_RE: regex::Regex = regex::Regex::new(r"^([-+]?\d+)(((\.\d+([eE][-+]?\d+)?)|([eE][-+]?\d+))([fF]?))?$") .unwrap(); } - loop { - let c = match self.peek() { - Err(e) if is_eof_io_error(&e) => b' ', - Err(e) => return Err(e)?, - Ok(c) if (c as char).is_whitespace() => b' ', - Ok(c) => c, - }; - match c { - b'(' | b')' | b'{' | b'}' | b'[' | b']' | b'<' | b'>' | b'"' | b';' | b',' - | b'@' | b'#' | b':' | b'|' | b' ' => { - let s = decode_utf8(bs)?; - return match NUMBER_RE.captures(&s) { - None => Ok(N::symbol(&s)), - Some(m) => match m.get(2) { - None => Ok(N::new(s.parse::().map_err(|_| { - io_syntax_error(&format!("Invalid signed-integer number: {:?}", s)) - })?)), - Some(_) => { - if let Some(maybe_f) = m.get(7) { - let s = m[1].to_owned() + &m[3]; - if maybe_f.range().is_empty() { - Ok(N::new(s.parse::().map_err(|_| { - io_syntax_error(&format!( - "Invalid double-precision number: {:?}", - s - )) - })?)) - } else { - Ok(N::new(s.parse::().map_err(|_| { - io_syntax_error(&format!( - "Invalid single-precision number: {:?}", - s - )) - })?)) - } - } else { - panic!("Internal error: cannot analyze number {:?}", s) - } - } - }, - }; + while !self.delimiter_follows()? { + bs.push(self.next_byte()?); + } + let s = decode_utf8(bs)?; + match NUMBER_RE.captures(&s) { + None => Ok(N::symbol(&s)), + Some(m) => match m.get(2) { + None => Ok(N::new(s.parse::().map_err(|_| { + io_syntax_error(&format!("Invalid signed-integer number: {:?}", s)) + })?)), + Some(_) => { + if let Some(maybe_f) = m.get(7) { + let s = m[1].to_owned() + &m[3]; + if maybe_f.range().is_empty() { + Ok(N::new(s.parse::().map_err(|_| { + io_syntax_error(&format!( + "Invalid double-precision number: {:?}", + s + )) + })?)) + } else { + Ok(N::new(s.parse::().map_err(|_| { + io_syntax_error(&format!( + "Invalid single-precision number: {:?}", + s + )) + })?)) + } + } else { + panic!("Internal error: cannot analyze number {:?}", s) + } } - c => { - self.skip()?; - bs.push(c) - } - } + }, } } } @@ -441,8 +449,8 @@ impl<'de, 'src, N: NestedValue, Dec: DomainParse, S: BinarySource<' b'#' => { self.skip()?; match self.next_byte()? { - b'f' => N::new(false), - b't' => N::new(true), + b'f' => { self.require_delimiter("Delimiter must follow #f")?; N::new(false) } + b't' => { self.require_delimiter("Delimiter must follow #t")?; N::new(true) } b'{' => self.read_set(read_annotations)?, b'"' => self.read_literal_binary()?, b'x' => match self.next_byte()? { diff --git a/tests/samples.bin b/tests/samples.bin index 6dcac25923b83295a8072ad5946b5994df948fd1..8ea5e2832e908b0639c81d46d22dc9dc8d61f87c 100644 GIT binary patch delta 396 zcmaLSy9&ZE6b9fyL8;*8;&e)J_xcE~qM(yF#Ck#tZ6Zm)u@&s>Vk-gf)*Sl~xx{8x7@WGaNQC;6VN JV~*~kb_ZS7po#zh delta 12 TcmaD5@;GoqpW bytes12: @"Bytes syntax only supports \\x, not \\u" bytes13: - + delimiters0: + delimiters1: @"Note no space between the falses here" + delimiters2: + delimiters3: @"No space between the #f and the foo" + delimiters4: @"Note no space after the #f" + delimiters5: @"Note no space after the #f" dict0: dict1: dict2: @"Missing close brace"