Test cases and implementation updates for delimiters following Boolean and SymbolOrNumber. Closes #47

2023-10-29 21:04:52 +01:00 · 2023-10-29 21:04:52 +01:00 · 9595872177
parent e8c0a2565e
commit 9595872177
12 changed files with 131 additions and 79 deletions
--- a/implementations/javascript/packages/core/src/reader.ts
+++ b/implementations/javascript/packages/core/src/reader.ts
@ -158,14 +158,19 @@ export class ReaderState {
        return decodeBase64(acc);
    }

+    requireDelimiter(prefix: string): void {
+        if (this.delimiterFollows()) return;
+        this.error(`Delimiter must follow ${prefix}`, this.pos);
+    }
+
+    delimiterFollows(): boolean {
+        if (this.atEnd()) return true;
+        const ch = this.peek();
+        return ('(){}[]<>";,@#:|'.indexOf(ch) !== -1) || isSpace(ch);
+    }
+
    readRawSymbolOrNumber<T>(acc: string): Value<T> {
-        while (true) {
-            if (this.atEnd()) break;
-            const ch = this.peek();
-            if (('(){}[]<>";,@#:|'.indexOf(ch) !== -1) || isSpace(ch)) break;
-            this.advance();
-            acc = acc + ch;
-        }
+        while (!this.delimiterFollows()) acc = acc + this.nextchar();
        const m = NUMBER_RE.exec(acc);
        if (m) {
            if (m[2] === void 0) {
@ -344,8 +349,8 @@ export class Reader<T> {
                case '#': {
                    const c = this.state.nextchar();
                    switch (c) {
-                        case 'f': return false;
-                        case 't': return true;
+                        case 'f': this.state.requireDelimiter('#f'); return false;
+                        case 't': this.state.requireDelimiter('#t'); return true;
                        case '{': return this.readSet();
                        case '"': return this.state.readLiteralBinary();
                        case 'x': switch (this.state.nextchar()) {
--- a/implementations/javascript/packages/core/test/codec.test.ts
+++ b/implementations/javascript/packages/core/test/codec.test.ts
@ -242,6 +242,14 @@ describe('common test suite', () => {
            forward: annotate<GenericEmbedded>([], Symbol.for('a'), Symbol.for('b'), Symbol.for('c')),
            back: []
        },
+        delimiters4: {
+            forward: [false, annotate<GenericEmbedded>(true, "a line comment")],
+            back: [false, true],
+        },
+        delimiters5: {
+            forward: [false, annotate<GenericEmbedded>(true, Symbol.for('ann'))],
+            back: [false, true],
+        },
        list1: {
            forward: [1, 2, 3, 4],
            back: [1, 2, 3, 4]
--- a/implementations/python/preserves/text.py
+++ b/implementations/python/preserves/text.py
@ -303,13 +303,18 @@ class Parser(TextCodec):
                raise DecodeError('Missing expected key/value separator')
            acc.append(self.next())

+    def require_delimiter(self, prefix):
+        if not self.delimiter_follows():
+            raise DecodeError('Delimiter must follow ' + prefix)
+
+    def delimiter_follows(self):
+        if self._atend(): return True
+        c = self.peek()
+        return c.isspace() or c in '(){}[]<>";,@#:|'
+
    def read_raw_symbol_or_number(self, acc):
-        while not self._atend():
-            c = self.peek()
-            if c.isspace() or c in '(){}[]<>";,@#:|':
-                break
-            self.skip()
-            acc.append(c)
+        while not self.delimiter_follows():
+            acc.append(self.nextchar())
        acc = u''.join(acc)
        m = NUMBER_RE.match(acc)
        if m:
@ -350,8 +355,8 @@ class Parser(TextCodec):
        if c == '#':
            self.skip()
            c = self.nextchar()
-            if c == 'f': return self.wrap(False)
-            if c == 't': return self.wrap(True)
+            if c == 'f': self.require_delimiter('#f'); return self.wrap(False)
+            if c == 't': self.require_delimiter('#t'); return self.wrap(True)
            if c == '{': return self.wrap(self.read_set())
            if c == '"': return self.wrap(self.read_literal_binary())
            if c == 'x':
--- a/implementations/python/tests/samples.bin
+++ b/implementations/python/tests/samples.bin
--- a/implementations/python/tests/samples.pr
+++ b/implementations/python/tests/samples.pr
@ -67,7 +67,13 @@
  bytes11: <Test #"\xB2\x02Hi" #[S G k]>
  bytes12: @"Bytes syntax only supports \\x, not \\u" <ParseError "#\"\\u6c34\"">
  bytes13: <Test #x"B2 11 61 62 63 6c 34 f0 5c 2f 22 08 0c 0a 0d 09 78 79 7a" #"abc\x6c\x34\xf0\\/\"\b\f\n\r\txyz">
-
+  delimiters0: <Test #x"B5808084" [#f #f]>
+  delimiters1: @"Note no space between the falses here" <Test #x"B5808084" [#f#f]>
+  delimiters2: <Test #x"B580B303666f6f84" [#f foo]>
+  delimiters3: @"No space between the #f and the foo" <ParseError "[#ffoo]">
+  delimiters4: @"Note no space after the #f" <Test #"\xB5\x80\x85\xB1\x0Ea line comment\x81\x84" [#f;a line comment
+#t]>
+  delimiters5:  @"Note no space after the #f" <Test #"\xB5\x80\x85\xB3\x03ann\x81\x84" [#f@ann #t]>
  dict0: <Test #x"B784" {}>
  dict1: <NondeterministicTest #x"b7 b10162 81 b30161 b00101 b5b00101b00102b0010384 b20163 b7 b30a66697273742d6e616d65 b109456c697a6162657468 84 b7 b3077375726e616d65 b109426c61636b77656c6c 84 84" { a: 1 "b": #t [1 2 3]: #"c" { first-name: "Elizabeth" }: { surname: "Blackwell" } }>
  dict2: @"Missing close brace" <ParseShort "{ a: b, c: d ">
--- a/implementations/python/tests/test_preserves.py
+++ b/implementations/python/tests/test_preserves.py
@ -238,6 +238,8 @@ expected_values = {
                     "back": _R('R', Symbol('f')) },
    "annotation7": { "forward": annotate([], Symbol('a'), Symbol('b'), Symbol('c')),
                     "back": () },
+    "delimiters4": { "forward": [False, annotate(True, 'a line comment')], "back": [False, True] },
+    "delimiters5": { "forward": [False, annotate(True, Symbol('ann'))], "back": [False, True] },
    "record2": { "value": _R('observe', _R('speak', _R('discard'), _R('capture', _R('discard')))) },
 }

--- a/implementations/racket/preserves/preserves/read-text.rkt
+++ b/implementations/racket/preserves/preserves/read-text.rkt
@ -77,8 +77,8 @@
      [#\: (parse-error "Unexpected key/value separator between items")]

      [#\# (match (next-char)
-             [#\f #f]
-             [#\t #t]
+             [#\f (unless (delimiter-follows?) (parse-error "Delimiter must follow #f")) #f]
+             [#\t (unless (delimiter-follows?) (parse-error "Delimiter must follow #t")) #t]
             [#\{ (sequence-fold (set) set-add* values #\})]
             [#\" (read-literal-binary)]
             [#\x (match (next-char)
@ -296,16 +296,20 @@
  ;;---------------------------------------------------------------------------
  ;; "Raw" symbols and numbers

+  (define (delimiter-follows?)
+    (define c (peek-char in-port))
+    (or (eof-object? c)
+        (char-whitespace? c)
+        (eqv? c PIPE)
+        (memv c '(#\< #\> #\[ #\] #\{ #\} #\( #\)
+                  #\# #\: #\" #\@ #\; #\,))))
+
  (define (read-raw-symbol-or-number acc)
-    (match (peek-char in-port)
-      [(or (? eof-object?)
-           (? char? (or #\( #\) #\{ #\} #\[ #\] #\< #\>
-                        #\" #\; #\, #\@ #\# #\: (== PIPE)
-                        (? char-whitespace?))))
-       (let ((input (reverse acc)))
-         (or (analyze-number input)
-             (string->symbol (list->string input))))]
-      [_ (read-raw-symbol-or-number (cons (read-char in-port) acc))]))
+    (if (delimiter-follows?)
+        (let ((input (reverse acc)))
+          (or (analyze-number input)
+              (string->symbol (list->string input))))
+        (read-raw-symbol-or-number (cons (read-char in-port) acc))))

  (define (analyze-number input)
    (match input
--- a/implementations/racket/preserves/preserves/tests/samples.pr
+++ b/implementations/racket/preserves/preserves/tests/samples.pr
@ -67,7 +67,13 @@
  bytes11: <Test #"\xB2\x02Hi" #[S G k]>
  bytes12: @"Bytes syntax only supports \\x, not \\u" <ParseError "#\"\\u6c34\"">
  bytes13: <Test #x"B2 11 61 62 63 6c 34 f0 5c 2f 22 08 0c 0a 0d 09 78 79 7a" #"abc\x6c\x34\xf0\\/\"\b\f\n\r\txyz">
-
+  delimiters0: <Test #x"B5808084" [#f #f]>
+  delimiters1: @"Note no space between the falses here" <Test #x"B5808084" [#f#f]>
+  delimiters2: <Test #x"B580B303666f6f84" [#f foo]>
+  delimiters3: @"No space between the #f and the foo" <ParseError "[#ffoo]">
+  delimiters4: @"Note no space after the #f" <Test #"\xB5\x80\x85\xB1\x0Ea line comment\x81\x84" [#f;a line comment
+#t]>
+  delimiters5:  @"Note no space after the #f" <Test #"\xB5\x80\x85\xB3\x03ann\x81\x84" [#f@ann #t]>
  dict0: <Test #x"B784" {}>
  dict1: <NondeterministicTest #x"b7 b10162 81 b30161 b00101 b5b00101b00102b0010384 b20163 b7 b30a66697273742d6e616d65 b109456c697a6162657468 84 b7 b3077375726e616d65 b109426c61636b77656c6c 84 84" { a: 1 "b": #t [1 2 3]: #"c" { first-name: "Elizabeth" }: { surname: "Blackwell" } }>
  dict2: @"Missing close brace" <ParseShort "{ a: b, c: d ">
--- a/implementations/racket/preserves/preserves/tests/test-main.rkt
+++ b/implementations/racket/preserves/preserves/tests/test-main.rkt
@ -120,6 +120,8 @@
        'annotation5 (asymmetric (annotate `#s(R ,(annotate 'f 'af)) 'ar) `#s(R f))
        'annotation6 (asymmetric (record (annotate 'R 'ar) (list (annotate 'f 'af))) `#s(R f))
        'annotation7 (asymmetric (annotate '() 'a 'b 'c) '())
+        'delimiters4 (asymmetric (list #f (annotate #t "a line comment")) (list #f #t))
+        'delimiters5 (asymmetric (list #f (annotate #t 'ann)) (list #f #t))
        ))

 (define (run-test-case variety t-name loc binary-form annotated-text-form)
--- a/implementations/rust/preserves/src/value/text/reader.rs
+++ b/implementations/rust/preserves/src/value/text/reader.rs
@ -346,59 +346,67 @@ impl<'de, 'src, N: NestedValue, Dec: DomainParse<N::Embedded>, S: BinarySource<'
        }
    }

+    fn require_delimiter(&mut self, msg: &'static str) -> io::Result<()> {
+        if self.delimiter_follows()? {
+            Ok(())
+        } else {
+            Err(io_syntax_error(msg))
+        }
+    }
+
+    fn delimiter_follows(&mut self) -> io::Result<bool> {
+        let c = match self.peek() {
+            Err(e) if is_eof_io_error(&e) => return Ok(true),
+            Err(e) => return Err(e)?,
+            Ok(c) if (c as char).is_whitespace() => return Ok(true),
+            Ok(c) => c,
+        };
+        Ok(match c {
+            b'(' | b')' | b'{' | b'}' | b'[' | b']' | b'<' | b'>' | b'"' | b';' | b','
+                | b'@' | b'#' | b':' | b'|' | b' ' => true,
+            _ => false,
+        })
+    }
+
    fn read_raw_symbol_or_number(&mut self, mut bs: Vec<u8>) -> io::Result<N> {
        lazy_static! {
            static ref NUMBER_RE: regex::Regex =
                regex::Regex::new(r"^([-+]?\d+)(((\.\d+([eE][-+]?\d+)?)|([eE][-+]?\d+))([fF]?))?$")
                    .unwrap();
        }
-        loop {
-            let c = match self.peek() {
-                Err(e) if is_eof_io_error(&e) => b' ',
-                Err(e) => return Err(e)?,
-                Ok(c) if (c as char).is_whitespace() => b' ',
-                Ok(c) => c,
-            };
-            match c {
-                b'(' | b')' | b'{' | b'}' | b'[' | b']' | b'<' | b'>' | b'"' | b';' | b','
-                | b'@' | b'#' | b':' | b'|' | b' ' => {
-                    let s = decode_utf8(bs)?;
-                    return match NUMBER_RE.captures(&s) {
-                        None => Ok(N::symbol(&s)),
-                        Some(m) => match m.get(2) {
-                            None => Ok(N::new(s.parse::<BigInt>().map_err(|_| {
-                                io_syntax_error(&format!("Invalid signed-integer number: {:?}", s))
-                            })?)),
-                            Some(_) => {
-                                if let Some(maybe_f) = m.get(7) {
-                                    let s = m[1].to_owned() + &m[3];
-                                    if maybe_f.range().is_empty() {
-                                        Ok(N::new(s.parse::<f64>().map_err(|_| {
-                                            io_syntax_error(&format!(
-                                                "Invalid double-precision number: {:?}",
-                                                s
-                                            ))
-                                        })?))
-                                    } else {
-                                        Ok(N::new(s.parse::<f32>().map_err(|_| {
-                                            io_syntax_error(&format!(
-                                                "Invalid single-precision number: {:?}",
-                                                s
-                                            ))
-                                        })?))
-                                    }
-                                } else {
-                                    panic!("Internal error: cannot analyze number {:?}", s)
-                                }
-                            }
-                        },
-                    };
+        while !self.delimiter_follows()? {
+            bs.push(self.next_byte()?);
+        }
+        let s = decode_utf8(bs)?;
+        match NUMBER_RE.captures(&s) {
+            None => Ok(N::symbol(&s)),
+            Some(m) => match m.get(2) {
+                None => Ok(N::new(s.parse::<BigInt>().map_err(|_| {
+                    io_syntax_error(&format!("Invalid signed-integer number: {:?}", s))
+                })?)),
+                Some(_) => {
+                    if let Some(maybe_f) = m.get(7) {
+                        let s = m[1].to_owned() + &m[3];
+                        if maybe_f.range().is_empty() {
+                            Ok(N::new(s.parse::<f64>().map_err(|_| {
+                                io_syntax_error(&format!(
+                                    "Invalid double-precision number: {:?}",
+                                    s
+                                ))
+                            })?))
+                        } else {
+                            Ok(N::new(s.parse::<f32>().map_err(|_| {
+                                io_syntax_error(&format!(
+                                    "Invalid single-precision number: {:?}",
+                                    s
+                                ))
+                            })?))
+                        }
+                    } else {
+                        panic!("Internal error: cannot analyze number {:?}", s)
+                    }
                }
-                c => {
-                    self.skip()?;
-                    bs.push(c)
-                }
-            }
+            },
        }
    }
 }
@ -441,8 +449,8 @@ impl<'de, 'src, N: NestedValue, Dec: DomainParse<N::Embedded>, S: BinarySource<'
            b'#' => {
                self.skip()?;
                match self.next_byte()? {
-                    b'f' => N::new(false),
-                    b't' => N::new(true),
+                    b'f' => { self.require_delimiter("Delimiter must follow #f")?; N::new(false) }
+                    b't' => { self.require_delimiter("Delimiter must follow #t")?; N::new(true) }
                    b'{' => self.read_set(read_annotations)?,
                    b'"' => self.read_literal_binary()?,
                    b'x' => match self.next_byte()? {
--- a/tests/samples.bin
+++ b/tests/samples.bin
--- a/tests/samples.pr
+++ b/tests/samples.pr
@ -67,7 +67,13 @@
  bytes11: <Test #"\xB2\x02Hi" #[S G k]>
  bytes12: @"Bytes syntax only supports \\x, not \\u" <ParseError "#\"\\u6c34\"">
  bytes13: <Test #x"B2 11 61 62 63 6c 34 f0 5c 2f 22 08 0c 0a 0d 09 78 79 7a" #"abc\x6c\x34\xf0\\/\"\b\f\n\r\txyz">
-
+  delimiters0: <Test #x"B5808084" [#f #f]>
+  delimiters1: @"Note no space between the falses here" <Test #x"B5808084" [#f#f]>
+  delimiters2: <Test #x"B580B303666f6f84" [#f foo]>
+  delimiters3: @"No space between the #f and the foo" <ParseError "[#ffoo]">
+  delimiters4: @"Note no space after the #f" <Test #"\xB5\x80\x85\xB1\x0Ea line comment\x81\x84" [#f;a line comment
+#t]>
+  delimiters5:  @"Note no space after the #f" <Test #"\xB5\x80\x85\xB3\x03ann\x81\x84" [#f@ann #t]>
  dict0: <Test #x"B784" {}>
  dict1: <NondeterministicTest #x"b7 b10162 81 b30161 b00101 b5b00101b00102b0010384 b20163 b7 b30a66697273742d6e616d65 b109456c697a6162657468 84 b7 b3077375726e616d65 b109426c61636b77656c6c 84 84" { a: 1 "b": #t [1 2 3]: #"c" { first-name: "Elizabeth" }: { surname: "Blackwell" } }>
  dict2: @"Missing close brace" <ParseShort "{ a: b, c: d ">