Test cases and implementation updates for delimiters following Boolean and SymbolOrNumber. Closes #47

This commit is contained in:
Tony Garnock-Jones 2023-10-29 21:04:52 +01:00
parent e8c0a2565e
commit 9595872177
12 changed files with 131 additions and 79 deletions

View File

@ -158,14 +158,19 @@ export class ReaderState {
return decodeBase64(acc);
}
requireDelimiter(prefix: string): void {
if (this.delimiterFollows()) return;
this.error(`Delimiter must follow ${prefix}`, this.pos);
}
delimiterFollows(): boolean {
if (this.atEnd()) return true;
const ch = this.peek();
return ('(){}[]<>";,@#:|'.indexOf(ch) !== -1) || isSpace(ch);
}
readRawSymbolOrNumber<T>(acc: string): Value<T> {
while (true) {
if (this.atEnd()) break;
const ch = this.peek();
if (('(){}[]<>";,@#:|'.indexOf(ch) !== -1) || isSpace(ch)) break;
this.advance();
acc = acc + ch;
}
while (!this.delimiterFollows()) acc = acc + this.nextchar();
const m = NUMBER_RE.exec(acc);
if (m) {
if (m[2] === void 0) {
@ -344,8 +349,8 @@ export class Reader<T> {
case '#': {
const c = this.state.nextchar();
switch (c) {
case 'f': return false;
case 't': return true;
case 'f': this.state.requireDelimiter('#f'); return false;
case 't': this.state.requireDelimiter('#t'); return true;
case '{': return this.readSet();
case '"': return this.state.readLiteralBinary();
case 'x': switch (this.state.nextchar()) {

View File

@ -242,6 +242,14 @@ describe('common test suite', () => {
forward: annotate<GenericEmbedded>([], Symbol.for('a'), Symbol.for('b'), Symbol.for('c')),
back: []
},
delimiters4: {
forward: [false, annotate<GenericEmbedded>(true, "a line comment")],
back: [false, true],
},
delimiters5: {
forward: [false, annotate<GenericEmbedded>(true, Symbol.for('ann'))],
back: [false, true],
},
list1: {
forward: [1, 2, 3, 4],
back: [1, 2, 3, 4]

View File

@ -303,13 +303,18 @@ class Parser(TextCodec):
raise DecodeError('Missing expected key/value separator')
acc.append(self.next())
def require_delimiter(self, prefix):
if not self.delimiter_follows():
raise DecodeError('Delimiter must follow ' + prefix)
def delimiter_follows(self):
if self._atend(): return True
c = self.peek()
return c.isspace() or c in '(){}[]<>";,@#:|'
def read_raw_symbol_or_number(self, acc):
while not self._atend():
c = self.peek()
if c.isspace() or c in '(){}[]<>";,@#:|':
break
self.skip()
acc.append(c)
while not self.delimiter_follows():
acc.append(self.nextchar())
acc = u''.join(acc)
m = NUMBER_RE.match(acc)
if m:
@ -350,8 +355,8 @@ class Parser(TextCodec):
if c == '#':
self.skip()
c = self.nextchar()
if c == 'f': return self.wrap(False)
if c == 't': return self.wrap(True)
if c == 'f': self.require_delimiter('#f'); return self.wrap(False)
if c == 't': self.require_delimiter('#t'); return self.wrap(True)
if c == '{': return self.wrap(self.read_set())
if c == '"': return self.wrap(self.read_literal_binary())
if c == 'x':

View File

@ -67,7 +67,13 @@
bytes11: <Test #"\xB2\x02Hi" #[S G k]>
bytes12: @"Bytes syntax only supports \\x, not \\u" <ParseError "#\"\\u6c34\"">
bytes13: <Test #x"B2 11 61 62 63 6c 34 f0 5c 2f 22 08 0c 0a 0d 09 78 79 7a" #"abc\x6c\x34\xf0\\/\"\b\f\n\r\txyz">
delimiters0: <Test #x"B5808084" [#f #f]>
delimiters1: @"Note no space between the falses here" <Test #x"B5808084" [#f#f]>
delimiters2: <Test #x"B580B303666f6f84" [#f foo]>
delimiters3: @"No space between the #f and the foo" <ParseError "[#ffoo]">
delimiters4: @"Note no space after the #f" <Test #"\xB5\x80\x85\xB1\x0Ea line comment\x81\x84" [#f;a line comment
#t]>
delimiters5: @"Note no space after the #f" <Test #"\xB5\x80\x85\xB3\x03ann\x81\x84" [#f@ann #t]>
dict0: <Test #x"B784" {}>
dict1: <NondeterministicTest #x"b7 b10162 81 b30161 b00101 b5b00101b00102b0010384 b20163 b7 b30a66697273742d6e616d65 b109456c697a6162657468 84 b7 b3077375726e616d65 b109426c61636b77656c6c 84 84" { a: 1 "b": #t [1 2 3]: #"c" { first-name: "Elizabeth" }: { surname: "Blackwell" } }>
dict2: @"Missing close brace" <ParseShort "{ a: b, c: d ">

View File

@ -238,6 +238,8 @@ expected_values = {
"back": _R('R', Symbol('f')) },
"annotation7": { "forward": annotate([], Symbol('a'), Symbol('b'), Symbol('c')),
"back": () },
"delimiters4": { "forward": [False, annotate(True, 'a line comment')], "back": [False, True] },
"delimiters5": { "forward": [False, annotate(True, Symbol('ann'))], "back": [False, True] },
"record2": { "value": _R('observe', _R('speak', _R('discard'), _R('capture', _R('discard')))) },
}

View File

@ -77,8 +77,8 @@
[#\: (parse-error "Unexpected key/value separator between items")]
[#\# (match (next-char)
[#\f #f]
[#\t #t]
[#\f (unless (delimiter-follows?) (parse-error "Delimiter must follow #f")) #f]
[#\t (unless (delimiter-follows?) (parse-error "Delimiter must follow #t")) #t]
[#\{ (sequence-fold (set) set-add* values #\})]
[#\" (read-literal-binary)]
[#\x (match (next-char)
@ -296,16 +296,20 @@
;;---------------------------------------------------------------------------
;; "Raw" symbols and numbers
(define (delimiter-follows?)
(define c (peek-char in-port))
(or (eof-object? c)
(char-whitespace? c)
(eqv? c PIPE)
(memv c '(#\< #\> #\[ #\] #\{ #\} #\( #\)
#\# #\: #\" #\@ #\; #\,))))
(define (read-raw-symbol-or-number acc)
(match (peek-char in-port)
[(or (? eof-object?)
(? char? (or #\( #\) #\{ #\} #\[ #\] #\< #\>
#\" #\; #\, #\@ #\# #\: (== PIPE)
(? char-whitespace?))))
(let ((input (reverse acc)))
(or (analyze-number input)
(string->symbol (list->string input))))]
[_ (read-raw-symbol-or-number (cons (read-char in-port) acc))]))
(if (delimiter-follows?)
(let ((input (reverse acc)))
(or (analyze-number input)
(string->symbol (list->string input))))
(read-raw-symbol-or-number (cons (read-char in-port) acc))))
(define (analyze-number input)
(match input

View File

@ -67,7 +67,13 @@
bytes11: <Test #"\xB2\x02Hi" #[S G k]>
bytes12: @"Bytes syntax only supports \\x, not \\u" <ParseError "#\"\\u6c34\"">
bytes13: <Test #x"B2 11 61 62 63 6c 34 f0 5c 2f 22 08 0c 0a 0d 09 78 79 7a" #"abc\x6c\x34\xf0\\/\"\b\f\n\r\txyz">
delimiters0: <Test #x"B5808084" [#f #f]>
delimiters1: @"Note no space between the falses here" <Test #x"B5808084" [#f#f]>
delimiters2: <Test #x"B580B303666f6f84" [#f foo]>
delimiters3: @"No space between the #f and the foo" <ParseError "[#ffoo]">
delimiters4: @"Note no space after the #f" <Test #"\xB5\x80\x85\xB1\x0Ea line comment\x81\x84" [#f;a line comment
#t]>
delimiters5: @"Note no space after the #f" <Test #"\xB5\x80\x85\xB3\x03ann\x81\x84" [#f@ann #t]>
dict0: <Test #x"B784" {}>
dict1: <NondeterministicTest #x"b7 b10162 81 b30161 b00101 b5b00101b00102b0010384 b20163 b7 b30a66697273742d6e616d65 b109456c697a6162657468 84 b7 b3077375726e616d65 b109426c61636b77656c6c 84 84" { a: 1 "b": #t [1 2 3]: #"c" { first-name: "Elizabeth" }: { surname: "Blackwell" } }>
dict2: @"Missing close brace" <ParseShort "{ a: b, c: d ">

View File

@ -120,6 +120,8 @@
'annotation5 (asymmetric (annotate `#s(R ,(annotate 'f 'af)) 'ar) `#s(R f))
'annotation6 (asymmetric (record (annotate 'R 'ar) (list (annotate 'f 'af))) `#s(R f))
'annotation7 (asymmetric (annotate '() 'a 'b 'c) '())
'delimiters4 (asymmetric (list #f (annotate #t "a line comment")) (list #f #t))
'delimiters5 (asymmetric (list #f (annotate #t 'ann)) (list #f #t))
))
(define (run-test-case variety t-name loc binary-form annotated-text-form)

View File

@ -346,59 +346,67 @@ impl<'de, 'src, N: NestedValue, Dec: DomainParse<N::Embedded>, S: BinarySource<'
}
}
fn require_delimiter(&mut self, msg: &'static str) -> io::Result<()> {
if self.delimiter_follows()? {
Ok(())
} else {
Err(io_syntax_error(msg))
}
}
fn delimiter_follows(&mut self) -> io::Result<bool> {
let c = match self.peek() {
Err(e) if is_eof_io_error(&e) => return Ok(true),
Err(e) => return Err(e)?,
Ok(c) if (c as char).is_whitespace() => return Ok(true),
Ok(c) => c,
};
Ok(match c {
b'(' | b')' | b'{' | b'}' | b'[' | b']' | b'<' | b'>' | b'"' | b';' | b','
| b'@' | b'#' | b':' | b'|' | b' ' => true,
_ => false,
})
}
fn read_raw_symbol_or_number(&mut self, mut bs: Vec<u8>) -> io::Result<N> {
lazy_static! {
static ref NUMBER_RE: regex::Regex =
regex::Regex::new(r"^([-+]?\d+)(((\.\d+([eE][-+]?\d+)?)|([eE][-+]?\d+))([fF]?))?$")
.unwrap();
}
loop {
let c = match self.peek() {
Err(e) if is_eof_io_error(&e) => b' ',
Err(e) => return Err(e)?,
Ok(c) if (c as char).is_whitespace() => b' ',
Ok(c) => c,
};
match c {
b'(' | b')' | b'{' | b'}' | b'[' | b']' | b'<' | b'>' | b'"' | b';' | b','
| b'@' | b'#' | b':' | b'|' | b' ' => {
let s = decode_utf8(bs)?;
return match NUMBER_RE.captures(&s) {
None => Ok(N::symbol(&s)),
Some(m) => match m.get(2) {
None => Ok(N::new(s.parse::<BigInt>().map_err(|_| {
io_syntax_error(&format!("Invalid signed-integer number: {:?}", s))
})?)),
Some(_) => {
if let Some(maybe_f) = m.get(7) {
let s = m[1].to_owned() + &m[3];
if maybe_f.range().is_empty() {
Ok(N::new(s.parse::<f64>().map_err(|_| {
io_syntax_error(&format!(
"Invalid double-precision number: {:?}",
s
))
})?))
} else {
Ok(N::new(s.parse::<f32>().map_err(|_| {
io_syntax_error(&format!(
"Invalid single-precision number: {:?}",
s
))
})?))
}
} else {
panic!("Internal error: cannot analyze number {:?}", s)
}
}
},
};
while !self.delimiter_follows()? {
bs.push(self.next_byte()?);
}
let s = decode_utf8(bs)?;
match NUMBER_RE.captures(&s) {
None => Ok(N::symbol(&s)),
Some(m) => match m.get(2) {
None => Ok(N::new(s.parse::<BigInt>().map_err(|_| {
io_syntax_error(&format!("Invalid signed-integer number: {:?}", s))
})?)),
Some(_) => {
if let Some(maybe_f) = m.get(7) {
let s = m[1].to_owned() + &m[3];
if maybe_f.range().is_empty() {
Ok(N::new(s.parse::<f64>().map_err(|_| {
io_syntax_error(&format!(
"Invalid double-precision number: {:?}",
s
))
})?))
} else {
Ok(N::new(s.parse::<f32>().map_err(|_| {
io_syntax_error(&format!(
"Invalid single-precision number: {:?}",
s
))
})?))
}
} else {
panic!("Internal error: cannot analyze number {:?}", s)
}
}
c => {
self.skip()?;
bs.push(c)
}
}
},
}
}
}
@ -441,8 +449,8 @@ impl<'de, 'src, N: NestedValue, Dec: DomainParse<N::Embedded>, S: BinarySource<'
b'#' => {
self.skip()?;
match self.next_byte()? {
b'f' => N::new(false),
b't' => N::new(true),
b'f' => { self.require_delimiter("Delimiter must follow #f")?; N::new(false) }
b't' => { self.require_delimiter("Delimiter must follow #t")?; N::new(true) }
b'{' => self.read_set(read_annotations)?,
b'"' => self.read_literal_binary()?,
b'x' => match self.next_byte()? {

Binary file not shown.

View File

@ -67,7 +67,13 @@
bytes11: <Test #"\xB2\x02Hi" #[S G k]>
bytes12: @"Bytes syntax only supports \\x, not \\u" <ParseError "#\"\\u6c34\"">
bytes13: <Test #x"B2 11 61 62 63 6c 34 f0 5c 2f 22 08 0c 0a 0d 09 78 79 7a" #"abc\x6c\x34\xf0\\/\"\b\f\n\r\txyz">
delimiters0: <Test #x"B5808084" [#f #f]>
delimiters1: @"Note no space between the falses here" <Test #x"B5808084" [#f#f]>
delimiters2: <Test #x"B580B303666f6f84" [#f foo]>
delimiters3: @"No space between the #f and the foo" <ParseError "[#ffoo]">
delimiters4: @"Note no space after the #f" <Test #"\xB5\x80\x85\xB1\x0Ea line comment\x81\x84" [#f;a line comment
#t]>
delimiters5: @"Note no space after the #f" <Test #"\xB5\x80\x85\xB3\x03ann\x81\x84" [#f@ann #t]>
dict0: <Test #x"B784" {}>
dict1: <NondeterministicTest #x"b7 b10162 81 b30161 b00101 b5b00101b00102b0010384 b20163 b7 b30a66697273742d6e616d65 b109456c697a6162657468 84 b7 b3077375726e616d65 b109426c61636b77656c6c 84 84" { a: 1 "b": #t [1 2 3]: #"c" { first-name: "Elizabeth" }: { surname: "Blackwell" } }>
dict2: @"Missing close brace" <ParseShort "{ a: b, c: d ">