diff --git a/implementations/javascript/packages/core/src/reader.ts b/implementations/javascript/packages/core/src/reader.ts index 78de135..7561013 100644 --- a/implementations/javascript/packages/core/src/reader.ts +++ b/implementations/javascript/packages/core/src/reader.ts @@ -96,10 +96,11 @@ export class ReaderState { return this.buffer.charCodeAt(this.advance()); } - skipws() { + skipws(skipCommas = false) { while (true) { if (this.atEnd()) break; - if (!isSpace(this.peek())) break; + const c = this.peek(); + if (!(isSpace(c) || (skipCommas && c === ','))) break; this.advance(); } } @@ -373,14 +374,15 @@ export class Reader { } case '<': { const label = this.next(); - const fields = this.readSequence('>'); + const fields = this.readSequence('>', false); return Record(label, fields); } - case '[': return this.readSequence(']'); + case '[': return this.readSequence(']', true); case '{': return this.readDictionary(); case '>': this.state.error('Unexpected >', startPos); case ']': this.state.error('Unexpected ]', startPos); case '}': this.state.error('Unexpected }', startPos); + case ',': this.state.error('Unexpected ,', startPos); default: return this.state.readRawSymbolOrNumber(c); } @@ -388,9 +390,9 @@ export class Reader { return this.wrap(unwrapped, startPos); } - seq(acc: S, update: (v: Value, acc: S) => void, ch: string): S { + seq(skipCommas: boolean, acc: S, update: (v: Value, acc: S) => void, ch: string): S { while (true) { - this.state.skipws(); + this.state.skipws(skipCommas); if (this.state.peek() === ch) { this.state.advance(); return acc; @@ -399,12 +401,13 @@ export class Reader { } } - readSequence(ch: string): Array> { - return this.seq([] as Array>, (v, acc) => acc.push(v), ch); + readSequence(ch: string, skipCommas: boolean): Array> { + return this.seq(skipCommas, [] as Array>, (v, acc) => acc.push(v), ch); } readDictionary(): Dictionary { - return this.seq(new Dictionary(), + return this.seq(true, + new Dictionary(), (k, acc) => { this.state.skipws(); switch (this.state.peek()) { @@ -422,7 +425,8 @@ export class Reader { } readSet(): Set { - return this.seq(new Set(), + return this.seq(true, + new Set(), (v, acc) => { if (acc.has(v)) this.state.error( `Duplicate value in set: ${stringify(v)}`, this.state.pos); @@ -458,5 +462,5 @@ export function decodeBase64(s: string): Bytes { } function isSpace(s: string): boolean { - return ' \t\n\r,'.indexOf(s) !== -1; + return ' \t\n\r'.indexOf(s) !== -1; } diff --git a/implementations/python/preserves/text.py b/implementations/python/preserves/text.py index 394ce89..a8ed9bf 100644 --- a/implementations/python/preserves/text.py +++ b/implementations/python/preserves/text.py @@ -158,10 +158,10 @@ class Parser(TextCodec): self.skip() return c - def skip_whitespace(self): + def skip_whitespace(self, skip_commas = False): while not self._atend(): c = self.peek() - if not (c.isspace() or c == ','): + if not (c.isspace() or (skip_commas and c == ',')): break self.skip() @@ -261,17 +261,17 @@ class Parser(TextCodec): if bytecount == 8: return struct.unpack('>d', bs)[0] raise DecodeError('Unsupported byte count in hex-encoded floating-point number') - def upto(self, delimiter): + def upto(self, delimiter, skip_commas): vs = [] while True: - self.skip_whitespace() + self.skip_whitespace(skip_commas) if self.peek() == delimiter: self.skip() return tuple(vs) vs.append(self.next()) def read_set(self): - items = self.upto('}') + items = self.upto('}', True) s = set() for i in items: if i in s: raise DecodeError('Duplicate value in set: ' + repr(i)) @@ -281,7 +281,7 @@ class Parser(TextCodec): def read_dictionary(self): acc = [] while True: - self.skip_whitespace() + self.skip_whitespace(True) if self.peek() == '}': self.skip() return ImmutableDict.from_kvs(acc) @@ -368,17 +368,17 @@ class Parser(TextCodec): raise DecodeError('Invalid # syntax') if c == '<': self.skip() - vs = self.upto('>') + vs = self.upto('>', False) if len(vs) == 0: raise DecodeError('Missing record label') return self.wrap(Record(vs[0], vs[1:])) if c == '[': self.skip() - return self.wrap(self.upto(']')) + return self.wrap(self.upto(']', True)) if c == '{': self.skip() return self.wrap(self.read_dictionary()) - if c in '>]}': + if c in '>]},': raise DecodeError('Unexpected ' + c) self.skip() return self.wrap(self.read_raw_symbol_or_number([c])) diff --git a/implementations/python/tests/samples.bin b/implementations/python/tests/samples.bin index d6008aa..3a62462 100644 Binary files a/implementations/python/tests/samples.bin and b/implementations/python/tests/samples.bin differ diff --git a/implementations/python/tests/samples.pr b/implementations/python/tests/samples.pr index 98e075b..7dff78b 100644 --- a/implementations/python/tests/samples.pr +++ b/implementations/python/tests/samples.pr @@ -52,8 +52,13 @@ annotation7: # Stop reading symbols at @ -- this test has three separate annotations + annotation8: @"Commas forbidden between @ and annotation" + annotation8a: @"Commas forbidden between @ and annotation in a collection" + annotation9: @"Commas forbidden between annotation and underlying" + annotation9a: @"Commas forbidden between annotation and underlying in a collection" bytes2: - bytes2a: + bytes2a: + bytes2b: @"Commas forbidden in internal whitespace" bytes3: bytes4: bytes5: @@ -79,6 +84,9 @@ dict3: @"Duplicate key" dict4: @"Unexpected close brace" dict5: @"Missing value" + dict6: @"Comma not allowed between key and colon" + dict7: @"Comma not allowed between colon and value" + dict8: double0: double+0: double-0: @@ -156,6 +164,7 @@ list0: list4: list4a: + list4b: list5: list6: list7: @@ -169,7 +178,8 @@ embed1: embed2: record1: >> - record2: , >>>> + record2: >>>> + record2a: @"Commas not allowed in records" , >>>"> record3: "Dr">> record4: > record5: > diff --git a/implementations/racket/preserves/preserves/read-text.rkt b/implementations/racket/preserves/preserves/read-text.rkt index 9467411..4e8ad72 100644 --- a/implementations/racket/preserves/preserves/read-text.rkt +++ b/implementations/racket/preserves/preserves/read-text.rkt @@ -47,7 +47,7 @@ v) (define (skip-whitespace* i) - (regexp-match? #px#"^(\\s|,)*" i)) ;; side effect: consumes matched portion of input + (regexp-match? #px#"^\\s*" i)) ;; side effect: consumes matched portion of input (define-match-expander px (syntax-rules () @@ -81,7 +81,7 @@ [(or #\newline #\return) (annotate-next-with "")] [#\f (unless (delimiter-follows?) (parse-error "Delimiter must follow #f")) #f] [#\t (unless (delimiter-follows?) (parse-error "Delimiter must follow #t")) #t] - [#\{ (sequence-fold (set) set-add* values #\})] + [#\{ (sequence-fold #t (set) set-add* values #\})] [#\" (read-literal-binary)] [#\x (match (next-char) [#\" (read-hex-binary '())] @@ -92,15 +92,16 @@ [#\! (embedded (decode-embedded (next)))] [c (parse-error "Invalid # syntax: ~v" c)])] - [#\< (match (read-sequence #\>) + [#\< (match (read-sequence #\> #f) ['() (parse-error "Missing record label")] [(cons label fields) (record label fields)])] - [#\[ (read-sequence #\])] + [#\[ (read-sequence #\] #t)] [#\{ (read-dictionary)] [#\> (parse-error "Unexpected >")] [#\] (parse-error "Unexpected ]")] [#\} (parse-error "Unexpected }")] + [#\, (parse-error "Unexpected ,")] [c (read-raw-symbol-or-number (list c))])) @@ -128,6 +129,9 @@ (define (skip-whitespace) (skip-whitespace* in-port)) + (define (skip-whitespace/commas) + (regexp-match? #px#"^(\\s|,)*" in-port)) ;; side effect: consumes matched portion of input + ;;--------------------------------------------------------------------------- ;; Source location tracking @@ -275,18 +279,21 @@ ;;--------------------------------------------------------------------------- ;; Collections - (define (sequence-fold acc accumulate-one finish terminator-char) + (define (sequence-fold commas-allowed? acc accumulate-one finish terminator-char) (let loop ((acc acc)) - (skip-whitespace) + (if commas-allowed? + (skip-whitespace/commas) + (skip-whitespace)) (match (eof-guard (peek-char in-port)) [(== terminator-char) (read-char in-port) (finish acc)] [_ (loop (accumulate-one acc (next)))]))) - (define (read-sequence terminator) - (sequence-fold '() (lambda (acc v) (cons v acc)) reverse terminator)) + (define (read-sequence terminator commas-allowed?) + (sequence-fold commas-allowed? '() (lambda (acc v) (cons v acc)) reverse terminator)) (define (read-dictionary) - (sequence-fold (hash) + (sequence-fold #t + (hash) (lambda (acc k) (skip-whitespace) (match (peek-char in-port) diff --git a/implementations/racket/preserves/preserves/tests/samples.pr b/implementations/racket/preserves/preserves/tests/samples.pr index 98e075b..7dff78b 100644 --- a/implementations/racket/preserves/preserves/tests/samples.pr +++ b/implementations/racket/preserves/preserves/tests/samples.pr @@ -52,8 +52,13 @@ annotation7: # Stop reading symbols at @ -- this test has three separate annotations + annotation8: @"Commas forbidden between @ and annotation" + annotation8a: @"Commas forbidden between @ and annotation in a collection" + annotation9: @"Commas forbidden between annotation and underlying" + annotation9a: @"Commas forbidden between annotation and underlying in a collection" bytes2: - bytes2a: + bytes2a: + bytes2b: @"Commas forbidden in internal whitespace" bytes3: bytes4: bytes5: @@ -79,6 +84,9 @@ dict3: @"Duplicate key" dict4: @"Unexpected close brace" dict5: @"Missing value" + dict6: @"Comma not allowed between key and colon" + dict7: @"Comma not allowed between colon and value" + dict8: double0: double+0: double-0: @@ -156,6 +164,7 @@ list0: list4: list4a: + list4b: list5: list6: list7: @@ -169,7 +178,8 @@ embed1: embed2: record1: >> - record2: , >>>> + record2: >>>> + record2a: @"Commas not allowed in records" , >>>"> record3: "Dr">> record4: > record5: > diff --git a/implementations/rust/preserves/src/value/text/reader.rs b/implementations/rust/preserves/src/value/text/reader.rs index de4fc04..11fa0cb 100644 --- a/implementations/rust/preserves/src/value/text/reader.rs +++ b/implementations/rust/preserves/src/value/text/reader.rs @@ -103,10 +103,18 @@ impl<'de, 'src, N: NestedValue, Dec: DomainParse, S: BinarySource<' } fn skip_whitespace(&mut self) { + self.skip_whitespace_and_maybe_commas(false) + } + + fn skip_whitespace_and_maybe_commas(&mut self, skip_commas: bool) { // Deliberately swallows errors. while let Ok(c) = self.peek() { match c { - b' ' | b'\t' | b'\r' | b'\n' | b',' => { + b' ' | b'\t' | b'\r' | b'\n' => { + let _ = self.skip(); + () + } + b',' if skip_commas => { let _ = self.skip(); () } @@ -343,10 +351,10 @@ impl<'de, 'src, N: NestedValue, Dec: DomainParse, S: BinarySource<' } } - fn upto(&mut self, delimiter: u8, read_annotations: bool) -> io::Result> { + fn upto(&mut self, delimiter: u8, read_annotations: bool, skip_commas: bool) -> io::Result> { let mut vs = Vec::new(); loop { - self.skip_whitespace(); + self.skip_whitespace_and_maybe_commas(skip_commas); if self.peek()? == delimiter { self.skip()?; return Ok(vs); @@ -356,7 +364,7 @@ impl<'de, 'src, N: NestedValue, Dec: DomainParse, S: BinarySource<' } fn read_set(&mut self, read_annotations: bool) -> io::Result { - let items = self.upto(b'}', read_annotations)?; + let items = self.upto(b'}', read_annotations, true)?; let mut s = Set::::new(); for i in items { if s.contains(&i) { @@ -370,7 +378,7 @@ impl<'de, 'src, N: NestedValue, Dec: DomainParse, S: BinarySource<' fn read_dictionary(&mut self, read_annotations: bool) -> io::Result { let mut d = Map::new(); loop { - self.skip_whitespace(); + self.skip_whitespace_and_maybe_commas(true); if self.peek()? == b'}' { self.skip()?; return Ok(N::new(d)); @@ -534,7 +542,7 @@ impl<'de, 'src, N: NestedValue, Dec: DomainParse, S: BinarySource<' } b'<' => { self.skip()?; - let vs = self.upto(b'>', read_annotations)?; + let vs = self.upto(b'>', read_annotations, false)?; if vs.is_empty() { return Err(io_syntax_error("Missing record label")); } @@ -542,7 +550,7 @@ impl<'de, 'src, N: NestedValue, Dec: DomainParse, S: BinarySource<' } b'[' => { self.skip()?; - N::new(self.upto(b']', read_annotations)?) + N::new(self.upto(b']', read_annotations, true)?) } b'{' => { self.skip()?; @@ -551,6 +559,7 @@ impl<'de, 'src, N: NestedValue, Dec: DomainParse, S: BinarySource<' b'>' => return Err(io_syntax_error("Unexpected >")), b']' => return Err(io_syntax_error("Unexpected ]")), b'}' => return Err(io_syntax_error("Unexpected }")), + b',' => return Err(io_syntax_error("Unexpected ,")), other => { self.skip()?; self.read_raw_symbol_or_number(vec![other])? @@ -629,6 +638,18 @@ impl<'de, 'src, N: NestedValue, Dec: DomainParse, S: BinarySource<' return Err(syntax_error("Missing expected key/value separator")); } } + B::Type { + closing: Some(B::Item::DictionaryValue), + opening: Some(B::Item::DictionaryKey), + } => self.skip_whitespace_and_maybe_commas(true), + B::Type { + closing: Some(B::Item::SetValue), + opening: Some(B::Item::SetValue), + } => self.skip_whitespace_and_maybe_commas(true), + B::Type { + closing: Some(B::Item::SequenceValue), + opening: Some(B::Item::SequenceValue), + } => self.skip_whitespace_and_maybe_commas(true), _ => (), } Ok(()) diff --git a/tests/samples.bin b/tests/samples.bin index d6008aa..3a62462 100644 Binary files a/tests/samples.bin and b/tests/samples.bin differ diff --git a/tests/samples.pr b/tests/samples.pr index 98e075b..7dff78b 100644 --- a/tests/samples.pr +++ b/tests/samples.pr @@ -52,8 +52,13 @@ annotation7: # Stop reading symbols at @ -- this test has three separate annotations + annotation8: @"Commas forbidden between @ and annotation" + annotation8a: @"Commas forbidden between @ and annotation in a collection" + annotation9: @"Commas forbidden between annotation and underlying" + annotation9a: @"Commas forbidden between annotation and underlying in a collection" bytes2: - bytes2a: + bytes2a: + bytes2b: @"Commas forbidden in internal whitespace" bytes3: bytes4: bytes5: @@ -79,6 +84,9 @@ dict3: @"Duplicate key" dict4: @"Unexpected close brace" dict5: @"Missing value" + dict6: @"Comma not allowed between key and colon" + dict7: @"Comma not allowed between colon and value" + dict8: double0: double+0: double-0: @@ -156,6 +164,7 @@ list0: list4: list4a: + list4b: list5: list6: list7: @@ -169,7 +178,8 @@ embed1: embed2: record1: >> - record2: , >>>> + record2: >>>> + record2a: @"Commas not allowed in records" , >>>"> record3: "Dr">> record4: > record5: >