Restrict positions where commas may exist

This commit is contained in:
Tony Garnock-Jones 2023-11-01 14:45:58 +01:00
parent c89147dd6a
commit d11ec61714
9 changed files with 104 additions and 42 deletions

View File

@ -96,10 +96,11 @@ export class ReaderState {
return this.buffer.charCodeAt(this.advance());
}
skipws() {
skipws(skipCommas = false) {
while (true) {
if (this.atEnd()) break;
if (!isSpace(this.peek())) break;
const c = this.peek();
if (!(isSpace(c) || (skipCommas && c === ','))) break;
this.advance();
}
}
@ -373,14 +374,15 @@ export class Reader<T> {
}
case '<': {
const label = this.next();
const fields = this.readSequence('>');
const fields = this.readSequence('>', false);
return Record(label, fields);
}
case '[': return this.readSequence(']');
case '[': return this.readSequence(']', true);
case '{': return this.readDictionary();
case '>': this.state.error('Unexpected >', startPos);
case ']': this.state.error('Unexpected ]', startPos);
case '}': this.state.error('Unexpected }', startPos);
case ',': this.state.error('Unexpected ,', startPos);
default:
return this.state.readRawSymbolOrNumber(c);
}
@ -388,9 +390,9 @@ export class Reader<T> {
return this.wrap(unwrapped, startPos);
}
seq<S>(acc: S, update: (v: Value<T>, acc: S) => void, ch: string): S {
seq<S>(skipCommas: boolean, acc: S, update: (v: Value<T>, acc: S) => void, ch: string): S {
while (true) {
this.state.skipws();
this.state.skipws(skipCommas);
if (this.state.peek() === ch) {
this.state.advance();
return acc;
@ -399,12 +401,13 @@ export class Reader<T> {
}
}
readSequence(ch: string): Array<Value<T>> {
return this.seq([] as Array<Value<T>>, (v, acc) => acc.push(v), ch);
readSequence(ch: string, skipCommas: boolean): Array<Value<T>> {
return this.seq(skipCommas, [] as Array<Value<T>>, (v, acc) => acc.push(v), ch);
}
readDictionary(): Dictionary<T> {
return this.seq(new Dictionary<T>(),
return this.seq(true,
new Dictionary<T>(),
(k, acc) => {
this.state.skipws();
switch (this.state.peek()) {
@ -422,7 +425,8 @@ export class Reader<T> {
}
readSet(): Set<T> {
return this.seq(new Set<T>(),
return this.seq(true,
new Set<T>(),
(v, acc) => {
if (acc.has(v)) this.state.error(
`Duplicate value in set: ${stringify(v)}`, this.state.pos);
@ -458,5 +462,5 @@ export function decodeBase64(s: string): Bytes {
}
function isSpace(s: string): boolean {
return ' \t\n\r,'.indexOf(s) !== -1;
return ' \t\n\r'.indexOf(s) !== -1;
}

View File

@ -158,10 +158,10 @@ class Parser(TextCodec):
self.skip()
return c
def skip_whitespace(self):
def skip_whitespace(self, skip_commas = False):
while not self._atend():
c = self.peek()
if not (c.isspace() or c == ','):
if not (c.isspace() or (skip_commas and c == ',')):
break
self.skip()
@ -261,17 +261,17 @@ class Parser(TextCodec):
if bytecount == 8: return struct.unpack('>d', bs)[0]
raise DecodeError('Unsupported byte count in hex-encoded floating-point number')
def upto(self, delimiter):
def upto(self, delimiter, skip_commas):
vs = []
while True:
self.skip_whitespace()
self.skip_whitespace(skip_commas)
if self.peek() == delimiter:
self.skip()
return tuple(vs)
vs.append(self.next())
def read_set(self):
items = self.upto('}')
items = self.upto('}', True)
s = set()
for i in items:
if i in s: raise DecodeError('Duplicate value in set: ' + repr(i))
@ -281,7 +281,7 @@ class Parser(TextCodec):
def read_dictionary(self):
acc = []
while True:
self.skip_whitespace()
self.skip_whitespace(True)
if self.peek() == '}':
self.skip()
return ImmutableDict.from_kvs(acc)
@ -368,17 +368,17 @@ class Parser(TextCodec):
raise DecodeError('Invalid # syntax')
if c == '<':
self.skip()
vs = self.upto('>')
vs = self.upto('>', False)
if len(vs) == 0:
raise DecodeError('Missing record label')
return self.wrap(Record(vs[0], vs[1:]))
if c == '[':
self.skip()
return self.wrap(self.upto(']'))
return self.wrap(self.upto(']', True))
if c == '{':
self.skip()
return self.wrap(self.read_dictionary())
if c in '>]}':
if c in '>]},':
raise DecodeError('Unexpected ' + c)
self.skip()
return self.wrap(self.read_raw_symbol_or_number([c]))

View File

@ -52,8 +52,13 @@
annotation7:
# Stop reading symbols at @ -- this test has three separate annotations
<Test #x"85 B30161 85 B30162 85 B30163 B584" @a@b@c[]>
annotation8: @"Commas forbidden between @ and annotation" <ParseError "@,a b">
annotation8a: @"Commas forbidden between @ and annotation in a collection" <ParseError "[@,a b]">
annotation9: @"Commas forbidden between annotation and underlying" <ParseError "@a, b">
annotation9a: @"Commas forbidden between annotation and underlying in a collection" <ParseError "[@a, b]">
bytes2: <Test #x"B20568656c6c6f" #"hello">
bytes2a: <Test @"Internal whitespace is allowed, including commas!" #x"B2, 05, 68, 65, 6c, 6c, 6f" #"hello">
bytes2a: <Test @"Internal whitespace is allowed, not including commas" #x"B2 05 68 65 6c 6c 6f" #"hello">
bytes2b: @"Commas forbidden in internal whitespace" <ParseError "#x\"B2, 05, 68, 65, 6c, 6c, 6f\"">
bytes3: <Test #x"B203414243" #"ABC">
bytes4: <Test #x"B203414243" #x"414243">
bytes5: <Test #x"B203414a4e" #x" 41 4A 4e ">
@ -79,6 +84,9 @@
dict3: @"Duplicate key" <ParseError "{ a: 1, a: 2 }">
dict4: @"Unexpected close brace" <ParseError "}">
dict5: @"Missing value" <DecodeError #x"b7 b00101 b00102 b00103 84">
dict6: @"Comma not allowed between key and colon" <ParseError "{ a,: 1, b: 2 }">
dict7: @"Comma not allowed between colon and value" <ParseError "{ a:, 1, b: 2 }">
dict8: <NondeterministicTest #x"b7b30161b00101b30162b0010284" {,, a: 1,, b: 2,,}>
double0: <Test #x"87080000000000000000" 0.0>
double+0: <Test #x"87080000000000000000" +0.0>
double-0: <Test #x"87088000000000000000" -0.0>
@ -156,6 +164,7 @@
list0: <Test #x"b584" []>
list4: <Test #x"b5b00101b00102b00103b0010484" [1 2 3 4]>
list4a: <Test #x"b5b00101b00102b00103b0010484" [1, 2, 3, 4]>
list4b: <Test #x"b5b00101b00102b00103b0010484" [,, 1,, 2,, 3,, 4,,]>
list5: <Test #x"b5b001feb001ffb000b0010184" [-2 -1 0 1]>
list6: <Test #x"b5 b10568656c6c6f b3057468657265 b205776f726c64 b584 b684 81 80 84" ["hello" there #"world" [] #{} #t #f]>
list7: <Test #x"b5 b303616263 b3032e2e2e b303646566 84" [abc ... def]>
@ -169,7 +178,8 @@
embed1: <Test #x"8686b000" #!#!0>
embed2: <Test #x"b586b00086b10568656c6c6f84" [#!0 #!"hello"]>
record1: <Test #x"b4 b30763617074757265 b4 b30764697363617264 84 84" <capture <discard>>>
record2: <Test #x"b4 b3076f627365727665 b4 b305737065616b b4 b30764697363617264 84 b4 b30763617074757265 b4 b30764697363617264 84 84 84 84" <observe <speak <discard>, <capture <discard>>>>>
record2: <Test #x"b4 b3076f627365727665 b4 b305737065616b b4 b30764697363617264 84 b4 b30763617074757265 b4 b30764697363617264 84 84 84 84" <observe <speak <discard> <capture <discard>>>>>
record2a: @"Commas not allowed in records" <ParseError "<observe <speak <discard>, <capture <discard>>>>">
record3: <Test #x"b4 b5 b3067469746c6564 b306706572736f6e b00102 b3057468696e67 b00101 84 b00165 b109426c61636b77656c6c b4 b30464617465 b002071d b00102 b00103 84 b1024472 84" <[titled person 2 thing 1] 101 "Blackwell" <date 1821 2 3> "Dr">>
record4: <Test #x"b4 b30764697363617264 84" <discard>>
record5: <Test #x"b4b00107b58484" <7[]>>

View File

@ -47,7 +47,7 @@
v)
(define (skip-whitespace* i)
(regexp-match? #px#"^(\\s|,)*" i)) ;; side effect: consumes matched portion of input
(regexp-match? #px#"^\\s*" i)) ;; side effect: consumes matched portion of input
(define-match-expander px
(syntax-rules ()
@ -81,7 +81,7 @@
[(or #\newline #\return) (annotate-next-with "")]
[#\f (unless (delimiter-follows?) (parse-error "Delimiter must follow #f")) #f]
[#\t (unless (delimiter-follows?) (parse-error "Delimiter must follow #t")) #t]
[#\{ (sequence-fold (set) set-add* values #\})]
[#\{ (sequence-fold #t (set) set-add* values #\})]
[#\" (read-literal-binary)]
[#\x (match (next-char)
[#\" (read-hex-binary '())]
@ -92,15 +92,16 @@
[#\! (embedded (decode-embedded (next)))]
[c (parse-error "Invalid # syntax: ~v" c)])]
[#\< (match (read-sequence #\>)
[#\< (match (read-sequence #\> #f)
['() (parse-error "Missing record label")]
[(cons label fields) (record label fields)])]
[#\[ (read-sequence #\])]
[#\[ (read-sequence #\] #t)]
[#\{ (read-dictionary)]
[#\> (parse-error "Unexpected >")]
[#\] (parse-error "Unexpected ]")]
[#\} (parse-error "Unexpected }")]
[#\, (parse-error "Unexpected ,")]
[c (read-raw-symbol-or-number (list c))]))
@ -128,6 +129,9 @@
(define (skip-whitespace) (skip-whitespace* in-port))
(define (skip-whitespace/commas)
(regexp-match? #px#"^(\\s|,)*" in-port)) ;; side effect: consumes matched portion of input
;;---------------------------------------------------------------------------
;; Source location tracking
@ -275,18 +279,21 @@
;;---------------------------------------------------------------------------
;; Collections
(define (sequence-fold acc accumulate-one finish terminator-char)
(define (sequence-fold commas-allowed? acc accumulate-one finish terminator-char)
(let loop ((acc acc))
(skip-whitespace)
(if commas-allowed?
(skip-whitespace/commas)
(skip-whitespace))
(match (eof-guard (peek-char in-port))
[(== terminator-char) (read-char in-port) (finish acc)]
[_ (loop (accumulate-one acc (next)))])))
(define (read-sequence terminator)
(sequence-fold '() (lambda (acc v) (cons v acc)) reverse terminator))
(define (read-sequence terminator commas-allowed?)
(sequence-fold commas-allowed? '() (lambda (acc v) (cons v acc)) reverse terminator))
(define (read-dictionary)
(sequence-fold (hash)
(sequence-fold #t
(hash)
(lambda (acc k)
(skip-whitespace)
(match (peek-char in-port)

View File

@ -52,8 +52,13 @@
annotation7:
# Stop reading symbols at @ -- this test has three separate annotations
<Test #x"85 B30161 85 B30162 85 B30163 B584" @a@b@c[]>
annotation8: @"Commas forbidden between @ and annotation" <ParseError "@,a b">
annotation8a: @"Commas forbidden between @ and annotation in a collection" <ParseError "[@,a b]">
annotation9: @"Commas forbidden between annotation and underlying" <ParseError "@a, b">
annotation9a: @"Commas forbidden between annotation and underlying in a collection" <ParseError "[@a, b]">
bytes2: <Test #x"B20568656c6c6f" #"hello">
bytes2a: <Test @"Internal whitespace is allowed, including commas!" #x"B2, 05, 68, 65, 6c, 6c, 6f" #"hello">
bytes2a: <Test @"Internal whitespace is allowed, not including commas" #x"B2 05 68 65 6c 6c 6f" #"hello">
bytes2b: @"Commas forbidden in internal whitespace" <ParseError "#x\"B2, 05, 68, 65, 6c, 6c, 6f\"">
bytes3: <Test #x"B203414243" #"ABC">
bytes4: <Test #x"B203414243" #x"414243">
bytes5: <Test #x"B203414a4e" #x" 41 4A 4e ">
@ -79,6 +84,9 @@
dict3: @"Duplicate key" <ParseError "{ a: 1, a: 2 }">
dict4: @"Unexpected close brace" <ParseError "}">
dict5: @"Missing value" <DecodeError #x"b7 b00101 b00102 b00103 84">
dict6: @"Comma not allowed between key and colon" <ParseError "{ a,: 1, b: 2 }">
dict7: @"Comma not allowed between colon and value" <ParseError "{ a:, 1, b: 2 }">
dict8: <NondeterministicTest #x"b7b30161b00101b30162b0010284" {,, a: 1,, b: 2,,}>
double0: <Test #x"87080000000000000000" 0.0>
double+0: <Test #x"87080000000000000000" +0.0>
double-0: <Test #x"87088000000000000000" -0.0>
@ -156,6 +164,7 @@
list0: <Test #x"b584" []>
list4: <Test #x"b5b00101b00102b00103b0010484" [1 2 3 4]>
list4a: <Test #x"b5b00101b00102b00103b0010484" [1, 2, 3, 4]>
list4b: <Test #x"b5b00101b00102b00103b0010484" [,, 1,, 2,, 3,, 4,,]>
list5: <Test #x"b5b001feb001ffb000b0010184" [-2 -1 0 1]>
list6: <Test #x"b5 b10568656c6c6f b3057468657265 b205776f726c64 b584 b684 81 80 84" ["hello" there #"world" [] #{} #t #f]>
list7: <Test #x"b5 b303616263 b3032e2e2e b303646566 84" [abc ... def]>
@ -169,7 +178,8 @@
embed1: <Test #x"8686b000" #!#!0>
embed2: <Test #x"b586b00086b10568656c6c6f84" [#!0 #!"hello"]>
record1: <Test #x"b4 b30763617074757265 b4 b30764697363617264 84 84" <capture <discard>>>
record2: <Test #x"b4 b3076f627365727665 b4 b305737065616b b4 b30764697363617264 84 b4 b30763617074757265 b4 b30764697363617264 84 84 84 84" <observe <speak <discard>, <capture <discard>>>>>
record2: <Test #x"b4 b3076f627365727665 b4 b305737065616b b4 b30764697363617264 84 b4 b30763617074757265 b4 b30764697363617264 84 84 84 84" <observe <speak <discard> <capture <discard>>>>>
record2a: @"Commas not allowed in records" <ParseError "<observe <speak <discard>, <capture <discard>>>>">
record3: <Test #x"b4 b5 b3067469746c6564 b306706572736f6e b00102 b3057468696e67 b00101 84 b00165 b109426c61636b77656c6c b4 b30464617465 b002071d b00102 b00103 84 b1024472 84" <[titled person 2 thing 1] 101 "Blackwell" <date 1821 2 3> "Dr">>
record4: <Test #x"b4 b30764697363617264 84" <discard>>
record5: <Test #x"b4b00107b58484" <7[]>>

View File

@ -103,10 +103,18 @@ impl<'de, 'src, N: NestedValue, Dec: DomainParse<N::Embedded>, S: BinarySource<'
}
fn skip_whitespace(&mut self) {
self.skip_whitespace_and_maybe_commas(false)
}
fn skip_whitespace_and_maybe_commas(&mut self, skip_commas: bool) {
// Deliberately swallows errors.
while let Ok(c) = self.peek() {
match c {
b' ' | b'\t' | b'\r' | b'\n' | b',' => {
b' ' | b'\t' | b'\r' | b'\n' => {
let _ = self.skip();
()
}
b',' if skip_commas => {
let _ = self.skip();
()
}
@ -343,10 +351,10 @@ impl<'de, 'src, N: NestedValue, Dec: DomainParse<N::Embedded>, S: BinarySource<'
}
}
fn upto(&mut self, delimiter: u8, read_annotations: bool) -> io::Result<Vec<N>> {
fn upto(&mut self, delimiter: u8, read_annotations: bool, skip_commas: bool) -> io::Result<Vec<N>> {
let mut vs = Vec::new();
loop {
self.skip_whitespace();
self.skip_whitespace_and_maybe_commas(skip_commas);
if self.peek()? == delimiter {
self.skip()?;
return Ok(vs);
@ -356,7 +364,7 @@ impl<'de, 'src, N: NestedValue, Dec: DomainParse<N::Embedded>, S: BinarySource<'
}
fn read_set(&mut self, read_annotations: bool) -> io::Result<N> {
let items = self.upto(b'}', read_annotations)?;
let items = self.upto(b'}', read_annotations, true)?;
let mut s = Set::<N>::new();
for i in items {
if s.contains(&i) {
@ -370,7 +378,7 @@ impl<'de, 'src, N: NestedValue, Dec: DomainParse<N::Embedded>, S: BinarySource<'
fn read_dictionary(&mut self, read_annotations: bool) -> io::Result<N> {
let mut d = Map::new();
loop {
self.skip_whitespace();
self.skip_whitespace_and_maybe_commas(true);
if self.peek()? == b'}' {
self.skip()?;
return Ok(N::new(d));
@ -534,7 +542,7 @@ impl<'de, 'src, N: NestedValue, Dec: DomainParse<N::Embedded>, S: BinarySource<'
}
b'<' => {
self.skip()?;
let vs = self.upto(b'>', read_annotations)?;
let vs = self.upto(b'>', read_annotations, false)?;
if vs.is_empty() {
return Err(io_syntax_error("Missing record label"));
}
@ -542,7 +550,7 @@ impl<'de, 'src, N: NestedValue, Dec: DomainParse<N::Embedded>, S: BinarySource<'
}
b'[' => {
self.skip()?;
N::new(self.upto(b']', read_annotations)?)
N::new(self.upto(b']', read_annotations, true)?)
}
b'{' => {
self.skip()?;
@ -551,6 +559,7 @@ impl<'de, 'src, N: NestedValue, Dec: DomainParse<N::Embedded>, S: BinarySource<'
b'>' => return Err(io_syntax_error("Unexpected >")),
b']' => return Err(io_syntax_error("Unexpected ]")),
b'}' => return Err(io_syntax_error("Unexpected }")),
b',' => return Err(io_syntax_error("Unexpected ,")),
other => {
self.skip()?;
self.read_raw_symbol_or_number(vec![other])?
@ -629,6 +638,18 @@ impl<'de, 'src, N: NestedValue, Dec: DomainParse<N::Embedded>, S: BinarySource<'
return Err(syntax_error("Missing expected key/value separator"));
}
}
B::Type {
closing: Some(B::Item::DictionaryValue),
opening: Some(B::Item::DictionaryKey),
} => self.skip_whitespace_and_maybe_commas(true),
B::Type {
closing: Some(B::Item::SetValue),
opening: Some(B::Item::SetValue),
} => self.skip_whitespace_and_maybe_commas(true),
B::Type {
closing: Some(B::Item::SequenceValue),
opening: Some(B::Item::SequenceValue),
} => self.skip_whitespace_and_maybe_commas(true),
_ => (),
}
Ok(())

Binary file not shown.

View File

@ -52,8 +52,13 @@
annotation7:
# Stop reading symbols at @ -- this test has three separate annotations
<Test #x"85 B30161 85 B30162 85 B30163 B584" @a@b@c[]>
annotation8: @"Commas forbidden between @ and annotation" <ParseError "@,a b">
annotation8a: @"Commas forbidden between @ and annotation in a collection" <ParseError "[@,a b]">
annotation9: @"Commas forbidden between annotation and underlying" <ParseError "@a, b">
annotation9a: @"Commas forbidden between annotation and underlying in a collection" <ParseError "[@a, b]">
bytes2: <Test #x"B20568656c6c6f" #"hello">
bytes2a: <Test @"Internal whitespace is allowed, including commas!" #x"B2, 05, 68, 65, 6c, 6c, 6f" #"hello">
bytes2a: <Test @"Internal whitespace is allowed, not including commas" #x"B2 05 68 65 6c 6c 6f" #"hello">
bytes2b: @"Commas forbidden in internal whitespace" <ParseError "#x\"B2, 05, 68, 65, 6c, 6c, 6f\"">
bytes3: <Test #x"B203414243" #"ABC">
bytes4: <Test #x"B203414243" #x"414243">
bytes5: <Test #x"B203414a4e" #x" 41 4A 4e ">
@ -79,6 +84,9 @@
dict3: @"Duplicate key" <ParseError "{ a: 1, a: 2 }">
dict4: @"Unexpected close brace" <ParseError "}">
dict5: @"Missing value" <DecodeError #x"b7 b00101 b00102 b00103 84">
dict6: @"Comma not allowed between key and colon" <ParseError "{ a,: 1, b: 2 }">
dict7: @"Comma not allowed between colon and value" <ParseError "{ a:, 1, b: 2 }">
dict8: <NondeterministicTest #x"b7b30161b00101b30162b0010284" {,, a: 1,, b: 2,,}>
double0: <Test #x"87080000000000000000" 0.0>
double+0: <Test #x"87080000000000000000" +0.0>
double-0: <Test #x"87088000000000000000" -0.0>
@ -156,6 +164,7 @@
list0: <Test #x"b584" []>
list4: <Test #x"b5b00101b00102b00103b0010484" [1 2 3 4]>
list4a: <Test #x"b5b00101b00102b00103b0010484" [1, 2, 3, 4]>
list4b: <Test #x"b5b00101b00102b00103b0010484" [,, 1,, 2,, 3,, 4,,]>
list5: <Test #x"b5b001feb001ffb000b0010184" [-2 -1 0 1]>
list6: <Test #x"b5 b10568656c6c6f b3057468657265 b205776f726c64 b584 b684 81 80 84" ["hello" there #"world" [] #{} #t #f]>
list7: <Test #x"b5 b303616263 b3032e2e2e b303646566 84" [abc ... def]>
@ -169,7 +178,8 @@
embed1: <Test #x"8686b000" #!#!0>
embed2: <Test #x"b586b00086b10568656c6c6f84" [#!0 #!"hello"]>
record1: <Test #x"b4 b30763617074757265 b4 b30764697363617264 84 84" <capture <discard>>>
record2: <Test #x"b4 b3076f627365727665 b4 b305737065616b b4 b30764697363617264 84 b4 b30763617074757265 b4 b30764697363617264 84 84 84 84" <observe <speak <discard>, <capture <discard>>>>>
record2: <Test #x"b4 b3076f627365727665 b4 b305737065616b b4 b30764697363617264 84 b4 b30763617074757265 b4 b30764697363617264 84 84 84 84" <observe <speak <discard> <capture <discard>>>>>
record2a: @"Commas not allowed in records" <ParseError "<observe <speak <discard>, <capture <discard>>>>">
record3: <Test #x"b4 b5 b3067469746c6564 b306706572736f6e b00102 b3057468696e67 b00101 84 b00165 b109426c61636b77656c6c b4 b30464617465 b002071d b00102 b00103 84 b1024472 84" <[titled person 2 thing 1] 101 "Blackwell" <date 1821 2 3> "Dr">>
record4: <Test #x"b4 b30764697363617264 84" <discard>>
record5: <Test #x"b4b00107b58484" <7[]>>