Default to Document-style treatment of whitespace-before-EOF

This commit is contained in:
Tony Garnock-Jones 2023-11-01 00:13:23 +01:00
parent 03cb5ab02f
commit 9ed9296fc0
4 changed files with 34 additions and 1 deletions

View File

@ -103,6 +103,7 @@ pub use repr::ValueClass;
pub use ser::to_value;
pub use ser::Serializer;
pub use text::TextReader;
pub use text::ToplevelWhitespaceMode;
pub use text::TextWriter;
pub use writer::Writer;

View File

@ -14,6 +14,7 @@ pub mod reader;
pub mod writer;
pub use reader::TextReader;
pub use reader::ToplevelWhitespaceMode;
pub use writer::TextWriter;
use crate::value::reader::BytesBinarySource;

View File

@ -40,9 +40,24 @@ pub struct TextReader<'de, 'src, N: NestedValue, Dec: DomainParse<N::Embedded>,
pub source: &'src mut S,
/// Decoder for producing Rust values embedded in the text.
pub dec: Dec,
/// Treatment of whitespace before a toplevel term.
pub toplevel_whitespace_mode: ToplevelWhitespaceMode,
phantom: PhantomData<&'de N>,
}
/// [TextReader] chooses `Document` mode to treat whitespace preceding end-of-file as a "no
/// more values" non-error situation, or `Value` mode to treat it as an "expected more input"
/// situation.
///
/// The Preserves syntax for `Value` treats any input at all, even whitespace, as an indicator
/// that a term is to follow. However, when using a TextReader to parse a *series* of `Value`s
/// in a `Document`, whitespace followed by EOF is to be treated as the permitted optional
/// whitespace at the end of a `Document.
pub enum ToplevelWhitespaceMode {
Document,
Value,
}
fn decode_utf8(bs: Vec<u8>) -> io::Result<String> {
Ok(String::from_utf8(bs).map_err(|_| io_syntax_error("Invalid UTF-8"))?)
}
@ -63,10 +78,16 @@ impl<'de, 'src, N: NestedValue, Dec: DomainParse<N::Embedded>, S: BinarySource<'
TextReader {
source,
dec,
toplevel_whitespace_mode: ToplevelWhitespaceMode::Document,
phantom: PhantomData,
}
}
pub fn toplevel_whitespace_mode(mut self, new_mode: ToplevelWhitespaceMode) -> Self {
self.toplevel_whitespace_mode = new_mode;
self
}
fn peek(&mut self) -> io::Result<u8> {
self.source.peek()
}
@ -437,11 +458,18 @@ impl<'de, 'src, N: NestedValue, Dec: DomainParse<N::Embedded>, S: BinarySource<'
{
fn next(&mut self, read_annotations: bool) -> io::Result<Option<N>> {
'restart: loop {
match self.toplevel_whitespace_mode {
ToplevelWhitespaceMode::Document => self.skip_whitespace(),
ToplevelWhitespaceMode::Value => (),
}
match self.peek() {
Err(e) if is_eof_io_error(&e) => return Ok(None),
_ => (),
}
self.skip_whitespace();
match self.toplevel_whitespace_mode {
ToplevelWhitespaceMode::Document => (),
ToplevelWhitespaceMode::Value => self.skip_whitespace(),
}
return Ok(Some(match self.peek()? {
b'"' => {
self.skip()?;

View File

@ -7,6 +7,7 @@ use preserves::value::IOBinarySource;
use preserves::value::IOValue;
use preserves::value::PackedWriter;
use preserves::value::Reader;
use preserves::value::ToplevelWhitespaceMode;
use std::io;
use std::iter::Iterator;
@ -158,6 +159,7 @@ fn run() -> io::Result<()> {
TestCase::ParseShort(text) => {
assert!(if let Err(e) = BytesBinarySource::new(text.as_bytes())
.text_iovalues()
.toplevel_whitespace_mode(ToplevelWhitespaceMode::Value)
.configured(true)
.next()
.unwrap()
@ -170,6 +172,7 @@ fn run() -> io::Result<()> {
TestCase::ParseEOF(text) => {
assert!(BytesBinarySource::new(text.as_bytes())
.text_iovalues()
.toplevel_whitespace_mode(ToplevelWhitespaceMode::Value)
.configured(true)
.next()
.is_none());