From 9ed9296fc0a34482377b9d5b3e5bb3df7e970d1e Mon Sep 17 00:00:00 2001 From: Tony Garnock-Jones Date: Wed, 1 Nov 2023 00:13:23 +0100 Subject: [PATCH] Default to Document-style treatment of whitespace-before-EOF --- .../rust/preserves/src/value/mod.rs | 1 + .../rust/preserves/src/value/text/mod.rs | 1 + .../rust/preserves/src/value/text/reader.rs | 30 ++++++++++++++++++- .../rust/preserves/tests/samples_tests.rs | 3 ++ 4 files changed, 34 insertions(+), 1 deletion(-) diff --git a/implementations/rust/preserves/src/value/mod.rs b/implementations/rust/preserves/src/value/mod.rs index df627fb..19d6daf 100644 --- a/implementations/rust/preserves/src/value/mod.rs +++ b/implementations/rust/preserves/src/value/mod.rs @@ -103,6 +103,7 @@ pub use repr::ValueClass; pub use ser::to_value; pub use ser::Serializer; pub use text::TextReader; +pub use text::ToplevelWhitespaceMode; pub use text::TextWriter; pub use writer::Writer; diff --git a/implementations/rust/preserves/src/value/text/mod.rs b/implementations/rust/preserves/src/value/text/mod.rs index 7fe346b..b7943fd 100644 --- a/implementations/rust/preserves/src/value/text/mod.rs +++ b/implementations/rust/preserves/src/value/text/mod.rs @@ -14,6 +14,7 @@ pub mod reader; pub mod writer; pub use reader::TextReader; +pub use reader::ToplevelWhitespaceMode; pub use writer::TextWriter; use crate::value::reader::BytesBinarySource; diff --git a/implementations/rust/preserves/src/value/text/reader.rs b/implementations/rust/preserves/src/value/text/reader.rs index fd49333..de4fc04 100644 --- a/implementations/rust/preserves/src/value/text/reader.rs +++ b/implementations/rust/preserves/src/value/text/reader.rs @@ -40,9 +40,24 @@ pub struct TextReader<'de, 'src, N: NestedValue, Dec: DomainParse, pub source: &'src mut S, /// Decoder for producing Rust values embedded in the text. pub dec: Dec, + /// Treatment of whitespace before a toplevel term. + pub toplevel_whitespace_mode: ToplevelWhitespaceMode, phantom: PhantomData<&'de N>, } +/// [TextReader] chooses `Document` mode to treat whitespace preceding end-of-file as a "no +/// more values" non-error situation, or `Value` mode to treat it as an "expected more input" +/// situation. +/// +/// The Preserves syntax for `Value` treats any input at all, even whitespace, as an indicator +/// that a term is to follow. However, when using a TextReader to parse a *series* of `Value`s +/// in a `Document`, whitespace followed by EOF is to be treated as the permitted optional +/// whitespace at the end of a `Document. +pub enum ToplevelWhitespaceMode { + Document, + Value, +} + fn decode_utf8(bs: Vec) -> io::Result { Ok(String::from_utf8(bs).map_err(|_| io_syntax_error("Invalid UTF-8"))?) } @@ -63,10 +78,16 @@ impl<'de, 'src, N: NestedValue, Dec: DomainParse, S: BinarySource<' TextReader { source, dec, + toplevel_whitespace_mode: ToplevelWhitespaceMode::Document, phantom: PhantomData, } } + pub fn toplevel_whitespace_mode(mut self, new_mode: ToplevelWhitespaceMode) -> Self { + self.toplevel_whitespace_mode = new_mode; + self + } + fn peek(&mut self) -> io::Result { self.source.peek() } @@ -437,11 +458,18 @@ impl<'de, 'src, N: NestedValue, Dec: DomainParse, S: BinarySource<' { fn next(&mut self, read_annotations: bool) -> io::Result> { 'restart: loop { + match self.toplevel_whitespace_mode { + ToplevelWhitespaceMode::Document => self.skip_whitespace(), + ToplevelWhitespaceMode::Value => (), + } match self.peek() { Err(e) if is_eof_io_error(&e) => return Ok(None), _ => (), } - self.skip_whitespace(); + match self.toplevel_whitespace_mode { + ToplevelWhitespaceMode::Document => (), + ToplevelWhitespaceMode::Value => self.skip_whitespace(), + } return Ok(Some(match self.peek()? { b'"' => { self.skip()?; diff --git a/implementations/rust/preserves/tests/samples_tests.rs b/implementations/rust/preserves/tests/samples_tests.rs index 5b0ad27..7df7212 100644 --- a/implementations/rust/preserves/tests/samples_tests.rs +++ b/implementations/rust/preserves/tests/samples_tests.rs @@ -7,6 +7,7 @@ use preserves::value::IOBinarySource; use preserves::value::IOValue; use preserves::value::PackedWriter; use preserves::value::Reader; +use preserves::value::ToplevelWhitespaceMode; use std::io; use std::iter::Iterator; @@ -158,6 +159,7 @@ fn run() -> io::Result<()> { TestCase::ParseShort(text) => { assert!(if let Err(e) = BytesBinarySource::new(text.as_bytes()) .text_iovalues() + .toplevel_whitespace_mode(ToplevelWhitespaceMode::Value) .configured(true) .next() .unwrap() @@ -170,6 +172,7 @@ fn run() -> io::Result<()> { TestCase::ParseEOF(text) => { assert!(BytesBinarySource::new(text.as_bytes()) .text_iovalues() + .toplevel_whitespace_mode(ToplevelWhitespaceMode::Value) .configured(true) .next() .is_none());