From d28901446d0716df7f47e9f0615f1510539a227c Mon Sep 17 00:00:00 2001 From: Tony Garnock-Jones Date: Tue, 3 Aug 2021 16:26:40 +0200 Subject: [PATCH] Decode text syntax from *byte* sources. --- implementations/rust/preserves/src/de.rs | 5 +- .../rust/preserves/src/value/text/mod.rs | 6 +- .../rust/preserves/src/value/text/reader.rs | 487 +++++++++--------- .../rust/preserves/tests/samples_tests.rs | 6 +- 4 files changed, 262 insertions(+), 242 deletions(-) diff --git a/implementations/rust/preserves/src/de.rs b/implementations/rust/preserves/src/de.rs index d035417..4140168 100644 --- a/implementations/rust/preserves/src/de.rs +++ b/implementations/rust/preserves/src/de.rs @@ -26,8 +26,9 @@ where from_reader(&mut PackedReader::new(&mut BytesBinarySource::new(bytes), IOValueDomainCodec)) } -pub fn from_text<'de, T>(text: &str) -> Result where T: Deserialize<'de> { - from_reader(&mut TextReader::new(text, ViaCodec::new(IOValueDomainCodec))) +pub fn from_text<'de, T>(text: &'de str) -> Result where T: Deserialize<'de> { + from_reader(&mut TextReader::new(&mut BytesBinarySource::new(text.as_bytes()), + ViaCodec::new(IOValueDomainCodec))) } pub fn from_read<'de, 'r, IOR: io::Read + io::Seek, T>(read: &'r mut IOR) -> diff --git a/implementations/rust/preserves/src/value/text/mod.rs b/implementations/rust/preserves/src/value/text/mod.rs index 8949567..80854ff 100644 --- a/implementations/rust/preserves/src/value/text/mod.rs +++ b/implementations/rust/preserves/src/value/text/mod.rs @@ -4,6 +4,8 @@ pub mod writer; pub use reader::TextReader; pub use writer::TextWriter; +use crate::value::reader::BytesBinarySource; + use std::io; use super::{DomainParse, Embeddable, IOValue, IOValueDomainCodec, NestedValue, Reader, ViaCodec}; @@ -12,7 +14,7 @@ pub fn from_str, Dec: DomainParse>( s: &str, decode_embedded: Dec, ) -> io::Result { - TextReader::new(s, decode_embedded).demand_next(false) + TextReader::new(&mut BytesBinarySource::new(s.as_bytes()), decode_embedded).demand_next(false) } pub fn iovalue_from_str(s: &str) -> io::Result { @@ -23,7 +25,7 @@ pub fn annotated_from_str, Dec: DomainParse> s: &str, decode_embedded: Dec, ) -> io::Result { - TextReader::new(s, decode_embedded).demand_next(true) + TextReader::new(&mut BytesBinarySource::new(s.as_bytes()), decode_embedded).demand_next(true) } pub fn annotated_iovalue_from_str(s: &str) -> io::Result { diff --git a/implementations/rust/preserves/src/value/text/reader.rs b/implementations/rust/preserves/src/value/text/reader.rs index e4c2ce7..c2d3fe5 100644 --- a/implementations/rust/preserves/src/value/text/reader.rs +++ b/implementations/rust/preserves/src/value/text/reader.rs @@ -1,9 +1,8 @@ use crate::error::Error; use crate::error::ExpectedKind; use crate::error::Received; -use crate::error::eof; use crate::error::io_syntax_error; -use crate::error::is_eof_error; +use crate::error::is_eof_io_error; use crate::error::syntax_error; use crate::hex; @@ -33,55 +32,57 @@ use std::io; use std::iter::FromIterator; use std::marker::PhantomData; -pub struct TextReader<'a, D: Embeddable, Dec: DomainParse> { - buf: &'a str, - pos: usize, - dec: Dec, - phantom: PhantomData, +pub struct TextReader<'de, 'src, D: Embeddable, Dec: DomainParse, S: BinarySource<'de>> { + pub source: &'src mut S, + pub dec: Dec, + phantom: PhantomData<&'de D>, } -impl<'a, D: Embeddable, Dec: DomainParse> TextReader<'a, D, Dec> { - pub fn new(buf: &'a str, dec: Dec) -> Self { +fn decode_utf8(bs: Vec) -> io::Result { + Ok(String::from_utf8(bs).map_err(|_| io_syntax_error("Invalid UTF-8"))?) +} + +fn append_codepoint(bs: &mut Vec, n: u32) -> io::Result<()> { + let c = char::from_u32(n).ok_or_else(|| io_syntax_error("Bad code point"))?; + let mut buf = [0; 4]; + let _ = c.encode_utf8(&mut buf); + bs.extend(&buf[0 .. c.len_utf8()]); + Ok(()) +} + +impl<'de, 'src, D: Embeddable, Dec: DomainParse, S: BinarySource<'de>> TextReader<'de, 'src, D, Dec, S> { + pub fn new(source: &'src mut S, dec: Dec) -> Self { TextReader { - buf, - pos: 0, + source, dec, phantom: PhantomData, } } - fn remaining_input(&self) -> &str { - &self.buf[self.pos ..] + fn peek(&mut self) -> io::Result { + self.source.peek() } - fn peek(&self) -> ReaderResult { - if self.pos >= self.buf.len() { - Err(eof()) - } else { - Ok(self.buf[self.pos ..].chars().next().unwrap()) - } + fn skip(&mut self) -> io::Result<()> { + self.source.skip() } - fn drop(&mut self, count: usize) { - self.pos += count; - } - - fn undrop(&mut self, count: usize) { - self.pos -= count; - } - - fn next_char(&mut self) -> ReaderResult { - let c = self.peek()?; - self.drop(c.len_utf8()); - Ok(c) + fn next_byte(&mut self) -> io::Result { + let b = self.source.peek()?; + self.source.skip()?; + Ok(b) } fn skip_whitespace(&mut self) { + // Deliberately swallows errors. while let Ok(c) = self.peek() { - if !c.is_whitespace() && c != ',' { - break; + match c { + b' ' | b'\t' | b'\r' | b'\n' | b',' => { + let _ = self.skip(); + () + } + _ => break, } - self.drop(c.len_utf8()) } } @@ -98,8 +99,8 @@ impl<'a, D: Embeddable, Dec: DomainParse> TextReader<'a, D, Dec> { loop { self.skip_whitespace(); match self.peek()? { - ';' => { self.drop(1); vs.push(N::new(self.comment_line()?)) } - '@' => { self.drop(1); vs.push(self.demand_next(true)?) } + b';' => { self.skip()?; vs.push(N::new(self.comment_line()?)) } + b'@' => { self.skip()?; vs.push(self.demand_next(true)?) } _ => return Ok(vs), } } @@ -109,135 +110,134 @@ impl<'a, D: Embeddable, Dec: DomainParse> TextReader<'a, D, Dec> { loop { self.skip_whitespace(); match self.peek()? { - ';' => { self.drop(1); self.comment_line()?; }, - '@' => { self.drop(1); Reader::>::skip_value(self)?; }, + b';' => { self.skip()?; self.comment_line()?; }, + b'@' => { self.skip()?; Reader::>::skip_value(self)?; }, _ => return Ok(()), } } } pub fn next_iovalue(&mut self, read_annotations: bool) -> io::Result { - let mut r = TextReader::new(self.remaining_input(), ViaCodec::new(IOValueDomainCodec)); + let mut r = TextReader::new(self.source, ViaCodec::new(IOValueDomainCodec)); let v = r.demand_next(read_annotations)?; - self.pos += r.pos; Ok(v) } fn comment_line(&mut self) -> io::Result { - let mut s = String::new(); + let mut bs = Vec::new(); loop { - match self.next_char()? { - '\r' | '\n' => return Ok(s), - c => s.push(c), + let b = self.peek()?; + self.skip()?; + match b { + b'\r' | b'\n' => return Ok(decode_utf8(bs)?), + _ => bs.push(b), } } } - fn read_intpart>(&mut self, mut s: String, c: char) -> io::Result { + fn read_intpart>(&mut self, mut bs: Vec, c: u8) -> io::Result { match c { - '0' => { - s.push(c); - self.read_fracexp(s) + b'0' => { + bs.push(c); + self.read_fracexp(bs) } _ => { - self.read_digit1(&mut s, c)?; - self.read_fracexp(s) + self.read_digit1(&mut bs, c)?; + self.read_fracexp(bs) } } } - fn read_fracexp>(&mut self, mut s: String) -> io::Result { + fn read_fracexp>(&mut self, mut bs: Vec) -> io::Result { + let mut is_float = false; match self.peek()? { - '.' => { - s.push(self.next_char()?); - let c = self.next_char()?; - self.read_digit1(&mut s, c)?; + b'.' => { + is_float = true; + bs.push(self.next_byte()?); + let c = self.next_byte()?; + self.read_digit1(&mut bs, c)?; } _ => () } - self.read_exp(s) - } - - fn read_exp>(&mut self, mut s: String) -> io::Result { match self.peek()? { - 'e' | 'E' => { - s.push(self.next_char()?); - self.read_sign_and_exp(s) + b'e' | b'E' => { + bs.push(self.next_byte()?); + self.read_sign_and_exp(bs) } - _ => self.finish_number(s) + _ => self.finish_number(bs, is_float) } } - fn read_sign_and_exp>(&mut self, mut s: String) -> io::Result { + fn read_sign_and_exp>(&mut self, mut bs: Vec) -> io::Result { match self.peek()? { - '+' | '-' => s.push(self.next_char()?), + b'+' | b'-' => bs.push(self.next_byte()?), _ => (), } - let c = self.next_char()?; - self.read_digit1(&mut s, c)?; - self.finish_number(s) + let c = self.next_byte()?; + self.read_digit1(&mut bs, c)?; + self.finish_number(bs, true) } - fn finish_number>(&mut self, s: String) -> io::Result { - if let Ok(n) = s.parse::() { - return Ok(N::new(n)); - } - match self.peek()? { - 'f' | 'F' => { - self.drop(1); - Ok(N::new(s.parse::().map_err( - |_| io_syntax_error(&format!( - "Invalid single-precision number: {:?}", s)))?)) + fn finish_number>(&mut self, bs: Vec, is_float: bool) -> io::Result { + let s = decode_utf8(bs)?; + if is_float { + match self.peek()? { + b'f' | b'F' => { + self.skip()?; + Ok(N::new(s.parse::().map_err( + |_| io_syntax_error(&format!( + "Invalid single-precision number: {:?}", s)))?)) + } + _ => + Ok(N::new(s.parse::().map_err( + |_| io_syntax_error(&format!( + "Invalid double-precision number: {:?}", s)))?)) } - _ => - Ok(N::new(s.parse::().map_err( - |_| io_syntax_error(&format!( - "Invalid double-precision number: {:?}", s)))?)) + } else { + Ok(N::new(s.parse::().map_err( + |_| io_syntax_error(&format!( + "Invalid signed-integer number: {:?}", s)))?)) } } - fn read_digit1(&mut self, s: &mut String, c: char) -> io::Result<()> + fn read_digit1(&mut self, bs: &mut Vec, c: u8) -> io::Result<()> { - if !c.is_digit(10) { + if !(c as char).is_digit(10) { return Err(io_syntax_error("Incomplete number")); } - s.push(c); - while self.peek()?.is_digit(10) { - s.push(self.next_char()?); + bs.push(c); + while (self.peek()? as char).is_digit(10) { + bs.push(self.next_byte()?); } Ok(()) } - fn read_stringlike( + fn read_stringlike( &mut self, mut seed: R, - acc: Acc, xform_item: X, - terminator: char, - hexescape: char, + terminator: u8, + hexescape: u8, hexescaper: H, ) -> io::Result where - X: Fn(char) -> Element, - H: Fn(&mut Self) -> io::Result, - Acc: Fn(&mut R, Element) -> (), + X: Fn(&mut R, u8) -> io::Result<()>, + H: Fn(&mut R, &mut Self) -> io::Result<()>, { loop { - match self.next_char()? { + match self.next_byte()? { c if c == terminator => return Ok(seed), - '\\' => match self.next_char()? { - c if c == hexescape => - acc(&mut seed, hexescaper(self)?), - c if c == terminator || c == '\\' || c == '/' => - acc(&mut seed, xform_item(c)), - 'b' => acc(&mut seed, xform_item('\x08')), - 'f' => acc(&mut seed, xform_item('\x0c')), - 'n' => acc(&mut seed, xform_item('\x0a')), - 'r' => acc(&mut seed, xform_item('\x0d')), - 't' => acc(&mut seed, xform_item('\x09')), + b'\\' => match self.next_byte()? { + c if c == hexescape => hexescaper(&mut seed, self)?, + c if c == terminator || c == b'\\' || c == b'/' => xform_item(&mut seed, c)?, + b'b' => xform_item(&mut seed, b'\x08')?, + b'f' => xform_item(&mut seed, b'\x0c')?, + b'n' => xform_item(&mut seed, b'\x0a')?, + b'r' => xform_item(&mut seed, b'\x0d')?, + b't' => xform_item(&mut seed, b'\x09')?, _ => return Err(io_syntax_error("Invalid escape code")), }, - c => acc(&mut seed, xform_item(c)), + c => xform_item(&mut seed, c)?, } } } @@ -245,8 +245,8 @@ impl<'a, D: Embeddable, Dec: DomainParse> TextReader<'a, D, Dec> { fn hexnum(&mut self, count: usize) -> io::Result { let mut v: u32 = 0; for _ in 0 .. count { - let c = self.next_char()?; - match c.to_digit(16) { + let c = self.next_byte()?; + match (c as char).to_digit(16) { Some(d) => v = v << 4 | d, None => @@ -256,58 +256,54 @@ impl<'a, D: Embeddable, Dec: DomainParse> TextReader<'a, D, Dec> { Ok(v) } - fn read_string(&mut self, delimiter: char) -> io::Result { - self.read_stringlike( - String::new(), - |s, c| s.push(c), - |c| c, + fn read_string(&mut self, delimiter: u8) -> io::Result { + decode_utf8(self.read_stringlike( + Vec::new(), + |bs, c| Ok(bs.push(c)), delimiter, - 'u', - |r| { + b'u', + |bs, r| { let n1 = r.hexnum(4)?; if (0xd800 ..= 0xdbff).contains(&n1) { let mut ok = true; - ok = ok && r.next_char()? == '\\'; - ok = ok && r.next_char()? == 'u'; + ok = ok && r.next_byte()? == b'\\'; + ok = ok && r.next_byte()? == b'u'; if !ok { Err(io_syntax_error("Missing second half of surrogate pair")) } else { let n2 = r.hexnum(4)?; if (0xdc00 ..= 0xdfff).contains(&n2) { let n = ((n1 - 0xd800) << 10) + (n2 - 0xdc00) + 0x10000; - char::from_u32(n).ok_or_else( - || io_syntax_error("Bad code point from surrogate pair")) + append_codepoint(bs, n) } else { Err(io_syntax_error("Bad second half of surrogate pair")) } } } else { - char::from_u32(n1).ok_or_else( - || io_syntax_error("Bad code point")) + append_codepoint(bs, n1) } - }) + })?) } fn read_literal_binary>(&mut self) -> io::Result { Ok(N::new(&self.read_stringlike( Vec::new(), - |bs, b| bs.push(b), - |c| c as u8, - '"', - 'x', - |r| Ok(r.hexnum(2)? as u8))?[..])) + |bs, b| Ok(bs.push(b)), + b'"', + b'x', + |bs, r| Ok(bs.push(r.hexnum(2)? as u8)))?[..])) } fn read_hex_binary>(&mut self) -> io::Result { let mut s = String::new(); loop { self.skip_whitespace(); - let c1 = self.next_char()?; + let c1 = self.next_byte()? as char; if c1 == '"' { let bs = hex::HexParser::Strict.decode(&s).unwrap(); return Ok(N::new(&bs[..])); } - let c2 = self.next_char()?; + let c2 = self.next_byte()? as char; if !(c1.is_digit(16) && c2.is_digit(16)) { return Err(io_syntax_error("Invalid hex binary")); } @@ -317,28 +313,28 @@ impl<'a, D: Embeddable, Dec: DomainParse> TextReader<'a, D, Dec> { } fn read_base64_binary>(&mut self) -> io::Result { - let mut s = String::new(); + let mut bs = Vec::new(); loop { self.skip_whitespace(); - let mut c = self.next_char()?; - if c == ']' { - let bs = base64::decode_config(&s, base64::STANDARD_NO_PAD) + let mut c = self.next_byte()?; + if c == b']' { + let bs = base64::decode_config(&decode_utf8(bs)?, base64::STANDARD_NO_PAD) .map_err(|_| io_syntax_error("Invalid base64 character"))?; return Ok(N::new(&bs[..])); } - if c == '-' { c = '+'; } - if c == '_' { c = '/'; } - if c == '=' { continue; } - s.push(c); + if c == b'-' { c = b'+'; } + if c == b'_' { c = b'/'; } + if c == b'=' { continue; } + bs.push(c); } } - fn upto>(&mut self, delimiter: char, read_annotations: bool) -> io::Result> { + fn upto>(&mut self, delimiter: u8, read_annotations: bool) -> io::Result> { let mut vs = Vec::new(); loop { self.skip_whitespace(); if self.peek()? == delimiter { - self.drop(delimiter.len_utf8()); + self.skip()?; return Ok(vs); } vs.push(Reader::::demand_next(self, read_annotations)?); @@ -349,13 +345,13 @@ impl<'a, D: Embeddable, Dec: DomainParse> TextReader<'a, D, Dec> { let mut d = Map::new(); loop { self.skip_whitespace(); - if self.peek()? == '}' { - self.drop(1); + if self.peek()? == b'}' { + self.skip()?; return Ok(N::new(d)); } let k = Reader::::demand_next(self, read_annotations)?; self.skip_whitespace(); - if self.next_char()? != ':' { + if self.next_byte()? != b':' { return Err(io_syntax_error("Missing expected key/value separator")); } let v = Reader::::demand_next(self, read_annotations)?; @@ -363,48 +359,56 @@ impl<'a, D: Embeddable, Dec: DomainParse> TextReader<'a, D, Dec> { } } - fn read_raw_symbol>(&mut self, mut s: String) -> io::Result { + fn read_raw_symbol>(&mut self, mut bs: Vec) -> io::Result { loop { let c = match self.peek() { - Err(e) if is_eof_error(&e) => ' ', + Err(e) if is_eof_io_error(&e) => b' ', Err(e) => return Err(e)?, - Ok(c) if c.is_whitespace() => ' ', + Ok(c) if (c as char).is_whitespace() => b' ', Ok(c) => c }; match c { - '(' | ')' | '{' | '}' | '[' | ']' | '<' | '>' | - '"' | ';' | ',' | '@' | '#' | ':' | '|' | ' ' => - return Ok(Value::symbol(&s).wrap()), + b'(' | b')' | b'{' | b'}' | b'[' | b']' | b'<' | b'>' | + b'"' | b';' | b',' | b'@' | b'#' | b':' | b'|' | b' ' => + return Ok(Value::symbol(&decode_utf8(bs)?).wrap()), c => { - self.drop(c.len_utf8()); - s.push(c) + self.skip()?; + bs.push(c) } } } } } -impl<'a, 'de, D: Embeddable, N: NestedValue, Dec: DomainParse> Reader<'de, D, N> for TextReader<'a, D, Dec> { +impl<'de, 'src, D: Embeddable, N: NestedValue, Dec: DomainParse, S: BinarySource<'de>> + Reader<'de, D, N> for TextReader<'de, 'src, D, Dec, S> +{ fn next(&mut self, read_annotations: bool) -> io::Result> { self.skip_whitespace(); - let c = match self.next_char() { + let c = match self.peek() { Ok(c) => c, - Err(e) if is_eof_error(&e) => return Ok(None), + Err(e) if is_eof_io_error(&e) => return Ok(None), Err(e) => return Err(e.into()), }; Ok(Some(match c { - '-' => { - let c1 = self.next_char()?; - self.read_intpart("-".to_owned(), c1)? + b'-' => { + self.skip()?; + let c1 = self.next_byte()?; + self.read_intpart(vec![b'-'], c1)? } - '0' | '1' | '2' | '3' | '4' | '5' | '6' | '7' | '8' | '9' => - self.read_intpart(String::new(), c)?, - '"' => - N::new(self.read_string('"')?), - '|' => - Value::symbol(&self.read_string('|')?).wrap(), - ';' | '@' => { - self.undrop(1); + b'0' | b'1' | b'2' | b'3' | b'4' | b'5' | b'6' | b'7' | b'8' | b'9' => { + self.skip()?; + self.read_intpart(Vec::new(), c)? + } + b'"' => { + self.skip()?; + N::new(self.read_string(b'"')?) + } + b'|' => { + self.skip()?; + Value::symbol(&self.read_string(b'|')?).wrap() + } + b';' | b'@' => { if read_annotations { let mut annotations = self.gather_annotations()?; let (existing_annotations, v) = @@ -416,61 +420,73 @@ impl<'a, 'de, D: Embeddable, N: NestedValue, Dec: DomainParse> Reader<'de, self.demand_next(read_annotations)? } } - ':' => { - // return Err(io_syntax_error("Unexpected key/value separator between items")), - return Err(io_syntax_error(&format!("Unexpected key/value separator between items (pos {:?})", self.pos))); + b':' => { + return Err(io_syntax_error("Unexpected key/value separator between items")); } - '#' => match self.next_char()? { - 'f' => N::new(false), - 't' => N::new(true), - '{' => N::new(Set::from_iter(self.upto('}', read_annotations)?.into_iter())), - '"' => self.read_literal_binary()?, - 'x' => if self.next_char()? == '"' { - self.read_hex_binary()? - } else { - return Err(io_syntax_error("Expected open-quote at start of hex ByteString")); - }, - '[' => self.read_base64_binary()?, - '=' => { - let bs_val: N = self.demand_next(true)?; - if bs_val.annotations().slice().len() > 0 { - return Err(io_syntax_error("Annotations not permitted after #=")); + b'#' => { + self.skip()?; + match self.next_byte()? { + b'f' => N::new(false), + b't' => N::new(true), + b'{' => N::new(Set::from_iter(self.upto(b'}', read_annotations)?.into_iter())), + b'"' => self.read_literal_binary()?, + b'x' => if self.next_byte()? == b'"' { + self.read_hex_binary()? + } else { + return Err(io_syntax_error("Expected open-quote at start of hex ByteString")); + }, + b'[' => self.read_base64_binary()?, + b'=' => { + let bs_val: N = self.demand_next(true)?; + if bs_val.annotations().slice().len() > 0 { + return Err(io_syntax_error("Annotations not permitted after #=")); + } + match bs_val.value().as_bytestring() { + None => + return Err(io_syntax_error("ByteString must follow #=")), + Some(bs) => + crate::value::BytesBinarySource::new(bs) + .packed(ViaCodec::new(&mut self.dec)) + .demand_next(read_annotations)? + } } - match bs_val.value().as_bytestring() { - None => - return Err(io_syntax_error("ByteString must follow #=")), - Some(bs) => - crate::value::BytesBinarySource::new(bs) - .packed(ViaCodec::new(&mut self.dec)) - .demand_next(read_annotations)? + b'!' => { + let v = self.next_iovalue(read_annotations)?; + Value::Embedded(self.dec.parse_embedded(&v)?).wrap() } + other => return Err(io_syntax_error(&format!("Invalid # syntax: {:?}", other))), } - '!' => { - let v = self.next_iovalue(read_annotations)?; - Value::Embedded(self.dec.parse_embedded(&v)?).wrap() - } - other => return Err(io_syntax_error(&format!("Invalid # syntax: {:?}", other))), - }, - '<' => { - let vs = self.upto('>', read_annotations)?; + } + b'<' => { + self.skip()?; + let vs = self.upto(b'>', read_annotations)?; if vs.is_empty() { return Err(io_syntax_error("Missing record label")); } Value::Record(Record(vs)).wrap() } - '[' => N::new(self.upto(']', read_annotations)?), - '{' => self.read_dictionary(read_annotations)?, - '>' => return Err(io_syntax_error("Unexpected >")), - ']' => return Err(io_syntax_error("Unexpected ]")), - '}' => return Err(io_syntax_error("Unexpected }")), - other => self.read_raw_symbol(other.to_string())?, + b'[' => { + self.skip()?; + N::new(self.upto(b']', read_annotations)?) + } + b'{' => { + self.skip()?; + self.read_dictionary(read_annotations)? + } + b'>' => return Err(io_syntax_error("Unexpected >")), + b']' => return Err(io_syntax_error("Unexpected ]")), + b'}' => return Err(io_syntax_error("Unexpected }")), + other => { + self.skip()?; + self.read_raw_symbol(vec![other])? + } })) } fn open_record(&mut self, arity: Option) -> ReaderResult { self.skip_annotations()?; - if self.peek()? != '<' { return Err(self.expected::(ExpectedKind::Record(arity))); } - self.drop(1); + if self.peek()? != b'<' { return Err(self.expected::(ExpectedKind::Record(arity))); } + self.skip()?; let mut b = B::Type::default(); Reader::::ensure_more_expected(self, &mut b, &B::Item::RecordLabel)?; Ok(b) @@ -479,12 +495,12 @@ impl<'a, 'de, D: Embeddable, N: NestedValue, Dec: DomainParse> Reader<'de, fn open_sequence_or_set(&mut self) -> ReaderResult { self.skip_annotations()?; let mark = Reader::::mark(self)?; - match self.next_char()? { - '#' => match self.next_char()? { - '{' => return Ok(B::Item::SetValue), + match self.next_byte()? { + b'#' => match self.next_byte()? { + b'{' => return Ok(B::Item::SetValue), _ => (), }, - '[' => return Ok(B::Item::SequenceValue), + b'[' => return Ok(B::Item::SequenceValue), _ => (), } Reader::::restore(self, &mark)?; @@ -493,17 +509,17 @@ impl<'a, 'de, D: Embeddable, N: NestedValue, Dec: DomainParse> Reader<'de, fn open_sequence(&mut self) -> ReaderResult<()> { self.skip_annotations()?; - if self.peek()? != '[' { return Err(self.expected::(ExpectedKind::Sequence)); } - self.drop(1); + if self.peek()? != b'[' { return Err(self.expected::(ExpectedKind::Sequence)); } + self.skip()?; Ok(()) } fn open_set(&mut self) -> ReaderResult<()> { self.skip_annotations()?; let mark = Reader::::mark(self)?; - match self.next_char()? { - '#' => match self.next_char()? { - '{' => return Ok(()), + match self.next_byte()? { + b'#' => match self.next_byte()? { + b'{' => return Ok(()), _ => (), }, _ => (), @@ -514,8 +530,8 @@ impl<'a, 'de, D: Embeddable, N: NestedValue, Dec: DomainParse> Reader<'de, fn open_dictionary(&mut self) -> ReaderResult<()> { self.skip_annotations()?; - if self.peek()? != '{' { return Err(self.expected::(ExpectedKind::Dictionary)); } - self.drop(1); + if self.peek()? != b'{' { return Err(self.expected::(ExpectedKind::Dictionary)); } + self.skip()?; Ok(()) } @@ -527,7 +543,7 @@ impl<'a, 'de, D: Embeddable, N: NestedValue, Dec: DomainParse> Reader<'de, opening: Some(B::Item::DictionaryValue), } => { self.skip_whitespace(); - if self.next_char()? != ':' { + if self.next_byte()? != b':' { return Err(syntax_error("Missing expected key/value separator")); } }, @@ -539,8 +555,8 @@ impl<'a, 'de, D: Embeddable, N: NestedValue, Dec: DomainParse> Reader<'de, fn close_compound(&mut self, b: &mut B::Type, i: &B::Item) -> ReaderResult { self.skip_whitespace(); match self.peek()? { - '>' | ']' | '}' => { - self.drop(1); + b'>' | b']' | b'}' => { + self.skip()?; Ok(true) } _ => { @@ -554,9 +570,9 @@ impl<'a, 'de, D: Embeddable, N: NestedValue, Dec: DomainParse> Reader<'de, fn open_embedded(&mut self) -> ReaderResult<()> { self.skip_annotations()?; let mark = Reader::::mark(self)?; - match self.next_char()? { - '#' => match self.next_char()? { - '!' => return Ok(()), + match self.next_byte()? { + b'#' => match self.next_byte()? { + b'!' => return Ok(()), _ => (), }, _ => (), @@ -569,33 +585,32 @@ impl<'a, 'de, D: Embeddable, N: NestedValue, Dec: DomainParse> Reader<'de, Ok(()) } - type Mark = usize; + type Mark = S::Mark; fn mark(&mut self) -> io::Result { - Ok(self.pos) + self.source.mark() } fn restore(&mut self, mark: &Self::Mark) -> io::Result<()> { - self.pos = *mark; - Ok(()) + self.source.restore(mark) } fn next_token(&mut self, read_embedded_annotations: bool) -> io::Result> { self.skip_annotations()?; let mark = Reader::::mark(self)?; - Ok(match self.next_char()? { - '<' => Token::Compound(CompoundClass::Record), - '[' => Token::Compound(CompoundClass::Sequence), - '{' => Token::Compound(CompoundClass::Dictionary), - '>' => Token::End, - ']' => Token::End, - '}' => Token::End, - '#' => match self.next_char()? { - '!' => { + Ok(match self.next_byte()? { + b'<' => Token::Compound(CompoundClass::Record), + b'[' => Token::Compound(CompoundClass::Sequence), + b'{' => Token::Compound(CompoundClass::Dictionary), + b'>' => Token::End, + b']' => Token::End, + b'}' => Token::End, + b'#' => match self.next_byte()? { + b'!' => { let v = self.next_iovalue(read_embedded_annotations)?; Token::Embedded(self.dec.parse_embedded(&v)?) } - '{' => Token::Compound(CompoundClass::Set), + b'{' => Token::Compound(CompoundClass::Set), _ => { Reader::::restore(self, &mark)?; Token::Atom(self.demand_next(false)?) diff --git a/implementations/rust/preserves/tests/samples_tests.rs b/implementations/rust/preserves/tests/samples_tests.rs index d4e992a..68604e0 100644 --- a/implementations/rust/preserves/tests/samples_tests.rs +++ b/implementations/rust/preserves/tests/samples_tests.rs @@ -23,8 +23,10 @@ fn decode_all(bytes: &'_ [u8]) -> io::Result> { let mut fh = std::fs::File::open("../../../tests/samples.pr").unwrap(); let mut contents = String::new(); fh.read_to_string(&mut contents)?; - let mut d = preserves::value::TextReader::new(&contents, preserves::value::ViaCodec::new(preserves::value::IOValueDomainCodec)); - d.next_iovalue(true)? + preserves::value::TextReader::new( + &mut BytesBinarySource::new(contents.as_bytes()), + preserves::value::ViaCodec::new(preserves::value::IOValueDomainCodec)) + .next_iovalue(true)? }; let from_packed = { let mut fh = std::fs::File::open("../../../tests/samples.bin").unwrap();