Decode text syntax from *byte* sources.

This commit is contained in:
Tony Garnock-Jones 2021-08-03 16:26:40 +02:00
parent e913951b91
commit d28901446d
4 changed files with 262 additions and 242 deletions

View File

@ -26,8 +26,9 @@ where
from_reader(&mut PackedReader::new(&mut BytesBinarySource::new(bytes), IOValueDomainCodec))
}
pub fn from_text<'de, T>(text: &str) -> Result<T> where T: Deserialize<'de> {
from_reader(&mut TextReader::new(text, ViaCodec::new(IOValueDomainCodec)))
pub fn from_text<'de, T>(text: &'de str) -> Result<T> where T: Deserialize<'de> {
from_reader(&mut TextReader::new(&mut BytesBinarySource::new(text.as_bytes()),
ViaCodec::new(IOValueDomainCodec)))
}
pub fn from_read<'de, 'r, IOR: io::Read + io::Seek, T>(read: &'r mut IOR) ->

View File

@ -4,6 +4,8 @@ pub mod writer;
pub use reader::TextReader;
pub use writer::TextWriter;
use crate::value::reader::BytesBinarySource;
use std::io;
use super::{DomainParse, Embeddable, IOValue, IOValueDomainCodec, NestedValue, Reader, ViaCodec};
@ -12,7 +14,7 @@ pub fn from_str<D: Embeddable, N: NestedValue<D>, Dec: DomainParse<D>>(
s: &str,
decode_embedded: Dec,
) -> io::Result<N> {
TextReader::new(s, decode_embedded).demand_next(false)
TextReader::new(&mut BytesBinarySource::new(s.as_bytes()), decode_embedded).demand_next(false)
}
pub fn iovalue_from_str(s: &str) -> io::Result<IOValue> {
@ -23,7 +25,7 @@ pub fn annotated_from_str<D: Embeddable, N: NestedValue<D>, Dec: DomainParse<D>>
s: &str,
decode_embedded: Dec,
) -> io::Result<N> {
TextReader::new(s, decode_embedded).demand_next(true)
TextReader::new(&mut BytesBinarySource::new(s.as_bytes()), decode_embedded).demand_next(true)
}
pub fn annotated_iovalue_from_str(s: &str) -> io::Result<IOValue> {

View File

@ -1,9 +1,8 @@
use crate::error::Error;
use crate::error::ExpectedKind;
use crate::error::Received;
use crate::error::eof;
use crate::error::io_syntax_error;
use crate::error::is_eof_error;
use crate::error::is_eof_io_error;
use crate::error::syntax_error;
use crate::hex;
@ -33,55 +32,57 @@ use std::io;
use std::iter::FromIterator;
use std::marker::PhantomData;
pub struct TextReader<'a, D: Embeddable, Dec: DomainParse<D>> {
buf: &'a str,
pos: usize,
dec: Dec,
phantom: PhantomData<D>,
pub struct TextReader<'de, 'src, D: Embeddable, Dec: DomainParse<D>, S: BinarySource<'de>> {
pub source: &'src mut S,
pub dec: Dec,
phantom: PhantomData<&'de D>,
}
impl<'a, D: Embeddable, Dec: DomainParse<D>> TextReader<'a, D, Dec> {
pub fn new(buf: &'a str, dec: Dec) -> Self {
fn decode_utf8(bs: Vec<u8>) -> io::Result<String> {
Ok(String::from_utf8(bs).map_err(|_| io_syntax_error("Invalid UTF-8"))?)
}
fn append_codepoint(bs: &mut Vec<u8>, n: u32) -> io::Result<()> {
let c = char::from_u32(n).ok_or_else(|| io_syntax_error("Bad code point"))?;
let mut buf = [0; 4];
let _ = c.encode_utf8(&mut buf);
bs.extend(&buf[0 .. c.len_utf8()]);
Ok(())
}
impl<'de, 'src, D: Embeddable, Dec: DomainParse<D>, S: BinarySource<'de>> TextReader<'de, 'src, D, Dec, S> {
pub fn new(source: &'src mut S, dec: Dec) -> Self {
TextReader {
buf,
pos: 0,
source,
dec,
phantom: PhantomData,
}
}
fn remaining_input(&self) -> &str {
&self.buf[self.pos ..]
fn peek(&mut self) -> io::Result<u8> {
self.source.peek()
}
fn peek(&self) -> ReaderResult<char> {
if self.pos >= self.buf.len() {
Err(eof())
} else {
Ok(self.buf[self.pos ..].chars().next().unwrap())
}
fn skip(&mut self) -> io::Result<()> {
self.source.skip()
}
fn drop(&mut self, count: usize) {
self.pos += count;
}
fn undrop(&mut self, count: usize) {
self.pos -= count;
}
fn next_char(&mut self) -> ReaderResult<char> {
let c = self.peek()?;
self.drop(c.len_utf8());
Ok(c)
fn next_byte(&mut self) -> io::Result<u8> {
let b = self.source.peek()?;
self.source.skip()?;
Ok(b)
}
fn skip_whitespace(&mut self) {
// Deliberately swallows errors.
while let Ok(c) = self.peek() {
if !c.is_whitespace() && c != ',' {
break;
match c {
b' ' | b'\t' | b'\r' | b'\n' | b',' => {
let _ = self.skip();
()
}
_ => break,
}
self.drop(c.len_utf8())
}
}
@ -98,8 +99,8 @@ impl<'a, D: Embeddable, Dec: DomainParse<D>> TextReader<'a, D, Dec> {
loop {
self.skip_whitespace();
match self.peek()? {
';' => { self.drop(1); vs.push(N::new(self.comment_line()?)) }
'@' => { self.drop(1); vs.push(self.demand_next(true)?) }
b';' => { self.skip()?; vs.push(N::new(self.comment_line()?)) }
b'@' => { self.skip()?; vs.push(self.demand_next(true)?) }
_ => return Ok(vs),
}
}
@ -109,135 +110,134 @@ impl<'a, D: Embeddable, Dec: DomainParse<D>> TextReader<'a, D, Dec> {
loop {
self.skip_whitespace();
match self.peek()? {
';' => { self.drop(1); self.comment_line()?; },
'@' => { self.drop(1); Reader::<D, DummyValue<D>>::skip_value(self)?; },
b';' => { self.skip()?; self.comment_line()?; },
b'@' => { self.skip()?; Reader::<D, DummyValue<D>>::skip_value(self)?; },
_ => return Ok(()),
}
}
}
pub fn next_iovalue(&mut self, read_annotations: bool) -> io::Result<IOValue> {
let mut r = TextReader::new(self.remaining_input(), ViaCodec::new(IOValueDomainCodec));
let mut r = TextReader::new(self.source, ViaCodec::new(IOValueDomainCodec));
let v = r.demand_next(read_annotations)?;
self.pos += r.pos;
Ok(v)
}
fn comment_line(&mut self) -> io::Result<String> {
let mut s = String::new();
let mut bs = Vec::new();
loop {
match self.next_char()? {
'\r' | '\n' => return Ok(s),
c => s.push(c),
let b = self.peek()?;
self.skip()?;
match b {
b'\r' | b'\n' => return Ok(decode_utf8(bs)?),
_ => bs.push(b),
}
}
}
fn read_intpart<N: NestedValue<D>>(&mut self, mut s: String, c: char) -> io::Result<N> {
fn read_intpart<N: NestedValue<D>>(&mut self, mut bs: Vec<u8>, c: u8) -> io::Result<N> {
match c {
'0' => {
s.push(c);
self.read_fracexp(s)
b'0' => {
bs.push(c);
self.read_fracexp(bs)
}
_ => {
self.read_digit1(&mut s, c)?;
self.read_fracexp(s)
self.read_digit1(&mut bs, c)?;
self.read_fracexp(bs)
}
}
}
fn read_fracexp<N: NestedValue<D>>(&mut self, mut s: String) -> io::Result<N> {
fn read_fracexp<N: NestedValue<D>>(&mut self, mut bs: Vec<u8>) -> io::Result<N> {
let mut is_float = false;
match self.peek()? {
'.' => {
s.push(self.next_char()?);
let c = self.next_char()?;
self.read_digit1(&mut s, c)?;
b'.' => {
is_float = true;
bs.push(self.next_byte()?);
let c = self.next_byte()?;
self.read_digit1(&mut bs, c)?;
}
_ => ()
}
self.read_exp(s)
}
fn read_exp<N: NestedValue<D>>(&mut self, mut s: String) -> io::Result<N> {
match self.peek()? {
'e' | 'E' => {
s.push(self.next_char()?);
self.read_sign_and_exp(s)
b'e' | b'E' => {
bs.push(self.next_byte()?);
self.read_sign_and_exp(bs)
}
_ => self.finish_number(s)
_ => self.finish_number(bs, is_float)
}
}
fn read_sign_and_exp<N: NestedValue<D>>(&mut self, mut s: String) -> io::Result<N> {
fn read_sign_and_exp<N: NestedValue<D>>(&mut self, mut bs: Vec<u8>) -> io::Result<N> {
match self.peek()? {
'+' | '-' => s.push(self.next_char()?),
b'+' | b'-' => bs.push(self.next_byte()?),
_ => (),
}
let c = self.next_char()?;
self.read_digit1(&mut s, c)?;
self.finish_number(s)
let c = self.next_byte()?;
self.read_digit1(&mut bs, c)?;
self.finish_number(bs, true)
}
fn finish_number<N: NestedValue<D>>(&mut self, s: String) -> io::Result<N> {
if let Ok(n) = s.parse::<BigInt>() {
return Ok(N::new(n));
}
match self.peek()? {
'f' | 'F' => {
self.drop(1);
Ok(N::new(s.parse::<f32>().map_err(
|_| io_syntax_error(&format!(
"Invalid single-precision number: {:?}", s)))?))
fn finish_number<N: NestedValue<D>>(&mut self, bs: Vec<u8>, is_float: bool) -> io::Result<N> {
let s = decode_utf8(bs)?;
if is_float {
match self.peek()? {
b'f' | b'F' => {
self.skip()?;
Ok(N::new(s.parse::<f32>().map_err(
|_| io_syntax_error(&format!(
"Invalid single-precision number: {:?}", s)))?))
}
_ =>
Ok(N::new(s.parse::<f64>().map_err(
|_| io_syntax_error(&format!(
"Invalid double-precision number: {:?}", s)))?))
}
_ =>
Ok(N::new(s.parse::<f64>().map_err(
|_| io_syntax_error(&format!(
"Invalid double-precision number: {:?}", s)))?))
} else {
Ok(N::new(s.parse::<BigInt>().map_err(
|_| io_syntax_error(&format!(
"Invalid signed-integer number: {:?}", s)))?))
}
}
fn read_digit1(&mut self, s: &mut String, c: char) -> io::Result<()>
fn read_digit1(&mut self, bs: &mut Vec<u8>, c: u8) -> io::Result<()>
{
if !c.is_digit(10) {
if !(c as char).is_digit(10) {
return Err(io_syntax_error("Incomplete number"));
}
s.push(c);
while self.peek()?.is_digit(10) {
s.push(self.next_char()?);
bs.push(c);
while (self.peek()? as char).is_digit(10) {
bs.push(self.next_byte()?);
}
Ok(())
}
fn read_stringlike<X, H, Acc, Element, R>(
fn read_stringlike<X, H, R>(
&mut self,
mut seed: R,
acc: Acc,
xform_item: X,
terminator: char,
hexescape: char,
terminator: u8,
hexescape: u8,
hexescaper: H,
) -> io::Result<R>
where
X: Fn(char) -> Element,
H: Fn(&mut Self) -> io::Result<Element>,
Acc: Fn(&mut R, Element) -> (),
X: Fn(&mut R, u8) -> io::Result<()>,
H: Fn(&mut R, &mut Self) -> io::Result<()>,
{
loop {
match self.next_char()? {
match self.next_byte()? {
c if c == terminator => return Ok(seed),
'\\' => match self.next_char()? {
c if c == hexescape =>
acc(&mut seed, hexescaper(self)?),
c if c == terminator || c == '\\' || c == '/' =>
acc(&mut seed, xform_item(c)),
'b' => acc(&mut seed, xform_item('\x08')),
'f' => acc(&mut seed, xform_item('\x0c')),
'n' => acc(&mut seed, xform_item('\x0a')),
'r' => acc(&mut seed, xform_item('\x0d')),
't' => acc(&mut seed, xform_item('\x09')),
b'\\' => match self.next_byte()? {
c if c == hexescape => hexescaper(&mut seed, self)?,
c if c == terminator || c == b'\\' || c == b'/' => xform_item(&mut seed, c)?,
b'b' => xform_item(&mut seed, b'\x08')?,
b'f' => xform_item(&mut seed, b'\x0c')?,
b'n' => xform_item(&mut seed, b'\x0a')?,
b'r' => xform_item(&mut seed, b'\x0d')?,
b't' => xform_item(&mut seed, b'\x09')?,
_ => return Err(io_syntax_error("Invalid escape code")),
},
c => acc(&mut seed, xform_item(c)),
c => xform_item(&mut seed, c)?,
}
}
}
@ -245,8 +245,8 @@ impl<'a, D: Embeddable, Dec: DomainParse<D>> TextReader<'a, D, Dec> {
fn hexnum(&mut self, count: usize) -> io::Result<u32> {
let mut v: u32 = 0;
for _ in 0 .. count {
let c = self.next_char()?;
match c.to_digit(16) {
let c = self.next_byte()?;
match (c as char).to_digit(16) {
Some(d) =>
v = v << 4 | d,
None =>
@ -256,58 +256,54 @@ impl<'a, D: Embeddable, Dec: DomainParse<D>> TextReader<'a, D, Dec> {
Ok(v)
}
fn read_string(&mut self, delimiter: char) -> io::Result<String> {
self.read_stringlike(
String::new(),
|s, c| s.push(c),
|c| c,
fn read_string(&mut self, delimiter: u8) -> io::Result<String> {
decode_utf8(self.read_stringlike(
Vec::new(),
|bs, c| Ok(bs.push(c)),
delimiter,
'u',
|r| {
b'u',
|bs, r| {
let n1 = r.hexnum(4)?;
if (0xd800 ..= 0xdbff).contains(&n1) {
let mut ok = true;
ok = ok && r.next_char()? == '\\';
ok = ok && r.next_char()? == 'u';
ok = ok && r.next_byte()? == b'\\';
ok = ok && r.next_byte()? == b'u';
if !ok {
Err(io_syntax_error("Missing second half of surrogate pair"))
} else {
let n2 = r.hexnum(4)?;
if (0xdc00 ..= 0xdfff).contains(&n2) {
let n = ((n1 - 0xd800) << 10) + (n2 - 0xdc00) + 0x10000;
char::from_u32(n).ok_or_else(
|| io_syntax_error("Bad code point from surrogate pair"))
append_codepoint(bs, n)
} else {
Err(io_syntax_error("Bad second half of surrogate pair"))
}
}
} else {
char::from_u32(n1).ok_or_else(
|| io_syntax_error("Bad code point"))
append_codepoint(bs, n1)
}
})
})?)
}
fn read_literal_binary<N: NestedValue<D>>(&mut self) -> io::Result<N> {
Ok(N::new(&self.read_stringlike(
Vec::new(),
|bs, b| bs.push(b),
|c| c as u8,
'"',
'x',
|r| Ok(r.hexnum(2)? as u8))?[..]))
|bs, b| Ok(bs.push(b)),
b'"',
b'x',
|bs, r| Ok(bs.push(r.hexnum(2)? as u8)))?[..]))
}
fn read_hex_binary<N: NestedValue<D>>(&mut self) -> io::Result<N> {
let mut s = String::new();
loop {
self.skip_whitespace();
let c1 = self.next_char()?;
let c1 = self.next_byte()? as char;
if c1 == '"' {
let bs = hex::HexParser::Strict.decode(&s).unwrap();
return Ok(N::new(&bs[..]));
}
let c2 = self.next_char()?;
let c2 = self.next_byte()? as char;
if !(c1.is_digit(16) && c2.is_digit(16)) {
return Err(io_syntax_error("Invalid hex binary"));
}
@ -317,28 +313,28 @@ impl<'a, D: Embeddable, Dec: DomainParse<D>> TextReader<'a, D, Dec> {
}
fn read_base64_binary<N: NestedValue<D>>(&mut self) -> io::Result<N> {
let mut s = String::new();
let mut bs = Vec::new();
loop {
self.skip_whitespace();
let mut c = self.next_char()?;
if c == ']' {
let bs = base64::decode_config(&s, base64::STANDARD_NO_PAD)
let mut c = self.next_byte()?;
if c == b']' {
let bs = base64::decode_config(&decode_utf8(bs)?, base64::STANDARD_NO_PAD)
.map_err(|_| io_syntax_error("Invalid base64 character"))?;
return Ok(N::new(&bs[..]));
}
if c == '-' { c = '+'; }
if c == '_' { c = '/'; }
if c == '=' { continue; }
s.push(c);
if c == b'-' { c = b'+'; }
if c == b'_' { c = b'/'; }
if c == b'=' { continue; }
bs.push(c);
}
}
fn upto<N: NestedValue<D>>(&mut self, delimiter: char, read_annotations: bool) -> io::Result<Vec<N>> {
fn upto<N: NestedValue<D>>(&mut self, delimiter: u8, read_annotations: bool) -> io::Result<Vec<N>> {
let mut vs = Vec::new();
loop {
self.skip_whitespace();
if self.peek()? == delimiter {
self.drop(delimiter.len_utf8());
self.skip()?;
return Ok(vs);
}
vs.push(Reader::<D, N>::demand_next(self, read_annotations)?);
@ -349,13 +345,13 @@ impl<'a, D: Embeddable, Dec: DomainParse<D>> TextReader<'a, D, Dec> {
let mut d = Map::new();
loop {
self.skip_whitespace();
if self.peek()? == '}' {
self.drop(1);
if self.peek()? == b'}' {
self.skip()?;
return Ok(N::new(d));
}
let k = Reader::<D, N>::demand_next(self, read_annotations)?;
self.skip_whitespace();
if self.next_char()? != ':' {
if self.next_byte()? != b':' {
return Err(io_syntax_error("Missing expected key/value separator"));
}
let v = Reader::<D, N>::demand_next(self, read_annotations)?;
@ -363,48 +359,56 @@ impl<'a, D: Embeddable, Dec: DomainParse<D>> TextReader<'a, D, Dec> {
}
}
fn read_raw_symbol<N: NestedValue<D>>(&mut self, mut s: String) -> io::Result<N> {
fn read_raw_symbol<N: NestedValue<D>>(&mut self, mut bs: Vec<u8>) -> io::Result<N> {
loop {
let c = match self.peek() {
Err(e) if is_eof_error(&e) => ' ',
Err(e) if is_eof_io_error(&e) => b' ',
Err(e) => return Err(e)?,
Ok(c) if c.is_whitespace() => ' ',
Ok(c) if (c as char).is_whitespace() => b' ',
Ok(c) => c
};
match c {
'(' | ')' | '{' | '}' | '[' | ']' | '<' | '>' |
'"' | ';' | ',' | '@' | '#' | ':' | '|' | ' ' =>
return Ok(Value::symbol(&s).wrap()),
b'(' | b')' | b'{' | b'}' | b'[' | b']' | b'<' | b'>' |
b'"' | b';' | b',' | b'@' | b'#' | b':' | b'|' | b' ' =>
return Ok(Value::symbol(&decode_utf8(bs)?).wrap()),
c => {
self.drop(c.len_utf8());
s.push(c)
self.skip()?;
bs.push(c)
}
}
}
}
}
impl<'a, 'de, D: Embeddable, N: NestedValue<D>, Dec: DomainParse<D>> Reader<'de, D, N> for TextReader<'a, D, Dec> {
impl<'de, 'src, D: Embeddable, N: NestedValue<D>, Dec: DomainParse<D>, S: BinarySource<'de>>
Reader<'de, D, N> for TextReader<'de, 'src, D, Dec, S>
{
fn next(&mut self, read_annotations: bool) -> io::Result<Option<N>> {
self.skip_whitespace();
let c = match self.next_char() {
let c = match self.peek() {
Ok(c) => c,
Err(e) if is_eof_error(&e) => return Ok(None),
Err(e) if is_eof_io_error(&e) => return Ok(None),
Err(e) => return Err(e.into()),
};
Ok(Some(match c {
'-' => {
let c1 = self.next_char()?;
self.read_intpart("-".to_owned(), c1)?
b'-' => {
self.skip()?;
let c1 = self.next_byte()?;
self.read_intpart(vec![b'-'], c1)?
}
'0' | '1' | '2' | '3' | '4' | '5' | '6' | '7' | '8' | '9' =>
self.read_intpart(String::new(), c)?,
'"' =>
N::new(self.read_string('"')?),
'|' =>
Value::symbol(&self.read_string('|')?).wrap(),
';' | '@' => {
self.undrop(1);
b'0' | b'1' | b'2' | b'3' | b'4' | b'5' | b'6' | b'7' | b'8' | b'9' => {
self.skip()?;
self.read_intpart(Vec::new(), c)?
}
b'"' => {
self.skip()?;
N::new(self.read_string(b'"')?)
}
b'|' => {
self.skip()?;
Value::symbol(&self.read_string(b'|')?).wrap()
}
b';' | b'@' => {
if read_annotations {
let mut annotations = self.gather_annotations()?;
let (existing_annotations, v) =
@ -416,61 +420,73 @@ impl<'a, 'de, D: Embeddable, N: NestedValue<D>, Dec: DomainParse<D>> Reader<'de,
self.demand_next(read_annotations)?
}
}
':' => {
// return Err(io_syntax_error("Unexpected key/value separator between items")),
return Err(io_syntax_error(&format!("Unexpected key/value separator between items (pos {:?})", self.pos)));
b':' => {
return Err(io_syntax_error("Unexpected key/value separator between items"));
}
'#' => match self.next_char()? {
'f' => N::new(false),
't' => N::new(true),
'{' => N::new(Set::from_iter(self.upto('}', read_annotations)?.into_iter())),
'"' => self.read_literal_binary()?,
'x' => if self.next_char()? == '"' {
self.read_hex_binary()?
} else {
return Err(io_syntax_error("Expected open-quote at start of hex ByteString"));
},
'[' => self.read_base64_binary()?,
'=' => {
let bs_val: N = self.demand_next(true)?;
if bs_val.annotations().slice().len() > 0 {
return Err(io_syntax_error("Annotations not permitted after #="));
b'#' => {
self.skip()?;
match self.next_byte()? {
b'f' => N::new(false),
b't' => N::new(true),
b'{' => N::new(Set::from_iter(self.upto(b'}', read_annotations)?.into_iter())),
b'"' => self.read_literal_binary()?,
b'x' => if self.next_byte()? == b'"' {
self.read_hex_binary()?
} else {
return Err(io_syntax_error("Expected open-quote at start of hex ByteString"));
},
b'[' => self.read_base64_binary()?,
b'=' => {
let bs_val: N = self.demand_next(true)?;
if bs_val.annotations().slice().len() > 0 {
return Err(io_syntax_error("Annotations not permitted after #="));
}
match bs_val.value().as_bytestring() {
None =>
return Err(io_syntax_error("ByteString must follow #=")),
Some(bs) =>
crate::value::BytesBinarySource::new(bs)
.packed(ViaCodec::new(&mut self.dec))
.demand_next(read_annotations)?
}
}
match bs_val.value().as_bytestring() {
None =>
return Err(io_syntax_error("ByteString must follow #=")),
Some(bs) =>
crate::value::BytesBinarySource::new(bs)
.packed(ViaCodec::new(&mut self.dec))
.demand_next(read_annotations)?
b'!' => {
let v = self.next_iovalue(read_annotations)?;
Value::Embedded(self.dec.parse_embedded(&v)?).wrap()
}
other => return Err(io_syntax_error(&format!("Invalid # syntax: {:?}", other))),
}
'!' => {
let v = self.next_iovalue(read_annotations)?;
Value::Embedded(self.dec.parse_embedded(&v)?).wrap()
}
other => return Err(io_syntax_error(&format!("Invalid # syntax: {:?}", other))),
},
'<' => {
let vs = self.upto('>', read_annotations)?;
}
b'<' => {
self.skip()?;
let vs = self.upto(b'>', read_annotations)?;
if vs.is_empty() {
return Err(io_syntax_error("Missing record label"));
}
Value::Record(Record(vs)).wrap()
}
'[' => N::new(self.upto(']', read_annotations)?),
'{' => self.read_dictionary(read_annotations)?,
'>' => return Err(io_syntax_error("Unexpected >")),
']' => return Err(io_syntax_error("Unexpected ]")),
'}' => return Err(io_syntax_error("Unexpected }")),
other => self.read_raw_symbol(other.to_string())?,
b'[' => {
self.skip()?;
N::new(self.upto(b']', read_annotations)?)
}
b'{' => {
self.skip()?;
self.read_dictionary(read_annotations)?
}
b'>' => return Err(io_syntax_error("Unexpected >")),
b']' => return Err(io_syntax_error("Unexpected ]")),
b'}' => return Err(io_syntax_error("Unexpected }")),
other => {
self.skip()?;
self.read_raw_symbol(vec![other])?
}
}))
}
fn open_record(&mut self, arity: Option<usize>) -> ReaderResult<B::Type> {
self.skip_annotations()?;
if self.peek()? != '<' { return Err(self.expected::<N>(ExpectedKind::Record(arity))); }
self.drop(1);
if self.peek()? != b'<' { return Err(self.expected::<N>(ExpectedKind::Record(arity))); }
self.skip()?;
let mut b = B::Type::default();
Reader::<D, N>::ensure_more_expected(self, &mut b, &B::Item::RecordLabel)?;
Ok(b)
@ -479,12 +495,12 @@ impl<'a, 'de, D: Embeddable, N: NestedValue<D>, Dec: DomainParse<D>> Reader<'de,
fn open_sequence_or_set(&mut self) -> ReaderResult<B::Item> {
self.skip_annotations()?;
let mark = Reader::<D, N>::mark(self)?;
match self.next_char()? {
'#' => match self.next_char()? {
'{' => return Ok(B::Item::SetValue),
match self.next_byte()? {
b'#' => match self.next_byte()? {
b'{' => return Ok(B::Item::SetValue),
_ => (),
},
'[' => return Ok(B::Item::SequenceValue),
b'[' => return Ok(B::Item::SequenceValue),
_ => (),
}
Reader::<D, N>::restore(self, &mark)?;
@ -493,17 +509,17 @@ impl<'a, 'de, D: Embeddable, N: NestedValue<D>, Dec: DomainParse<D>> Reader<'de,
fn open_sequence(&mut self) -> ReaderResult<()> {
self.skip_annotations()?;
if self.peek()? != '[' { return Err(self.expected::<N>(ExpectedKind::Sequence)); }
self.drop(1);
if self.peek()? != b'[' { return Err(self.expected::<N>(ExpectedKind::Sequence)); }
self.skip()?;
Ok(())
}
fn open_set(&mut self) -> ReaderResult<()> {
self.skip_annotations()?;
let mark = Reader::<D, N>::mark(self)?;
match self.next_char()? {
'#' => match self.next_char()? {
'{' => return Ok(()),
match self.next_byte()? {
b'#' => match self.next_byte()? {
b'{' => return Ok(()),
_ => (),
},
_ => (),
@ -514,8 +530,8 @@ impl<'a, 'de, D: Embeddable, N: NestedValue<D>, Dec: DomainParse<D>> Reader<'de,
fn open_dictionary(&mut self) -> ReaderResult<()> {
self.skip_annotations()?;
if self.peek()? != '{' { return Err(self.expected::<N>(ExpectedKind::Dictionary)); }
self.drop(1);
if self.peek()? != b'{' { return Err(self.expected::<N>(ExpectedKind::Dictionary)); }
self.skip()?;
Ok(())
}
@ -527,7 +543,7 @@ impl<'a, 'de, D: Embeddable, N: NestedValue<D>, Dec: DomainParse<D>> Reader<'de,
opening: Some(B::Item::DictionaryValue),
} => {
self.skip_whitespace();
if self.next_char()? != ':' {
if self.next_byte()? != b':' {
return Err(syntax_error("Missing expected key/value separator"));
}
},
@ -539,8 +555,8 @@ impl<'a, 'de, D: Embeddable, N: NestedValue<D>, Dec: DomainParse<D>> Reader<'de,
fn close_compound(&mut self, b: &mut B::Type, i: &B::Item) -> ReaderResult<bool> {
self.skip_whitespace();
match self.peek()? {
'>' | ']' | '}' => {
self.drop(1);
b'>' | b']' | b'}' => {
self.skip()?;
Ok(true)
}
_ => {
@ -554,9 +570,9 @@ impl<'a, 'de, D: Embeddable, N: NestedValue<D>, Dec: DomainParse<D>> Reader<'de,
fn open_embedded(&mut self) -> ReaderResult<()> {
self.skip_annotations()?;
let mark = Reader::<D, N>::mark(self)?;
match self.next_char()? {
'#' => match self.next_char()? {
'!' => return Ok(()),
match self.next_byte()? {
b'#' => match self.next_byte()? {
b'!' => return Ok(()),
_ => (),
},
_ => (),
@ -569,33 +585,32 @@ impl<'a, 'de, D: Embeddable, N: NestedValue<D>, Dec: DomainParse<D>> Reader<'de,
Ok(())
}
type Mark = usize;
type Mark = S::Mark;
fn mark(&mut self) -> io::Result<Self::Mark> {
Ok(self.pos)
self.source.mark()
}
fn restore(&mut self, mark: &Self::Mark) -> io::Result<()> {
self.pos = *mark;
Ok(())
self.source.restore(mark)
}
fn next_token(&mut self, read_embedded_annotations: bool) -> io::Result<Token<D, N>> {
self.skip_annotations()?;
let mark = Reader::<D, N>::mark(self)?;
Ok(match self.next_char()? {
'<' => Token::Compound(CompoundClass::Record),
'[' => Token::Compound(CompoundClass::Sequence),
'{' => Token::Compound(CompoundClass::Dictionary),
'>' => Token::End,
']' => Token::End,
'}' => Token::End,
'#' => match self.next_char()? {
'!' => {
Ok(match self.next_byte()? {
b'<' => Token::Compound(CompoundClass::Record),
b'[' => Token::Compound(CompoundClass::Sequence),
b'{' => Token::Compound(CompoundClass::Dictionary),
b'>' => Token::End,
b']' => Token::End,
b'}' => Token::End,
b'#' => match self.next_byte()? {
b'!' => {
let v = self.next_iovalue(read_embedded_annotations)?;
Token::Embedded(self.dec.parse_embedded(&v)?)
}
'{' => Token::Compound(CompoundClass::Set),
b'{' => Token::Compound(CompoundClass::Set),
_ => {
Reader::<D, N>::restore(self, &mark)?;
Token::Atom(self.demand_next(false)?)

View File

@ -23,8 +23,10 @@ fn decode_all(bytes: &'_ [u8]) -> io::Result<Vec<IOValue>> {
let mut fh = std::fs::File::open("../../../tests/samples.pr").unwrap();
let mut contents = String::new();
fh.read_to_string(&mut contents)?;
let mut d = preserves::value::TextReader::new(&contents, preserves::value::ViaCodec::new(preserves::value::IOValueDomainCodec));
d.next_iovalue(true)?
preserves::value::TextReader::new(
&mut BytesBinarySource::new(contents.as_bytes()),
preserves::value::ViaCodec::new(preserves::value::IOValueDomainCodec))
.next_iovalue(true)?
};
let from_packed = {
let mut fh = std::fs::File::open("../../../tests/samples.bin").unwrap();