Decode text syntax from *byte* sources.
This commit is contained in:
parent
e913951b91
commit
d28901446d
|
@ -26,8 +26,9 @@ where
|
|||
from_reader(&mut PackedReader::new(&mut BytesBinarySource::new(bytes), IOValueDomainCodec))
|
||||
}
|
||||
|
||||
pub fn from_text<'de, T>(text: &str) -> Result<T> where T: Deserialize<'de> {
|
||||
from_reader(&mut TextReader::new(text, ViaCodec::new(IOValueDomainCodec)))
|
||||
pub fn from_text<'de, T>(text: &'de str) -> Result<T> where T: Deserialize<'de> {
|
||||
from_reader(&mut TextReader::new(&mut BytesBinarySource::new(text.as_bytes()),
|
||||
ViaCodec::new(IOValueDomainCodec)))
|
||||
}
|
||||
|
||||
pub fn from_read<'de, 'r, IOR: io::Read + io::Seek, T>(read: &'r mut IOR) ->
|
||||
|
|
|
@ -4,6 +4,8 @@ pub mod writer;
|
|||
pub use reader::TextReader;
|
||||
pub use writer::TextWriter;
|
||||
|
||||
use crate::value::reader::BytesBinarySource;
|
||||
|
||||
use std::io;
|
||||
|
||||
use super::{DomainParse, Embeddable, IOValue, IOValueDomainCodec, NestedValue, Reader, ViaCodec};
|
||||
|
@ -12,7 +14,7 @@ pub fn from_str<D: Embeddable, N: NestedValue<D>, Dec: DomainParse<D>>(
|
|||
s: &str,
|
||||
decode_embedded: Dec,
|
||||
) -> io::Result<N> {
|
||||
TextReader::new(s, decode_embedded).demand_next(false)
|
||||
TextReader::new(&mut BytesBinarySource::new(s.as_bytes()), decode_embedded).demand_next(false)
|
||||
}
|
||||
|
||||
pub fn iovalue_from_str(s: &str) -> io::Result<IOValue> {
|
||||
|
@ -23,7 +25,7 @@ pub fn annotated_from_str<D: Embeddable, N: NestedValue<D>, Dec: DomainParse<D>>
|
|||
s: &str,
|
||||
decode_embedded: Dec,
|
||||
) -> io::Result<N> {
|
||||
TextReader::new(s, decode_embedded).demand_next(true)
|
||||
TextReader::new(&mut BytesBinarySource::new(s.as_bytes()), decode_embedded).demand_next(true)
|
||||
}
|
||||
|
||||
pub fn annotated_iovalue_from_str(s: &str) -> io::Result<IOValue> {
|
||||
|
|
|
@ -1,9 +1,8 @@
|
|||
use crate::error::Error;
|
||||
use crate::error::ExpectedKind;
|
||||
use crate::error::Received;
|
||||
use crate::error::eof;
|
||||
use crate::error::io_syntax_error;
|
||||
use crate::error::is_eof_error;
|
||||
use crate::error::is_eof_io_error;
|
||||
use crate::error::syntax_error;
|
||||
|
||||
use crate::hex;
|
||||
|
@ -33,55 +32,57 @@ use std::io;
|
|||
use std::iter::FromIterator;
|
||||
use std::marker::PhantomData;
|
||||
|
||||
pub struct TextReader<'a, D: Embeddable, Dec: DomainParse<D>> {
|
||||
buf: &'a str,
|
||||
pos: usize,
|
||||
dec: Dec,
|
||||
phantom: PhantomData<D>,
|
||||
pub struct TextReader<'de, 'src, D: Embeddable, Dec: DomainParse<D>, S: BinarySource<'de>> {
|
||||
pub source: &'src mut S,
|
||||
pub dec: Dec,
|
||||
phantom: PhantomData<&'de D>,
|
||||
}
|
||||
|
||||
impl<'a, D: Embeddable, Dec: DomainParse<D>> TextReader<'a, D, Dec> {
|
||||
pub fn new(buf: &'a str, dec: Dec) -> Self {
|
||||
fn decode_utf8(bs: Vec<u8>) -> io::Result<String> {
|
||||
Ok(String::from_utf8(bs).map_err(|_| io_syntax_error("Invalid UTF-8"))?)
|
||||
}
|
||||
|
||||
fn append_codepoint(bs: &mut Vec<u8>, n: u32) -> io::Result<()> {
|
||||
let c = char::from_u32(n).ok_or_else(|| io_syntax_error("Bad code point"))?;
|
||||
let mut buf = [0; 4];
|
||||
let _ = c.encode_utf8(&mut buf);
|
||||
bs.extend(&buf[0 .. c.len_utf8()]);
|
||||
Ok(())
|
||||
}
|
||||
|
||||
impl<'de, 'src, D: Embeddable, Dec: DomainParse<D>, S: BinarySource<'de>> TextReader<'de, 'src, D, Dec, S> {
|
||||
pub fn new(source: &'src mut S, dec: Dec) -> Self {
|
||||
TextReader {
|
||||
buf,
|
||||
pos: 0,
|
||||
source,
|
||||
dec,
|
||||
phantom: PhantomData,
|
||||
}
|
||||
}
|
||||
|
||||
fn remaining_input(&self) -> &str {
|
||||
&self.buf[self.pos ..]
|
||||
fn peek(&mut self) -> io::Result<u8> {
|
||||
self.source.peek()
|
||||
}
|
||||
|
||||
fn peek(&self) -> ReaderResult<char> {
|
||||
if self.pos >= self.buf.len() {
|
||||
Err(eof())
|
||||
} else {
|
||||
Ok(self.buf[self.pos ..].chars().next().unwrap())
|
||||
}
|
||||
fn skip(&mut self) -> io::Result<()> {
|
||||
self.source.skip()
|
||||
}
|
||||
|
||||
fn drop(&mut self, count: usize) {
|
||||
self.pos += count;
|
||||
}
|
||||
|
||||
fn undrop(&mut self, count: usize) {
|
||||
self.pos -= count;
|
||||
}
|
||||
|
||||
fn next_char(&mut self) -> ReaderResult<char> {
|
||||
let c = self.peek()?;
|
||||
self.drop(c.len_utf8());
|
||||
Ok(c)
|
||||
fn next_byte(&mut self) -> io::Result<u8> {
|
||||
let b = self.source.peek()?;
|
||||
self.source.skip()?;
|
||||
Ok(b)
|
||||
}
|
||||
|
||||
fn skip_whitespace(&mut self) {
|
||||
// Deliberately swallows errors.
|
||||
while let Ok(c) = self.peek() {
|
||||
if !c.is_whitespace() && c != ',' {
|
||||
break;
|
||||
match c {
|
||||
b' ' | b'\t' | b'\r' | b'\n' | b',' => {
|
||||
let _ = self.skip();
|
||||
()
|
||||
}
|
||||
_ => break,
|
||||
}
|
||||
self.drop(c.len_utf8())
|
||||
}
|
||||
}
|
||||
|
||||
|
@ -98,8 +99,8 @@ impl<'a, D: Embeddable, Dec: DomainParse<D>> TextReader<'a, D, Dec> {
|
|||
loop {
|
||||
self.skip_whitespace();
|
||||
match self.peek()? {
|
||||
';' => { self.drop(1); vs.push(N::new(self.comment_line()?)) }
|
||||
'@' => { self.drop(1); vs.push(self.demand_next(true)?) }
|
||||
b';' => { self.skip()?; vs.push(N::new(self.comment_line()?)) }
|
||||
b'@' => { self.skip()?; vs.push(self.demand_next(true)?) }
|
||||
_ => return Ok(vs),
|
||||
}
|
||||
}
|
||||
|
@ -109,135 +110,134 @@ impl<'a, D: Embeddable, Dec: DomainParse<D>> TextReader<'a, D, Dec> {
|
|||
loop {
|
||||
self.skip_whitespace();
|
||||
match self.peek()? {
|
||||
';' => { self.drop(1); self.comment_line()?; },
|
||||
'@' => { self.drop(1); Reader::<D, DummyValue<D>>::skip_value(self)?; },
|
||||
b';' => { self.skip()?; self.comment_line()?; },
|
||||
b'@' => { self.skip()?; Reader::<D, DummyValue<D>>::skip_value(self)?; },
|
||||
_ => return Ok(()),
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
pub fn next_iovalue(&mut self, read_annotations: bool) -> io::Result<IOValue> {
|
||||
let mut r = TextReader::new(self.remaining_input(), ViaCodec::new(IOValueDomainCodec));
|
||||
let mut r = TextReader::new(self.source, ViaCodec::new(IOValueDomainCodec));
|
||||
let v = r.demand_next(read_annotations)?;
|
||||
self.pos += r.pos;
|
||||
Ok(v)
|
||||
}
|
||||
|
||||
fn comment_line(&mut self) -> io::Result<String> {
|
||||
let mut s = String::new();
|
||||
let mut bs = Vec::new();
|
||||
loop {
|
||||
match self.next_char()? {
|
||||
'\r' | '\n' => return Ok(s),
|
||||
c => s.push(c),
|
||||
let b = self.peek()?;
|
||||
self.skip()?;
|
||||
match b {
|
||||
b'\r' | b'\n' => return Ok(decode_utf8(bs)?),
|
||||
_ => bs.push(b),
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
fn read_intpart<N: NestedValue<D>>(&mut self, mut s: String, c: char) -> io::Result<N> {
|
||||
fn read_intpart<N: NestedValue<D>>(&mut self, mut bs: Vec<u8>, c: u8) -> io::Result<N> {
|
||||
match c {
|
||||
'0' => {
|
||||
s.push(c);
|
||||
self.read_fracexp(s)
|
||||
b'0' => {
|
||||
bs.push(c);
|
||||
self.read_fracexp(bs)
|
||||
}
|
||||
_ => {
|
||||
self.read_digit1(&mut s, c)?;
|
||||
self.read_fracexp(s)
|
||||
self.read_digit1(&mut bs, c)?;
|
||||
self.read_fracexp(bs)
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
fn read_fracexp<N: NestedValue<D>>(&mut self, mut s: String) -> io::Result<N> {
|
||||
fn read_fracexp<N: NestedValue<D>>(&mut self, mut bs: Vec<u8>) -> io::Result<N> {
|
||||
let mut is_float = false;
|
||||
match self.peek()? {
|
||||
'.' => {
|
||||
s.push(self.next_char()?);
|
||||
let c = self.next_char()?;
|
||||
self.read_digit1(&mut s, c)?;
|
||||
b'.' => {
|
||||
is_float = true;
|
||||
bs.push(self.next_byte()?);
|
||||
let c = self.next_byte()?;
|
||||
self.read_digit1(&mut bs, c)?;
|
||||
}
|
||||
_ => ()
|
||||
}
|
||||
self.read_exp(s)
|
||||
}
|
||||
|
||||
fn read_exp<N: NestedValue<D>>(&mut self, mut s: String) -> io::Result<N> {
|
||||
match self.peek()? {
|
||||
'e' | 'E' => {
|
||||
s.push(self.next_char()?);
|
||||
self.read_sign_and_exp(s)
|
||||
b'e' | b'E' => {
|
||||
bs.push(self.next_byte()?);
|
||||
self.read_sign_and_exp(bs)
|
||||
}
|
||||
_ => self.finish_number(s)
|
||||
_ => self.finish_number(bs, is_float)
|
||||
}
|
||||
}
|
||||
|
||||
fn read_sign_and_exp<N: NestedValue<D>>(&mut self, mut s: String) -> io::Result<N> {
|
||||
fn read_sign_and_exp<N: NestedValue<D>>(&mut self, mut bs: Vec<u8>) -> io::Result<N> {
|
||||
match self.peek()? {
|
||||
'+' | '-' => s.push(self.next_char()?),
|
||||
b'+' | b'-' => bs.push(self.next_byte()?),
|
||||
_ => (),
|
||||
}
|
||||
let c = self.next_char()?;
|
||||
self.read_digit1(&mut s, c)?;
|
||||
self.finish_number(s)
|
||||
let c = self.next_byte()?;
|
||||
self.read_digit1(&mut bs, c)?;
|
||||
self.finish_number(bs, true)
|
||||
}
|
||||
|
||||
fn finish_number<N: NestedValue<D>>(&mut self, s: String) -> io::Result<N> {
|
||||
if let Ok(n) = s.parse::<BigInt>() {
|
||||
return Ok(N::new(n));
|
||||
}
|
||||
match self.peek()? {
|
||||
'f' | 'F' => {
|
||||
self.drop(1);
|
||||
Ok(N::new(s.parse::<f32>().map_err(
|
||||
|_| io_syntax_error(&format!(
|
||||
"Invalid single-precision number: {:?}", s)))?))
|
||||
fn finish_number<N: NestedValue<D>>(&mut self, bs: Vec<u8>, is_float: bool) -> io::Result<N> {
|
||||
let s = decode_utf8(bs)?;
|
||||
if is_float {
|
||||
match self.peek()? {
|
||||
b'f' | b'F' => {
|
||||
self.skip()?;
|
||||
Ok(N::new(s.parse::<f32>().map_err(
|
||||
|_| io_syntax_error(&format!(
|
||||
"Invalid single-precision number: {:?}", s)))?))
|
||||
}
|
||||
_ =>
|
||||
Ok(N::new(s.parse::<f64>().map_err(
|
||||
|_| io_syntax_error(&format!(
|
||||
"Invalid double-precision number: {:?}", s)))?))
|
||||
}
|
||||
_ =>
|
||||
Ok(N::new(s.parse::<f64>().map_err(
|
||||
|_| io_syntax_error(&format!(
|
||||
"Invalid double-precision number: {:?}", s)))?))
|
||||
} else {
|
||||
Ok(N::new(s.parse::<BigInt>().map_err(
|
||||
|_| io_syntax_error(&format!(
|
||||
"Invalid signed-integer number: {:?}", s)))?))
|
||||
}
|
||||
}
|
||||
|
||||
fn read_digit1(&mut self, s: &mut String, c: char) -> io::Result<()>
|
||||
fn read_digit1(&mut self, bs: &mut Vec<u8>, c: u8) -> io::Result<()>
|
||||
{
|
||||
if !c.is_digit(10) {
|
||||
if !(c as char).is_digit(10) {
|
||||
return Err(io_syntax_error("Incomplete number"));
|
||||
}
|
||||
s.push(c);
|
||||
while self.peek()?.is_digit(10) {
|
||||
s.push(self.next_char()?);
|
||||
bs.push(c);
|
||||
while (self.peek()? as char).is_digit(10) {
|
||||
bs.push(self.next_byte()?);
|
||||
}
|
||||
Ok(())
|
||||
}
|
||||
|
||||
fn read_stringlike<X, H, Acc, Element, R>(
|
||||
fn read_stringlike<X, H, R>(
|
||||
&mut self,
|
||||
mut seed: R,
|
||||
acc: Acc,
|
||||
xform_item: X,
|
||||
terminator: char,
|
||||
hexescape: char,
|
||||
terminator: u8,
|
||||
hexescape: u8,
|
||||
hexescaper: H,
|
||||
) -> io::Result<R>
|
||||
where
|
||||
X: Fn(char) -> Element,
|
||||
H: Fn(&mut Self) -> io::Result<Element>,
|
||||
Acc: Fn(&mut R, Element) -> (),
|
||||
X: Fn(&mut R, u8) -> io::Result<()>,
|
||||
H: Fn(&mut R, &mut Self) -> io::Result<()>,
|
||||
{
|
||||
loop {
|
||||
match self.next_char()? {
|
||||
match self.next_byte()? {
|
||||
c if c == terminator => return Ok(seed),
|
||||
'\\' => match self.next_char()? {
|
||||
c if c == hexescape =>
|
||||
acc(&mut seed, hexescaper(self)?),
|
||||
c if c == terminator || c == '\\' || c == '/' =>
|
||||
acc(&mut seed, xform_item(c)),
|
||||
'b' => acc(&mut seed, xform_item('\x08')),
|
||||
'f' => acc(&mut seed, xform_item('\x0c')),
|
||||
'n' => acc(&mut seed, xform_item('\x0a')),
|
||||
'r' => acc(&mut seed, xform_item('\x0d')),
|
||||
't' => acc(&mut seed, xform_item('\x09')),
|
||||
b'\\' => match self.next_byte()? {
|
||||
c if c == hexescape => hexescaper(&mut seed, self)?,
|
||||
c if c == terminator || c == b'\\' || c == b'/' => xform_item(&mut seed, c)?,
|
||||
b'b' => xform_item(&mut seed, b'\x08')?,
|
||||
b'f' => xform_item(&mut seed, b'\x0c')?,
|
||||
b'n' => xform_item(&mut seed, b'\x0a')?,
|
||||
b'r' => xform_item(&mut seed, b'\x0d')?,
|
||||
b't' => xform_item(&mut seed, b'\x09')?,
|
||||
_ => return Err(io_syntax_error("Invalid escape code")),
|
||||
},
|
||||
c => acc(&mut seed, xform_item(c)),
|
||||
c => xform_item(&mut seed, c)?,
|
||||
}
|
||||
}
|
||||
}
|
||||
|
@ -245,8 +245,8 @@ impl<'a, D: Embeddable, Dec: DomainParse<D>> TextReader<'a, D, Dec> {
|
|||
fn hexnum(&mut self, count: usize) -> io::Result<u32> {
|
||||
let mut v: u32 = 0;
|
||||
for _ in 0 .. count {
|
||||
let c = self.next_char()?;
|
||||
match c.to_digit(16) {
|
||||
let c = self.next_byte()?;
|
||||
match (c as char).to_digit(16) {
|
||||
Some(d) =>
|
||||
v = v << 4 | d,
|
||||
None =>
|
||||
|
@ -256,58 +256,54 @@ impl<'a, D: Embeddable, Dec: DomainParse<D>> TextReader<'a, D, Dec> {
|
|||
Ok(v)
|
||||
}
|
||||
|
||||
fn read_string(&mut self, delimiter: char) -> io::Result<String> {
|
||||
self.read_stringlike(
|
||||
String::new(),
|
||||
|s, c| s.push(c),
|
||||
|c| c,
|
||||
fn read_string(&mut self, delimiter: u8) -> io::Result<String> {
|
||||
decode_utf8(self.read_stringlike(
|
||||
Vec::new(),
|
||||
|bs, c| Ok(bs.push(c)),
|
||||
delimiter,
|
||||
'u',
|
||||
|r| {
|
||||
b'u',
|
||||
|bs, r| {
|
||||
let n1 = r.hexnum(4)?;
|
||||
if (0xd800 ..= 0xdbff).contains(&n1) {
|
||||
let mut ok = true;
|
||||
ok = ok && r.next_char()? == '\\';
|
||||
ok = ok && r.next_char()? == 'u';
|
||||
ok = ok && r.next_byte()? == b'\\';
|
||||
ok = ok && r.next_byte()? == b'u';
|
||||
if !ok {
|
||||
Err(io_syntax_error("Missing second half of surrogate pair"))
|
||||
} else {
|
||||
let n2 = r.hexnum(4)?;
|
||||
if (0xdc00 ..= 0xdfff).contains(&n2) {
|
||||
let n = ((n1 - 0xd800) << 10) + (n2 - 0xdc00) + 0x10000;
|
||||
char::from_u32(n).ok_or_else(
|
||||
|| io_syntax_error("Bad code point from surrogate pair"))
|
||||
append_codepoint(bs, n)
|
||||
} else {
|
||||
Err(io_syntax_error("Bad second half of surrogate pair"))
|
||||
}
|
||||
}
|
||||
} else {
|
||||
char::from_u32(n1).ok_or_else(
|
||||
|| io_syntax_error("Bad code point"))
|
||||
append_codepoint(bs, n1)
|
||||
}
|
||||
})
|
||||
})?)
|
||||
}
|
||||
|
||||
fn read_literal_binary<N: NestedValue<D>>(&mut self) -> io::Result<N> {
|
||||
Ok(N::new(&self.read_stringlike(
|
||||
Vec::new(),
|
||||
|bs, b| bs.push(b),
|
||||
|c| c as u8,
|
||||
'"',
|
||||
'x',
|
||||
|r| Ok(r.hexnum(2)? as u8))?[..]))
|
||||
|bs, b| Ok(bs.push(b)),
|
||||
b'"',
|
||||
b'x',
|
||||
|bs, r| Ok(bs.push(r.hexnum(2)? as u8)))?[..]))
|
||||
}
|
||||
|
||||
fn read_hex_binary<N: NestedValue<D>>(&mut self) -> io::Result<N> {
|
||||
let mut s = String::new();
|
||||
loop {
|
||||
self.skip_whitespace();
|
||||
let c1 = self.next_char()?;
|
||||
let c1 = self.next_byte()? as char;
|
||||
if c1 == '"' {
|
||||
let bs = hex::HexParser::Strict.decode(&s).unwrap();
|
||||
return Ok(N::new(&bs[..]));
|
||||
}
|
||||
let c2 = self.next_char()?;
|
||||
let c2 = self.next_byte()? as char;
|
||||
if !(c1.is_digit(16) && c2.is_digit(16)) {
|
||||
return Err(io_syntax_error("Invalid hex binary"));
|
||||
}
|
||||
|
@ -317,28 +313,28 @@ impl<'a, D: Embeddable, Dec: DomainParse<D>> TextReader<'a, D, Dec> {
|
|||
}
|
||||
|
||||
fn read_base64_binary<N: NestedValue<D>>(&mut self) -> io::Result<N> {
|
||||
let mut s = String::new();
|
||||
let mut bs = Vec::new();
|
||||
loop {
|
||||
self.skip_whitespace();
|
||||
let mut c = self.next_char()?;
|
||||
if c == ']' {
|
||||
let bs = base64::decode_config(&s, base64::STANDARD_NO_PAD)
|
||||
let mut c = self.next_byte()?;
|
||||
if c == b']' {
|
||||
let bs = base64::decode_config(&decode_utf8(bs)?, base64::STANDARD_NO_PAD)
|
||||
.map_err(|_| io_syntax_error("Invalid base64 character"))?;
|
||||
return Ok(N::new(&bs[..]));
|
||||
}
|
||||
if c == '-' { c = '+'; }
|
||||
if c == '_' { c = '/'; }
|
||||
if c == '=' { continue; }
|
||||
s.push(c);
|
||||
if c == b'-' { c = b'+'; }
|
||||
if c == b'_' { c = b'/'; }
|
||||
if c == b'=' { continue; }
|
||||
bs.push(c);
|
||||
}
|
||||
}
|
||||
|
||||
fn upto<N: NestedValue<D>>(&mut self, delimiter: char, read_annotations: bool) -> io::Result<Vec<N>> {
|
||||
fn upto<N: NestedValue<D>>(&mut self, delimiter: u8, read_annotations: bool) -> io::Result<Vec<N>> {
|
||||
let mut vs = Vec::new();
|
||||
loop {
|
||||
self.skip_whitespace();
|
||||
if self.peek()? == delimiter {
|
||||
self.drop(delimiter.len_utf8());
|
||||
self.skip()?;
|
||||
return Ok(vs);
|
||||
}
|
||||
vs.push(Reader::<D, N>::demand_next(self, read_annotations)?);
|
||||
|
@ -349,13 +345,13 @@ impl<'a, D: Embeddable, Dec: DomainParse<D>> TextReader<'a, D, Dec> {
|
|||
let mut d = Map::new();
|
||||
loop {
|
||||
self.skip_whitespace();
|
||||
if self.peek()? == '}' {
|
||||
self.drop(1);
|
||||
if self.peek()? == b'}' {
|
||||
self.skip()?;
|
||||
return Ok(N::new(d));
|
||||
}
|
||||
let k = Reader::<D, N>::demand_next(self, read_annotations)?;
|
||||
self.skip_whitespace();
|
||||
if self.next_char()? != ':' {
|
||||
if self.next_byte()? != b':' {
|
||||
return Err(io_syntax_error("Missing expected key/value separator"));
|
||||
}
|
||||
let v = Reader::<D, N>::demand_next(self, read_annotations)?;
|
||||
|
@ -363,48 +359,56 @@ impl<'a, D: Embeddable, Dec: DomainParse<D>> TextReader<'a, D, Dec> {
|
|||
}
|
||||
}
|
||||
|
||||
fn read_raw_symbol<N: NestedValue<D>>(&mut self, mut s: String) -> io::Result<N> {
|
||||
fn read_raw_symbol<N: NestedValue<D>>(&mut self, mut bs: Vec<u8>) -> io::Result<N> {
|
||||
loop {
|
||||
let c = match self.peek() {
|
||||
Err(e) if is_eof_error(&e) => ' ',
|
||||
Err(e) if is_eof_io_error(&e) => b' ',
|
||||
Err(e) => return Err(e)?,
|
||||
Ok(c) if c.is_whitespace() => ' ',
|
||||
Ok(c) if (c as char).is_whitespace() => b' ',
|
||||
Ok(c) => c
|
||||
};
|
||||
match c {
|
||||
'(' | ')' | '{' | '}' | '[' | ']' | '<' | '>' |
|
||||
'"' | ';' | ',' | '@' | '#' | ':' | '|' | ' ' =>
|
||||
return Ok(Value::symbol(&s).wrap()),
|
||||
b'(' | b')' | b'{' | b'}' | b'[' | b']' | b'<' | b'>' |
|
||||
b'"' | b';' | b',' | b'@' | b'#' | b':' | b'|' | b' ' =>
|
||||
return Ok(Value::symbol(&decode_utf8(bs)?).wrap()),
|
||||
c => {
|
||||
self.drop(c.len_utf8());
|
||||
s.push(c)
|
||||
self.skip()?;
|
||||
bs.push(c)
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
impl<'a, 'de, D: Embeddable, N: NestedValue<D>, Dec: DomainParse<D>> Reader<'de, D, N> for TextReader<'a, D, Dec> {
|
||||
impl<'de, 'src, D: Embeddable, N: NestedValue<D>, Dec: DomainParse<D>, S: BinarySource<'de>>
|
||||
Reader<'de, D, N> for TextReader<'de, 'src, D, Dec, S>
|
||||
{
|
||||
fn next(&mut self, read_annotations: bool) -> io::Result<Option<N>> {
|
||||
self.skip_whitespace();
|
||||
let c = match self.next_char() {
|
||||
let c = match self.peek() {
|
||||
Ok(c) => c,
|
||||
Err(e) if is_eof_error(&e) => return Ok(None),
|
||||
Err(e) if is_eof_io_error(&e) => return Ok(None),
|
||||
Err(e) => return Err(e.into()),
|
||||
};
|
||||
Ok(Some(match c {
|
||||
'-' => {
|
||||
let c1 = self.next_char()?;
|
||||
self.read_intpart("-".to_owned(), c1)?
|
||||
b'-' => {
|
||||
self.skip()?;
|
||||
let c1 = self.next_byte()?;
|
||||
self.read_intpart(vec![b'-'], c1)?
|
||||
}
|
||||
'0' | '1' | '2' | '3' | '4' | '5' | '6' | '7' | '8' | '9' =>
|
||||
self.read_intpart(String::new(), c)?,
|
||||
'"' =>
|
||||
N::new(self.read_string('"')?),
|
||||
'|' =>
|
||||
Value::symbol(&self.read_string('|')?).wrap(),
|
||||
';' | '@' => {
|
||||
self.undrop(1);
|
||||
b'0' | b'1' | b'2' | b'3' | b'4' | b'5' | b'6' | b'7' | b'8' | b'9' => {
|
||||
self.skip()?;
|
||||
self.read_intpart(Vec::new(), c)?
|
||||
}
|
||||
b'"' => {
|
||||
self.skip()?;
|
||||
N::new(self.read_string(b'"')?)
|
||||
}
|
||||
b'|' => {
|
||||
self.skip()?;
|
||||
Value::symbol(&self.read_string(b'|')?).wrap()
|
||||
}
|
||||
b';' | b'@' => {
|
||||
if read_annotations {
|
||||
let mut annotations = self.gather_annotations()?;
|
||||
let (existing_annotations, v) =
|
||||
|
@ -416,61 +420,73 @@ impl<'a, 'de, D: Embeddable, N: NestedValue<D>, Dec: DomainParse<D>> Reader<'de,
|
|||
self.demand_next(read_annotations)?
|
||||
}
|
||||
}
|
||||
':' => {
|
||||
// return Err(io_syntax_error("Unexpected key/value separator between items")),
|
||||
return Err(io_syntax_error(&format!("Unexpected key/value separator between items (pos {:?})", self.pos)));
|
||||
b':' => {
|
||||
return Err(io_syntax_error("Unexpected key/value separator between items"));
|
||||
}
|
||||
'#' => match self.next_char()? {
|
||||
'f' => N::new(false),
|
||||
't' => N::new(true),
|
||||
'{' => N::new(Set::from_iter(self.upto('}', read_annotations)?.into_iter())),
|
||||
'"' => self.read_literal_binary()?,
|
||||
'x' => if self.next_char()? == '"' {
|
||||
self.read_hex_binary()?
|
||||
} else {
|
||||
return Err(io_syntax_error("Expected open-quote at start of hex ByteString"));
|
||||
},
|
||||
'[' => self.read_base64_binary()?,
|
||||
'=' => {
|
||||
let bs_val: N = self.demand_next(true)?;
|
||||
if bs_val.annotations().slice().len() > 0 {
|
||||
return Err(io_syntax_error("Annotations not permitted after #="));
|
||||
b'#' => {
|
||||
self.skip()?;
|
||||
match self.next_byte()? {
|
||||
b'f' => N::new(false),
|
||||
b't' => N::new(true),
|
||||
b'{' => N::new(Set::from_iter(self.upto(b'}', read_annotations)?.into_iter())),
|
||||
b'"' => self.read_literal_binary()?,
|
||||
b'x' => if self.next_byte()? == b'"' {
|
||||
self.read_hex_binary()?
|
||||
} else {
|
||||
return Err(io_syntax_error("Expected open-quote at start of hex ByteString"));
|
||||
},
|
||||
b'[' => self.read_base64_binary()?,
|
||||
b'=' => {
|
||||
let bs_val: N = self.demand_next(true)?;
|
||||
if bs_val.annotations().slice().len() > 0 {
|
||||
return Err(io_syntax_error("Annotations not permitted after #="));
|
||||
}
|
||||
match bs_val.value().as_bytestring() {
|
||||
None =>
|
||||
return Err(io_syntax_error("ByteString must follow #=")),
|
||||
Some(bs) =>
|
||||
crate::value::BytesBinarySource::new(bs)
|
||||
.packed(ViaCodec::new(&mut self.dec))
|
||||
.demand_next(read_annotations)?
|
||||
}
|
||||
}
|
||||
match bs_val.value().as_bytestring() {
|
||||
None =>
|
||||
return Err(io_syntax_error("ByteString must follow #=")),
|
||||
Some(bs) =>
|
||||
crate::value::BytesBinarySource::new(bs)
|
||||
.packed(ViaCodec::new(&mut self.dec))
|
||||
.demand_next(read_annotations)?
|
||||
b'!' => {
|
||||
let v = self.next_iovalue(read_annotations)?;
|
||||
Value::Embedded(self.dec.parse_embedded(&v)?).wrap()
|
||||
}
|
||||
other => return Err(io_syntax_error(&format!("Invalid # syntax: {:?}", other))),
|
||||
}
|
||||
'!' => {
|
||||
let v = self.next_iovalue(read_annotations)?;
|
||||
Value::Embedded(self.dec.parse_embedded(&v)?).wrap()
|
||||
}
|
||||
other => return Err(io_syntax_error(&format!("Invalid # syntax: {:?}", other))),
|
||||
},
|
||||
'<' => {
|
||||
let vs = self.upto('>', read_annotations)?;
|
||||
}
|
||||
b'<' => {
|
||||
self.skip()?;
|
||||
let vs = self.upto(b'>', read_annotations)?;
|
||||
if vs.is_empty() {
|
||||
return Err(io_syntax_error("Missing record label"));
|
||||
}
|
||||
Value::Record(Record(vs)).wrap()
|
||||
}
|
||||
'[' => N::new(self.upto(']', read_annotations)?),
|
||||
'{' => self.read_dictionary(read_annotations)?,
|
||||
'>' => return Err(io_syntax_error("Unexpected >")),
|
||||
']' => return Err(io_syntax_error("Unexpected ]")),
|
||||
'}' => return Err(io_syntax_error("Unexpected }")),
|
||||
other => self.read_raw_symbol(other.to_string())?,
|
||||
b'[' => {
|
||||
self.skip()?;
|
||||
N::new(self.upto(b']', read_annotations)?)
|
||||
}
|
||||
b'{' => {
|
||||
self.skip()?;
|
||||
self.read_dictionary(read_annotations)?
|
||||
}
|
||||
b'>' => return Err(io_syntax_error("Unexpected >")),
|
||||
b']' => return Err(io_syntax_error("Unexpected ]")),
|
||||
b'}' => return Err(io_syntax_error("Unexpected }")),
|
||||
other => {
|
||||
self.skip()?;
|
||||
self.read_raw_symbol(vec![other])?
|
||||
}
|
||||
}))
|
||||
}
|
||||
|
||||
fn open_record(&mut self, arity: Option<usize>) -> ReaderResult<B::Type> {
|
||||
self.skip_annotations()?;
|
||||
if self.peek()? != '<' { return Err(self.expected::<N>(ExpectedKind::Record(arity))); }
|
||||
self.drop(1);
|
||||
if self.peek()? != b'<' { return Err(self.expected::<N>(ExpectedKind::Record(arity))); }
|
||||
self.skip()?;
|
||||
let mut b = B::Type::default();
|
||||
Reader::<D, N>::ensure_more_expected(self, &mut b, &B::Item::RecordLabel)?;
|
||||
Ok(b)
|
||||
|
@ -479,12 +495,12 @@ impl<'a, 'de, D: Embeddable, N: NestedValue<D>, Dec: DomainParse<D>> Reader<'de,
|
|||
fn open_sequence_or_set(&mut self) -> ReaderResult<B::Item> {
|
||||
self.skip_annotations()?;
|
||||
let mark = Reader::<D, N>::mark(self)?;
|
||||
match self.next_char()? {
|
||||
'#' => match self.next_char()? {
|
||||
'{' => return Ok(B::Item::SetValue),
|
||||
match self.next_byte()? {
|
||||
b'#' => match self.next_byte()? {
|
||||
b'{' => return Ok(B::Item::SetValue),
|
||||
_ => (),
|
||||
},
|
||||
'[' => return Ok(B::Item::SequenceValue),
|
||||
b'[' => return Ok(B::Item::SequenceValue),
|
||||
_ => (),
|
||||
}
|
||||
Reader::<D, N>::restore(self, &mark)?;
|
||||
|
@ -493,17 +509,17 @@ impl<'a, 'de, D: Embeddable, N: NestedValue<D>, Dec: DomainParse<D>> Reader<'de,
|
|||
|
||||
fn open_sequence(&mut self) -> ReaderResult<()> {
|
||||
self.skip_annotations()?;
|
||||
if self.peek()? != '[' { return Err(self.expected::<N>(ExpectedKind::Sequence)); }
|
||||
self.drop(1);
|
||||
if self.peek()? != b'[' { return Err(self.expected::<N>(ExpectedKind::Sequence)); }
|
||||
self.skip()?;
|
||||
Ok(())
|
||||
}
|
||||
|
||||
fn open_set(&mut self) -> ReaderResult<()> {
|
||||
self.skip_annotations()?;
|
||||
let mark = Reader::<D, N>::mark(self)?;
|
||||
match self.next_char()? {
|
||||
'#' => match self.next_char()? {
|
||||
'{' => return Ok(()),
|
||||
match self.next_byte()? {
|
||||
b'#' => match self.next_byte()? {
|
||||
b'{' => return Ok(()),
|
||||
_ => (),
|
||||
},
|
||||
_ => (),
|
||||
|
@ -514,8 +530,8 @@ impl<'a, 'de, D: Embeddable, N: NestedValue<D>, Dec: DomainParse<D>> Reader<'de,
|
|||
|
||||
fn open_dictionary(&mut self) -> ReaderResult<()> {
|
||||
self.skip_annotations()?;
|
||||
if self.peek()? != '{' { return Err(self.expected::<N>(ExpectedKind::Dictionary)); }
|
||||
self.drop(1);
|
||||
if self.peek()? != b'{' { return Err(self.expected::<N>(ExpectedKind::Dictionary)); }
|
||||
self.skip()?;
|
||||
Ok(())
|
||||
}
|
||||
|
||||
|
@ -527,7 +543,7 @@ impl<'a, 'de, D: Embeddable, N: NestedValue<D>, Dec: DomainParse<D>> Reader<'de,
|
|||
opening: Some(B::Item::DictionaryValue),
|
||||
} => {
|
||||
self.skip_whitespace();
|
||||
if self.next_char()? != ':' {
|
||||
if self.next_byte()? != b':' {
|
||||
return Err(syntax_error("Missing expected key/value separator"));
|
||||
}
|
||||
},
|
||||
|
@ -539,8 +555,8 @@ impl<'a, 'de, D: Embeddable, N: NestedValue<D>, Dec: DomainParse<D>> Reader<'de,
|
|||
fn close_compound(&mut self, b: &mut B::Type, i: &B::Item) -> ReaderResult<bool> {
|
||||
self.skip_whitespace();
|
||||
match self.peek()? {
|
||||
'>' | ']' | '}' => {
|
||||
self.drop(1);
|
||||
b'>' | b']' | b'}' => {
|
||||
self.skip()?;
|
||||
Ok(true)
|
||||
}
|
||||
_ => {
|
||||
|
@ -554,9 +570,9 @@ impl<'a, 'de, D: Embeddable, N: NestedValue<D>, Dec: DomainParse<D>> Reader<'de,
|
|||
fn open_embedded(&mut self) -> ReaderResult<()> {
|
||||
self.skip_annotations()?;
|
||||
let mark = Reader::<D, N>::mark(self)?;
|
||||
match self.next_char()? {
|
||||
'#' => match self.next_char()? {
|
||||
'!' => return Ok(()),
|
||||
match self.next_byte()? {
|
||||
b'#' => match self.next_byte()? {
|
||||
b'!' => return Ok(()),
|
||||
_ => (),
|
||||
},
|
||||
_ => (),
|
||||
|
@ -569,33 +585,32 @@ impl<'a, 'de, D: Embeddable, N: NestedValue<D>, Dec: DomainParse<D>> Reader<'de,
|
|||
Ok(())
|
||||
}
|
||||
|
||||
type Mark = usize;
|
||||
type Mark = S::Mark;
|
||||
|
||||
fn mark(&mut self) -> io::Result<Self::Mark> {
|
||||
Ok(self.pos)
|
||||
self.source.mark()
|
||||
}
|
||||
|
||||
fn restore(&mut self, mark: &Self::Mark) -> io::Result<()> {
|
||||
self.pos = *mark;
|
||||
Ok(())
|
||||
self.source.restore(mark)
|
||||
}
|
||||
|
||||
fn next_token(&mut self, read_embedded_annotations: bool) -> io::Result<Token<D, N>> {
|
||||
self.skip_annotations()?;
|
||||
let mark = Reader::<D, N>::mark(self)?;
|
||||
Ok(match self.next_char()? {
|
||||
'<' => Token::Compound(CompoundClass::Record),
|
||||
'[' => Token::Compound(CompoundClass::Sequence),
|
||||
'{' => Token::Compound(CompoundClass::Dictionary),
|
||||
'>' => Token::End,
|
||||
']' => Token::End,
|
||||
'}' => Token::End,
|
||||
'#' => match self.next_char()? {
|
||||
'!' => {
|
||||
Ok(match self.next_byte()? {
|
||||
b'<' => Token::Compound(CompoundClass::Record),
|
||||
b'[' => Token::Compound(CompoundClass::Sequence),
|
||||
b'{' => Token::Compound(CompoundClass::Dictionary),
|
||||
b'>' => Token::End,
|
||||
b']' => Token::End,
|
||||
b'}' => Token::End,
|
||||
b'#' => match self.next_byte()? {
|
||||
b'!' => {
|
||||
let v = self.next_iovalue(read_embedded_annotations)?;
|
||||
Token::Embedded(self.dec.parse_embedded(&v)?)
|
||||
}
|
||||
'{' => Token::Compound(CompoundClass::Set),
|
||||
b'{' => Token::Compound(CompoundClass::Set),
|
||||
_ => {
|
||||
Reader::<D, N>::restore(self, &mark)?;
|
||||
Token::Atom(self.demand_next(false)?)
|
||||
|
|
|
@ -23,8 +23,10 @@ fn decode_all(bytes: &'_ [u8]) -> io::Result<Vec<IOValue>> {
|
|||
let mut fh = std::fs::File::open("../../../tests/samples.pr").unwrap();
|
||||
let mut contents = String::new();
|
||||
fh.read_to_string(&mut contents)?;
|
||||
let mut d = preserves::value::TextReader::new(&contents, preserves::value::ViaCodec::new(preserves::value::IOValueDomainCodec));
|
||||
d.next_iovalue(true)?
|
||||
preserves::value::TextReader::new(
|
||||
&mut BytesBinarySource::new(contents.as_bytes()),
|
||||
preserves::value::ViaCodec::new(preserves::value::IOValueDomainCodec))
|
||||
.next_iovalue(true)?
|
||||
};
|
||||
let from_packed = {
|
||||
let mut fh = std::fs::File::open("../../../tests/samples.bin").unwrap();
|
||||
|
|
Loading…
Reference in New Issue