580 lines
20 KiB
Rust
580 lines
20 KiB
Rust
use crate::error::Error;
|
|
use crate::error::ExpectedKind;
|
|
use crate::error::Received;
|
|
use crate::error::io_syntax_error;
|
|
use crate::error::is_eof_io_error;
|
|
use crate::error::syntax_error;
|
|
|
|
use crate::hex;
|
|
|
|
use crate::value::CompoundClass;
|
|
use crate::value::DomainParse;
|
|
use crate::value::DummyValue;
|
|
use crate::value::Embeddable;
|
|
use crate::value::IOValue;
|
|
use crate::value::IOValueDomainCodec;
|
|
use crate::value::Map;
|
|
use crate::value::NestedValue;
|
|
use crate::value::Reader;
|
|
use crate::value::Record;
|
|
use crate::value::Set;
|
|
use crate::value::Token;
|
|
use crate::value::Value;
|
|
use crate::value::ViaCodec;
|
|
use crate::value::boundary as B;
|
|
use crate::value::reader::BinarySource;
|
|
use crate::value::reader::ReaderResult;
|
|
use crate::value::repr::Annotations;
|
|
|
|
use lazy_static::lazy_static;
|
|
|
|
use num::bigint::BigInt;
|
|
|
|
use std::convert::TryInto;
|
|
use std::io;
|
|
use std::iter::FromIterator;
|
|
use std::marker::PhantomData;
|
|
|
|
pub struct TextReader<'de, 'src, D: Embeddable, Dec: DomainParse<D>, S: BinarySource<'de>> {
|
|
pub source: &'src mut S,
|
|
pub dec: Dec,
|
|
phantom: PhantomData<&'de D>,
|
|
}
|
|
|
|
fn decode_utf8(bs: Vec<u8>) -> io::Result<String> {
|
|
Ok(String::from_utf8(bs).map_err(|_| io_syntax_error("Invalid UTF-8"))?)
|
|
}
|
|
|
|
fn append_codepoint(bs: &mut Vec<u8>, n: u32) -> io::Result<()> {
|
|
let c = char::from_u32(n).ok_or_else(|| io_syntax_error("Bad code point"))?;
|
|
let mut buf = [0; 4];
|
|
let _ = c.encode_utf8(&mut buf);
|
|
bs.extend(&buf[0 .. c.len_utf8()]);
|
|
Ok(())
|
|
}
|
|
|
|
impl<'de, 'src, D: Embeddable, Dec: DomainParse<D>, S: BinarySource<'de>>
|
|
TextReader<'de, 'src, D, Dec, S>
|
|
{
|
|
pub fn new(source: &'src mut S, dec: Dec) -> Self {
|
|
TextReader {
|
|
source,
|
|
dec,
|
|
phantom: PhantomData,
|
|
}
|
|
}
|
|
|
|
fn peek(&mut self) -> io::Result<u8> {
|
|
self.source.peek()
|
|
}
|
|
|
|
fn skip(&mut self) -> io::Result<()> {
|
|
self.source.skip()
|
|
}
|
|
|
|
fn next_byte(&mut self) -> io::Result<u8> {
|
|
let b = self.source.peek()?;
|
|
self.source.skip()?;
|
|
Ok(b)
|
|
}
|
|
|
|
fn skip_whitespace(&mut self) {
|
|
// Deliberately swallows errors.
|
|
while let Ok(c) = self.peek() {
|
|
match c {
|
|
b' ' | b'\t' | b'\r' | b'\n' | b',' => {
|
|
let _ = self.skip();
|
|
()
|
|
}
|
|
_ => break,
|
|
}
|
|
}
|
|
}
|
|
|
|
// TODO: This is a duplicate of fn expected in PackedReader.
|
|
fn expected<N: NestedValue<Embedded = D>>(&mut self, k: ExpectedKind) -> Error {
|
|
match Reader::<N>::demand_next(self, true) {
|
|
Ok(v) => Error::Expected(k, Received::ReceivedOtherValue(format!("{:?}", v))),
|
|
Err(e) => e.into()
|
|
}
|
|
}
|
|
|
|
fn gather_annotations<N: NestedValue<Embedded = D>>(&mut self) -> ReaderResult<Vec<N>> {
|
|
let mut vs = Vec::new();
|
|
loop {
|
|
self.skip_whitespace();
|
|
match self.peek()? {
|
|
b';' => { self.skip()?; vs.push(N::new(self.comment_line()?)) }
|
|
b'@' => { self.skip()?; vs.push(self.demand_next(true)?) }
|
|
_ => return Ok(vs),
|
|
}
|
|
}
|
|
}
|
|
|
|
fn skip_annotations(&mut self) -> ReaderResult<()> {
|
|
loop {
|
|
self.skip_whitespace();
|
|
match self.peek()? {
|
|
b';' => { self.skip()?; self.comment_line()?; },
|
|
b'@' => { self.skip()?; Reader::<DummyValue<D>>::skip_value(self)?; },
|
|
_ => return Ok(()),
|
|
}
|
|
}
|
|
}
|
|
|
|
pub fn next_iovalue(&mut self, read_annotations: bool) -> io::Result<IOValue> {
|
|
let mut r = TextReader::new(self.source, ViaCodec::new(IOValueDomainCodec));
|
|
let v = r.demand_next(read_annotations)?;
|
|
Ok(v)
|
|
}
|
|
|
|
fn comment_line(&mut self) -> io::Result<String> {
|
|
let mut bs = Vec::new();
|
|
loop {
|
|
let b = self.peek()?;
|
|
self.skip()?;
|
|
match b {
|
|
b'\r' | b'\n' => return Ok(decode_utf8(bs)?),
|
|
_ => bs.push(b),
|
|
}
|
|
}
|
|
}
|
|
|
|
fn read_hex_float<N: NestedValue>(&mut self, bytecount: usize) -> io::Result<N> {
|
|
if self.next_byte()? != b'"' {
|
|
return Err(io_syntax_error("Missing open-double-quote in hex-encoded floating-point number"));
|
|
}
|
|
let bs = self.read_hex_binary()?;
|
|
if bs.len() != bytecount {
|
|
return Err(io_syntax_error("Incorrect number of bytes in hex-encoded floating-point number"));
|
|
}
|
|
match bytecount {
|
|
4 => Ok(Value::from(f32::from_bits(u32::from_be_bytes(bs.try_into().unwrap()))).wrap()),
|
|
8 => Ok(Value::from(f64::from_bits(u64::from_be_bytes(bs.try_into().unwrap()))).wrap()),
|
|
_ => Err(io_syntax_error("Unsupported byte count in hex-encoded floating-point number")),
|
|
}
|
|
}
|
|
|
|
fn read_stringlike<X, H, R>(
|
|
&mut self,
|
|
mut seed: R,
|
|
xform_item: X,
|
|
terminator: u8,
|
|
hexescape: u8,
|
|
hexescaper: H,
|
|
) -> io::Result<R>
|
|
where
|
|
X: Fn(&mut R, u8) -> io::Result<()>,
|
|
H: Fn(&mut R, &mut Self) -> io::Result<()>,
|
|
{
|
|
loop {
|
|
match self.next_byte()? {
|
|
c if c == terminator => return Ok(seed),
|
|
b'\\' => match self.next_byte()? {
|
|
c if c == hexescape => hexescaper(&mut seed, self)?,
|
|
c if c == terminator || c == b'\\' || c == b'/' => xform_item(&mut seed, c)?,
|
|
b'b' => xform_item(&mut seed, b'\x08')?,
|
|
b'f' => xform_item(&mut seed, b'\x0c')?,
|
|
b'n' => xform_item(&mut seed, b'\x0a')?,
|
|
b'r' => xform_item(&mut seed, b'\x0d')?,
|
|
b't' => xform_item(&mut seed, b'\x09')?,
|
|
_ => return Err(io_syntax_error("Invalid escape code")),
|
|
},
|
|
c => xform_item(&mut seed, c)?,
|
|
}
|
|
}
|
|
}
|
|
|
|
fn hexnum(&mut self, count: usize) -> io::Result<u32> {
|
|
let mut v: u32 = 0;
|
|
for _ in 0 .. count {
|
|
let c = self.next_byte()?;
|
|
match (c as char).to_digit(16) {
|
|
Some(d) =>
|
|
v = v << 4 | d,
|
|
None =>
|
|
return Err(io_syntax_error("Bad hex escape")),
|
|
}
|
|
}
|
|
Ok(v)
|
|
}
|
|
|
|
fn read_string(&mut self, delimiter: u8) -> io::Result<String> {
|
|
decode_utf8(self.read_stringlike(
|
|
Vec::new(),
|
|
|bs, c| Ok(bs.push(c)),
|
|
delimiter,
|
|
b'u',
|
|
|bs, r| {
|
|
let n1 = r.hexnum(4)?;
|
|
if (0xd800 ..= 0xdbff).contains(&n1) {
|
|
let mut ok = true;
|
|
ok = ok && r.next_byte()? == b'\\';
|
|
ok = ok && r.next_byte()? == b'u';
|
|
if !ok {
|
|
Err(io_syntax_error("Missing second half of surrogate pair"))
|
|
} else {
|
|
let n2 = r.hexnum(4)?;
|
|
if (0xdc00 ..= 0xdfff).contains(&n2) {
|
|
let n = ((n1 - 0xd800) << 10) + (n2 - 0xdc00) + 0x10000;
|
|
append_codepoint(bs, n)
|
|
} else {
|
|
Err(io_syntax_error("Bad second half of surrogate pair"))
|
|
}
|
|
}
|
|
} else {
|
|
append_codepoint(bs, n1)
|
|
}
|
|
})?)
|
|
}
|
|
|
|
fn read_literal_binary<N: NestedValue>(&mut self) -> io::Result<N> {
|
|
Ok(N::new(&self.read_stringlike(
|
|
Vec::new(),
|
|
|bs, b| Ok(bs.push(b)),
|
|
b'"',
|
|
b'x',
|
|
|bs, r| Ok(bs.push(r.hexnum(2)? as u8)))?[..]))
|
|
}
|
|
|
|
fn read_hex_binary(&mut self) -> io::Result<Vec<u8>> {
|
|
let mut s = String::new();
|
|
loop {
|
|
self.skip_whitespace();
|
|
let c1 = self.next_byte()? as char;
|
|
if c1 == '"' {
|
|
return Ok(hex::HexParser::Strict.decode(&s).unwrap());
|
|
}
|
|
let c2 = self.next_byte()? as char;
|
|
if !(c1.is_digit(16) && c2.is_digit(16)) {
|
|
return Err(io_syntax_error("Invalid hex binary"));
|
|
}
|
|
s.push(c1);
|
|
s.push(c2);
|
|
}
|
|
}
|
|
|
|
fn read_base64_binary<N: NestedValue>(&mut self) -> io::Result<N> {
|
|
let mut bs = Vec::new();
|
|
loop {
|
|
self.skip_whitespace();
|
|
let mut c = self.next_byte()?;
|
|
if c == b']' {
|
|
let bs = base64::decode_config(&decode_utf8(bs)?, base64::STANDARD_NO_PAD)
|
|
.map_err(|_| io_syntax_error("Invalid base64 character"))?;
|
|
return Ok(N::new(&bs[..]));
|
|
}
|
|
if c == b'-' { c = b'+'; }
|
|
if c == b'_' { c = b'/'; }
|
|
if c == b'=' { continue; }
|
|
bs.push(c);
|
|
}
|
|
}
|
|
|
|
fn upto<N: NestedValue<Embedded = D>>(&mut self, delimiter: u8, read_annotations: bool) -> io::Result<Vec<N>> {
|
|
let mut vs = Vec::new();
|
|
loop {
|
|
self.skip_whitespace();
|
|
if self.peek()? == delimiter {
|
|
self.skip()?;
|
|
return Ok(vs);
|
|
}
|
|
vs.push(Reader::<N>::demand_next(self, read_annotations)?);
|
|
}
|
|
}
|
|
|
|
fn read_dictionary<N: NestedValue<Embedded = D>>(&mut self, read_annotations: bool) -> io::Result<N> {
|
|
let mut d = Map::new();
|
|
loop {
|
|
self.skip_whitespace();
|
|
if self.peek()? == b'}' {
|
|
self.skip()?;
|
|
return Ok(N::new(d));
|
|
}
|
|
let k = Reader::<N>::demand_next(self, read_annotations)?;
|
|
self.skip_whitespace();
|
|
if self.next_byte()? != b':' {
|
|
return Err(io_syntax_error("Missing expected key/value separator"));
|
|
}
|
|
let v = Reader::<N>::demand_next(self, read_annotations)?;
|
|
d.insert(k, v);
|
|
}
|
|
}
|
|
|
|
fn read_raw_symbol_or_number<N: NestedValue>(&mut self, mut bs: Vec<u8>) -> io::Result<N> {
|
|
lazy_static! {
|
|
static ref NUMBER_RE: regex::Regex = regex::Regex::new(
|
|
r"^([-+]?\d+)(((\.\d+([eE][-+]?\d+)?)|([eE][-+]?\d+))([fF]?))?$").unwrap();
|
|
}
|
|
loop {
|
|
let c = match self.peek() {
|
|
Err(e) if is_eof_io_error(&e) => b' ',
|
|
Err(e) => return Err(e)?,
|
|
Ok(c) if (c as char).is_whitespace() => b' ',
|
|
Ok(c) => c
|
|
};
|
|
match c {
|
|
b'(' | b')' | b'{' | b'}' | b'[' | b']' | b'<' | b'>' |
|
|
b'"' | b';' | b',' | b'@' | b'#' | b':' | b'|' | b' ' => {
|
|
let s = decode_utf8(bs)?;
|
|
return match NUMBER_RE.captures(&s) {
|
|
None => Ok(N::symbol(&s)),
|
|
Some(m) => match m.get(2) {
|
|
None => Ok(N::new(s.parse::<BigInt>().map_err(
|
|
|_| io_syntax_error(&format!(
|
|
"Invalid signed-integer number: {:?}", s)))?)),
|
|
Some(_) => {
|
|
if let Some(maybe_f) = m.get(7) {
|
|
let s = m[1].to_owned() + &m[3];
|
|
if maybe_f.range().is_empty() {
|
|
Ok(N::new(s.parse::<f64>().map_err(
|
|
|_| io_syntax_error(&format!(
|
|
"Invalid double-precision number: {:?}", s)))?))
|
|
} else {
|
|
Ok(N::new(s.parse::<f32>().map_err(
|
|
|_| io_syntax_error(&format!(
|
|
"Invalid single-precision number: {:?}", s)))?))
|
|
}
|
|
} else {
|
|
panic!("Internal error: cannot analyze number {:?}", s)
|
|
}
|
|
}
|
|
}
|
|
}
|
|
}
|
|
c => {
|
|
self.skip()?;
|
|
bs.push(c)
|
|
}
|
|
}
|
|
}
|
|
}
|
|
}
|
|
|
|
impl<'de, 'src, N: NestedValue, Dec: DomainParse<N::Embedded>, S: BinarySource<'de>>
|
|
Reader<'de, N> for TextReader<'de, 'src, N::Embedded, Dec, S>
|
|
{
|
|
fn next(&mut self, read_annotations: bool) -> io::Result<Option<N>> {
|
|
self.skip_whitespace();
|
|
let c = match self.peek() {
|
|
Ok(c) => c,
|
|
Err(e) if is_eof_io_error(&e) => return Ok(None),
|
|
Err(e) => return Err(e.into()),
|
|
};
|
|
Ok(Some(match c {
|
|
b'"' => {
|
|
self.skip()?;
|
|
N::new(self.read_string(b'"')?)
|
|
}
|
|
b'|' => {
|
|
self.skip()?;
|
|
N::symbol(&self.read_string(b'|')?)
|
|
}
|
|
b';' | b'@' => {
|
|
if read_annotations {
|
|
let mut annotations = self.gather_annotations()?;
|
|
let (existing_annotations, v) =
|
|
Reader::<N>::demand_next(self, read_annotations)?.pieces();
|
|
annotations.extend_from_slice(existing_annotations.slice());
|
|
N::wrap(Annotations::new(Some(annotations)), v)
|
|
} else {
|
|
self.skip_annotations()?;
|
|
self.demand_next(read_annotations)?
|
|
}
|
|
}
|
|
b':' => {
|
|
return Err(io_syntax_error("Unexpected key/value separator between items"));
|
|
}
|
|
b'#' => {
|
|
self.skip()?;
|
|
match self.next_byte()? {
|
|
b'f' => N::new(false),
|
|
b't' => N::new(true),
|
|
b'{' => N::new(Set::from_iter(self.upto(b'}', read_annotations)?.into_iter())),
|
|
b'"' => self.read_literal_binary()?,
|
|
b'x' => match self.next_byte()? {
|
|
b'"' => N::new(&self.read_hex_binary()?[..]),
|
|
b'f' => self.read_hex_float(4)?,
|
|
b'd' => self.read_hex_float(8)?,
|
|
_ => return Err(io_syntax_error("Invalid #x syntax")),
|
|
},
|
|
b'[' => self.read_base64_binary()?,
|
|
b'!' => {
|
|
let v = self.next_iovalue(read_annotations)?;
|
|
Value::Embedded(self.dec.parse_embedded(&v)?).wrap()
|
|
}
|
|
other => return Err(io_syntax_error(&format!("Invalid # syntax: {:?}", other))),
|
|
}
|
|
}
|
|
b'<' => {
|
|
self.skip()?;
|
|
let vs = self.upto(b'>', read_annotations)?;
|
|
if vs.is_empty() {
|
|
return Err(io_syntax_error("Missing record label"));
|
|
}
|
|
Value::Record(Record(vs)).wrap()
|
|
}
|
|
b'[' => {
|
|
self.skip()?;
|
|
N::new(self.upto(b']', read_annotations)?)
|
|
}
|
|
b'{' => {
|
|
self.skip()?;
|
|
self.read_dictionary(read_annotations)?
|
|
}
|
|
b'>' => return Err(io_syntax_error("Unexpected >")),
|
|
b']' => return Err(io_syntax_error("Unexpected ]")),
|
|
b'}' => return Err(io_syntax_error("Unexpected }")),
|
|
other => {
|
|
self.skip()?;
|
|
self.read_raw_symbol_or_number(vec![other])?
|
|
}
|
|
}))
|
|
}
|
|
|
|
fn open_record(&mut self, arity: Option<usize>) -> ReaderResult<B::Type> {
|
|
self.skip_annotations()?;
|
|
if self.peek()? != b'<' { return Err(self.expected::<N>(ExpectedKind::Record(arity))); }
|
|
self.skip()?;
|
|
let mut b = B::Type::default();
|
|
Reader::<N>::ensure_more_expected(self, &mut b, &B::Item::RecordLabel)?;
|
|
Ok(b)
|
|
}
|
|
|
|
fn open_sequence_or_set(&mut self) -> ReaderResult<B::Item> {
|
|
self.skip_annotations()?;
|
|
let mark = Reader::<N>::mark(self)?;
|
|
match self.next_byte()? {
|
|
b'#' => match self.next_byte()? {
|
|
b'{' => return Ok(B::Item::SetValue),
|
|
_ => (),
|
|
},
|
|
b'[' => return Ok(B::Item::SequenceValue),
|
|
_ => (),
|
|
}
|
|
Reader::<N>::restore(self, &mark)?;
|
|
Err(self.expected::<N>(ExpectedKind::SequenceOrSet))
|
|
}
|
|
|
|
fn open_sequence(&mut self) -> ReaderResult<()> {
|
|
self.skip_annotations()?;
|
|
if self.peek()? != b'[' { return Err(self.expected::<N>(ExpectedKind::Sequence)); }
|
|
self.skip()?;
|
|
Ok(())
|
|
}
|
|
|
|
fn open_set(&mut self) -> ReaderResult<()> {
|
|
self.skip_annotations()?;
|
|
let mark = Reader::<N>::mark(self)?;
|
|
match self.next_byte()? {
|
|
b'#' => match self.next_byte()? {
|
|
b'{' => return Ok(()),
|
|
_ => (),
|
|
},
|
|
_ => (),
|
|
}
|
|
Reader::<N>::restore(self, &mark)?;
|
|
Err(self.expected::<N>(ExpectedKind::Set))
|
|
}
|
|
|
|
fn open_dictionary(&mut self) -> ReaderResult<()> {
|
|
self.skip_annotations()?;
|
|
if self.peek()? != b'{' { return Err(self.expected::<N>(ExpectedKind::Dictionary)); }
|
|
self.skip()?;
|
|
Ok(())
|
|
}
|
|
|
|
#[inline]
|
|
fn boundary(&mut self, b: &B::Type) -> ReaderResult<()> {
|
|
match b {
|
|
B::Type {
|
|
closing: Some(B::Item::DictionaryKey),
|
|
opening: Some(B::Item::DictionaryValue),
|
|
} => {
|
|
self.skip_whitespace();
|
|
if self.next_byte()? != b':' {
|
|
return Err(syntax_error("Missing expected key/value separator"));
|
|
}
|
|
},
|
|
_ => (),
|
|
}
|
|
Ok(())
|
|
}
|
|
|
|
fn close_compound(&mut self, b: &mut B::Type, i: &B::Item) -> ReaderResult<bool> {
|
|
self.skip_whitespace();
|
|
match self.peek()? {
|
|
b'>' | b']' | b'}' => {
|
|
self.skip()?;
|
|
Ok(true)
|
|
}
|
|
_ => {
|
|
b.shift(Some(i.clone()));
|
|
Reader::<N>::boundary(self, b)?;
|
|
Ok(false)
|
|
}
|
|
}
|
|
}
|
|
|
|
fn open_embedded(&mut self) -> ReaderResult<()> {
|
|
self.skip_annotations()?;
|
|
let mark = Reader::<N>::mark(self)?;
|
|
match self.next_byte()? {
|
|
b'#' => match self.next_byte()? {
|
|
b'!' => return Ok(()),
|
|
_ => (),
|
|
},
|
|
_ => (),
|
|
}
|
|
Reader::<N>::restore(self, &mark)?;
|
|
Err(self.expected::<N>(ExpectedKind::Embedded))
|
|
}
|
|
|
|
fn close_embedded(&mut self) -> ReaderResult<()> {
|
|
Ok(())
|
|
}
|
|
|
|
type Mark = S::Mark;
|
|
|
|
fn mark(&mut self) -> io::Result<Self::Mark> {
|
|
self.source.mark()
|
|
}
|
|
|
|
fn restore(&mut self, mark: &Self::Mark) -> io::Result<()> {
|
|
self.source.restore(mark)
|
|
}
|
|
|
|
fn next_token(&mut self, read_embedded_annotations: bool) -> io::Result<Token<N>> {
|
|
self.skip_annotations()?;
|
|
let mark = Reader::<N>::mark(self)?;
|
|
Ok(match self.next_byte()? {
|
|
b'<' => Token::Compound(CompoundClass::Record),
|
|
b'[' => Token::Compound(CompoundClass::Sequence),
|
|
b'{' => Token::Compound(CompoundClass::Dictionary),
|
|
b'>' => Token::End,
|
|
b']' => Token::End,
|
|
b'}' => Token::End,
|
|
b'#' => match self.next_byte()? {
|
|
b'!' => {
|
|
let v = self.next_iovalue(read_embedded_annotations)?;
|
|
Token::Embedded(self.dec.parse_embedded(&v)?)
|
|
}
|
|
b'{' => Token::Compound(CompoundClass::Set),
|
|
_ => {
|
|
Reader::<N>::restore(self, &mark)?;
|
|
Token::Atom(self.demand_next(false)?)
|
|
}
|
|
},
|
|
_ => {
|
|
Reader::<N>::restore(self, &mark)?;
|
|
Token::Atom(self.demand_next(false)?)
|
|
}
|
|
})
|
|
}
|
|
|
|
fn next_annotations_and_token(&mut self) -> io::Result<(Vec<N>, Token<N>)> {
|
|
let annotations = self.gather_annotations()?;
|
|
Ok((annotations, self.next_token(true)?))
|
|
}
|
|
}
|