preserves/implementations/rust/preserves/src/value/text/reader.rs

615 lines
20 KiB
Rust
Raw Normal View History

2021-08-02 09:42:48 +00:00
use crate::error::Error;
use crate::error::ExpectedKind;
use crate::error::Received;
use crate::error::eof;
use crate::error::io_syntax_error;
use crate::error::is_eof_error;
use crate::error::syntax_error;
use crate::hex;
use crate::value::CompoundClass;
use crate::value::DomainParse;
use crate::value::DummyValue;
use crate::value::Embeddable;
use crate::value::IOValue;
use crate::value::IOValueDomainCodec;
use crate::value::Map;
use crate::value::NestedValue;
use crate::value::Reader;
use crate::value::Record;
use crate::value::Set;
use crate::value::Token;
use crate::value::Value;
use crate::value::ViaCodec;
use crate::value::boundary as B;
use crate::value::reader::BinarySource;
use crate::value::reader::ReaderResult;
use crate::value::repr::Annotations;
use num::bigint::BigInt;
use std::io;
use std::iter::FromIterator;
use std::marker::PhantomData;
pub struct TextReader<'a, D: Embeddable, Dec: DomainParse<D>> {
buf: &'a str,
pos: usize,
dec: Dec,
phantom: PhantomData<D>,
}
impl<'a, D: Embeddable, Dec: DomainParse<D>> TextReader<'a, D, Dec> {
pub fn new(buf: &'a str, dec: Dec) -> Self {
TextReader {
buf,
pos: 0,
dec,
phantom: PhantomData,
}
}
fn remaining_input(&self) -> &str {
&self.buf[self.pos ..]
}
fn peek(&self) -> ReaderResult<char> {
if self.pos >= self.buf.len() {
Err(eof())
} else {
Ok(self.buf[self.pos ..].chars().next().unwrap())
}
}
fn drop(&mut self, count: usize) {
self.pos += count;
}
fn undrop(&mut self, count: usize) {
self.pos -= count;
}
fn next_char(&mut self) -> ReaderResult<char> {
let c = self.peek()?;
self.drop(c.len_utf8());
Ok(c)
}
fn skip_whitespace(&mut self) {
while let Ok(c) = self.peek() {
if !c.is_whitespace() && c != ',' {
break;
}
self.drop(c.len_utf8())
}
}
// TODO: This is a duplicate of fn expected in PackedReader.
fn expected<N: NestedValue<D>>(&mut self, k: ExpectedKind) -> Error {
match Reader::<D, N>::demand_next(self, true) {
Ok(v) => Error::Expected(k, Received::ReceivedOtherValue(format!("{:?}", v))),
Err(e) => e.into()
}
}
fn gather_annotations<N: NestedValue<D>>(&mut self) -> ReaderResult<Vec<N>> {
let mut vs = Vec::new();
loop {
self.skip_whitespace();
match self.peek()? {
';' => { self.drop(1); vs.push(N::new(self.comment_line()?)) }
'@' => { self.drop(1); vs.push(self.demand_next(true)?) }
_ => return Ok(vs),
}
}
}
fn skip_annotations(&mut self) -> ReaderResult<()> {
loop {
self.skip_whitespace();
match self.peek()? {
';' => { self.drop(1); self.comment_line()?; },
'@' => { self.drop(1); Reader::<D, DummyValue<D>>::skip_value(self)?; },
_ => return Ok(()),
}
}
}
pub fn next_iovalue(&mut self, read_annotations: bool) -> io::Result<IOValue> {
let mut r = TextReader::new(self.remaining_input(), ViaCodec::new(IOValueDomainCodec));
let v = r.demand_next(read_annotations)?;
self.pos += r.pos;
Ok(v)
}
fn comment_line(&mut self) -> io::Result<String> {
let mut s = String::new();
loop {
match self.next_char()? {
'\r' | '\n' => return Ok(s),
c => s.push(c),
}
}
}
fn read_intpart<N: NestedValue<D>>(&mut self, mut s: String, c: char) -> io::Result<N> {
match c {
'0' => {
s.push(c);
self.read_fracexp(s)
}
_ => {
self.read_digit1(&mut s, c)?;
self.read_fracexp(s)
}
}
}
fn read_fracexp<N: NestedValue<D>>(&mut self, mut s: String) -> io::Result<N> {
match self.peek()? {
'.' => {
s.push(self.next_char()?);
let c = self.next_char()?;
self.read_digit1(&mut s, c)?;
}
_ => ()
}
self.read_exp(s)
}
fn read_exp<N: NestedValue<D>>(&mut self, mut s: String) -> io::Result<N> {
match self.peek()? {
'e' | 'E' => {
s.push(self.next_char()?);
self.read_sign_and_exp(s)
}
_ => self.finish_number(s)
}
}
fn read_sign_and_exp<N: NestedValue<D>>(&mut self, mut s: String) -> io::Result<N> {
match self.peek()? {
'+' | '-' => s.push(self.next_char()?),
_ => (),
}
let c = self.next_char()?;
self.read_digit1(&mut s, c)?;
self.finish_number(s)
}
fn finish_number<N: NestedValue<D>>(&mut self, s: String) -> io::Result<N> {
if let Ok(n) = s.parse::<BigInt>() {
return Ok(N::new(n));
}
match self.peek()? {
'f' | 'F' => {
self.drop(1);
Ok(N::new(s.parse::<f32>().map_err(
|_| io_syntax_error(&format!(
"Invalid single-precision number: {:?}", s)))?))
}
_ =>
Ok(N::new(s.parse::<f64>().map_err(
|_| io_syntax_error(&format!(
"Invalid double-precision number: {:?}", s)))?))
}
}
fn read_digit1(&mut self, s: &mut String, c: char) -> io::Result<()>
{
if !c.is_digit(10) {
return Err(io_syntax_error("Incomplete number"));
}
s.push(c);
while self.peek()?.is_digit(10) {
s.push(self.next_char()?);
}
Ok(())
}
fn read_stringlike<X, H, Acc, Element, R>(
&mut self,
mut seed: R,
acc: Acc,
xform_item: X,
terminator: char,
hexescape: char,
hexescaper: H,
) -> io::Result<R>
where
X: Fn(char) -> Element,
H: Fn(&mut Self) -> io::Result<Element>,
Acc: Fn(&mut R, Element) -> (),
{
loop {
match self.next_char()? {
c if c == terminator => return Ok(seed),
'\\' => match self.next_char()? {
c if c == hexescape =>
acc(&mut seed, hexescaper(self)?),
c if c == terminator || c == '\\' || c == '/' =>
acc(&mut seed, xform_item(c)),
'b' => acc(&mut seed, xform_item('\x08')),
'f' => acc(&mut seed, xform_item('\x0c')),
'n' => acc(&mut seed, xform_item('\x0a')),
'r' => acc(&mut seed, xform_item('\x0d')),
't' => acc(&mut seed, xform_item('\x09')),
_ => return Err(io_syntax_error("Invalid escape code")),
},
c => acc(&mut seed, xform_item(c)),
}
}
}
fn hexnum(&mut self, count: usize) -> io::Result<u32> {
let mut v: u32 = 0;
for _ in 0 .. count {
let c = self.next_char()?;
match c.to_digit(16) {
Some(d) =>
v = v << 4 | d,
None =>
return Err(io_syntax_error("Bad hex escape")),
}
}
Ok(v)
}
fn read_string(&mut self, delimiter: char) -> io::Result<String> {
self.read_stringlike(
String::new(),
|s, c| s.push(c),
|c| c,
delimiter,
'u',
|r| {
let n1 = r.hexnum(4)?;
if (0xd800 ..= 0xdbff).contains(&n1) {
let mut ok = true;
ok = ok && r.next_char()? == '\\';
ok = ok && r.next_char()? == 'u';
if !ok {
Err(io_syntax_error("Missing second half of surrogate pair"))
} else {
let n2 = r.hexnum(4)?;
if (0xdc00 ..= 0xdfff).contains(&n2) {
let n = ((n1 - 0xd800) << 10) + (n2 - 0xdc00) + 0x10000;
char::from_u32(n).ok_or_else(
|| io_syntax_error("Bad code point from surrogate pair"))
} else {
Err(io_syntax_error("Bad second half of surrogate pair"))
}
}
} else {
char::from_u32(n1).ok_or_else(
|| io_syntax_error("Bad code point"))
}
})
}
fn read_literal_binary<N: NestedValue<D>>(&mut self) -> io::Result<N> {
Ok(N::new(&self.read_stringlike(
Vec::new(),
|bs, b| bs.push(b),
|c| c as u8,
'"',
'x',
|r| Ok(r.hexnum(2)? as u8))?[..]))
}
fn read_hex_binary<N: NestedValue<D>>(&mut self) -> io::Result<N> {
let mut s = String::new();
loop {
self.skip_whitespace();
let c1 = self.next_char()?;
if c1 == '"' {
let bs = hex::HexParser::Strict.decode(&s).unwrap();
return Ok(N::new(&bs[..]));
}
let c2 = self.next_char()?;
if !(c1.is_digit(16) && c2.is_digit(16)) {
return Err(io_syntax_error("Invalid hex binary"));
}
s.push(c1);
s.push(c2);
}
}
fn read_base64_binary<N: NestedValue<D>>(&mut self) -> io::Result<N> {
let mut s = String::new();
loop {
self.skip_whitespace();
let mut c = self.next_char()?;
if c == ']' {
let bs = base64::decode_config(&s, base64::STANDARD_NO_PAD)
.map_err(|_| io_syntax_error("Invalid base64 character"))?;
return Ok(N::new(&bs[..]));
}
if c == '-' { c = '+'; }
if c == '_' { c = '/'; }
if c == '=' { continue; }
s.push(c);
}
}
fn upto<N: NestedValue<D>>(&mut self, delimiter: char, read_annotations: bool) -> io::Result<Vec<N>> {
let mut vs = Vec::new();
loop {
self.skip_whitespace();
if self.peek()? == delimiter {
self.drop(delimiter.len_utf8());
return Ok(vs);
}
vs.push(Reader::<D, N>::demand_next(self, read_annotations)?);
}
}
fn read_dictionary<N: NestedValue<D>>(&mut self, read_annotations: bool) -> io::Result<N> {
let mut d = Map::new();
loop {
self.skip_whitespace();
if self.peek()? == '}' {
self.drop(1);
return Ok(N::new(d));
}
let k = Reader::<D, N>::demand_next(self, read_annotations)?;
self.skip_whitespace();
if self.next_char()? != ':' {
return Err(io_syntax_error("Missing expected key/value separator"));
}
let v = Reader::<D, N>::demand_next(self, read_annotations)?;
d.insert(k, v);
}
}
fn read_raw_symbol<N: NestedValue<D>>(&mut self, mut s: String) -> io::Result<N> {
loop {
let c = match self.peek() {
Err(e) if is_eof_error(&e) => ' ',
Err(e) => return Err(e)?,
Ok(c) if c.is_whitespace() => ' ',
Ok(c) => c
};
match c {
'(' | ')' | '{' | '}' | '[' | ']' | '<' | '>' |
'"' | ';' | ',' | '@' | '#' | ':' | '|' | ' ' =>
return Ok(Value::symbol(&s).wrap()),
c => {
self.drop(c.len_utf8());
s.push(c)
}
}
}
}
}
impl<'a, 'de, D: Embeddable, N: NestedValue<D>, Dec: DomainParse<D>> Reader<'de, D, N> for TextReader<'a, D, Dec> {
fn next(&mut self, read_annotations: bool) -> io::Result<Option<N>> {
self.skip_whitespace();
let c = match self.next_char() {
Ok(c) => c,
Err(e) if is_eof_error(&e) => return Ok(None),
Err(e) => return Err(e.into()),
};
Ok(Some(match c {
'-' => {
let c1 = self.next_char()?;
self.read_intpart("-".to_owned(), c1)?
}
'0' | '1' | '2' | '3' | '4' | '5' | '6' | '7' | '8' | '9' =>
self.read_intpart(String::new(), c)?,
'"' =>
N::new(self.read_string('"')?),
'|' =>
Value::symbol(&self.read_string('|')?).wrap(),
';' | '@' => {
self.undrop(1);
if read_annotations {
let mut annotations = self.gather_annotations()?;
let (existing_annotations, v) =
Reader::<D, N>::demand_next(self, read_annotations)?.pieces();
annotations.extend_from_slice(existing_annotations.slice());
N::wrap(Annotations::new(Some(annotations)), v)
} else {
self.skip_annotations()?;
self.demand_next(read_annotations)?
}
}
':' => {
// return Err(io_syntax_error("Unexpected key/value separator between items")),
return Err(io_syntax_error(&format!("Unexpected key/value separator between items (pos {:?})", self.pos)));
}
'#' => match self.next_char()? {
'f' => N::new(false),
't' => N::new(true),
'{' => N::new(Set::from_iter(self.upto('}', read_annotations)?.into_iter())),
'"' => self.read_literal_binary()?,
'x' => if self.next_char()? == '"' {
self.read_hex_binary()?
} else {
return Err(io_syntax_error("Expected open-quote at start of hex ByteString"));
},
'[' => self.read_base64_binary()?,
'=' => {
let bs_val: N = self.demand_next(true)?;
if bs_val.annotations().slice().len() > 0 {
return Err(io_syntax_error("Annotations not permitted after #="));
}
match bs_val.value().as_bytestring() {
None =>
return Err(io_syntax_error("ByteString must follow #=")),
Some(bs) =>
crate::value::BytesBinarySource::new(bs)
.packed(ViaCodec::new(&mut self.dec))
.demand_next(read_annotations)?
}
}
'!' => {
let v = self.next_iovalue(read_annotations)?;
Value::Embedded(self.dec.parse_embedded(&v)?).wrap()
}
other => return Err(io_syntax_error(&format!("Invalid # syntax: {:?}", other))),
},
'<' => {
let vs = self.upto('>', read_annotations)?;
if vs.is_empty() {
return Err(io_syntax_error("Missing record label"));
}
Value::Record(Record(vs)).wrap()
}
'[' => N::new(self.upto(']', read_annotations)?),
'{' => self.read_dictionary(read_annotations)?,
'>' => return Err(io_syntax_error("Unexpected >")),
']' => return Err(io_syntax_error("Unexpected ]")),
'}' => return Err(io_syntax_error("Unexpected }")),
other => self.read_raw_symbol(other.to_string())?,
}))
}
fn open_record(&mut self, arity: Option<usize>) -> ReaderResult<B::Type> {
self.skip_annotations()?;
if self.peek()? != '<' { return Err(self.expected::<N>(ExpectedKind::Record(arity))); }
self.drop(1);
let mut b = B::Type::default();
Reader::<D, N>::ensure_more_expected(self, &mut b, &B::Item::RecordLabel)?;
Ok(b)
}
fn open_sequence_or_set(&mut self) -> ReaderResult<B::Item> {
self.skip_annotations()?;
let mark = Reader::<D, N>::mark(self)?;
match self.next_char()? {
'#' => match self.next_char()? {
'{' => return Ok(B::Item::SetValue),
_ => (),
},
'[' => return Ok(B::Item::SequenceValue),
_ => (),
}
Reader::<D, N>::restore(self, &mark)?;
Err(self.expected::<N>(ExpectedKind::SequenceOrSet))
}
fn open_sequence(&mut self) -> ReaderResult<()> {
self.skip_annotations()?;
if self.peek()? != '[' { return Err(self.expected::<N>(ExpectedKind::Sequence)); }
self.drop(1);
Ok(())
}
fn open_set(&mut self) -> ReaderResult<()> {
self.skip_annotations()?;
let mark = Reader::<D, N>::mark(self)?;
match self.next_char()? {
'#' => match self.next_char()? {
'{' => return Ok(()),
_ => (),
},
_ => (),
}
Reader::<D, N>::restore(self, &mark)?;
Err(self.expected::<N>(ExpectedKind::Set))
}
fn open_dictionary(&mut self) -> ReaderResult<()> {
self.skip_annotations()?;
if self.peek()? != '{' { return Err(self.expected::<N>(ExpectedKind::Dictionary)); }
self.drop(1);
Ok(())
}
fn boundary(&mut self, b: &B::Type) -> ReaderResult<()> {
match b {
B::Type {
closing: Some(B::Item::DictionaryKey),
opening: Some(B::Item::DictionaryValue),
} => {
self.skip_whitespace();
if self.next_char()? != ':' {
return Err(syntax_error("Missing expected key/value separator"));
}
},
_ => (),
}
Ok(())
}
fn close_compound(&mut self, b: &mut B::Type, i: &B::Item) -> ReaderResult<bool> {
self.skip_whitespace();
match self.peek()? {
'>' | ']' | '}' => {
self.drop(1);
Ok(true)
}
_ => {
b.shift(Some(i.clone()));
Reader::<D, N>::boundary(self, b)?;
Ok(false)
}
}
}
fn open_embedded(&mut self) -> ReaderResult<()> {
self.skip_annotations()?;
let mark = Reader::<D, N>::mark(self)?;
match self.next_char()? {
'#' => match self.next_char()? {
'!' => return Ok(()),
_ => (),
},
_ => (),
}
Reader::<D, N>::restore(self, &mark)?;
Err(self.expected::<N>(ExpectedKind::Embedded))
}
fn close_embedded(&mut self) -> ReaderResult<()> {
Ok(())
}
type Mark = usize;
fn mark(&mut self) -> io::Result<Self::Mark> {
Ok(self.pos)
}
fn restore(&mut self, mark: &Self::Mark) -> io::Result<()> {
self.pos = *mark;
Ok(())
}
fn next_token(&mut self, read_embedded_annotations: bool) -> io::Result<Token<D, N>> {
self.skip_annotations()?;
let mark = Reader::<D, N>::mark(self)?;
Ok(match self.next_char()? {
'<' => Token::Compound(CompoundClass::Record),
'[' => Token::Compound(CompoundClass::Sequence),
'{' => Token::Compound(CompoundClass::Dictionary),
'>' => Token::End,
']' => Token::End,
'}' => Token::End,
'#' => match self.next_char()? {
'!' => {
let v = self.next_iovalue(read_embedded_annotations)?;
Token::Embedded(self.dec.parse_embedded(&v)?)
}
'{' => Token::Compound(CompoundClass::Set),
_ => {
Reader::<D, N>::restore(self, &mark)?;
Token::Atom(self.demand_next(false)?)
}
},
_ => {
Reader::<D, N>::restore(self, &mark)?;
Token::Atom(self.demand_next(false)?)
}
})
}
fn next_annotations_and_token(&mut self) -> io::Result<(Vec<N>, Token<D, N>)> {
let annotations = self.gather_annotations()?;
Ok((annotations, self.next_token(true)?))
}
}