use crate::error::Error; use crate::error::ExpectedKind; use crate::error::Received; use crate::error::eof; use crate::error::io_syntax_error; use crate::error::is_eof_error; use crate::error::syntax_error; use crate::hex; use crate::value::CompoundClass; use crate::value::DomainParse; use crate::value::DummyValue; use crate::value::Embeddable; use crate::value::IOValue; use crate::value::IOValueDomainCodec; use crate::value::Map; use crate::value::NestedValue; use crate::value::Reader; use crate::value::Record; use crate::value::Set; use crate::value::Token; use crate::value::Value; use crate::value::ViaCodec; use crate::value::boundary as B; use crate::value::reader::BinarySource; use crate::value::reader::ReaderResult; use crate::value::repr::Annotations; use num::bigint::BigInt; use std::io; use std::iter::FromIterator; use std::marker::PhantomData; pub struct TextReader<'a, D: Embeddable, Dec: DomainParse> { buf: &'a str, pos: usize, dec: Dec, phantom: PhantomData, } impl<'a, D: Embeddable, Dec: DomainParse> TextReader<'a, D, Dec> { pub fn new(buf: &'a str, dec: Dec) -> Self { TextReader { buf, pos: 0, dec, phantom: PhantomData, } } fn remaining_input(&self) -> &str { &self.buf[self.pos ..] } fn peek(&self) -> ReaderResult { if self.pos >= self.buf.len() { Err(eof()) } else { Ok(self.buf[self.pos ..].chars().next().unwrap()) } } fn drop(&mut self, count: usize) { self.pos += count; } fn undrop(&mut self, count: usize) { self.pos -= count; } fn next_char(&mut self) -> ReaderResult { let c = self.peek()?; self.drop(c.len_utf8()); Ok(c) } fn skip_whitespace(&mut self) { while let Ok(c) = self.peek() { if !c.is_whitespace() && c != ',' { break; } self.drop(c.len_utf8()) } } // TODO: This is a duplicate of fn expected in PackedReader. fn expected>(&mut self, k: ExpectedKind) -> Error { match Reader::::demand_next(self, true) { Ok(v) => Error::Expected(k, Received::ReceivedOtherValue(format!("{:?}", v))), Err(e) => e.into() } } fn gather_annotations>(&mut self) -> ReaderResult> { let mut vs = Vec::new(); loop { self.skip_whitespace(); match self.peek()? { ';' => { self.drop(1); vs.push(N::new(self.comment_line()?)) } '@' => { self.drop(1); vs.push(self.demand_next(true)?) } _ => return Ok(vs), } } } fn skip_annotations(&mut self) -> ReaderResult<()> { loop { self.skip_whitespace(); match self.peek()? { ';' => { self.drop(1); self.comment_line()?; }, '@' => { self.drop(1); Reader::>::skip_value(self)?; }, _ => return Ok(()), } } } pub fn next_iovalue(&mut self, read_annotations: bool) -> io::Result { let mut r = TextReader::new(self.remaining_input(), ViaCodec::new(IOValueDomainCodec)); let v = r.demand_next(read_annotations)?; self.pos += r.pos; Ok(v) } fn comment_line(&mut self) -> io::Result { let mut s = String::new(); loop { match self.next_char()? { '\r' | '\n' => return Ok(s), c => s.push(c), } } } fn read_intpart>(&mut self, mut s: String, c: char) -> io::Result { match c { '0' => { s.push(c); self.read_fracexp(s) } _ => { self.read_digit1(&mut s, c)?; self.read_fracexp(s) } } } fn read_fracexp>(&mut self, mut s: String) -> io::Result { match self.peek()? { '.' => { s.push(self.next_char()?); let c = self.next_char()?; self.read_digit1(&mut s, c)?; } _ => () } self.read_exp(s) } fn read_exp>(&mut self, mut s: String) -> io::Result { match self.peek()? { 'e' | 'E' => { s.push(self.next_char()?); self.read_sign_and_exp(s) } _ => self.finish_number(s) } } fn read_sign_and_exp>(&mut self, mut s: String) -> io::Result { match self.peek()? { '+' | '-' => s.push(self.next_char()?), _ => (), } let c = self.next_char()?; self.read_digit1(&mut s, c)?; self.finish_number(s) } fn finish_number>(&mut self, s: String) -> io::Result { if let Ok(n) = s.parse::() { return Ok(N::new(n)); } match self.peek()? { 'f' | 'F' => { self.drop(1); Ok(N::new(s.parse::().map_err( |_| io_syntax_error(&format!( "Invalid single-precision number: {:?}", s)))?)) } _ => Ok(N::new(s.parse::().map_err( |_| io_syntax_error(&format!( "Invalid double-precision number: {:?}", s)))?)) } } fn read_digit1(&mut self, s: &mut String, c: char) -> io::Result<()> { if !c.is_digit(10) { return Err(io_syntax_error("Incomplete number")); } s.push(c); while self.peek()?.is_digit(10) { s.push(self.next_char()?); } Ok(()) } fn read_stringlike( &mut self, mut seed: R, acc: Acc, xform_item: X, terminator: char, hexescape: char, hexescaper: H, ) -> io::Result where X: Fn(char) -> Element, H: Fn(&mut Self) -> io::Result, Acc: Fn(&mut R, Element) -> (), { loop { match self.next_char()? { c if c == terminator => return Ok(seed), '\\' => match self.next_char()? { c if c == hexescape => acc(&mut seed, hexescaper(self)?), c if c == terminator || c == '\\' || c == '/' => acc(&mut seed, xform_item(c)), 'b' => acc(&mut seed, xform_item('\x08')), 'f' => acc(&mut seed, xform_item('\x0c')), 'n' => acc(&mut seed, xform_item('\x0a')), 'r' => acc(&mut seed, xform_item('\x0d')), 't' => acc(&mut seed, xform_item('\x09')), _ => return Err(io_syntax_error("Invalid escape code")), }, c => acc(&mut seed, xform_item(c)), } } } fn hexnum(&mut self, count: usize) -> io::Result { let mut v: u32 = 0; for _ in 0 .. count { let c = self.next_char()?; match c.to_digit(16) { Some(d) => v = v << 4 | d, None => return Err(io_syntax_error("Bad hex escape")), } } Ok(v) } fn read_string(&mut self, delimiter: char) -> io::Result { self.read_stringlike( String::new(), |s, c| s.push(c), |c| c, delimiter, 'u', |r| { let n1 = r.hexnum(4)?; if (0xd800 ..= 0xdbff).contains(&n1) { let mut ok = true; ok = ok && r.next_char()? == '\\'; ok = ok && r.next_char()? == 'u'; if !ok { Err(io_syntax_error("Missing second half of surrogate pair")) } else { let n2 = r.hexnum(4)?; if (0xdc00 ..= 0xdfff).contains(&n2) { let n = ((n1 - 0xd800) << 10) + (n2 - 0xdc00) + 0x10000; char::from_u32(n).ok_or_else( || io_syntax_error("Bad code point from surrogate pair")) } else { Err(io_syntax_error("Bad second half of surrogate pair")) } } } else { char::from_u32(n1).ok_or_else( || io_syntax_error("Bad code point")) } }) } fn read_literal_binary>(&mut self) -> io::Result { Ok(N::new(&self.read_stringlike( Vec::new(), |bs, b| bs.push(b), |c| c as u8, '"', 'x', |r| Ok(r.hexnum(2)? as u8))?[..])) } fn read_hex_binary>(&mut self) -> io::Result { let mut s = String::new(); loop { self.skip_whitespace(); let c1 = self.next_char()?; if c1 == '"' { let bs = hex::HexParser::Strict.decode(&s).unwrap(); return Ok(N::new(&bs[..])); } let c2 = self.next_char()?; if !(c1.is_digit(16) && c2.is_digit(16)) { return Err(io_syntax_error("Invalid hex binary")); } s.push(c1); s.push(c2); } } fn read_base64_binary>(&mut self) -> io::Result { let mut s = String::new(); loop { self.skip_whitespace(); let mut c = self.next_char()?; if c == ']' { let bs = base64::decode_config(&s, base64::STANDARD_NO_PAD) .map_err(|_| io_syntax_error("Invalid base64 character"))?; return Ok(N::new(&bs[..])); } if c == '-' { c = '+'; } if c == '_' { c = '/'; } if c == '=' { continue; } s.push(c); } } fn upto>(&mut self, delimiter: char, read_annotations: bool) -> io::Result> { let mut vs = Vec::new(); loop { self.skip_whitespace(); if self.peek()? == delimiter { self.drop(delimiter.len_utf8()); return Ok(vs); } vs.push(Reader::::demand_next(self, read_annotations)?); } } fn read_dictionary>(&mut self, read_annotations: bool) -> io::Result { let mut d = Map::new(); loop { self.skip_whitespace(); if self.peek()? == '}' { self.drop(1); return Ok(N::new(d)); } let k = Reader::::demand_next(self, read_annotations)?; self.skip_whitespace(); if self.next_char()? != ':' { return Err(io_syntax_error("Missing expected key/value separator")); } let v = Reader::::demand_next(self, read_annotations)?; d.insert(k, v); } } fn read_raw_symbol>(&mut self, mut s: String) -> io::Result { loop { let c = match self.peek() { Err(e) if is_eof_error(&e) => ' ', Err(e) => return Err(e)?, Ok(c) if c.is_whitespace() => ' ', Ok(c) => c }; match c { '(' | ')' | '{' | '}' | '[' | ']' | '<' | '>' | '"' | ';' | ',' | '@' | '#' | ':' | '|' | ' ' => return Ok(Value::symbol(&s).wrap()), c => { self.drop(c.len_utf8()); s.push(c) } } } } } impl<'a, 'de, D: Embeddable, N: NestedValue, Dec: DomainParse> Reader<'de, D, N> for TextReader<'a, D, Dec> { fn next(&mut self, read_annotations: bool) -> io::Result> { self.skip_whitespace(); let c = match self.next_char() { Ok(c) => c, Err(e) if is_eof_error(&e) => return Ok(None), Err(e) => return Err(e.into()), }; Ok(Some(match c { '-' => { let c1 = self.next_char()?; self.read_intpart("-".to_owned(), c1)? } '0' | '1' | '2' | '3' | '4' | '5' | '6' | '7' | '8' | '9' => self.read_intpart(String::new(), c)?, '"' => N::new(self.read_string('"')?), '|' => Value::symbol(&self.read_string('|')?).wrap(), ';' | '@' => { self.undrop(1); if read_annotations { let mut annotations = self.gather_annotations()?; let (existing_annotations, v) = Reader::::demand_next(self, read_annotations)?.pieces(); annotations.extend_from_slice(existing_annotations.slice()); N::wrap(Annotations::new(Some(annotations)), v) } else { self.skip_annotations()?; self.demand_next(read_annotations)? } } ':' => { // return Err(io_syntax_error("Unexpected key/value separator between items")), return Err(io_syntax_error(&format!("Unexpected key/value separator between items (pos {:?})", self.pos))); } '#' => match self.next_char()? { 'f' => N::new(false), 't' => N::new(true), '{' => N::new(Set::from_iter(self.upto('}', read_annotations)?.into_iter())), '"' => self.read_literal_binary()?, 'x' => if self.next_char()? == '"' { self.read_hex_binary()? } else { return Err(io_syntax_error("Expected open-quote at start of hex ByteString")); }, '[' => self.read_base64_binary()?, '=' => { let bs_val: N = self.demand_next(true)?; if bs_val.annotations().slice().len() > 0 { return Err(io_syntax_error("Annotations not permitted after #=")); } match bs_val.value().as_bytestring() { None => return Err(io_syntax_error("ByteString must follow #=")), Some(bs) => crate::value::BytesBinarySource::new(bs) .packed(ViaCodec::new(&mut self.dec)) .demand_next(read_annotations)? } } '!' => { let v = self.next_iovalue(read_annotations)?; Value::Embedded(self.dec.parse_embedded(&v)?).wrap() } other => return Err(io_syntax_error(&format!("Invalid # syntax: {:?}", other))), }, '<' => { let vs = self.upto('>', read_annotations)?; if vs.is_empty() { return Err(io_syntax_error("Missing record label")); } Value::Record(Record(vs)).wrap() } '[' => N::new(self.upto(']', read_annotations)?), '{' => self.read_dictionary(read_annotations)?, '>' => return Err(io_syntax_error("Unexpected >")), ']' => return Err(io_syntax_error("Unexpected ]")), '}' => return Err(io_syntax_error("Unexpected }")), other => self.read_raw_symbol(other.to_string())?, })) } fn open_record(&mut self, arity: Option) -> ReaderResult { self.skip_annotations()?; if self.peek()? != '<' { return Err(self.expected::(ExpectedKind::Record(arity))); } self.drop(1); let mut b = B::Type::default(); Reader::::ensure_more_expected(self, &mut b, &B::Item::RecordLabel)?; Ok(b) } fn open_sequence_or_set(&mut self) -> ReaderResult { self.skip_annotations()?; let mark = Reader::::mark(self)?; match self.next_char()? { '#' => match self.next_char()? { '{' => return Ok(B::Item::SetValue), _ => (), }, '[' => return Ok(B::Item::SequenceValue), _ => (), } Reader::::restore(self, &mark)?; Err(self.expected::(ExpectedKind::SequenceOrSet)) } fn open_sequence(&mut self) -> ReaderResult<()> { self.skip_annotations()?; if self.peek()? != '[' { return Err(self.expected::(ExpectedKind::Sequence)); } self.drop(1); Ok(()) } fn open_set(&mut self) -> ReaderResult<()> { self.skip_annotations()?; let mark = Reader::::mark(self)?; match self.next_char()? { '#' => match self.next_char()? { '{' => return Ok(()), _ => (), }, _ => (), } Reader::::restore(self, &mark)?; Err(self.expected::(ExpectedKind::Set)) } fn open_dictionary(&mut self) -> ReaderResult<()> { self.skip_annotations()?; if self.peek()? != '{' { return Err(self.expected::(ExpectedKind::Dictionary)); } self.drop(1); Ok(()) } #[inline] fn boundary(&mut self, b: &B::Type) -> ReaderResult<()> { match b { B::Type { closing: Some(B::Item::DictionaryKey), opening: Some(B::Item::DictionaryValue), } => { self.skip_whitespace(); if self.next_char()? != ':' { return Err(syntax_error("Missing expected key/value separator")); } }, _ => (), } Ok(()) } fn close_compound(&mut self, b: &mut B::Type, i: &B::Item) -> ReaderResult { self.skip_whitespace(); match self.peek()? { '>' | ']' | '}' => { self.drop(1); Ok(true) } _ => { b.shift(Some(i.clone())); Reader::::boundary(self, b)?; Ok(false) } } } fn open_embedded(&mut self) -> ReaderResult<()> { self.skip_annotations()?; let mark = Reader::::mark(self)?; match self.next_char()? { '#' => match self.next_char()? { '!' => return Ok(()), _ => (), }, _ => (), } Reader::::restore(self, &mark)?; Err(self.expected::(ExpectedKind::Embedded)) } fn close_embedded(&mut self) -> ReaderResult<()> { Ok(()) } type Mark = usize; fn mark(&mut self) -> io::Result { Ok(self.pos) } fn restore(&mut self, mark: &Self::Mark) -> io::Result<()> { self.pos = *mark; Ok(()) } fn next_token(&mut self, read_embedded_annotations: bool) -> io::Result> { self.skip_annotations()?; let mark = Reader::::mark(self)?; Ok(match self.next_char()? { '<' => Token::Compound(CompoundClass::Record), '[' => Token::Compound(CompoundClass::Sequence), '{' => Token::Compound(CompoundClass::Dictionary), '>' => Token::End, ']' => Token::End, '}' => Token::End, '#' => match self.next_char()? { '!' => { let v = self.next_iovalue(read_embedded_annotations)?; Token::Embedded(self.dec.parse_embedded(&v)?) } '{' => Token::Compound(CompoundClass::Set), _ => { Reader::::restore(self, &mark)?; Token::Atom(self.demand_next(false)?) } }, _ => { Reader::::restore(self, &mark)?; Token::Atom(self.demand_next(false)?) } }) } fn next_annotations_and_token(&mut self) -> io::Result<(Vec, Token)> { let annotations = self.gather_annotations()?; Ok((annotations, self.next_token(true)?)) } }