Core preserves tests pass

This commit is contained in:
Tony Garnock-Jones 2022-07-10 17:39:25 +02:00
parent c2a48a9b78
commit 7f9f609a84
5 changed files with 183 additions and 130 deletions

View File

@ -50,7 +50,7 @@ impl From<Error> for io::Error {
match e { match e {
Error::Io(ioe) => ioe, Error::Io(ioe) => ioe,
Error::Message(str) => io::Error::new(io::ErrorKind::Other, str), Error::Message(str) => io::Error::new(io::ErrorKind::Other, str),
_ => io::Error::new(io::ErrorKind::Other, e.to_string()), _ => io::Error::new(io::ErrorKind::Other, e),
} }
} }
} }
@ -93,18 +93,6 @@ pub fn is_eof_error(e: &Error) -> bool {
} }
} }
pub fn syntax_error(s: &str) -> Error {
Error::Io(io_syntax_error(s))
}
pub fn is_syntax_error(e: &Error) -> bool {
if let Error::Io(ioe) = e {
is_syntax_io_error(ioe)
} else {
false
}
}
//--------------------------------------------------------------------------- //---------------------------------------------------------------------------
pub fn io_eof() -> io::Error { pub fn io_eof() -> io::Error {
@ -115,10 +103,6 @@ pub fn is_eof_io_error(e: &io::Error) -> bool {
matches!(e.kind(), io::ErrorKind::UnexpectedEof) matches!(e.kind(), io::ErrorKind::UnexpectedEof)
} }
pub fn io_syntax_error(s: &str) -> io::Error {
io::Error::new(io::ErrorKind::InvalidData, s)
}
pub fn is_syntax_io_error(e: &io::Error) -> bool { pub fn is_syntax_io_error(e: &io::Error) -> bool {
matches!(e.kind(), io::ErrorKind::InvalidData) matches!(e.kind(), io::ErrorKind::InvalidData)
} }

View File

@ -1,4 +1,4 @@
use crate::error::{self, ExpectedKind, io_eof, io_syntax_error}; use crate::error::{self, ExpectedKind, io_eof};
use num::bigint::BigInt; use num::bigint::BigInt;
use num::traits::cast::{FromPrimitive, ToPrimitive}; use num::traits::cast::{FromPrimitive, ToPrimitive};
@ -101,7 +101,7 @@ impl<'de, 'src, S: BinarySource<'de>> PackedReaderSource<'de, 'src, S> {
if acc & 0x80 != 0 { return Ok(acc - 0x80) } if acc & 0x80 != 0 { return Ok(acc - 0x80) }
loop { loop {
if acc & 0xfe0000000000000 != 0 { if acc & 0xfe0000000000000 != 0 {
return Err(io_syntax_error("Varint length marker overflow")); return Err(self.syntax_error("Varint length marker overflow"));
} }
acc <<= 7; acc <<= 7;
let v = self.read()? as u64; let v = self.read()? as u64;
@ -113,7 +113,7 @@ impl<'de, 'src, S: BinarySource<'de>> PackedReaderSource<'de, 'src, S> {
fn narrow_to_len(&mut self) -> io::Result<Option<u64>> { fn narrow_to_len(&mut self) -> io::Result<Option<u64>> {
let item_len = self.varint()?; let item_len = self.varint()?;
if !self.advance(item_len) { if !self.advance(item_len) {
return Err(io_syntax_error("Bad item length")); return Err(self.syntax_error("Bad item length"));
} }
let remaining = self.count; let remaining = self.count;
self.count = Some(item_len); self.count = Some(item_len);
@ -122,7 +122,8 @@ impl<'de, 'src, S: BinarySource<'de>> PackedReaderSource<'de, 'src, S> {
fn narrow(&mut self) -> io::Result<()> { fn narrow(&mut self) -> io::Result<()> {
if !self.expect_length { if !self.expect_length {
self.expect_length = true; // after the first value, always true! self.expect_length = true;
self.stack.push(Continuation::Sequence { count: Some(0) });
} else { } else {
let count = self.narrow_to_len()?; let count = self.narrow_to_len()?;
self.stack.push(Continuation::Sequence { count }); self.stack.push(Continuation::Sequence { count });
@ -223,6 +224,10 @@ impl<'de, 'src, S: BinarySource<'de>> BinarySource<'de> for PackedReaderSource<'
Ok(()) Ok(())
} }
fn input_position(&mut self) -> io::Result<Option<usize>> {
self.source.input_position()
}
#[inline(always)] #[inline(always)]
fn skip(&mut self) -> io::Result<()> { fn skip(&mut self) -> io::Result<()> {
self.advance_or_eof(1)?; self.advance_or_eof(1)?;
@ -356,6 +361,10 @@ impl<'de, 'src, S: BinarySource<'de>> PackedReader<'de, 'src, S> {
} }
} }
fn syntax_error(&mut self, message: &str) -> io::Error {
self.source.syntax_error(message)
}
fn _next<N: NestedValue, Dec: DomainDecode<N::Embedded>>( fn _next<N: NestedValue, Dec: DomainDecode<N::Embedded>>(
&mut self, &mut self,
read_annotations: bool, read_annotations: bool,
@ -370,20 +379,26 @@ impl<'de, 'src, S: BinarySource<'de>> PackedReader<'de, 'src, S> {
match bs.len() { match bs.len() {
4 => Value::from(f32::from_bits(u32::from_be_bytes((&bs[..]).try_into().unwrap()))).wrap(), 4 => Value::from(f32::from_bits(u32::from_be_bytes((&bs[..]).try_into().unwrap()))).wrap(),
8 => Value::from(f64::from_bits(u64::from_be_bytes((&bs[..]).try_into().unwrap()))).wrap(), 8 => Value::from(f64::from_bits(u64::from_be_bytes((&bs[..]).try_into().unwrap()))).wrap(),
_ => Err(io_syntax_error("Invalid floating-point width"))?, _ => Err(self.syntax_error("Invalid floating-point width"))?,
} }
} }
Tag::SignedInteger => Value::SignedInteger(self.source.read_signed_integer()?).wrap(), Tag::SignedInteger => Value::SignedInteger(self.source.read_signed_integer()?).wrap(),
Tag::String => Value::String(decode_nul_str(self.source.read_to_end()?)?.into_owned()).wrap(), Tag::String => {
let bs = self.source.read_to_end()?;
Value::String(self.decode_nul_str(bs)?.into_owned()).wrap()
}
Tag::ByteString => Value::ByteString(self.source.read_to_end()?.into_owned()).wrap(), Tag::ByteString => Value::ByteString(self.source.read_to_end()?.into_owned()).wrap(),
Tag::Symbol => Value::Symbol(decodestr(self.source.read_to_end()?)?.into_owned()).wrap(), Tag::Symbol => {
let bs = self.source.read_to_end()?;
Value::Symbol(self.decodestr(bs)?.into_owned()).wrap()
},
Tag::Record => { Tag::Record => {
let mut vs = Vec::new(); let mut vs = Vec::new();
while let Some(v) = self.next(read_annotations, decode_embedded)? { while let Some(v) = self.next(read_annotations, decode_embedded)? {
vs.push(v); vs.push(v);
} }
if vs.is_empty() { if vs.is_empty() {
return Err(io_syntax_error("Too few elements in encoded record")) return Err(self.syntax_error("Too few elements in encoded record"))
} }
Value::Record(Record(vs)).wrap() Value::Record(Record(vs)).wrap()
} }
@ -406,13 +421,17 @@ impl<'de, 'src, S: BinarySource<'de>> PackedReader<'de, 'src, S> {
while let Some(k) = self.next(read_annotations, decode_embedded)? { while let Some(k) = self.next(read_annotations, decode_embedded)? {
match self.next(read_annotations, decode_embedded)? { match self.next(read_annotations, decode_embedded)? {
Some(v) => { d.insert(k, v); } Some(v) => { d.insert(k, v); }
None => return Err(io_syntax_error("Missing dictionary value")), None => return Err(self.syntax_error("Missing dictionary value")),
} }
} }
Value::Dictionary(d).wrap() Value::Dictionary(d).wrap()
} }
Tag::Embedded => Value::Embedded( Tag::Embedded => {
decode_embedded.decode_embedded(self, read_annotations)?).wrap(), self.source.expect_length = false;
let d = decode_embedded.decode_embedded(self, read_annotations)?;
self.source.expect_length = true;
Value::Embedded(d).wrap()
}
Tag::Annotation => { Tag::Annotation => {
if read_annotations { if read_annotations {
let underlying: Option<N> = self.next(read_annotations, decode_embedded)?; let underlying: Option<N> = self.next(read_annotations, decode_embedded)?;
@ -426,7 +445,7 @@ impl<'de, 'src, S: BinarySource<'de>> PackedReader<'de, 'src, S> {
existing_annotations.modify(|ws| ws.extend_from_slice(&vs[..])); existing_annotations.modify(|ws| ws.extend_from_slice(&vs[..]));
N::wrap(existing_annotations, v) N::wrap(existing_annotations, v)
} }
None => return Err(io_syntax_error("Missing value in encoded annotation")), None => return Err(self.syntax_error("Missing value in encoded annotation")),
} }
} else { } else {
self.source.narrow_to_annotated_value()?; self.source.narrow_to_annotated_value()?;
@ -436,6 +455,37 @@ impl<'de, 'src, S: BinarySource<'de>> PackedReader<'de, 'src, S> {
}); });
} }
} }
#[inline(always)]
fn decodestr<'a>(&mut self, cow: Cow<'a, [u8]>) -> io::Result<Cow<'a, str>> {
match cow {
Cow::Borrowed(bs) =>
Ok(Cow::Borrowed(std::str::from_utf8(bs).map_err(|_| self.syntax_error("Invalid UTF-8"))?)),
Cow::Owned(bs) =>
Ok(Cow::Owned(String::from_utf8(bs).map_err(|_| self.syntax_error("Invalid UTF-8"))?)),
}
}
fn check_nul(&mut self, bs: &[u8]) -> io::Result<()> {
if bs.len() < 1 || bs[bs.len() - 1] != 0 {
return Err(self.syntax_error("Missing trailing NUL byte on string"));
}
Ok(())
}
fn decode_nul_str<'a>(&mut self, cow: Cow<'a, [u8]>) -> io::Result<Cow<'a, str>> {
match cow {
Cow::Borrowed(bs) => {
self.check_nul(bs)?;
self.decodestr(Cow::Borrowed(&bs[0..bs.len()-1]))
}
Cow::Owned(mut bs) => {
self.check_nul(&bs)?;
bs.truncate(bs.len() - 1);
self.decodestr(Cow::Owned(bs))
}
}
}
} }
impl<'de, 'src, S: BinarySource<'de>> Reader<'de> for PackedReader<'de, 'src, S> { impl<'de, 'src, S: BinarySource<'de>> Reader<'de> for PackedReader<'de, 'src, S> {
@ -490,11 +540,14 @@ impl<'de, 'src, S: BinarySource<'de>> Reader<'de> for PackedReader<'de, 'src, S>
#[inline(always)] #[inline(always)]
fn open_embedded(&mut self) -> ReaderResult<()> { fn open_embedded(&mut self) -> ReaderResult<()> {
self.next_compound(Tag::Embedded, ExpectedKind::Embedded) self.next_compound(Tag::Embedded, ExpectedKind::Embedded)?;
self.source.expect_length = false;
Ok(())
} }
#[inline(always)] #[inline(always)]
fn close_embedded(&mut self) -> ReaderResult<()> { fn close_embedded(&mut self) -> ReaderResult<()> {
self.source.expect_length = true;
Ok(self.source.widen(())?) Ok(self.source.widen(())?)
} }
@ -539,8 +592,10 @@ impl<'de, 'src, S: BinarySource<'de>> Reader<'de> for PackedReader<'de, 'src, S>
Tag::Embedded => { Tag::Embedded => {
self.source.skip()?; self.source.skip()?;
self.source.expect_length = false;
let t = Token::Embedded(decode_embedded.decode_embedded( let t = Token::Embedded(decode_embedded.decode_embedded(
self, read_embedded_annotations)?); self, read_embedded_annotations)?);
self.source.expect_length = true;
self.source.widen(t) self.source.widen(t)
} }
@ -591,7 +646,7 @@ impl<'de, 'src, S: BinarySource<'de>> Reader<'de> for PackedReader<'de, 'src, S>
match bs.len() { match bs.len() {
4 => Ok(f32::from_bits(u32::from_be_bytes((&bs[..]).try_into().unwrap()))), 4 => Ok(f32::from_bits(u32::from_be_bytes((&bs[..]).try_into().unwrap()))),
8 => Ok(f64::from_bits(u64::from_be_bytes((&bs[..]).try_into().unwrap())) as f32), 8 => Ok(f64::from_bits(u64::from_be_bytes((&bs[..]).try_into().unwrap())) as f32),
_ => Err(io_syntax_error("Invalid floating-point width"))?, _ => Err(self.syntax_error("Invalid floating-point width"))?,
} }
} }
@ -600,12 +655,13 @@ impl<'de, 'src, S: BinarySource<'de>> Reader<'de> for PackedReader<'de, 'src, S>
match bs.len() { match bs.len() {
4 => Ok(f32::from_bits(u32::from_be_bytes((&bs[..]).try_into().unwrap())) as f64), 4 => Ok(f32::from_bits(u32::from_be_bytes((&bs[..]).try_into().unwrap())) as f64),
8 => Ok(f64::from_bits(u64::from_be_bytes((&bs[..]).try_into().unwrap()))), 8 => Ok(f64::from_bits(u64::from_be_bytes((&bs[..]).try_into().unwrap()))),
_ => Err(io_syntax_error("Invalid floating-point width"))?, _ => Err(self.syntax_error("Invalid floating-point width"))?,
} }
} }
fn next_str(&mut self) -> ReaderResult<Cow<'de, str>> { fn next_str(&mut self) -> ReaderResult<Cow<'de, str>> {
Ok(decode_nul_str(self.next_atomic(Tag::String, ExpectedKind::String)?)?) let bs = self.next_atomic(Tag::String, ExpectedKind::String)?;
Ok(self.decode_nul_str(bs)?)
} }
fn next_bytestring(&mut self) -> ReaderResult<Cow<'de, [u8]>> { fn next_bytestring(&mut self) -> ReaderResult<Cow<'de, [u8]>> {
@ -613,37 +669,7 @@ impl<'de, 'src, S: BinarySource<'de>> Reader<'de> for PackedReader<'de, 'src, S>
} }
fn next_symbol(&mut self) -> ReaderResult<Cow<'de, str>> { fn next_symbol(&mut self) -> ReaderResult<Cow<'de, str>> {
Ok(decodestr(self.next_atomic(Tag::Symbol, ExpectedKind::Symbol)?)?) let bs = self.next_atomic(Tag::Symbol, ExpectedKind::Symbol)?;
} Ok(self.decodestr(bs)?)
}
#[inline(always)]
fn decodestr(cow: Cow<'_, [u8]>) -> io::Result<Cow<'_, str>> {
match cow {
Cow::Borrowed(bs) =>
Ok(Cow::Borrowed(std::str::from_utf8(bs).map_err(|_| io_syntax_error("Invalid UTF-8"))?)),
Cow::Owned(bs) =>
Ok(Cow::Owned(String::from_utf8(bs).map_err(|_| io_syntax_error("Invalid UTF-8"))?)),
}
}
fn check_nul(bs: &[u8]) -> io::Result<()> {
if bs.len() < 1 || bs[bs.len() - 1] != 0 {
return Err(io_syntax_error("Missing trailing NUL byte on string"));
}
Ok(())
}
fn decode_nul_str(cow: Cow<'_, [u8]>) -> io::Result<Cow<'_, str>> {
match cow {
Cow::Borrowed(bs) => {
check_nul(bs)?;
decodestr(Cow::Borrowed(&bs[0..bs.len()-1]))
}
Cow::Owned(mut bs) => {
check_nul(&bs)?;
bs.truncate(bs.len() - 1);
decodestr(Cow::Owned(bs))
}
} }
} }

View File

@ -35,15 +35,16 @@ impl PackedWriter<&mut Vec<u8>> {
} }
} }
pub fn varint(iol: &mut IOList, mut v: usize) { pub fn varint(iol: &mut IOList, v: usize) {
loop { _varint(iol, v, 0x80)
if v < 128 { }
iol.write((v + 0x80) as u8);
return; fn _varint(iol: &mut IOList, v: usize, d: u8) {
} else { if v < 128 {
iol.write((v & 0x7f) as u8); iol.write(v as u8 + d);
v >>= 7; } else {
} _varint(iol, v >> 7, 0);
iol.write((v & 0x7f) as u8 + d);
} }
} }

View File

@ -7,6 +7,7 @@ pub trait BinarySource<'de>: Sized {
type Mark; type Mark;
fn mark(&mut self) -> io::Result<Self::Mark>; fn mark(&mut self) -> io::Result<Self::Mark>;
fn restore(&mut self, mark: &Self::Mark) -> io::Result<()>; fn restore(&mut self, mark: &Self::Mark) -> io::Result<()>;
fn input_position(&mut self) -> io::Result<Option<usize>>;
fn skip(&mut self) -> io::Result<()>; fn skip(&mut self) -> io::Result<()>;
fn peek(&mut self) -> io::Result<Option<u8>>; fn peek(&mut self) -> io::Result<Option<u8>>;
@ -17,6 +18,16 @@ pub trait BinarySource<'de>: Sized {
//--------------------------------------------------------------------------- //---------------------------------------------------------------------------
fn syntax_error(&mut self, message: &str) -> io::Error {
io::Error::new(io::ErrorKind::InvalidData, SyntaxError {
position: match self.input_position() {
Ok(p) => p,
Err(_) => None,
},
message: message.to_owned(),
})
}
fn packed(&mut self) -> super::PackedReader<'de, '_, Self> { fn packed(&mut self) -> super::PackedReader<'de, '_, Self> {
super::PackedReader::new(self) super::PackedReader::new(self)
} }
@ -26,6 +37,25 @@ pub trait BinarySource<'de>: Sized {
} }
} }
#[derive(Debug)]
pub struct SyntaxError {
position: Option<usize>,
message: String,
}
impl std::fmt::Display for SyntaxError {
fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
write!(f, "position {}: {}",
match self.position {
Some(p) => p.to_string(),
None => "??".to_string(),
},
self.message)
}
}
impl std::error::Error for SyntaxError {}
pub struct IOBinarySource<R: io::Read + io::Seek> { pub struct IOBinarySource<R: io::Read + io::Seek> {
pub read: R, pub read: R,
pub buf: Option<u8>, pub buf: Option<u8>,
@ -53,6 +83,10 @@ impl<'de, R: io::Read + io::Seek> BinarySource<'de> for IOBinarySource<R> {
Ok(()) Ok(())
} }
fn input_position(&mut self) -> io::Result<Option<usize>> {
Ok(Some(self.mark()? as usize))
}
#[inline(always)] #[inline(always)]
fn skip(&mut self) -> io::Result<()> { fn skip(&mut self) -> io::Result<()> {
if self.buf.is_none() { unreachable!(); } if self.buf.is_none() { unreachable!(); }
@ -133,6 +167,10 @@ impl<'de> BinarySource<'de> for BytesBinarySource<'de> {
Ok(()) Ok(())
} }
fn input_position(&mut self) -> io::Result<Option<usize>> {
Ok(Some(self.index as usize))
}
#[inline(always)] #[inline(always)]
fn skip(&mut self) -> io::Result<()> { fn skip(&mut self) -> io::Result<()> {
if self.index as usize >= self.bytes.len() { unreachable!(); } if self.index as usize >= self.bytes.len() { unreachable!(); }

View File

@ -1,7 +1,6 @@
use crate::error::Error; use crate::error::Error;
use crate::error::ExpectedKind; use crate::error::ExpectedKind;
use crate::error::io_eof; use crate::error::io_eof;
use crate::error::io_syntax_error;
use crate::hex; use crate::hex;
@ -30,18 +29,6 @@ pub struct TextReader<'de, 'src, S: BinarySource<'de>> {
phantom: PhantomData<&'de ()>, phantom: PhantomData<&'de ()>,
} }
fn decode_utf8(bs: Vec<u8>) -> io::Result<String> {
Ok(String::from_utf8(bs).map_err(|_| io_syntax_error("Invalid UTF-8"))?)
}
fn append_codepoint(bs: &mut Vec<u8>, n: u32) -> io::Result<()> {
let c = char::from_u32(n).ok_or_else(|| io_syntax_error("Bad code point"))?;
let mut buf = [0; 4];
let _ = c.encode_utf8(&mut buf);
bs.extend(&buf[0 .. c.len_utf8()]);
Ok(())
}
impl<'de, 'src, S: BinarySource<'de>> TextReader<'de, 'src, S> { impl<'de, 'src, S: BinarySource<'de>> TextReader<'de, 'src, S> {
pub fn new(source: &'src mut S) -> Self { pub fn new(source: &'src mut S) -> Self {
TextReader { TextReader {
@ -50,6 +37,10 @@ impl<'de, 'src, S: BinarySource<'de>> TextReader<'de, 'src, S> {
} }
} }
fn syntax_error(&mut self, message: &str) -> io::Error {
self.source.syntax_error(message)
}
fn peek(&mut self) -> io::Result<Option<u8>> { fn peek(&mut self) -> io::Result<Option<u8>> {
self.source.peek() self.source.peek()
} }
@ -112,13 +103,17 @@ impl<'de, 'src, S: BinarySource<'de>> TextReader<'de, 'src, S> {
} }
} }
fn decode_utf8(&mut self, bs: Vec<u8>) -> io::Result<String> {
String::from_utf8(bs).map_err(|_| self.syntax_error("Invalid UTF-8"))
}
fn comment_line(&mut self) -> io::Result<String> { fn comment_line(&mut self) -> io::Result<String> {
let mut bs = Vec::new(); let mut bs = Vec::new();
loop { loop {
let b = self.peek_noeof()?; let b = self.peek_noeof()?;
self.skip()?; self.skip()?;
match b { match b {
b'\r' | b'\n' => return Ok(decode_utf8(bs)?), b'\r' | b'\n' => return Ok(self.decode_utf8(bs)?),
_ => bs.push(b), _ => bs.push(b),
} }
} }
@ -168,23 +163,23 @@ impl<'de, 'src, S: BinarySource<'de>> TextReader<'de, 'src, S> {
} }
fn finish_number<N: NestedValue>(&mut self, bs: Vec<u8>, is_float: bool) -> io::Result<N> { fn finish_number<N: NestedValue>(&mut self, bs: Vec<u8>, is_float: bool) -> io::Result<N> {
let s = decode_utf8(bs)?; let s = self.decode_utf8(bs)?;
if is_float { if is_float {
match self.peek_noeof() { match self.peek_noeof() {
Ok(b'f') | Ok(b'F') => { Ok(b'f') | Ok(b'F') => {
self.skip()?; self.skip()?;
Ok(N::new(s.parse::<f32>().map_err( Ok(N::new(s.parse::<f32>().map_err(
|_| io_syntax_error(&format!( |_| self.syntax_error(&format!(
"Invalid single-precision number: {:?}", s)))?)) "Invalid single-precision number: {:?}", s)))?))
} }
_ => _ =>
Ok(N::new(s.parse::<f64>().map_err( Ok(N::new(s.parse::<f64>().map_err(
|_| io_syntax_error(&format!( |_| self.syntax_error(&format!(
"Invalid double-precision number: {:?}", s)))?)) "Invalid double-precision number: {:?}", s)))?))
} }
} else { } else {
Ok(N::new(s.parse::<BigInt>().map_err( Ok(N::new(s.parse::<BigInt>().map_err(
|_| io_syntax_error(&format!( |_| self.syntax_error(&format!(
"Invalid signed-integer number: {:?}", s)))?)) "Invalid signed-integer number: {:?}", s)))?))
} }
} }
@ -192,7 +187,7 @@ impl<'de, 'src, S: BinarySource<'de>> TextReader<'de, 'src, S> {
fn read_digit1(&mut self, bs: &mut Vec<u8>, c: u8) -> io::Result<()> fn read_digit1(&mut self, bs: &mut Vec<u8>, c: u8) -> io::Result<()>
{ {
if !(c as char).is_digit(10) { if !(c as char).is_digit(10) {
return Err(io_syntax_error("Incomplete number")); return Err(self.syntax_error("Incomplete number"));
} }
bs.push(c); bs.push(c);
while let Ok(Some(c)) = self.peek() { while let Ok(Some(c)) = self.peek() {
@ -213,23 +208,23 @@ impl<'de, 'src, S: BinarySource<'de>> TextReader<'de, 'src, S> {
hexescaper: H, hexescaper: H,
) -> io::Result<R> ) -> io::Result<R>
where where
X: Fn(&mut R, u8) -> io::Result<()>, X: Fn(&mut Self, &mut R, u8) -> io::Result<()>,
H: Fn(&mut R, &mut Self) -> io::Result<()>, H: Fn(&mut Self, &mut R) -> io::Result<()>,
{ {
loop { loop {
match self.next_byte()? { match self.next_byte()? {
c if c == terminator => return Ok(seed), c if c == terminator => return Ok(seed),
b'\\' => match self.next_byte()? { b'\\' => match self.next_byte()? {
c if c == hexescape => hexescaper(&mut seed, self)?, c if c == hexescape => hexescaper(self, &mut seed)?,
c if c == terminator || c == b'\\' || c == b'/' => xform_item(&mut seed, c)?, c if c == terminator || c == b'\\' || c == b'/' => xform_item(self, &mut seed, c)?,
b'b' => xform_item(&mut seed, b'\x08')?, b'b' => xform_item(self, &mut seed, b'\x08')?,
b'f' => xform_item(&mut seed, b'\x0c')?, b'f' => xform_item(self, &mut seed, b'\x0c')?,
b'n' => xform_item(&mut seed, b'\x0a')?, b'n' => xform_item(self, &mut seed, b'\x0a')?,
b'r' => xform_item(&mut seed, b'\x0d')?, b'r' => xform_item(self, &mut seed, b'\x0d')?,
b't' => xform_item(&mut seed, b'\x09')?, b't' => xform_item(self, &mut seed, b'\x09')?,
_ => return Err(io_syntax_error("Invalid escape code")), _ => return Err(self.syntax_error("Invalid escape code")),
}, },
c => xform_item(&mut seed, c)?, c => xform_item(self, &mut seed, c)?,
} }
} }
} }
@ -242,48 +237,57 @@ impl<'de, 'src, S: BinarySource<'de>> TextReader<'de, 'src, S> {
Some(d) => Some(d) =>
v = v << 4 | d, v = v << 4 | d,
None => None =>
return Err(io_syntax_error("Bad hex escape")), return Err(self.syntax_error("Bad hex escape")),
} }
} }
Ok(v) Ok(v)
} }
fn append_codepoint(&mut self, bs: &mut Vec<u8>, n: u32) -> io::Result<()> {
let c = char::from_u32(n).ok_or_else(|| self.syntax_error("Bad code point"))?;
let mut buf = [0; 4];
let _ = c.encode_utf8(&mut buf);
bs.extend(&buf[0 .. c.len_utf8()]);
Ok(())
}
fn read_string(&mut self, delimiter: u8) -> io::Result<String> { fn read_string(&mut self, delimiter: u8) -> io::Result<String> {
decode_utf8(self.read_stringlike( let raw = self.read_stringlike(
Vec::new(), Vec::new(),
|bs, c| Ok(bs.push(c)), |_r, bs, c| Ok(bs.push(c)),
delimiter, delimiter,
b'u', b'u',
|bs, r| { |r, bs| {
let n1 = r.hexnum(4)?; let n1 = r.hexnum(4)?;
if (0xd800 ..= 0xdbff).contains(&n1) { if (0xd800 ..= 0xdbff).contains(&n1) {
let mut ok = true; let mut ok = true;
ok = ok && r.next_byte()? == b'\\'; ok = ok && r.next_byte()? == b'\\';
ok = ok && r.next_byte()? == b'u'; ok = ok && r.next_byte()? == b'u';
if !ok { if !ok {
Err(io_syntax_error("Missing second half of surrogate pair")) Err(r.syntax_error("Missing second half of surrogate pair"))
} else { } else {
let n2 = r.hexnum(4)?; let n2 = r.hexnum(4)?;
if (0xdc00 ..= 0xdfff).contains(&n2) { if (0xdc00 ..= 0xdfff).contains(&n2) {
let n = ((n1 - 0xd800) << 10) + (n2 - 0xdc00) + 0x10000; let n = ((n1 - 0xd800) << 10) + (n2 - 0xdc00) + 0x10000;
append_codepoint(bs, n) r.append_codepoint(bs, n)
} else { } else {
Err(io_syntax_error("Bad second half of surrogate pair")) Err(r.syntax_error("Bad second half of surrogate pair"))
} }
} }
} else { } else {
append_codepoint(bs, n1) r.append_codepoint(bs, n1)
} }
})?) })?;
self.decode_utf8(raw)
} }
fn read_literal_binary<N: NestedValue>(&mut self) -> io::Result<N> { fn read_literal_binary<N: NestedValue>(&mut self) -> io::Result<N> {
Ok(N::new(&self.read_stringlike( Ok(N::new(&self.read_stringlike(
Vec::new(), Vec::new(),
|bs, b| Ok(bs.push(b)), |_r, bs, b| Ok(bs.push(b)),
b'"', b'"',
b'x', b'x',
|bs, r| Ok(bs.push(r.hexnum(2)? as u8)))?[..])) |r, bs| Ok(bs.push(r.hexnum(2)? as u8)))?[..]))
} }
fn read_hex_binary<N: NestedValue>(&mut self) -> io::Result<N> { fn read_hex_binary<N: NestedValue>(&mut self) -> io::Result<N> {
@ -297,7 +301,7 @@ impl<'de, 'src, S: BinarySource<'de>> TextReader<'de, 'src, S> {
} }
let c2 = self.next_byte()? as char; let c2 = self.next_byte()? as char;
if !(c1.is_digit(16) && c2.is_digit(16)) { if !(c1.is_digit(16) && c2.is_digit(16)) {
return Err(io_syntax_error("Invalid hex binary")); return Err(self.syntax_error("Invalid hex binary"));
} }
s.push(c1); s.push(c1);
s.push(c2); s.push(c2);
@ -310,8 +314,8 @@ impl<'de, 'src, S: BinarySource<'de>> TextReader<'de, 'src, S> {
self.skip_whitespace(); self.skip_whitespace();
let mut c = self.next_byte()?; let mut c = self.next_byte()?;
if c == b']' { if c == b']' {
let bs = base64::decode_config(&decode_utf8(bs)?, base64::STANDARD_NO_PAD) let bs = base64::decode_config(&self.decode_utf8(bs)?, base64::STANDARD_NO_PAD)
.map_err(|_| io_syntax_error("Invalid base64 character"))?; .map_err(|_| self.syntax_error("Invalid base64 character"))?;
return Ok(N::new(&bs[..])); return Ok(N::new(&bs[..]));
} }
if c == b'-' { c = b'+'; } if c == b'-' { c = b'+'; }
@ -353,7 +357,7 @@ impl<'de, 'src, S: BinarySource<'de>> TextReader<'de, 'src, S> {
let k = self.demand_next(read_annotations, decode_embedded)?; let k = self.demand_next(read_annotations, decode_embedded)?;
self.skip_whitespace(); self.skip_whitespace();
if self.next_byte()? != b':' { if self.next_byte()? != b':' {
return Err(io_syntax_error("Missing expected key/value separator")); return Err(self.syntax_error("Missing expected key/value separator"));
} }
let v = self.demand_next(read_annotations, decode_embedded)?; let v = self.demand_next(read_annotations, decode_embedded)?;
d.insert(k, v); d.insert(k, v);
@ -370,7 +374,7 @@ impl<'de, 'src, S: BinarySource<'de>> TextReader<'de, 'src, S> {
match c { match c {
b'(' | b')' | b'{' | b'}' | b'[' | b']' | b'<' | b'>' | b'(' | b')' | b'{' | b'}' | b'[' | b']' | b'<' | b'>' |
b'"' | b';' | b',' | b'@' | b'#' | b':' | b'|' | b' ' => b'"' | b';' | b',' | b'@' | b'#' | b':' | b'|' | b' ' =>
return Ok(N::symbol(&decode_utf8(bs)?)), return Ok(N::symbol(&self.decode_utf8(bs)?)),
c => { c => {
self.skip()?; self.skip()?;
bs.push(c) bs.push(c)
@ -422,7 +426,7 @@ impl<'de, 'src, S: BinarySource<'de>> Reader<'de> for TextReader<'de, 'src, S> {
} }
} }
b':' => { b':' => {
return Err(io_syntax_error("Unexpected key/value separator between items")); return Err(self.syntax_error("Unexpected key/value separator between items"));
} }
b'#' => { b'#' => {
self.skip()?; self.skip()?;
@ -435,17 +439,17 @@ impl<'de, 'src, S: BinarySource<'de>> Reader<'de> for TextReader<'de, 'src, S> {
b'x' => if self.next_byte()? == b'"' { b'x' => if self.next_byte()? == b'"' {
self.read_hex_binary()? self.read_hex_binary()?
} else { } else {
return Err(io_syntax_error("Expected open-quote at start of hex ByteString")); return Err(self.syntax_error("Expected open-quote at start of hex ByteString"));
}, },
b'[' => self.read_base64_binary()?, b'[' => self.read_base64_binary()?,
b'=' => { b'=' => {
let bs_val = self.next_iovalue(true)?; let bs_val = self.next_iovalue(true)?;
if bs_val.annotations().slice().len() > 0 { if bs_val.annotations().slice().len() > 0 {
return Err(io_syntax_error("Annotations not permitted after #=")); return Err(self.syntax_error("Annotations not permitted after #="));
} }
match bs_val.value().as_bytestring() { match bs_val.value().as_bytestring() {
None => None =>
return Err(io_syntax_error("ByteString must follow #=")), return Err(self.syntax_error("ByteString must follow #=")),
Some(bs) => Some(bs) =>
crate::value::BytesBinarySource::new(bs) crate::value::BytesBinarySource::new(bs)
.packed() .packed()
@ -454,14 +458,14 @@ impl<'de, 'src, S: BinarySource<'de>> Reader<'de> for TextReader<'de, 'src, S> {
} }
b'!' => Value::Embedded( b'!' => Value::Embedded(
decode_embedded.decode_embedded(self, read_annotations)?).wrap(), decode_embedded.decode_embedded(self, read_annotations)?).wrap(),
other => return Err(io_syntax_error(&format!("Invalid # syntax: {:?}", other))), other => return Err(self.syntax_error(&format!("Invalid # syntax: {:?}", other))),
} }
} }
b'<' => { b'<' => {
self.skip()?; self.skip()?;
let vs = self.upto(b'>', read_annotations, decode_embedded)?; let vs = self.upto(b'>', read_annotations, decode_embedded)?;
if vs.is_empty() { if vs.is_empty() {
return Err(io_syntax_error("Missing record label")); return Err(self.syntax_error("Missing record label"));
} }
Value::Record(Record(vs)).wrap() Value::Record(Record(vs)).wrap()
} }
@ -473,9 +477,9 @@ impl<'de, 'src, S: BinarySource<'de>> Reader<'de> for TextReader<'de, 'src, S> {
self.skip()?; self.skip()?;
self.read_dictionary(read_annotations, decode_embedded)? self.read_dictionary(read_annotations, decode_embedded)?
} }
b'>' => return Err(io_syntax_error("Unexpected >")), b'>' => return Err(self.syntax_error("Unexpected >")),
b']' => return Err(io_syntax_error("Unexpected ]")), b']' => return Err(self.syntax_error("Unexpected ]")),
b'}' => return Err(io_syntax_error("Unexpected }")), b'}' => return Err(self.syntax_error("Unexpected }")),
other => { other => {
self.skip()?; self.skip()?;
self.read_raw_symbol(vec![other])? self.read_raw_symbol(vec![other])?
@ -527,7 +531,7 @@ impl<'de, 'src, S: BinarySource<'de>> Reader<'de> for TextReader<'de, 'src, S> {
} => { } => {
self.skip_whitespace(); self.skip_whitespace();
if self.next_byte()? != b':' { if self.next_byte()? != b':' {
return Err(io_syntax_error("Missing expected key/value separator"))?; return Err(self.syntax_error("Missing expected key/value separator"))?;
} }
}, },
_ => (), _ => (),