preserves/implementations/rust/preserves/src/packed/view.rs

568 lines
18 KiB
Rust

use std::borrow::Cow;
use std::fmt::Debug;
use std::io;
use std::marker::PhantomData;
use std::ops::Range;
use std::sync::Arc;
use num_bigint::BigInt;
use crate::BinarySource;
use crate::BytesBinarySource;
use crate::Domain;
use crate::DomainDecode;
use crate::IOValue;
use crate::NoEmbeddedDomainCodec;
use crate::PackedWriter;
use crate::SignedInteger;
use crate::Value;
use crate::ValueClass;
use crate::ValueImpl;
use crate::error;
use crate::error::io_eof;
use crate::reader::NextToken;
use super::constants::Tag;
#[derive(Debug, Clone)]
struct Index<D> {
embedded: Option<Box<[(Range<usize>, D)]>>,
}
#[derive(Debug, Clone)]
struct IndexedRepr<'de, Packed: AsRef<[u8]> + 'de, D> {
packed: Packed,
index: Index<D>,
phantom: PhantomData<&'de ()>,
}
#[derive(Debug, Clone)]
pub struct View<'de, Packed: AsRef<[u8]> + 'de, D: Domain> {
repr: Arc<IndexedRepr<'de, Packed, D>>,
annotation_offset: usize,
value_range: Range<usize>,
}
struct IndexInProgress<'dec, D, Dec> {
embedded: Vec<(Range<usize>, D)>,
dec: &'dec mut Dec,
}
struct Indexer<'de, 'dec, D: Domain, Dec: DomainDecode<D>> {
packed: &'de [u8],
offset: usize,
index: Option<IndexInProgress<'dec, D, Dec>>,
}
#[inline(always)]
fn at(packed: &[u8], i: usize) -> io::Result<u8> {
match packed.get(i) {
Some(v) => Ok(*v),
None => Err(io_eof()),
}
}
#[inline(always)]
fn tag_at(packed: &[u8], i: usize) -> io::Result<Tag> {
Ok(Tag::try_from(at(packed, i)?)?)
}
fn varint(packed: &[u8], mut i: usize) -> io::Result<(u64, usize)> {
let mut shift = 0;
let mut acc: u64 = 0;
loop {
let v = at(packed, i)?;
i = i + 1;
if shift == 63 && v > 1 { Err(error::Error::Message("PackedReader length too long".to_string()))? }
acc |= ((v & 0x7f) as u64) << shift;
shift += 7;
if v & 0x80 == 0 { return Ok((acc, i)) }
if shift >= 70 { Err(error::Error::Message("PackedReader length too long".to_string()))? }
}
}
impl<'de, 'dec, D: Domain, Dec: DomainDecode<D>> Indexer<'de, 'dec, D, Dec> {
fn skip_annotations(&mut self) -> io::Result<()> {
loop {
if tag_at(&self.packed, self.offset)? == Tag::Annotation {
self.skip_annotation()?;
} else {
return Ok(());
}
}
}
fn tag(&mut self) -> io::Result<Tag> {
let tag = tag_at(&self.packed, self.offset)?;
Ok(tag)
}
fn varint(&mut self) -> io::Result<u64> {
let (n, i) = varint(&self.packed, self.offset + 1)?;
self.offset = i;
Ok(n)
}
fn skip_annotation(&mut self) -> io::Result<()> {
self.offset += 1;
let saved = self.index.take();
self.skip_value()?;
self.index = saved;
Ok(())
}
fn skip_value(&mut self) -> io::Result<()> {
loop {
match self.tag()? {
Tag::False => self.offset += 1,
Tag::True => self.offset += 1,
Tag::Float => self.offset += 5,
Tag::Double => self.offset += 9,
Tag::End => Err(io::Error::new(io::ErrorKind::InvalidData,
format!("Unexpected end tag at offset {}", self.offset)))?,
Tag::Annotation => {
self.skip_annotation()?;
continue;
}
Tag::Embedded => {
let pos = self.offset;
self.offset += 1;
if let Some(p) = &mut self.index {
let mut r = BytesBinarySource::new(&self.packed[self.offset ..]).into_packed();
let d = p.dec.decode_embedded(&mut r, true)?;
let endpos = self.offset + r.source.index as usize;
p.embedded.push((pos .. endpos, d));
self.offset = endpos;
} else {
continue;
}
}
Tag::SmallInteger(_) => self.offset += 1,
Tag::MediumInteger(n) => self.offset += 1 + n as usize,
Tag::SignedInteger | Tag::ByteString => {
let n = self.varint()?;
self.offset += n as usize;
}
Tag::String | Tag::Symbol => {
let n = self.varint()?;
std::str::from_utf8(&self.packed[self.offset .. self.offset + (n as usize)])
.map_err(|_| io::Error::new(io::ErrorKind::InvalidData, "Invalid UTF-8"))?;
self.offset += n as usize;
}
Tag::Record | Tag::Sequence | Tag::Set | Tag::Dictionary => {
self.offset += 1;
while tag_at(&self.packed, self.offset)? != Tag::End {
self.skip_value()?;
}
self.offset += 1;
}
}
return Ok(());
}
}
}
impl<'de, Packed: AsRef<[u8]> + 'de, D: Domain> IndexedRepr<'de, Packed, D> {
fn trim(&self, range: Range<usize>) -> Arc<IndexedRepr<'static, Box<[u8]>, D>> {
Arc::new(IndexedRepr {
packed: self.packed.as_ref()[range.clone()].to_vec().into_boxed_slice(),
index: Index {
embedded: match self.index.embedded.as_ref() {
Some(e) => {
let lo = e.partition_point(|(r, _)| r.end <= range.start);
let hi = e.partition_point(|(r, _)| r.start < range.end);
if hi > lo {
Some(e[lo..hi].to_vec().into_boxed_slice())
} else {
None
}
}
None => None,
}
},
phantom: PhantomData,
})
}
}
impl<'de, Packed: AsRef<[u8]> + 'de, D: Domain> View<'de, Packed, D> {
pub fn new<Dec: DomainDecode<D>>(packed: Packed, dec: Option<&mut Dec>) -> io::Result<Self> {
Self::new_offset(packed, dec, 0)
}
pub fn new_offset<Dec: DomainDecode<D>>(packed: Packed, dec: Option<&mut Dec>, offset: usize) -> io::Result<Self> {
let mut indexer = Indexer {
packed: packed.as_ref(),
offset,
index: dec.map(|dec| IndexInProgress {
embedded: Vec::new(),
dec
}),
};
indexer.skip_annotations()?;
let value_start = indexer.offset;
indexer.skip_value()?;
let value_end = indexer.offset;
if value_end > packed.as_ref().len() { Err(io_eof())? }
let embedded = indexer.index.and_then(
|i| if i.embedded.is_empty() { None } else { Some(i.embedded.into_boxed_slice()) });
Ok(View {
repr: Arc::new(IndexedRepr {
packed,
index: Index { embedded },
phantom: PhantomData,
}),
annotation_offset: offset,
value_range: (value_start .. value_end),
})
}
fn inner_new(repr: &Arc<IndexedRepr<'de, Packed, D>>, offset: usize) -> io::Result<Self> {
let mut indexer: Indexer<D, NoEmbeddedDomainCodec> = Indexer {
packed: repr.packed.as_ref(),
offset,
index: None,
};
indexer.skip_annotations()?;
let value_start = indexer.offset;
indexer.skip_value()?;
let value_end = indexer.offset;
Ok(View {
repr: Arc::clone(repr),
annotation_offset: offset,
value_range: (value_start .. value_end),
})
}
#[inline(always)]
fn tag(&self) -> Tag {
tag_at(self.repr.packed.as_ref(), self.value_range.start).unwrap()
}
#[inline(always)]
fn varint(&self) -> (u64, usize) {
varint(self.repr.packed.as_ref(), self.value_range.start + 1).unwrap()
}
#[inline(always)]
fn sub(&self, offset: usize, len: usize) -> &[u8] {
let packed = self.repr.packed.as_ref();
&packed[offset .. offset + len]
}
#[inline(always)]
fn atom_chunk(&self) -> &[u8] {
let (n, i) = self.varint();
self.sub(i, n as usize)
}
#[inline(always)]
fn signed_integer(&self, offset: usize, len: usize) -> SignedInteger {
let bs = self.sub(offset, len);
if (bs[0] & 0x80) == 0 {
// Positive or zero.
let mut i = 0;
while i < len && bs[i] == 0 { i += 1; }
if len - i <= 16 {
let mut v: u128 = 0;
for b in &bs[i..] { v = v << 8 | (*b as u128); }
SignedInteger::from(v)
} else {
SignedInteger::from(Cow::Owned(BigInt::from_bytes_be(num_bigint::Sign::Plus, &bs[i..])))
}
} else {
// Negative.
let mut i = 0;
while i < len && bs[i] == 0xff { i += 1; }
if len - i <= 16 {
let mut v: i128 = -1;
for b in &bs[i..] { v = v << 8 | (*b as i128); }
SignedInteger::from(v)
} else {
SignedInteger::from(Cow::Owned(BigInt::from_signed_bytes_be(&bs)))
}
}
}
}
impl<'de, Packed: AsRef<[u8]> + 'de, D: Domain> ValueImpl<D> for View<'de, Packed, D> {
fn write(&self, w: &mut dyn crate::Writer, enc: &mut dyn crate::DomainEncode<D>) -> io::Result<()> {
match w.specialized() {
Some(("packed", mut w)) => {
if let Some(embedded) = &self.repr.index.embedded {
let packed = self.repr.packed.as_ref();
let mut lo = self.annotation_offset;
for (range, d) in embedded.iter() {
w.write_all(&packed[lo .. range.start])?;
lo = range.end;
enc.encode_embedded(&mut PackedWriter::new(&mut w), d)?;
}
w.write_all(&packed[lo .. self.value_range.end])
} else {
w.write_all(&self.repr.packed.as_ref()[self.annotation_offset .. self.value_range.end])
}
}
_ => crate::write_value(w, self, enc)
}
}
fn value_clone(&self) -> Value<D> {
Value::new(View {
repr: self.repr.trim(self.annotation_offset .. self.value_range.end),
annotation_offset: 0,
value_range: (self.value_range.start - self.annotation_offset
.. self.value_range.end - self.annotation_offset),
})
}
fn value_class(&self) -> ValueClass {
match self.tag().into() {
Some(NextToken::Annotation) | None => unreachable!(),
Some(NextToken::Value(v)) => v,
}
}
fn as_boolean(&self) -> Option<bool> {
match self.tag() {
Tag::False => Some(false),
Tag::True => Some(true),
_ => None,
}
}
fn as_float(&self) -> Option<f32> {
match self.tag() {
Tag::Float => Some(f32::from_be_bytes(self.sub(self.value_range.start + 1, 4).try_into().unwrap())),
_ => None,
}
}
fn as_double(&self) -> Option<f64> {
match self.tag() {
Tag::Double => Some(f64::from_be_bytes(self.sub(self.value_range.start + 1, 8).try_into().unwrap())),
_ => None,
}
}
fn as_signed_integer(&self) -> Option<Cow<'_, SignedInteger>> {
match self.tag() {
Tag::SmallInteger(v) => Some(Cow::Owned(v.into())),
Tag::MediumInteger(n) => Some(Cow::Owned(self.signed_integer(self.value_range.start + 1, n as usize))),
Tag::SignedInteger => {
let (n, i) = self.varint();
Some(Cow::Owned(self.signed_integer(i, n as usize)))
}
_ => None,
}
}
fn as_string(&self) -> Option<Cow<'_, str>> {
match self.tag() {
Tag::String => Some(Cow::Borrowed(unsafe {
// SAFETY: we already checked in the View constructor
std::str::from_utf8_unchecked(self.atom_chunk())
})),
_ => None,
}
}
fn as_bytestring(&self) -> Option<Cow<'_, [u8]>> {
match self.tag() {
Tag::ByteString => Some(Cow::Borrowed(self.atom_chunk())),
_ => None,
}
}
fn as_symbol(&self) -> Option<Cow<'_, str>> {
match self.tag() {
Tag::Symbol => Some(Cow::Borrowed(unsafe {
// SAFETY: we already checked in the View constructor
std::str::from_utf8_unchecked(self.atom_chunk())
})),
_ => None,
}
}
fn is_record(&self) -> bool {
self.tag() == Tag::Record
}
fn label(&self) -> Value<D> {
if !self.is_record() { panic!("Not a record") }
Value::new(View::inner_new(&self.repr, self.value_range.start + 1).unwrap())
}
fn is_sequence(&self) -> bool {
self.tag() == Tag::Sequence
}
fn len(&self) -> usize {
match self.tag() {
Tag::Record | Tag::Sequence | Tag::Set => self.iter().count(),
Tag::Dictionary => self.entries().count(),
_ => panic!("Has no length"),
}
}
fn index(&self, i: usize) -> Value<D> {
self.iter().nth(i).unwrap()
}
fn iter(&self) -> Box<dyn Iterator<Item = Value<D>> + '_> {
let mut i = Box::new(ViewIterator::inner_new(&self.repr, self.value_range.start + 1));
match self.tag() {
Tag::Record => { i.next(); () }
Tag::Sequence => (),
Tag::Set => (),
_ => panic!("Not iterable"),
}
i
}
fn is_set(&self) -> bool {
self.tag() == Tag::Set
}
fn has(&self, v: &Value<D>) -> bool {
self.iter().find(|e| v == e).is_some()
}
fn is_dictionary(&self) -> bool {
self.tag() == Tag::Dictionary
}
fn get(&self, k: &Value<D>) -> Option<Value<D>> {
for (kk, vv) in self.entries() {
if k == &kk { return Some(vv); }
}
None
}
fn entries(&self) -> Box<dyn Iterator<Item = (Value<D>, Value<D>)> + '_> {
if !self.is_dictionary() { panic!("Not a dictionary") }
Box::new(DictionaryAdapter(ViewIterator::inner_new(&self.repr, self.value_range.start + 1)))
}
fn as_embedded(&self) -> Option<Cow<'_, D>> {
if self.tag() != Tag::Embedded { return None }
Some(match self.repr.index.embedded.as_ref() {
Some(e) => {
Cow::Borrowed(e.iter().find_map(|(r, d)| {
if r.start == self.value_range.start { Some(d) } else { None }
}).unwrap())
}
None => {
let mut r = BytesBinarySource::new(
&self.repr.packed.as_ref()[self.value_range.start + 1 .. self.value_range.end])
.into_packed();
let d = D::Decode::default().decode_embedded(&mut r, true).unwrap();
Cow::Owned(d)
}
})
}
fn annotations(&self) -> Option<Cow<'_, [IOValue]>> {
if self.value_range.start == self.annotation_offset {
None
} else {
let repr = Arc::new(IndexedRepr {
packed: self.repr.packed.as_ref(),
index: Index { embedded: None },
phantom: PhantomData,
});
let anns: Vec<IOValue> = AnnotationAdapter(ViewIterator::inner_new(&repr, self.annotation_offset))
.map(|ann| ann.into())
.collect();
Some(Cow::Owned(anns))
}
}
}
pub struct ViewStream<'de, 'dec, D: Domain, Dec: DomainDecode<D> = <D as Domain>::Decode> {
buf: &'de [u8],
dec: Option<&'dec mut Dec>,
offset: usize,
phantom: PhantomData<&'de D>,
}
impl<'de, 'dec, D: Domain, Dec: DomainDecode<D>> ViewStream<'de, 'dec, D, Dec> {
pub fn new(buf: &'de [u8], dec: Option<&'dec mut Dec>) -> Self {
ViewStream {
buf,
dec,
offset: 0,
phantom: PhantomData,
}
}
}
impl<'de, 'dec, D: Domain, Dec: DomainDecode<D>> Iterator for ViewStream<'de, 'dec, D, Dec> {
type Item = io::Result<Value<D>>;
fn next(&mut self) -> Option<Self::Item> {
if self.offset >= self.buf.len() {
None
} else {
match View::new_offset(self.buf, self.dec.as_mut().map(|dec| &mut *dec), self.offset) {
Ok(v) => {
self.offset = v.value_range.end;
Some(Ok(Value::new(v)))
}
Err(e) => Some(Err(e)),
}
}
}
}
pub struct ViewIterator<Repr> {
repr: Arc<Repr>,
offset: usize,
}
impl<'de, Packed: AsRef<[u8]> + 'de, D: Domain> ViewIterator<IndexedRepr<'de, Packed, D>> {
pub fn inner_new(repr: &Arc<IndexedRepr<'de, Packed, D>>, offset: usize) -> Self {
ViewIterator { repr: Arc::clone(repr), offset }
}
}
impl<'de, Packed: AsRef<[u8]> + 'de, D: Domain> Iterator for ViewIterator<IndexedRepr<'de, Packed, D>> {
type Item = Value<D>;
fn next(&mut self) -> Option<Self::Item> {
if let Ok(Tag::End) = tag_at(self.repr.packed.as_ref(), self.offset) {
None
} else {
let v = View::inner_new(&self.repr, self.offset).unwrap();
self.offset = v.value_range.end;
Some(Value::new(v))
}
}
}
pub struct DictionaryAdapter<Repr>(pub ViewIterator<Repr>);
impl<'de, Packed: AsRef<[u8]> + 'de, D: Domain> Iterator for DictionaryAdapter<IndexedRepr<'de, Packed, D>> {
type Item = (Value<D>, Value<D>);
fn next(&mut self) -> Option<Self::Item> {
let k = self.0.next()?;
let v = self.0.next()?;
Some((k, v))
}
}
pub struct AnnotationAdapter<Repr>(pub ViewIterator<Repr>);
impl<'de, Packed: AsRef<[u8]> + 'de, D: Domain> Iterator for AnnotationAdapter<IndexedRepr<'de, Packed, D>> {
type Item = Value<D>;
fn next(&mut self) -> Option<Self::Item> {
if let Ok(Tag::Annotation) = tag_at(self.0.repr.packed.as_ref(), self.0.offset) {
self.0.offset += 1;
self.0.next()
} else {
None
}
}
}