use std::borrow::Borrow; use std::borrow::Cow; use std::fmt::Debug; use std::hash::Hash; use std::hash::Hasher; use std::io; use std::marker::PhantomData; use std::ops::Range; use std::sync::Arc; use num_bigint::BigInt; use crate::BinarySource; use crate::BytesBinarySource; use crate::Domain; use crate::DomainDecode; use crate::DomainEncode; use crate::NoEmbeddedDomainCodec; use crate::PackedWriter; use crate::SignedInteger; use crate::ValueClass; use crate::ValueImpl; use crate::Writer; use crate::error; use crate::error::io_eof; use crate::reader::NextToken; use crate::value_eq; use crate::write_value; use super::constants::Tag; #[derive(Debug)] struct Index { embedded: Option, D)]>>, } #[derive(Debug)] struct IndexedRepr<'de, D> { packed: Cow<'de, [u8]>, index: Index, phantom: PhantomData<&'de ()>, } pub struct View<'de, D: Domain<'de>> { repr: Arc>, annotation_offset: usize, value_range: Range, } #[repr(transparent)] pub struct IOView<'de>(IOViewImpl<'de>); pub type IOViewImpl<'de> = View<'de, IOView<'de>>; struct IndexInProgress<'dec, D, Dec> { embedded: Vec<(Range, D)>, dec: &'dec mut Dec, } struct Indexer<'de, 'dec, D: Domain<'de>, Dec: DomainDecode<'de, D>> { packed: &'de [u8], offset: usize, index: Option>, } #[inline(always)] fn at(packed: &[u8], i: usize) -> io::Result { match packed.get(i) { Some(v) => Ok(*v), None => Err(io_eof()), } } #[inline(always)] fn tag_at(packed: &[u8], i: usize) -> io::Result { Ok(Tag::try_from(at(packed, i)?)?) } fn varint(packed: &[u8], mut i: usize) -> io::Result<(u64, usize)> { let mut shift = 0; let mut acc: u64 = 0; loop { let v = at(packed, i)?; i = i + 1; if shift == 63 && v > 1 { Err(error::Error::Message("PackedReader length too long".to_string()))? } acc |= ((v & 0x7f) as u64) << shift; shift += 7; if v & 0x80 == 0 { return Ok((acc, i)) } if shift >= 70 { Err(error::Error::Message("PackedReader length too long".to_string()))? } } } impl<'de, 'dec, D: Domain<'de>, Dec: DomainDecode<'de, D>> Indexer<'de, 'dec, D, Dec> { fn skip_annotations(&mut self) -> io::Result<()> { loop { if tag_at(&self.packed, self.offset)? == Tag::Annotation { self.skip_annotation()?; } else { return Ok(()); } } } fn tag(&mut self) -> io::Result { let tag = tag_at(&self.packed, self.offset)?; Ok(tag) } fn varint(&mut self) -> io::Result { let (n, i) = varint(&self.packed, self.offset + 1)?; self.offset = i; Ok(n) } fn skip_annotation(&mut self) -> io::Result<()> { self.offset += 1; let saved = self.index.take(); self.skip_value()?; self.index = saved; Ok(()) } fn skip_value(&mut self) -> io::Result<()> { loop { match self.tag()? { Tag::False => self.offset += 1, Tag::True => self.offset += 1, Tag::Float => self.offset += 5, Tag::Double => self.offset += 9, Tag::End => Err(io::Error::new(io::ErrorKind::InvalidData, format!("Unexpected end tag at offset {}", self.offset)))?, Tag::Annotation => { self.skip_annotation()?; continue; } Tag::Embedded => { let pos = self.offset; self.offset += 1; if let Some(p) = &mut self.index { let mut r = BytesBinarySource::new(&self.packed[self.offset ..]).into_packed(); let d = p.dec.decode_embedded(&mut r, true)?; let endpos = self.offset + r.source.index as usize; p.embedded.push((pos .. endpos, d)); self.offset = endpos; } else { continue; } } Tag::SmallInteger(_) => self.offset += 1, Tag::MediumInteger(n) => self.offset += 1 + n as usize, Tag::SignedInteger | Tag::ByteString => { let n = self.varint()?; self.offset += n as usize; } Tag::String | Tag::Symbol => { let n = self.varint()?; std::str::from_utf8(&self.packed[self.offset .. self.offset + (n as usize)]) .map_err(|_| io::Error::new(io::ErrorKind::InvalidData, "Invalid UTF-8"))?; self.offset += n as usize; } Tag::Record | Tag::Sequence | Tag::Set | Tag::Dictionary => { self.offset += 1; while tag_at(&self.packed, self.offset)? != Tag::End { self.skip_value()?; } self.offset += 1; } } return Ok(()); } } } impl<'de, D: Domain<'de>> IndexedRepr<'de, D> { fn trim(&self, range: Range) -> Arc> { Arc::new(IndexedRepr { packed: self.packed.as_ref()[range.clone()].to_vec().into(), index: Index { embedded: match self.index.embedded.as_ref() { Some(e) => { let lo = e.partition_point(|(r, _)| r.end <= range.start); let hi = e.partition_point(|(r, _)| r.start < range.end); if hi > lo { Some(e[lo..hi].to_vec().into_boxed_slice()) } else { None } } None => None, } }, phantom: PhantomData, }) } } impl<'de, D: Domain<'de>> View<'de, D> { pub fn new>(packed: Cow<'de, [u8]>, dec: Option<&mut Dec>) -> io::Result { Self::new_offset(packed, dec, 0) } // pub fn trim(&self) -> View<'static, D> { // View { // repr: self.repr.trim(self.annotation_offset .. self.value_range.end), // annotation_offset: 0, // value_range: (self.value_range.start - self.annotation_offset // .. self.value_range.end - self.annotation_offset), // } // } pub fn new_offset>(packed: Cow<'de, [u8]>, dec: Option<&mut Dec>, offset: usize) -> io::Result { let mut indexer = Indexer { packed: packed.as_ref(), offset, index: dec.map(|dec| IndexInProgress { embedded: Vec::new(), dec }), }; indexer.skip_annotations()?; let value_start = indexer.offset; indexer.skip_value()?; let value_end = indexer.offset; if value_end > packed.as_ref().len() { Err(io_eof())? } let embedded = indexer.index.and_then( |i| if i.embedded.is_empty() { None } else { Some(i.embedded.into_boxed_slice()) }); Ok(View { repr: Arc::new(IndexedRepr { packed, index: Index { embedded }, phantom: PhantomData, }), annotation_offset: offset, value_range: (value_start .. value_end), }) } fn inner_new(repr: &Arc>, offset: usize) -> io::Result { let mut indexer: Indexer = Indexer { packed: repr.packed.as_ref(), offset, index: None, }; indexer.skip_annotations()?; let value_start = indexer.offset; indexer.skip_value()?; let value_end = indexer.offset; Ok(View { repr: Arc::clone(repr), annotation_offset: offset, value_range: (value_start .. value_end), }) } #[inline(always)] fn tag(&self) -> Tag { tag_at(self.repr.packed.as_ref(), self.value_range.start).unwrap() } #[inline(always)] fn varint(&self) -> (u64, usize) { varint(self.repr.packed.as_ref(), self.value_range.start + 1).unwrap() } #[inline(always)] fn sub(&self, offset: usize, len: usize) -> &[u8] { let packed = self.repr.packed.as_ref(); &packed[offset .. offset + len] } #[inline(always)] fn atom_chunk(&self) -> &[u8] { let (n, i) = self.varint(); self.sub(i, n as usize) } #[inline(always)] fn signed_integer(&self, offset: usize, len: usize) -> SignedInteger { let bs = self.sub(offset, len); if (bs[0] & 0x80) == 0 { // Positive or zero. let mut i = 0; while i < len && bs[i] == 0 { i += 1; } if len - i <= 16 { let mut v: u128 = 0; for b in &bs[i..] { v = v << 8 | (*b as u128); } SignedInteger::from(v) } else { SignedInteger::from(Cow::Owned(BigInt::from_bytes_be(num_bigint::Sign::Plus, &bs[i..]))) } } else { // Negative. let mut i = 0; while i < len && bs[i] == 0xff { i += 1; } if len - i <= 16 { let mut v: i128 = -1; for b in &bs[i..] { v = v << 8 | (*b as i128); } SignedInteger::from(v) } else { SignedInteger::from(Cow::Owned(BigInt::from_signed_bytes_be(&bs))) } } } } impl<'de, D: Domain<'de>> Domain<'de> for View<'de, D> { type Decode = NoEmbeddedDomainCodec; type Encode = NoEmbeddedDomainCodec; } impl<'de, D: Domain<'de>> Clone for View<'de, D> { fn clone(&self) -> Self { View { repr: self.repr.clone(), annotation_offset: self.annotation_offset, value_range: self.value_range.clone() } } } impl<'de, D: Domain<'de>> ValueImpl<'de> for View<'de, D> { type Handle = Self; type Embedded = D; type Mapped> = View<'de, E>; type Items<'a> = ViewIterator> where Self: 'a; type Entries<'a> = DictionaryAdapter> where Self: 'a; type IOEmbedded = IOView<'de>; fn wrap(self) -> Self::Handle { self } fn write(&self, w: &mut dyn Writer, enc: &mut dyn DomainEncode) -> io::Result<()> { match w.specialized() { Some(("packed", mut w)) => { if let Some(embedded) = &self.repr.index.embedded { let packed = self.repr.packed.as_ref(); let mut lo = self.annotation_offset; for (range, d) in embedded.iter() { w.write_all(&packed[lo .. range.start])?; lo = range.end; enc.encode_embedded(&mut PackedWriter::new(&mut w), d)?; } w.write_all(&packed[lo .. self.value_range.end]) } else { w.write_all(&self.repr.packed.as_ref()[self.annotation_offset .. self.value_range.end]) } } _ => write_value(w, self, enc) } } fn value_class(&self) -> ValueClass { match self.tag().into() { Some(NextToken::Annotation) | None => unreachable!(), Some(NextToken::Value(v)) => v, } } fn as_boolean(&self) -> Option { match self.tag() { Tag::False => Some(false), Tag::True => Some(true), _ => None, } } fn as_float(&self) -> Option { match self.tag() { Tag::Float => Some(f32::from_be_bytes(self.sub(self.value_range.start + 1, 4).try_into().unwrap())), _ => None, } } fn as_double(&self) -> Option { match self.tag() { Tag::Double => Some(f64::from_be_bytes(self.sub(self.value_range.start + 1, 8).try_into().unwrap())), _ => None, } } fn as_signed_integer(&self) -> Option> { match self.tag() { Tag::SmallInteger(v) => Some(Cow::Owned(v.into())), Tag::MediumInteger(n) => Some(Cow::Owned(self.signed_integer(self.value_range.start + 1, n as usize))), Tag::SignedInteger => { let (n, i) = self.varint(); Some(Cow::Owned(self.signed_integer(i, n as usize))) } _ => None, } } fn as_string(&self) -> Option> { match self.tag() { Tag::String => Some(Cow::Borrowed(unsafe { // SAFETY: we already checked in the View constructor std::str::from_utf8_unchecked(self.atom_chunk()) })), _ => None, } } fn as_bytestring(&self) -> Option> { match self.tag() { Tag::ByteString => Some(Cow::Borrowed(self.atom_chunk())), _ => None, } } fn as_symbol(&self) -> Option> { match self.tag() { Tag::Symbol => Some(Cow::Borrowed(unsafe { // SAFETY: we already checked in the View constructor std::str::from_utf8_unchecked(self.atom_chunk()) })), _ => None, } } fn is_record(&self) -> bool { self.tag() == Tag::Record } fn label(&self) -> Self::Handle { if !self.is_record() { panic!("Not a record") } View::inner_new(&self.repr, self.value_range.start + 1).unwrap() } fn is_sequence(&self) -> bool { self.tag() == Tag::Sequence } fn len(&self) -> usize { match self.tag() { Tag::Record | Tag::Sequence | Tag::Set => self.iter().count(), Tag::Dictionary => self.entries().count(), _ => panic!("Has no length"), } } fn index(&self, i: usize) -> Self::Handle { self.iter().nth(i).unwrap() } fn iter(&self) -> Self::Items<'_> { let mut i = ViewIterator::inner_new(&self.repr, self.value_range.start + 1); match self.tag() { Tag::Record => { i.next(); () } Tag::Sequence => (), Tag::Set => (), _ => panic!("Not iterable"), } i } fn is_set(&self) -> bool { self.tag() == Tag::Set } fn has>(&self, v: &E::Handle) -> bool { self.iter().find(|e| value_eq(v.borrow(), e)).is_some() } fn is_dictionary(&self) -> bool { self.tag() == Tag::Dictionary } fn get>(&self, k: &K::Handle) -> Option { for (kk, vv) in self.entries() { if value_eq(k.borrow(), &kk) { return Some(vv); } } None } fn entries(&self) -> Self::Entries<'_> { if !self.is_dictionary() { panic!("Not a dictionary") } DictionaryAdapter(ViewIterator::inner_new(&self.repr, self.value_range.start + 1)) } fn as_embedded(&self) -> Option> { if self.tag() != Tag::Embedded { return None } match self.repr.index.embedded.as_ref() { Some(e) => { Some(Cow::Borrowed(e.iter().find_map(|(r, d)| { if r.start == self.value_range.start { Some(d) } else { None } }).unwrap())) } None => { let mut r = BytesBinarySource::new( &self.repr.packed.as_ref()[self.value_range.start + 1 .. self.value_range.end]) .into_packed(); let d = D::Decode::default().decode_embedded(&mut r, true).unwrap(); Some(Cow::Owned(d)) } } } fn annotations(&self) -> Option as ValueImpl<'de>>::Handle]>> { if self.value_range.start == self.annotation_offset { None } else { let repr = Arc::new(IndexedRepr { packed: self.repr.packed.as_ref().into(), index: Index:: { embedded: None }, phantom: PhantomData, }); let anns: Vec = AnnotationAdapter(ViewIterator::inner_new(&repr, self.annotation_offset)).collect(); Some(Cow::Owned(anns)) } } fn peeled(v: &Self::Handle) -> Self::Handle { View { repr: v.repr.clone(), annotation_offset: v.value_range.start, value_range: v.value_range, } } fn copy, F, Err>(w: &E::Handle, f: &mut F) -> Result where F: FnMut(&E::Embedded) -> Result { todo!() } fn map_embedded, F, Err>(v: &Self::Handle, f: &mut F) -> Result< as ValueImpl<'de>>::Handle, Err> where F: FnMut(&Self::Embedded) -> Result { todo!() } } crate::impl_value_methods!({'de, D: Domain<'de>}, View<'de, D>); impl<'de> Clone for IOView<'de> { fn clone(&self) -> Self { Self(self.0.clone()) } } impl<'de> Debug for IOView<'de> { fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result { self.0.fmt(f) } } impl<'de> Ord for IOView<'de> { fn cmp(&self, other: &Self) -> std::cmp::Ordering { self.0.cmp(&other.0) } } impl<'de> Hash for IOView<'de> { fn hash(&self, state: &mut H) { self.0.hash(state); } } impl<'de> PartialOrd for IOView<'de> { fn partial_cmp(&self, other: &Self) -> Option { self.0.partial_cmp(&other.0) } } impl<'de> PartialEq for IOView<'de> { fn eq(&self, other: &Self) -> bool { self.0 == other.0 } } impl<'de> Eq for IOView<'de> {} impl<'de> Domain<'de> for IOView<'de> { type Decode = NoEmbeddedDomainCodec; type Encode = NoEmbeddedDomainCodec; } impl<'de> From> for IOView<'de> { fn from(v: IOViewImpl<'de>) -> Self { IOView(v) } } impl<'de> Into> for IOView<'de> { fn into(self) -> IOViewImpl<'de> { self.0 } } impl<'de> AsRef> for IOView<'de> { fn as_ref(&self) -> &IOViewImpl<'de> { &self.0 } } pub struct ViewStream<'de, 'dec, D: Domain<'de>, Dec: DomainDecode<'de, D> = >::Decode> { buf: Cow<'de, [u8]>, dec: Option<&'dec mut Dec>, offset: usize, phantom: PhantomData<&'de D>, } impl<'de, 'dec, D: Domain<'de>, Dec: DomainDecode<'de, D>> ViewStream<'de, 'dec, D, Dec> { pub fn new(buf: Cow<'de, [u8]>, dec: Option<&'dec mut Dec>) -> Self { ViewStream { buf, dec, offset: 0, phantom: PhantomData, } } } impl<'de, 'dec, D: Domain<'de>, Dec: DomainDecode<'de, D>> Iterator for ViewStream<'de, 'dec, D, Dec> { type Item = io::Result>; fn next(&mut self) -> Option { if self.offset >= self.buf.len() { None } else { match View::new_offset(self.buf, self.dec.as_mut().map(|dec| &mut *dec), self.offset) { Ok(v) => { self.offset = v.value_range.end; Some(Ok(v)) } Err(e) => Some(Err(e)), } } } } pub struct ViewIterator { repr: Arc, offset: usize, } impl<'de, D: Domain<'de>> ViewIterator> { pub fn inner_new(repr: &Arc>, offset: usize) -> Self { ViewIterator { repr: Arc::clone(repr), offset } } } impl<'de, D: Domain<'de>> Iterator for ViewIterator> { type Item = View<'de, D>; fn next(&mut self) -> Option { if let Ok(Tag::End) = tag_at(self.repr.packed.as_ref(), self.offset) { None } else { let v = View::inner_new(&self.repr, self.offset).unwrap(); self.offset = v.value_range.end; Some(v) } } } pub struct DictionaryAdapter(pub ViewIterator); impl<'de, D: Domain<'de>> Iterator for DictionaryAdapter> { type Item = (View<'de, D>, View<'de, D>); fn next(&mut self) -> Option { let k = self.0.next()?; let v = self.0.next()?; Some((k, v)) } } pub struct AnnotationAdapter(pub ViewIterator); impl<'de> Iterator for AnnotationAdapter>> { type Item = IOViewImpl<'de>; fn next(&mut self) -> Option { if let Ok(Tag::Annotation) = tag_at(self.0.repr.packed.as_ref(), self.0.offset) { self.0.offset += 1; self.0.next() } else { None } } }