From 6033968a1399c16b042d475dbbb330712f76f627 Mon Sep 17 00:00:00 2001 From: Tony Garnock-Jones Date: Thu, 10 Nov 2022 15:02:08 +0100 Subject: [PATCH] Packed-binary view-backed ValueImpl and little demo --- implementations/rust/oo/src/lib.rs | 1 + implementations/rust/oo/src/packed/mod.rs | 2 +- implementations/rust/oo/src/packed/view.rs | 370 ++++++++++++++++++ implementations/rust/oo/src/repr.rs | 70 +--- implementations/rust/oo/src/text/writer.rs | 3 + implementations/rust/oo/src/writer.rs | 25 +- implementations/rust/oo/tests/read_samples.rs | 10 + 7 files changed, 420 insertions(+), 61 deletions(-) create mode 100644 implementations/rust/oo/src/packed/view.rs diff --git a/implementations/rust/oo/src/lib.rs b/implementations/rust/oo/src/lib.rs index cac3ba2..4534777 100644 --- a/implementations/rust/oo/src/lib.rs +++ b/implementations/rust/oo/src/lib.rs @@ -38,6 +38,7 @@ pub use repr::ValueImpl; pub use repr::copy_via; pub use repr::iovalue; pub use repr::owned; +pub use repr::shell; pub use repr::value; pub use signed_integer::SignedInteger; pub use source::BinarySource; diff --git a/implementations/rust/oo/src/packed/mod.rs b/implementations/rust/oo/src/packed/mod.rs index 566519a..55f1648 100644 --- a/implementations/rust/oo/src/packed/mod.rs +++ b/implementations/rust/oo/src/packed/mod.rs @@ -1,6 +1,6 @@ pub mod constants; pub mod reader; -// pub mod view; +pub mod view; pub mod writer; pub use reader::PackedReader; diff --git a/implementations/rust/oo/src/packed/view.rs b/implementations/rust/oo/src/packed/view.rs new file mode 100644 index 0000000..2f4e236 --- /dev/null +++ b/implementations/rust/oo/src/packed/view.rs @@ -0,0 +1,370 @@ +use std::borrow::Cow; +use std::io; +use std::marker::PhantomData; + +use num_bigint::BigInt; + +use crate::IOValue; +use crate::PlainValue; +use crate::SignedInteger; +use crate::Value; +use crate::ValueClass; +use crate::ValueImpl; +use crate::error; +use crate::error::io_eof; +use crate::iovalue; +use crate::owned; +use crate::shell; +use crate::reader::NextToken; + +use super::constants::Tag; + +#[derive(Debug, Clone)] +pub struct View<'de, Packed: AsRef<[u8]> + 'de> { + packed: Packed, + value_offset: usize, // annotation_offset is implicitly 0 + value_end: usize, + phantom: PhantomData<&'de ()>, +} + +#[inline(always)] +fn at(packed: &[u8], i: usize) -> io::Result { + match packed.get(i) { + Some(v) => Ok(*v), + None => Err(io_eof()), + } +} + +#[inline(always)] +fn tag_at(packed: &[u8], i: usize) -> io::Result { + Ok(Tag::try_from(at(packed, i)?)?) +} + +fn skip_annotations(packed: &[u8], mut i: usize) -> io::Result { + loop { + if tag_at(packed, i)? == Tag::Annotation { + i = skip_value(packed, i + 1)?; + } else { + return Ok(i); + } + } +} + +fn varint(packed: &[u8], mut i: usize) -> io::Result<(u64, usize)> { + let mut shift = 0; + let mut acc: u64 = 0; + loop { + let v = at(packed, i)?; + i = i + 1; + if shift == 63 && v > 1 { Err(error::Error::Message("PackedReader length too long".to_string()))? } + acc |= ((v & 0x7f) as u64) << shift; + shift += 7; + if v & 0x80 == 0 { return Ok((acc, i)) } + if shift >= 70 { Err(error::Error::Message("PackedReader length too long".to_string()))? } + } +} + +fn skip_value(packed: &[u8], mut i: usize) -> io::Result { + loop { + let next_i = match tag_at(packed, i)? { + Tag::False => i + 1, + Tag::True => i + 1, + Tag::Float => i + 5, + Tag::Double => i + 9, + Tag::End => Err(io::Error::new(io::ErrorKind::InvalidData, "Unexpected end tag"))?, + Tag::Annotation => { + i = skip_value(packed, i + 1)?; + continue; + } + Tag::Embedded => { + i = i + 1; + continue; + } + Tag::SmallInteger(_) => i + 1, + Tag::MediumInteger(n) => i + 1 + (n as usize), + Tag::SignedInteger | Tag::String | Tag:: ByteString | Tag:: Symbol => { + let (n, i) = varint(packed, i + 1)?; + i + (n as usize) + } + Tag::Record | Tag::Sequence | Tag::Set | Tag::Dictionary => { + i = i + 1; + while tag_at(packed, i)? != Tag::End { + i = skip_value(packed, i)?; + } + i + 1 + } + }; + return Ok(next_i); + } +} + +impl<'de, Packed: AsRef<[u8]> + 'de> View<'de, Packed> { + pub fn new(packed: Packed) -> io::Result { + // println!("packed {:?}", &packed.as_ref()); + let value_offset = skip_annotations(packed.as_ref(), 0)?; + let value_end = skip_value(packed.as_ref(), value_offset)?; + if value_end > packed.as_ref().len() { Err(io_eof())? } + Ok(View { packed, value_offset, value_end, phantom: PhantomData }) + } + + #[inline(always)] + fn tag(&self) -> Tag { + tag_at(self.packed.as_ref(), self.value_offset).unwrap() + } + + #[inline(always)] + fn varint(&self) -> (u64, usize) { + varint(self.packed.as_ref(), self.value_offset + 1).unwrap() + } + + #[inline(always)] + fn sub(&self, offset: usize, len: usize) -> &[u8] { + let packed = self.packed.as_ref(); + &packed[offset .. offset + len] + } + + #[inline(always)] + fn atom_chunk(&self) -> &[u8] { + let (n, i) = self.varint(); + self.sub(i, n as usize) + } + + #[inline(always)] + fn signed_integer(&self, offset: usize, len: usize) -> SignedInteger { + let bs = self.sub(offset, len); + if (bs[0] & 0x80) == 0 { + // Positive or zero. + let mut i = 0; + while i < len && bs[i] == 0 { i += 1; } + if len - i <= 16 { + let mut v: u128 = 0; + for b in &bs[i..] { v = v << 8 | (*b as u128); } + SignedInteger::from(v) + } else { + SignedInteger::from(Cow::Owned(BigInt::from_bytes_be(num_bigint::Sign::Plus, &bs[i..]))) + } + } else { + // Negative. + let mut i = 0; + while i < len && bs[i] == 0xff { i += 1; } + if len - i <= 16 { + let mut v: i128 = -1; + for b in &bs[i..] { v = v << 8 | (*b as i128); } + SignedInteger::from(v) + } else { + SignedInteger::from(Cow::Owned(BigInt::from_signed_bytes_be(&bs))) + } + } + } +} + +impl<'de, Packed: AsRef<[u8]> + 'de> ValueImpl for View<'de, Packed> { + fn write(&self, w: &mut dyn crate::Writer, enc: &mut dyn crate::DomainEncode) -> io::Result<()> { + crate::write_value(w, self, enc) + } + + fn value_clone(&self) -> PlainValue<'static, IOValue> where IOValue: 'static { + owned(View { + packed: self.packed.as_ref()[..self.value_end].to_owned(), + value_offset: self.value_offset, + value_end: self.value_end, + phantom: PhantomData, + }) + } + + fn value_class(&self) -> ValueClass { + match self.tag().into() { + Some(NextToken::Annotation) | None => unreachable!(), + Some(NextToken::Value(v)) => v, + } + } + + fn as_boolean(&self) -> Option { + match self.tag() { + Tag::False => Some(false), + Tag::True => Some(true), + _ => None, + } + } + + fn as_float(&self) -> Option { + match self.tag() { + Tag::Float => Some(f32::from_be_bytes(self.sub(self.value_offset + 1, 4).try_into().unwrap())), + _ => None, + } + } + + fn as_double(&self) -> Option { + match self.tag() { + Tag::Double => Some(f64::from_be_bytes(self.sub(self.value_offset + 1, 8).try_into().unwrap())), + _ => None, + } + } + + fn is_signed_integer(&self) -> bool { + match self.tag() { + Tag::SmallInteger(_) => true, + Tag::MediumInteger(_) => true, + Tag::SignedInteger => true, + _ => false, + } + } + + fn as_signed_integer(&self) -> Option { + match self.tag() { + Tag::SmallInteger(v) => Some(v.into()), + Tag::MediumInteger(n) => Some(self.signed_integer(self.value_offset + 1, n as usize)), + Tag::SignedInteger => { + let (n, i) = self.varint(); + Some(self.signed_integer(i, n as usize)) + } + _ => None, + } + } + + fn as_string(&self) -> Option> { + match self.tag() { + Tag::String => Some(Cow::Borrowed(std::str::from_utf8(self.atom_chunk()).unwrap())), + _ => None, + } + } + + fn as_bytestring(&self) -> Option> { + match self.tag() { + Tag::ByteString => Some(Cow::Borrowed(self.atom_chunk())), + _ => None, + } + } + + fn as_symbol(&self) -> Option> { + match self.tag() { + Tag::Symbol => Some(Cow::Borrowed(std::str::from_utf8(self.atom_chunk()).unwrap())), + _ => None, + } + } + + fn is_record(&self) -> bool { + self.tag() == Tag::Record + } + + fn label(&self) -> Value<'_, IOValue> { + if !self.is_record() { panic!("Not a record") } + shell(View::new(&self.packed.as_ref()[self.value_offset + 1 .. self.value_end]).unwrap()) + } + + fn is_sequence(&self) -> bool { + self.tag() == Tag::Sequence + } + + fn len(&self) -> usize { + self.iter().count() + } + + fn index(&self, i: usize) -> Value<'_, IOValue> { + self.iter().nth(i).unwrap() + } + + fn iter(&self) -> Box> + '_> { + let mut i = Box::new(ViewIterator::new(&self.packed.as_ref()[self.value_offset + 1 ..])); + match self.tag() { + Tag::Record => { i.next(); () } + Tag::Sequence => (), + Tag::Set => (), + _ => panic!("Not iterable"), + } + i + } + + fn is_set(&self) -> bool { + self.tag() == Tag::Set + } + + fn has(&self, v: &dyn ValueImpl) -> bool { + self.iter().find(|e| v == &**e).is_some() + } + + fn is_dictionary(&self) -> bool { + self.tag() == Tag::Dictionary + } + + fn get(&self, k: &dyn ValueImpl) -> Option> { + for (kk, vv) in self.entries() { + if &*kk == k { return Some(vv); } + } + None + } + + fn entries(&self) -> Box, Value<'_, IOValue>)> + '_> { + if !self.is_dictionary() { panic!("Not a dictionary") } + Box::new(DictionaryAdapter(ViewIterator::new(&self.packed.as_ref()[self.value_offset + 1 ..]))) + } + + fn is_embedded(&self) -> bool { + self.tag() == Tag::Embedded + } + + fn embedded(&self) -> Cow<'_, IOValue> { + let bs = self.packed.as_ref()[self.value_offset + 1 .. self.value_end].to_vec(); + Cow::Owned(iovalue(View::new(bs).unwrap())) + } + + fn annotations(&self) -> Option> { + if self.value_offset == 0 { + None + } else { + let anns: Vec = AnnotationAdapter(ViewIterator::new(&self.packed.as_ref()[.. self.value_offset])) + .map(|ann| iovalue(ann.into_owned())) + .collect(); + Some(Cow::Owned(anns)) + } + } +} + +pub struct ViewIterator<'de> { + packed: &'de [u8], + offset: usize, +} + +impl<'de> ViewIterator<'de> { + pub fn new(packed: &'de [u8]) -> Self { + ViewIterator { packed, offset: 0 } + } +} + +impl<'de> Iterator for ViewIterator<'de> { + type Item = Value<'de, IOValue>; + + fn next(&mut self) -> Option { + let v = View::new(&self.packed[self.offset..]).ok()?; + if v.tag() == Tag::End { return None; } + self.offset += v.value_end; + Some(shell(v)) + } +} + +pub struct DictionaryAdapter<'de>(pub ViewIterator<'de>); + +impl<'de> Iterator for DictionaryAdapter<'de> { + type Item = (Value<'de, IOValue>, Value<'de, IOValue>); + + fn next(&mut self) -> Option { + let k = self.0.next()?; + let v = self.0.next()?; + Some((k, v)) + } +} + +pub struct AnnotationAdapter<'de>(pub ViewIterator<'de>); + +impl<'de> Iterator for AnnotationAdapter<'de> { + type Item = Value<'de, IOValue>; + + fn next(&mut self) -> Option { + if let Ok(Tag::Annotation) = tag_at(self.0.packed.as_ref(), self.0.offset) { + self.0.offset += 1; + self.0.next() + } else { + None + } + } +} diff --git a/implementations/rust/oo/src/repr.rs b/implementations/rust/oo/src/repr.rs index 79f8a36..8055d7e 100644 --- a/implementations/rust/oo/src/repr.rs +++ b/implementations/rust/oo/src/repr.rs @@ -1,6 +1,5 @@ use bytemuck::TransparentWrapper; -use std::any::Any; use std::borrow::Borrow; use std::borrow::Cow; use std::cmp::Ordering; @@ -31,14 +30,16 @@ pub type PlainValue<'va, D = IOValue> = Box + 'va>; pub enum Value<'r, D: Domain = IOValue> { Borrowed(&'r (dyn ValueImpl + 'r)), + Shell(PlainValue<'r, D>), Owned(PlainValue<'static, D>), } impl<'r, D: Domain> Value<'r, D> { pub fn into_owned(self: Value<'r, D>) -> PlainValue<'static, D> { match self { - Value::Borrowed(r) => return r.value_clone(), - Value::Owned(v) => return v, + Value::Borrowed(r) => r.value_clone(), + Value::Shell(v) => v.value_clone(), + Value::Owned(v) => v, } } } @@ -61,6 +62,7 @@ impl<'r, D: Domain> Deref for Value<'r, D> { fn deref(&self) -> &Self::Target { match self { Value::Borrowed(r) => r, + Value::Shell(v) => v, Value::Owned(v) => v, } } @@ -148,15 +150,17 @@ pub trait ValueImpl { fn is_embedded(&self) -> bool { false } fn embedded(&self) -> Cow<'_, D> { panic!("Not an embedded value") } - fn annotations(&self) -> Option<&[IOValue]> { None } - - fn specialized(&self) -> Option<&dyn Any> { None } + fn annotations(&self) -> Option> { None } } pub fn value>(v: &V) -> Value<'_, D> { Value::Borrowed(v) } +pub fn shell<'r, D: Domain, V: ValueImpl + 'r>(v: V) -> Value<'r, D> { + Value::Shell(Box::new(v)) +} + pub fn owned<'va, D: Domain, V: ValueImpl + 'static>(v: V) -> PlainValue<'va, D> { Box::new(v) } @@ -234,7 +238,7 @@ impl<'a, D: Domain, V: ValueImpl + ?Sized> ValueImpl for &'a V { fn entries(&self) -> Box, Value<'_, D>)> + '_> { (*self).entries() } fn is_embedded(&self) -> bool { (*self).is_embedded() } fn embedded(&self) -> Cow<'_, D> { (*self).embedded() } - fn annotations(&self) -> Option<&[IOValue]> { (*self).annotations() } + fn annotations(&self) -> Option> { (*self).annotations() } } impl<'va, D: Domain> ValueImpl for PlainValue<'va, D> { @@ -262,7 +266,7 @@ impl<'va, D: Domain> ValueImpl for PlainValue<'va, D> { fn entries(&self) -> Box, Value<'_, D>)> + '_> { self.as_ref().entries() } fn is_embedded(&self) -> bool { self.as_ref().is_embedded() } fn embedded(&self) -> Cow<'_, D> { self.as_ref().embedded() } - fn annotations(&self) -> Option<&[IOValue]> { self.as_ref().annotations() } + fn annotations(&self) -> Option> { self.as_ref().annotations() } } impl<'a, D: Domain> Debug for dyn ValueImpl + 'a { @@ -315,21 +319,6 @@ impl<'a, D: Domain> Hash for dyn ValueImpl + 'a { } } -fn iters_eq<'a, D: Domain>( - mut i1: Box> + 'a>, - mut i2: Box> + 'a>, -) -> bool { - loop { - match i1.next() { - None => return i2.next().is_none(), - Some(v1) => match i2.next() { - None => return false, - Some(v2) => if v1 != v2 { return false; }, - } - } - } -} - impl<'a, D: Domain> PartialEq for dyn ValueImpl + 'a { fn eq(&self, other: &Self) -> bool { let cls = self.value_class(); @@ -354,10 +343,10 @@ impl<'a, D: Domain> PartialEq for dyn ValueImpl + 'a { ValueClass::Compound(c) => match c { CompoundClass::Record => { if self.label() != other.label() { return false; } - iters_eq(self.iter(), other.iter()) + self.iter().eq(other.iter()) } CompoundClass::Sequence => { - iters_eq(self.iter(), other.iter()) + self.iter().eq(other.iter()) } CompoundClass::Set => { let s1 = self.iter().collect::>(); @@ -375,27 +364,6 @@ impl<'a, D: Domain> PartialEq for dyn ValueImpl + 'a { } } -fn iters_cmp<'a, D: Domain>( - mut i1: Box> + 'a>, - mut i2: Box> + 'a>, -) -> Ordering { - loop { - match i1.next() { - None => match i2.next() { - None => return Ordering::Equal, - Some(_) => return Ordering::Less, - } - Some(v1) => match i2.next() { - None => return Ordering::Greater, - Some(v2) => match v1.cmp(&v2) { - Ordering::Equal => (), - other => return other, - } - } - } - } -} - impl<'a, D: Domain> Ord for dyn ValueImpl + 'a { fn cmp(&self, other: &Self) -> Ordering { let cls = self.value_class(); @@ -419,8 +387,8 @@ impl<'a, D: Domain> Ord for dyn ValueImpl + 'a { ValueClass::Compound(c) => match c { CompoundClass::Record => self.label().cmp(&other.label()).then_with( - || iters_cmp(self.iter(), other.iter())), - CompoundClass::Sequence => iters_cmp(self.iter(), other.iter()), + || self.iter().cmp(other.iter())), + CompoundClass::Sequence => self.iter().cmp(other.iter()), CompoundClass::Set => { let s1 = self.iter().collect::>(); let s2 = other.iter().collect::>(); @@ -921,7 +889,7 @@ impl> ValueImpl for Annotations { fn entries(&self) -> Box, Value<'_, D>)> + '_> { self.value().entries() } fn is_embedded(&self) -> bool { self.value().is_embedded() } fn embedded(&self) -> Cow<'_, D> { self.value().embedded() } - fn annotations(&self) -> Option<&[IOValue]> { Some(&self.1) } + fn annotations(&self) -> Option> { Some(Cow::Borrowed(&self.1)) } } impl> PartialEq for Annotations { @@ -1047,7 +1015,7 @@ impl ValueImpl for ArcValue { fn entries(&self) -> Box, Value<'_, D>)> + '_> { self.0.entries() } fn is_embedded(&self) -> bool { self.0.is_embedded() } fn embedded(&self) -> Cow<'_, D> { self.0.embedded() } - fn annotations(&self) -> Option<&[IOValue]> { self.0.annotations() } + fn annotations(&self) -> Option> { self.0.annotations() } } impl ValueImpl for IOValue { @@ -1075,5 +1043,5 @@ impl ValueImpl for IOValue { fn entries(&self) -> Box, Value<'_, IOValue>)> + '_> { self.0.entries() } fn is_embedded(&self) -> bool { self.0.is_embedded() } fn embedded(&self) -> Cow<'_, IOValue> { self.0.embedded() } - fn annotations(&self) -> Option<&[IOValue]> { self.0.annotations() } + fn annotations(&self) -> Option> { self.0.annotations() } } diff --git a/implementations/rust/oo/src/text/writer.rs b/implementations/rust/oo/src/text/writer.rs index 1e8afbb..fb559ab 100644 --- a/implementations/rust/oo/src/text/writer.rs +++ b/implementations/rust/oo/src/text/writer.rs @@ -163,6 +163,9 @@ impl Writer for TextWriter { (Some(B::Item::Annotation), Some(B::Item::AnnotatedValue)) => { return write!(self.w, " ") } + (None, Some(B::Item::AnnotatedValue)) | + // ^ strictly speaking, this combination is not permitted; a ValueImpl that yields + // a zero-length vector of annotations instead of `None` is in error. (Some(B::Item::AnnotatedValue), None) => return Ok(()), diff --git a/implementations/rust/oo/src/writer.rs b/implementations/rust/oo/src/writer.rs index 40b0c51..c477bc5 100644 --- a/implementations/rust/oo/src/writer.rs +++ b/implementations/rust/oo/src/writer.rs @@ -76,16 +76,23 @@ pub fn write_value>( let annotations = v.annotations(); let mut annotation_b = B::Type::default(); - if let Some(anns) = annotations { - w.start_annotations()?; - for ann in anns { - annotation_b.shift(Some(B::Item::Annotation)); + let has_annotations = if let Some(anns) = annotations { + if anns.is_empty() { + false + } else { + w.start_annotations()?; + for ann in &anns[..] { + annotation_b.shift(Some(B::Item::Annotation)); + w.boundary(&annotation_b)?; + ann.write(w, &mut IOValueDomainCodec)?; + } + annotation_b.shift(Some(B::Item::AnnotatedValue)); w.boundary(&annotation_b)?; - ann.write(w, &mut IOValueDomainCodec)?; + true } - annotation_b.shift(Some(B::Item::AnnotatedValue)); - w.boundary(&annotation_b)?; - } + } else { + false + }; match v.value_class() { ValueClass::Atomic(a) => match a { @@ -159,7 +166,7 @@ pub fn write_value>( } } - if let Some(_) = annotations { + if has_annotations { annotation_b.shift(None); w.boundary(&annotation_b)?; w.end_annotations()? diff --git a/implementations/rust/oo/tests/read_samples.rs b/implementations/rust/oo/tests/read_samples.rs index c3861e3..9a83e1a 100644 --- a/implementations/rust/oo/tests/read_samples.rs +++ b/implementations/rust/oo/tests/read_samples.rs @@ -20,3 +20,13 @@ fn read_samples_bin() -> io::Result<()> { println!("{:#?}", annotated_iovalue_from_bytes(&contents)?); Ok(()) } + +#[test] +fn read_samples_view() -> io::Result<()> { + let mut fh = std::fs::File::open("../../../tests/samples.bin")?; + let mut contents = Vec::new(); + fh.read_to_end(&mut contents)?; + let v = packed::view::View::new(contents)?; + println!("{:#?}", value(&v)); + Ok(()) +}