Packed-binary view-backed ValueImpl and little demo

This commit is contained in:
Tony Garnock-Jones 2022-11-10 15:02:08 +01:00
parent 13ec9cc67e
commit 6033968a13
7 changed files with 420 additions and 61 deletions

View File

@ -38,6 +38,7 @@ pub use repr::ValueImpl;
pub use repr::copy_via;
pub use repr::iovalue;
pub use repr::owned;
pub use repr::shell;
pub use repr::value;
pub use signed_integer::SignedInteger;
pub use source::BinarySource;

View File

@ -1,6 +1,6 @@
pub mod constants;
pub mod reader;
// pub mod view;
pub mod view;
pub mod writer;
pub use reader::PackedReader;

View File

@ -0,0 +1,370 @@
use std::borrow::Cow;
use std::io;
use std::marker::PhantomData;
use num_bigint::BigInt;
use crate::IOValue;
use crate::PlainValue;
use crate::SignedInteger;
use crate::Value;
use crate::ValueClass;
use crate::ValueImpl;
use crate::error;
use crate::error::io_eof;
use crate::iovalue;
use crate::owned;
use crate::shell;
use crate::reader::NextToken;
use super::constants::Tag;
#[derive(Debug, Clone)]
pub struct View<'de, Packed: AsRef<[u8]> + 'de> {
packed: Packed,
value_offset: usize, // annotation_offset is implicitly 0
value_end: usize,
phantom: PhantomData<&'de ()>,
}
#[inline(always)]
fn at(packed: &[u8], i: usize) -> io::Result<u8> {
match packed.get(i) {
Some(v) => Ok(*v),
None => Err(io_eof()),
}
}
#[inline(always)]
fn tag_at(packed: &[u8], i: usize) -> io::Result<Tag> {
Ok(Tag::try_from(at(packed, i)?)?)
}
fn skip_annotations(packed: &[u8], mut i: usize) -> io::Result<usize> {
loop {
if tag_at(packed, i)? == Tag::Annotation {
i = skip_value(packed, i + 1)?;
} else {
return Ok(i);
}
}
}
fn varint(packed: &[u8], mut i: usize) -> io::Result<(u64, usize)> {
let mut shift = 0;
let mut acc: u64 = 0;
loop {
let v = at(packed, i)?;
i = i + 1;
if shift == 63 && v > 1 { Err(error::Error::Message("PackedReader length too long".to_string()))? }
acc |= ((v & 0x7f) as u64) << shift;
shift += 7;
if v & 0x80 == 0 { return Ok((acc, i)) }
if shift >= 70 { Err(error::Error::Message("PackedReader length too long".to_string()))? }
}
}
fn skip_value(packed: &[u8], mut i: usize) -> io::Result<usize> {
loop {
let next_i = match tag_at(packed, i)? {
Tag::False => i + 1,
Tag::True => i + 1,
Tag::Float => i + 5,
Tag::Double => i + 9,
Tag::End => Err(io::Error::new(io::ErrorKind::InvalidData, "Unexpected end tag"))?,
Tag::Annotation => {
i = skip_value(packed, i + 1)?;
continue;
}
Tag::Embedded => {
i = i + 1;
continue;
}
Tag::SmallInteger(_) => i + 1,
Tag::MediumInteger(n) => i + 1 + (n as usize),
Tag::SignedInteger | Tag::String | Tag:: ByteString | Tag:: Symbol => {
let (n, i) = varint(packed, i + 1)?;
i + (n as usize)
}
Tag::Record | Tag::Sequence | Tag::Set | Tag::Dictionary => {
i = i + 1;
while tag_at(packed, i)? != Tag::End {
i = skip_value(packed, i)?;
}
i + 1
}
};
return Ok(next_i);
}
}
impl<'de, Packed: AsRef<[u8]> + 'de> View<'de, Packed> {
pub fn new(packed: Packed) -> io::Result<Self> {
// println!("packed {:?}", &packed.as_ref());
let value_offset = skip_annotations(packed.as_ref(), 0)?;
let value_end = skip_value(packed.as_ref(), value_offset)?;
if value_end > packed.as_ref().len() { Err(io_eof())? }
Ok(View { packed, value_offset, value_end, phantom: PhantomData })
}
#[inline(always)]
fn tag(&self) -> Tag {
tag_at(self.packed.as_ref(), self.value_offset).unwrap()
}
#[inline(always)]
fn varint(&self) -> (u64, usize) {
varint(self.packed.as_ref(), self.value_offset + 1).unwrap()
}
#[inline(always)]
fn sub(&self, offset: usize, len: usize) -> &[u8] {
let packed = self.packed.as_ref();
&packed[offset .. offset + len]
}
#[inline(always)]
fn atom_chunk(&self) -> &[u8] {
let (n, i) = self.varint();
self.sub(i, n as usize)
}
#[inline(always)]
fn signed_integer(&self, offset: usize, len: usize) -> SignedInteger {
let bs = self.sub(offset, len);
if (bs[0] & 0x80) == 0 {
// Positive or zero.
let mut i = 0;
while i < len && bs[i] == 0 { i += 1; }
if len - i <= 16 {
let mut v: u128 = 0;
for b in &bs[i..] { v = v << 8 | (*b as u128); }
SignedInteger::from(v)
} else {
SignedInteger::from(Cow::Owned(BigInt::from_bytes_be(num_bigint::Sign::Plus, &bs[i..])))
}
} else {
// Negative.
let mut i = 0;
while i < len && bs[i] == 0xff { i += 1; }
if len - i <= 16 {
let mut v: i128 = -1;
for b in &bs[i..] { v = v << 8 | (*b as i128); }
SignedInteger::from(v)
} else {
SignedInteger::from(Cow::Owned(BigInt::from_signed_bytes_be(&bs)))
}
}
}
}
impl<'de, Packed: AsRef<[u8]> + 'de> ValueImpl<IOValue> for View<'de, Packed> {
fn write(&self, w: &mut dyn crate::Writer, enc: &mut dyn crate::DomainEncode<IOValue>) -> io::Result<()> {
crate::write_value(w, self, enc)
}
fn value_clone(&self) -> PlainValue<'static, IOValue> where IOValue: 'static {
owned(View {
packed: self.packed.as_ref()[..self.value_end].to_owned(),
value_offset: self.value_offset,
value_end: self.value_end,
phantom: PhantomData,
})
}
fn value_class(&self) -> ValueClass {
match self.tag().into() {
Some(NextToken::Annotation) | None => unreachable!(),
Some(NextToken::Value(v)) => v,
}
}
fn as_boolean(&self) -> Option<bool> {
match self.tag() {
Tag::False => Some(false),
Tag::True => Some(true),
_ => None,
}
}
fn as_float(&self) -> Option<f32> {
match self.tag() {
Tag::Float => Some(f32::from_be_bytes(self.sub(self.value_offset + 1, 4).try_into().unwrap())),
_ => None,
}
}
fn as_double(&self) -> Option<f64> {
match self.tag() {
Tag::Double => Some(f64::from_be_bytes(self.sub(self.value_offset + 1, 8).try_into().unwrap())),
_ => None,
}
}
fn is_signed_integer(&self) -> bool {
match self.tag() {
Tag::SmallInteger(_) => true,
Tag::MediumInteger(_) => true,
Tag::SignedInteger => true,
_ => false,
}
}
fn as_signed_integer(&self) -> Option<SignedInteger> {
match self.tag() {
Tag::SmallInteger(v) => Some(v.into()),
Tag::MediumInteger(n) => Some(self.signed_integer(self.value_offset + 1, n as usize)),
Tag::SignedInteger => {
let (n, i) = self.varint();
Some(self.signed_integer(i, n as usize))
}
_ => None,
}
}
fn as_string(&self) -> Option<Cow<'_, str>> {
match self.tag() {
Tag::String => Some(Cow::Borrowed(std::str::from_utf8(self.atom_chunk()).unwrap())),
_ => None,
}
}
fn as_bytestring(&self) -> Option<Cow<'_, [u8]>> {
match self.tag() {
Tag::ByteString => Some(Cow::Borrowed(self.atom_chunk())),
_ => None,
}
}
fn as_symbol(&self) -> Option<Cow<'_, str>> {
match self.tag() {
Tag::Symbol => Some(Cow::Borrowed(std::str::from_utf8(self.atom_chunk()).unwrap())),
_ => None,
}
}
fn is_record(&self) -> bool {
self.tag() == Tag::Record
}
fn label(&self) -> Value<'_, IOValue> {
if !self.is_record() { panic!("Not a record") }
shell(View::new(&self.packed.as_ref()[self.value_offset + 1 .. self.value_end]).unwrap())
}
fn is_sequence(&self) -> bool {
self.tag() == Tag::Sequence
}
fn len(&self) -> usize {
self.iter().count()
}
fn index(&self, i: usize) -> Value<'_, IOValue> {
self.iter().nth(i).unwrap()
}
fn iter(&self) -> Box<dyn Iterator<Item = Value<'_, IOValue>> + '_> {
let mut i = Box::new(ViewIterator::new(&self.packed.as_ref()[self.value_offset + 1 ..]));
match self.tag() {
Tag::Record => { i.next(); () }
Tag::Sequence => (),
Tag::Set => (),
_ => panic!("Not iterable"),
}
i
}
fn is_set(&self) -> bool {
self.tag() == Tag::Set
}
fn has(&self, v: &dyn ValueImpl<IOValue>) -> bool {
self.iter().find(|e| v == &**e).is_some()
}
fn is_dictionary(&self) -> bool {
self.tag() == Tag::Dictionary
}
fn get(&self, k: &dyn ValueImpl<IOValue>) -> Option<Value<'_, IOValue>> {
for (kk, vv) in self.entries() {
if &*kk == k { return Some(vv); }
}
None
}
fn entries(&self) -> Box<dyn Iterator<Item = (Value<'_, IOValue>, Value<'_, IOValue>)> + '_> {
if !self.is_dictionary() { panic!("Not a dictionary") }
Box::new(DictionaryAdapter(ViewIterator::new(&self.packed.as_ref()[self.value_offset + 1 ..])))
}
fn is_embedded(&self) -> bool {
self.tag() == Tag::Embedded
}
fn embedded(&self) -> Cow<'_, IOValue> {
let bs = self.packed.as_ref()[self.value_offset + 1 .. self.value_end].to_vec();
Cow::Owned(iovalue(View::new(bs).unwrap()))
}
fn annotations(&self) -> Option<Cow<'_, [IOValue]>> {
if self.value_offset == 0 {
None
} else {
let anns: Vec<IOValue> = AnnotationAdapter(ViewIterator::new(&self.packed.as_ref()[.. self.value_offset]))
.map(|ann| iovalue(ann.into_owned()))
.collect();
Some(Cow::Owned(anns))
}
}
}
pub struct ViewIterator<'de> {
packed: &'de [u8],
offset: usize,
}
impl<'de> ViewIterator<'de> {
pub fn new(packed: &'de [u8]) -> Self {
ViewIterator { packed, offset: 0 }
}
}
impl<'de> Iterator for ViewIterator<'de> {
type Item = Value<'de, IOValue>;
fn next(&mut self) -> Option<Self::Item> {
let v = View::new(&self.packed[self.offset..]).ok()?;
if v.tag() == Tag::End { return None; }
self.offset += v.value_end;
Some(shell(v))
}
}
pub struct DictionaryAdapter<'de>(pub ViewIterator<'de>);
impl<'de> Iterator for DictionaryAdapter<'de> {
type Item = (Value<'de, IOValue>, Value<'de, IOValue>);
fn next(&mut self) -> Option<Self::Item> {
let k = self.0.next()?;
let v = self.0.next()?;
Some((k, v))
}
}
pub struct AnnotationAdapter<'de>(pub ViewIterator<'de>);
impl<'de> Iterator for AnnotationAdapter<'de> {
type Item = Value<'de, IOValue>;
fn next(&mut self) -> Option<Self::Item> {
if let Ok(Tag::Annotation) = tag_at(self.0.packed.as_ref(), self.0.offset) {
self.0.offset += 1;
self.0.next()
} else {
None
}
}
}

View File

@ -1,6 +1,5 @@
use bytemuck::TransparentWrapper;
use std::any::Any;
use std::borrow::Borrow;
use std::borrow::Cow;
use std::cmp::Ordering;
@ -31,14 +30,16 @@ pub type PlainValue<'va, D = IOValue> = Box<dyn ValueImpl<D> + 'va>;
pub enum Value<'r, D: Domain = IOValue> {
Borrowed(&'r (dyn ValueImpl<D> + 'r)),
Shell(PlainValue<'r, D>),
Owned(PlainValue<'static, D>),
}
impl<'r, D: Domain> Value<'r, D> {
pub fn into_owned(self: Value<'r, D>) -> PlainValue<'static, D> {
match self {
Value::Borrowed(r) => return r.value_clone(),
Value::Owned(v) => return v,
Value::Borrowed(r) => r.value_clone(),
Value::Shell(v) => v.value_clone(),
Value::Owned(v) => v,
}
}
}
@ -61,6 +62,7 @@ impl<'r, D: Domain> Deref for Value<'r, D> {
fn deref(&self) -> &Self::Target {
match self {
Value::Borrowed(r) => r,
Value::Shell(v) => v,
Value::Owned(v) => v,
}
}
@ -148,15 +150,17 @@ pub trait ValueImpl<D: Domain> {
fn is_embedded(&self) -> bool { false }
fn embedded(&self) -> Cow<'_, D> { panic!("Not an embedded value") }
fn annotations(&self) -> Option<&[IOValue]> { None }
fn specialized(&self) -> Option<&dyn Any> { None }
fn annotations(&self) -> Option<Cow<'_, [IOValue]>> { None }
}
pub fn value<D: Domain, V: ValueImpl<D>>(v: &V) -> Value<'_, D> {
Value::Borrowed(v)
}
pub fn shell<'r, D: Domain, V: ValueImpl<D> + 'r>(v: V) -> Value<'r, D> {
Value::Shell(Box::new(v))
}
pub fn owned<'va, D: Domain, V: ValueImpl<D> + 'static>(v: V) -> PlainValue<'va, D> {
Box::new(v)
}
@ -234,7 +238,7 @@ impl<'a, D: Domain, V: ValueImpl<D> + ?Sized> ValueImpl<D> for &'a V {
fn entries(&self) -> Box<dyn Iterator<Item = (Value<'_, D>, Value<'_, D>)> + '_> { (*self).entries() }
fn is_embedded(&self) -> bool { (*self).is_embedded() }
fn embedded(&self) -> Cow<'_, D> { (*self).embedded() }
fn annotations(&self) -> Option<&[IOValue]> { (*self).annotations() }
fn annotations(&self) -> Option<Cow<'_, [IOValue]>> { (*self).annotations() }
}
impl<'va, D: Domain> ValueImpl<D> for PlainValue<'va, D> {
@ -262,7 +266,7 @@ impl<'va, D: Domain> ValueImpl<D> for PlainValue<'va, D> {
fn entries(&self) -> Box<dyn Iterator<Item = (Value<'_, D>, Value<'_, D>)> + '_> { self.as_ref().entries() }
fn is_embedded(&self) -> bool { self.as_ref().is_embedded() }
fn embedded(&self) -> Cow<'_, D> { self.as_ref().embedded() }
fn annotations(&self) -> Option<&[IOValue]> { self.as_ref().annotations() }
fn annotations(&self) -> Option<Cow<'_, [IOValue]>> { self.as_ref().annotations() }
}
impl<'a, D: Domain> Debug for dyn ValueImpl<D> + 'a {
@ -315,21 +319,6 @@ impl<'a, D: Domain> Hash for dyn ValueImpl<D> + 'a {
}
}
fn iters_eq<'a, D: Domain>(
mut i1: Box<dyn Iterator<Item = Value<'_, D>> + 'a>,
mut i2: Box<dyn Iterator<Item = Value<'_, D>> + 'a>,
) -> bool {
loop {
match i1.next() {
None => return i2.next().is_none(),
Some(v1) => match i2.next() {
None => return false,
Some(v2) => if v1 != v2 { return false; },
}
}
}
}
impl<'a, D: Domain> PartialEq for dyn ValueImpl<D> + 'a {
fn eq(&self, other: &Self) -> bool {
let cls = self.value_class();
@ -354,10 +343,10 @@ impl<'a, D: Domain> PartialEq for dyn ValueImpl<D> + 'a {
ValueClass::Compound(c) => match c {
CompoundClass::Record => {
if self.label() != other.label() { return false; }
iters_eq(self.iter(), other.iter())
self.iter().eq(other.iter())
}
CompoundClass::Sequence => {
iters_eq(self.iter(), other.iter())
self.iter().eq(other.iter())
}
CompoundClass::Set => {
let s1 = self.iter().collect::<Set<_>>();
@ -375,27 +364,6 @@ impl<'a, D: Domain> PartialEq for dyn ValueImpl<D> + 'a {
}
}
fn iters_cmp<'a, D: Domain>(
mut i1: Box<dyn Iterator<Item = Value<'_, D>> + 'a>,
mut i2: Box<dyn Iterator<Item = Value<'_, D>> + 'a>,
) -> Ordering {
loop {
match i1.next() {
None => match i2.next() {
None => return Ordering::Equal,
Some(_) => return Ordering::Less,
}
Some(v1) => match i2.next() {
None => return Ordering::Greater,
Some(v2) => match v1.cmp(&v2) {
Ordering::Equal => (),
other => return other,
}
}
}
}
}
impl<'a, D: Domain> Ord for dyn ValueImpl<D> + 'a {
fn cmp(&self, other: &Self) -> Ordering {
let cls = self.value_class();
@ -419,8 +387,8 @@ impl<'a, D: Domain> Ord for dyn ValueImpl<D> + 'a {
ValueClass::Compound(c) => match c {
CompoundClass::Record =>
self.label().cmp(&other.label()).then_with(
|| iters_cmp(self.iter(), other.iter())),
CompoundClass::Sequence => iters_cmp(self.iter(), other.iter()),
|| self.iter().cmp(other.iter())),
CompoundClass::Sequence => self.iter().cmp(other.iter()),
CompoundClass::Set => {
let s1 = self.iter().collect::<Set<_>>();
let s2 = other.iter().collect::<Set<_>>();
@ -921,7 +889,7 @@ impl<D: Domain, V: ValueImpl<D>> ValueImpl<D> for Annotations<D, V> {
fn entries(&self) -> Box<dyn Iterator<Item = (Value<'_, D>, Value<'_, D>)> + '_> { self.value().entries() }
fn is_embedded(&self) -> bool { self.value().is_embedded() }
fn embedded(&self) -> Cow<'_, D> { self.value().embedded() }
fn annotations(&self) -> Option<&[IOValue]> { Some(&self.1) }
fn annotations(&self) -> Option<Cow<'_, [IOValue]>> { Some(Cow::Borrowed(&self.1)) }
}
impl<D: Domain, V: ValueImpl<D>> PartialEq for Annotations<D, V> {
@ -1047,7 +1015,7 @@ impl<D: Domain> ValueImpl<D> for ArcValue<D> {
fn entries(&self) -> Box<dyn Iterator<Item = (Value<'_, D>, Value<'_, D>)> + '_> { self.0.entries() }
fn is_embedded(&self) -> bool { self.0.is_embedded() }
fn embedded(&self) -> Cow<'_, D> { self.0.embedded() }
fn annotations(&self) -> Option<&[IOValue]> { self.0.annotations() }
fn annotations(&self) -> Option<Cow<'_, [IOValue]>> { self.0.annotations() }
}
impl ValueImpl<IOValue> for IOValue {
@ -1075,5 +1043,5 @@ impl ValueImpl<IOValue> for IOValue {
fn entries(&self) -> Box<dyn Iterator<Item = (Value<'_, IOValue>, Value<'_, IOValue>)> + '_> { self.0.entries() }
fn is_embedded(&self) -> bool { self.0.is_embedded() }
fn embedded(&self) -> Cow<'_, IOValue> { self.0.embedded() }
fn annotations(&self) -> Option<&[IOValue]> { self.0.annotations() }
fn annotations(&self) -> Option<Cow<'_, [IOValue]>> { self.0.annotations() }
}

View File

@ -163,6 +163,9 @@ impl<W: io::Write> Writer for TextWriter<W> {
(Some(B::Item::Annotation), Some(B::Item::AnnotatedValue)) => {
return write!(self.w, " ")
}
(None, Some(B::Item::AnnotatedValue)) |
// ^ strictly speaking, this combination is not permitted; a ValueImpl that yields
// a zero-length vector of annotations instead of `None` is in error.
(Some(B::Item::AnnotatedValue), None) =>
return Ok(()),

View File

@ -76,16 +76,23 @@ pub fn write_value<D: Domain, V: ValueImpl<D>>(
let annotations = v.annotations();
let mut annotation_b = B::Type::default();
if let Some(anns) = annotations {
w.start_annotations()?;
for ann in anns {
annotation_b.shift(Some(B::Item::Annotation));
let has_annotations = if let Some(anns) = annotations {
if anns.is_empty() {
false
} else {
w.start_annotations()?;
for ann in &anns[..] {
annotation_b.shift(Some(B::Item::Annotation));
w.boundary(&annotation_b)?;
ann.write(w, &mut IOValueDomainCodec)?;
}
annotation_b.shift(Some(B::Item::AnnotatedValue));
w.boundary(&annotation_b)?;
ann.write(w, &mut IOValueDomainCodec)?;
true
}
annotation_b.shift(Some(B::Item::AnnotatedValue));
w.boundary(&annotation_b)?;
}
} else {
false
};
match v.value_class() {
ValueClass::Atomic(a) => match a {
@ -159,7 +166,7 @@ pub fn write_value<D: Domain, V: ValueImpl<D>>(
}
}
if let Some(_) = annotations {
if has_annotations {
annotation_b.shift(None);
w.boundary(&annotation_b)?;
w.end_annotations()?

View File

@ -20,3 +20,13 @@ fn read_samples_bin() -> io::Result<()> {
println!("{:#?}", annotated_iovalue_from_bytes(&contents)?);
Ok(())
}
#[test]
fn read_samples_view() -> io::Result<()> {
let mut fh = std::fs::File::open("../../../tests/samples.bin")?;
let mut contents = Vec::new();
fh.read_to_end(&mut contents)?;
let v = packed::view::View::new(contents)?;
println!("{:#?}", value(&v));
Ok(())
}