From dbc2a0f14c605d0f84fe11dfb2509f828d88cf7b Mon Sep 17 00:00:00 2001 From: Tony Garnock-Jones Date: Thu, 3 Nov 2022 17:17:31 +0100 Subject: [PATCH] Progress! --- implementations/rust/oo/Cargo.toml | 2 - implementations/rust/oo/src/boundary.rs | 46 + implementations/rust/oo/src/domain.rs | 144 +++ implementations/rust/oo/src/error.rs | 67 ++ implementations/rust/oo/src/lib.rs | 551 +----------- .../rust/oo/src/packed/constants.rs | 91 ++ implementations/rust/oo/src/packed/mod.rs | 32 + implementations/rust/oo/src/packed/reader.rs | 584 +++++++++++++ implementations/rust/oo/src/packed/writer.rs | 395 +++++++++ implementations/rust/oo/src/reader.rs | 362 ++++++++ implementations/rust/oo/src/repr.rs | 826 ++++++++++++++++++ implementations/rust/oo/src/signed_integer.rs | 36 +- implementations/rust/oo/src/source.rs | 223 +++++ implementations/rust/oo/src/text/mod.rs | 33 + implementations/rust/oo/src/text/reader.rs | 613 +++++++++++++ implementations/rust/oo/src/text/writer.rs | 305 +++++++ implementations/rust/oo/src/types.rs | 28 + implementations/rust/oo/src/writer.rs | 62 ++ 18 files changed, 3861 insertions(+), 539 deletions(-) create mode 100644 implementations/rust/oo/src/boundary.rs create mode 100644 implementations/rust/oo/src/domain.rs create mode 100644 implementations/rust/oo/src/error.rs create mode 100644 implementations/rust/oo/src/packed/constants.rs create mode 100644 implementations/rust/oo/src/packed/mod.rs create mode 100644 implementations/rust/oo/src/packed/reader.rs create mode 100644 implementations/rust/oo/src/packed/writer.rs create mode 100644 implementations/rust/oo/src/reader.rs create mode 100644 implementations/rust/oo/src/repr.rs create mode 100644 implementations/rust/oo/src/source.rs create mode 100644 implementations/rust/oo/src/text/mod.rs create mode 100644 implementations/rust/oo/src/text/reader.rs create mode 100644 implementations/rust/oo/src/text/writer.rs create mode 100644 implementations/rust/oo/src/types.rs create mode 100644 implementations/rust/oo/src/writer.rs diff --git a/implementations/rust/oo/Cargo.toml b/implementations/rust/oo/Cargo.toml index b796a95..66fba05 100644 --- a/implementations/rust/oo/Cargo.toml +++ b/implementations/rust/oo/Cargo.toml @@ -11,8 +11,6 @@ bytemuck = "1.12" dtoa = "0.4" num = "0.4" regex = "1.5" -serde = { version = "1.0", features = ["derive"] } -serde_bytes = "0.11" [package.metadata.workspaces] independent = true diff --git a/implementations/rust/oo/src/boundary.rs b/implementations/rust/oo/src/boundary.rs new file mode 100644 index 0000000..86e85cf --- /dev/null +++ b/implementations/rust/oo/src/boundary.rs @@ -0,0 +1,46 @@ +#[derive(Default, Clone, Debug)] +pub struct Type { + pub closing: Option, + pub opening: Option, +} + +#[derive(Clone, Debug)] +pub enum Item { + Annotation, + AnnotatedValue, + DictionaryKey, + DictionaryValue, + RecordField, + RecordLabel, + SequenceValue, + SetValue, +} + +impl Type { + #[inline] + pub fn shift(&mut self, i: Option) { + let tmp = std::mem::replace(&mut self.opening, i); + self.closing = tmp; + } +} + +pub fn start(i: Item) -> Type { + Type { + closing: None, + opening: Some(i), + } +} + +pub fn mid(c: Item, o: Item) -> Type { + Type { + closing: Some(c), + opening: Some(o), + } +} + +pub fn end(i: Item) -> Type { + Type { + closing: Some(i), + opening: None, + } +} diff --git a/implementations/rust/oo/src/domain.rs b/implementations/rust/oo/src/domain.rs new file mode 100644 index 0000000..d3ada3a --- /dev/null +++ b/implementations/rust/oo/src/domain.rs @@ -0,0 +1,144 @@ +use std::io; + +use super::IOValue; +use super::Reader; +use super::Writer; +use super::Value; + +pub trait Domain: std::fmt::Debug + Eq + std::hash::Hash + Ord + Clone { + type Decode: DomainDecode + Default; + type Encode: DomainEncode + Default; +} + +pub trait DomainDecode { + fn decode_embedded<'de, R: Reader<'de>>( + &mut self, + r: &mut R, + read_annotations: bool, + ) -> io::Result; +} + +pub trait DomainEncode { + fn encode_embedded( + &mut self, + w: &mut dyn Writer, + d: &D, + ) -> io::Result<()>; +} + +impl<'a, D: Domain, T: DomainDecode> DomainDecode for &'a mut T { + fn decode_embedded<'de, R: Reader<'de>>( + &mut self, + r: &mut R, + read_annotations: bool, + ) -> io::Result { + (**self).decode_embedded(r, read_annotations) + } +} + +impl<'a, D: Domain, T: DomainEncode> DomainEncode for &'a mut T { + fn encode_embedded( + &mut self, + w: &mut dyn Writer, + d: &D, + ) -> io::Result<()> { + (**self).encode_embedded(w, d) + } +} + +#[derive(Default)] +pub struct DefaultDomainCodec; + +impl DomainDecode for DefaultDomainCodec { + fn decode_embedded<'de, R: Reader<'de>>( + &mut self, + r: &mut R, + read_annotations: bool, + ) -> io::Result { + D::Decode::default().decode_embedded(r, read_annotations) + } +} + +impl DomainEncode for DefaultDomainCodec { + fn encode_embedded( + &mut self, + w: &mut dyn Writer, + d: &D, + ) -> io::Result<()> { + D::Encode::default().encode_embedded(w, d) + } +} + +#[derive(Default)] +pub struct DebugDomainCodec; + +impl, D: Domain + std::str::FromStr> DomainDecode for DebugDomainCodec { + fn decode_embedded<'de, R: Reader<'de>>( + &mut self, + r: &mut R, + _read_annotations: bool, + ) -> io::Result { + r.next_str()?.parse().map_err(|e: Err| e.into()) + } +} + +impl DomainEncode for DebugDomainCodec { + fn encode_embedded( + &mut self, + w: &mut dyn Writer, + d: &D, + ) -> io::Result<()> { + w.write_string(&format!("{:?}", d)) + } +} + +#[derive(Default)] +pub struct NoEmbeddedDomainCodec; + +impl DomainDecode for NoEmbeddedDomainCodec { + fn decode_embedded<'de, R: Reader<'de>>( + &mut self, + _r: &mut R, + _read_annotations: bool, + ) -> io::Result { + Err(io::Error::new(io::ErrorKind::Unsupported, "Embedded values not supported here")) + } +} + +impl DomainEncode for NoEmbeddedDomainCodec { + fn encode_embedded( + &mut self, + _w: &mut dyn Writer, + _d: &D, + ) -> io::Result<()> { + Err(io::Error::new(io::ErrorKind::Unsupported, "Embedded values not supported here")) + } +} + +#[derive(Default)] +pub struct IOValueDomainCodec; + +impl Domain for IOValue { + type Decode = IOValueDomainCodec; + type Encode = IOValueDomainCodec; +} + +impl DomainDecode for IOValueDomainCodec { + fn decode_embedded<'de, R: Reader<'de>>( + &mut self, + r: &mut R, + read_annotations: bool, + ) -> io::Result { + Ok(r.next_iovalue(read_annotations)?) + } +} + +impl DomainEncode for IOValueDomainCodec { + fn encode_embedded( + &mut self, + w: &mut dyn Writer, + d: &IOValue, + ) -> io::Result<()> { + d.write(w, self) + } +} diff --git a/implementations/rust/oo/src/error.rs b/implementations/rust/oo/src/error.rs new file mode 100644 index 0000000..d68063d --- /dev/null +++ b/implementations/rust/oo/src/error.rs @@ -0,0 +1,67 @@ +use num::bigint::BigInt; +use std::convert::From; +use std::io; + +#[derive(Debug)] +pub enum Error { + Io(io::Error), + Message(String), + InvalidUnicodeScalar(u32), + NumberOutOfRange(BigInt), + MissingCloseDelimiter, + MissingItem, + Expected(ExpectedKind), +} + +#[derive(Debug, PartialEq)] +pub enum ExpectedKind { + Boolean, + Float, + Double, + + SignedIntegerI128, + SignedIntegerU128, + SignedInteger, + String, + ByteString, + Symbol, + + Record, + SimpleRecord(String), + Sequence, + Set, + Dictionary, + + Embedded, + + Option, + UnicodeScalar, +} + +impl From for Error { + fn from(e: io::Error) -> Self { + Error::Io(e) + } +} + +impl From for io::Error { + fn from(e: Error) -> Self { + match e { + Error::Io(ioe) => ioe, + Error::Message(str) => io::Error::new(io::ErrorKind::Other, str), + _ => io::Error::new(io::ErrorKind::Other, e), + } + } +} + +impl std::error::Error for Error {} + +impl std::fmt::Display for Error { + fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result { + write!(f, "{:?}", self) + } +} + +pub fn io_eof() -> io::Error { + io::Error::new(io::ErrorKind::UnexpectedEof, "EOF") +} diff --git a/implementations/rust/oo/src/lib.rs b/implementations/rust/oo/src/lib.rs index 62953ee..767c6d1 100644 --- a/implementations/rust/oo/src/lib.rs +++ b/implementations/rust/oo/src/lib.rs @@ -1,529 +1,34 @@ -use bytemuck::TransparentWrapper; -use float::{eq_f32, eq_f64, cmp_f32, cmp_f64}; - -use std::borrow::{Cow, Borrow}; -use std::cmp::Ordering; -use std::fmt::Debug; -use std::hash::{Hash, Hasher}; -use std::vec::Vec; - -pub use std::collections::BTreeSet as Set; -pub use std::collections::BTreeMap as Map; - +pub mod boundary; +pub mod domain; +pub mod error; pub mod float; +pub mod reader; +pub mod repr; pub mod signed_integer; +pub mod source; +pub mod types; +pub mod writer; +pub use domain::Domain; +pub use reader::Reader; +pub use repr::Annotations; +pub use repr::Atom; +pub use repr::Bytes; +pub use repr::Embedded; +pub use repr::IOValue; +pub use repr::Map; +pub use repr::NoValue; +pub use repr::Record; +pub use repr::Set; +pub use repr::Symbol; +pub use repr::Value; +pub use repr::owned; +pub use repr::value; pub use signed_integer::SignedInteger; - -/// The kinds of `Value` from the specification. -#[derive(Debug, Clone, PartialEq, Eq, Hash, PartialOrd, Ord)] -pub enum ValueClass { - Atomic(AtomClass), - Compound(CompoundClass), - Embedded, -} - -/// The kinds of `Atom` from the specification. -#[derive(Debug, Clone, PartialEq, Eq, Hash, PartialOrd, Ord)] -pub enum AtomClass { - Boolean, - Float, - Double, - SignedInteger, - String, - ByteString, - Symbol, -} - -/// The kinds of `Compound` from the specification. -#[derive(Debug, Clone, PartialEq, Eq, Hash, PartialOrd, Ord)] -pub enum CompoundClass { - Record, - Sequence, - Set, - Dictionary, -} - -pub trait Domain: Debug + Eq + Hash + Ord + Clone {} - -#[derive(Debug)] -pub struct OutOfRange; - -pub trait Value: Debug { - fn value_class(&self) -> ValueClass; - - fn as_boolean(&self) -> Option { None } - fn as_float(&self) -> Option { None } - fn as_double(&self) -> Option { None } - - fn is_signed_integer(&self) -> bool { false } - fn as_signed_integer(&self) -> Option { None } - - fn as_string(&self) -> Option> { None } - fn as_bytestring(&self) -> Option> { None } - fn as_symbol(&self) -> Option> { None } - - fn is_record(&self) -> bool { false } - fn label(&self) -> &dyn Value { panic!("Not a record") } - - fn is_sequence(&self) -> bool { false } - fn len(&self) -> usize { panic!("Has no length") } - fn index(&self, _i: usize) -> &dyn Value { panic!("Not indexable") } - fn iter(&self) -> Box> + '_> { panic!("Not iterable") } - - fn is_set(&self) -> bool { false } - fn has(&self, _v: &dyn Value) -> bool { false } - - fn is_dictionary(&self) -> bool { false } - fn get(&self, _k: &dyn Value) -> Option<&dyn Value> { None } - fn entries(&self) -> Box, &dyn Value)> + '_> { panic!("Not a dictionary") } - - fn is_embedded(&self) -> bool { false } - fn embedded(&self) -> Cow<'_, D> { panic!("Not an embedded value") } - - fn annotations(&self) -> Option<&[Box>]> { None } -} - -pub fn value>(v: &V) -> &dyn Value { - v -} - -pub fn owned + 'static>(v: V) -> Box> { - Box::new(v) -} - -impl<'a, D: Domain, V: Value + ?Sized> Value for &'a V { - fn value_class(&self) -> ValueClass { (*self).value_class() } - fn as_boolean(&self) -> Option { (*self).as_boolean() } - fn as_float(&self) -> Option { (*self).as_float() } - fn as_double(&self) -> Option { (*self).as_double() } - fn is_signed_integer(&self) -> bool { (*self).is_signed_integer() } - fn as_signed_integer(&self) -> Option { (*self).as_signed_integer() } - fn as_string(&self) -> Option> { (*self).as_string() } - fn as_bytestring(&self) -> Option> { (*self).as_bytestring() } - fn as_symbol(&self) -> Option> { (*self).as_symbol() } - fn is_record(&self) -> bool { (*self).is_record() } - fn label(&self) -> &dyn Value { (*self).label() } - fn is_sequence(&self) -> bool { (*self).is_sequence() } - fn len(&self) -> usize { (*self).len() } - fn index(&self, i: usize) -> &dyn Value { (*self).index(i) } - fn iter(&self) -> Box> + '_> { (*self).iter() } - fn is_set(&self) -> bool { (*self).is_set() } - fn has(&self, v: &dyn Value) -> bool { (*self).has(v) } - fn is_dictionary(&self) -> bool { (*self).is_dictionary() } - fn get(&self, k: &dyn Value) -> Option<&dyn Value> { (*self).get(k) } - fn entries(&self) -> Box, &dyn Value)> + '_> { (*self).entries() } - fn is_embedded(&self) -> bool { (*self).is_embedded() } - fn embedded(&self) -> Cow<'_, D> { (*self).embedded() } - fn annotations(&self) -> Option<&[Box>]> { (*self).annotations() } -} - -impl Value for Box> { - fn value_class(&self) -> ValueClass { self.as_ref().value_class() } - fn as_boolean(&self) -> Option { self.as_ref().as_boolean() } - fn as_float(&self) -> Option { self.as_ref().as_float() } - fn as_double(&self) -> Option { self.as_ref().as_double() } - fn is_signed_integer(&self) -> bool { self.as_ref().is_signed_integer() } - fn as_signed_integer(&self) -> Option { self.as_ref().as_signed_integer() } - fn as_string(&self) -> Option> { self.as_ref().as_string() } - fn as_bytestring(&self) -> Option> { self.as_ref().as_bytestring() } - fn as_symbol(&self) -> Option> { self.as_ref().as_symbol() } - fn is_record(&self) -> bool { self.as_ref().is_record() } - fn label(&self) -> &dyn Value { self.as_ref().label() } - fn is_sequence(&self) -> bool { self.as_ref().is_sequence() } - fn len(&self) -> usize { self.as_ref().len() } - fn index(&self, i: usize) -> &dyn Value { self.as_ref().index(i) } - fn iter(&self) -> Box> + '_> { self.as_ref().iter() } - fn is_set(&self) -> bool { self.as_ref().is_set() } - fn has(&self, v: &dyn Value) -> bool { self.as_ref().has(v) } - fn is_dictionary(&self) -> bool { self.as_ref().is_dictionary() } - fn get(&self, k: &dyn Value) -> Option<&dyn Value> { self.as_ref().get(k) } - fn entries(&self) -> Box, &dyn Value)> + '_> { self.as_ref().entries() } - fn is_embedded(&self) -> bool { self.as_ref().is_embedded() } - fn embedded(&self) -> Cow<'_, D> { self.as_ref().embedded() } - fn annotations(&self) -> Option<&[Box>]> { self.as_ref().annotations() } -} - -impl<'a, D: Domain> Hash for dyn Value + 'a { - fn hash(&self, state: &mut H) { - match self.value_class() { - ValueClass::Atomic(a) => match a { - AtomClass::Boolean => self.as_boolean().unwrap().hash(state), - AtomClass::Float => self.as_float().unwrap().to_bits().hash(state), - AtomClass::Double => self.as_double().unwrap().to_bits().hash(state), - AtomClass::SignedInteger => self.as_signed_integer().unwrap().hash(state), - AtomClass::String => self.as_string().unwrap().hash(state), - AtomClass::ByteString => self.as_bytestring().unwrap().hash(state), - AtomClass::Symbol => self.as_symbol().unwrap().hash(state), - } - ValueClass::Compound(c) => match c { - CompoundClass::Sequence | - CompoundClass::Set => { - state.write_usize(self.len()); - for v in self.iter() { v.hash(state) } - } - CompoundClass::Record => { - self.label().hash(state); - state.write_usize(self.len()); - for v in self.iter() { v.hash(state) } - } - CompoundClass::Dictionary => { - state.write_usize(self.len()); - for (k, v) in self.entries() { - k.hash(state); - v.hash(state); - } - } - } - ValueClass::Embedded => self.embedded().hash(state), - } - } -} - -fn iters_eq<'a, D: Domain>( - mut i1: Box> + 'a>, - mut i2: Box> + 'a>, -) -> bool { - loop { - match i1.next() { - None => return i2.next().is_none(), - Some(v1) => match i2.next() { - None => return false, - Some(v2) => if v1 != v2 { return false; }, - } - } - } -} - -impl<'a, D: Domain> PartialEq for dyn Value + 'a { - fn eq(&self, other: &Self) -> bool { - let cls = self.value_class(); - if cls != other.value_class() { return false; } - match cls { - ValueClass::Atomic(a) => match a { - AtomClass::Boolean => - self.as_boolean().unwrap() == other.as_boolean().unwrap(), - AtomClass::Float => - eq_f32(self.as_float().unwrap(), other.as_float().unwrap()), - AtomClass::Double => - eq_f64(self.as_double().unwrap(), other.as_double().unwrap()), - AtomClass::SignedInteger => - self.as_signed_integer().unwrap() == other.as_signed_integer().unwrap(), - AtomClass::String => - self.as_string().unwrap() == other.as_string().unwrap(), - AtomClass::ByteString => - self.as_bytestring().unwrap() == other.as_bytestring().unwrap(), - AtomClass::Symbol => - self.as_symbol().unwrap() == other.as_symbol().unwrap(), - } - ValueClass::Compound(c) => match c { - CompoundClass::Record => { - if self.label() != other.label() { return false; } - iters_eq(self.iter(), other.iter()) - } - CompoundClass::Sequence => { - iters_eq(self.iter(), other.iter()) - } - CompoundClass::Set => { - let s1 = self.iter().collect::>(); - let s2 = other.iter().collect::>(); - s1 == s2 - } - CompoundClass::Dictionary => { - let d1 = self.entries().collect::>(); - let d2 = other.entries().collect::>(); - d1 == d2 - } - } - ValueClass::Embedded => self.embedded() == other.embedded(), - } - } -} - -fn iters_cmp<'a, D: Domain>( - mut i1: Box> + 'a>, - mut i2: Box> + 'a>, -) -> Ordering { - loop { - match i1.next() { - None => match i2.next() { - None => return Ordering::Equal, - Some(_) => return Ordering::Less, - } - Some(v1) => match i2.next() { - None => return Ordering::Greater, - Some(v2) => match v1.cmp(v2) { - Ordering::Equal => (), - other => return other, - } - } - } - } -} - -impl<'a, D: Domain> Ord for dyn Value + 'a { - fn cmp(&self, other: &Self) -> Ordering { - let cls = self.value_class(); - cls.cmp(&other.value_class()).then_with(|| match cls { - ValueClass::Atomic(a) => match a { - AtomClass::Boolean => - self.as_boolean().cmp(&other.as_boolean()), - AtomClass::Float => - cmp_f32(self.as_float().unwrap(), other.as_float().unwrap()), - AtomClass::Double => - cmp_f64(self.as_double().unwrap(), other.as_double().unwrap()), - AtomClass::SignedInteger => - self.as_signed_integer().cmp(&other.as_signed_integer()), - AtomClass::String => - self.as_string().cmp(&other.as_string()), - AtomClass::ByteString => - self.as_bytestring().cmp(&other.as_bytestring()), - AtomClass::Symbol => - self.as_symbol().cmp(&other.as_symbol()), - }, - ValueClass::Compound(c) => match c { - CompoundClass::Record => - self.label().cmp(other.label()).then_with( - || iters_cmp(self.iter(), other.iter())), - CompoundClass::Sequence => iters_cmp(self.iter(), other.iter()), - CompoundClass::Set => { - let s1 = self.iter().collect::>(); - let s2 = other.iter().collect::>(); - s1.cmp(&s2) - } - CompoundClass::Dictionary => { - let d1 = self.entries().collect::>(); - let d2 = other.entries().collect::>(); - d1.cmp(&d2) - } - }, - ValueClass::Embedded => self.embedded().cmp(&other.embedded()), - }) - } -} - -impl<'a, D: Domain> Eq for dyn Value + 'a {} - -impl<'a, D: Domain> PartialOrd for dyn Value + 'a { - fn partial_cmp(&self, other: &Self) -> Option { - Some(self.cmp(other)) - } -} - -#[derive(Debug, Clone, PartialEq, Eq, Hash, PartialOrd, Ord)] -pub enum NoValue {} - -impl Domain for NoValue {} - -impl Value for NoValue { - fn value_class(&self) -> ValueClass { unreachable!() } -} - -impl Value for bool { - fn value_class(&self) -> ValueClass { ValueClass::Atomic(AtomClass::Boolean) } - fn as_boolean(&self) -> Option { Some(*self) } -} - -impl Value for u64 { - fn value_class(&self) -> ValueClass { ValueClass::Atomic(AtomClass::SignedInteger) } - fn as_signed_integer(&self) -> Option { - Some((*self).into()) - } -} - -impl Value for f32 { - fn value_class(&self) -> ValueClass { ValueClass::Atomic(AtomClass::Float) } - fn as_float(&self) -> Option { Some(*self) } - fn as_double(&self) -> Option { Some(*self as f64) } -} - -impl Value for f64 { - fn value_class(&self) -> ValueClass { ValueClass::Atomic(AtomClass::Float) } - fn as_float(&self) -> Option { Some(*self as f32) } - fn as_double(&self) -> Option { Some(*self) } -} - -impl Value for str { - fn value_class(&self) -> ValueClass { ValueClass::Atomic(AtomClass::String) } - fn as_string(&self) -> Option> { Some(Cow::Borrowed(self)) } -} - -impl Value for String { - fn value_class(&self) -> ValueClass { ValueClass::Atomic(AtomClass::String) } - fn as_string(&self) -> Option> { Some(Cow::Borrowed(self)) } -} - -#[derive(Debug, Clone, PartialEq, Eq, Hash, PartialOrd, Ord)] -#[repr(transparent)] -pub struct Bytes>(T); - -impl + Debug, D: Domain> Value for Bytes { - fn value_class(&self) -> ValueClass { ValueClass::Atomic(AtomClass::ByteString) } - fn as_bytestring(&self) -> Option> { Some(Cow::Borrowed(self.0.as_ref())) } -} - -#[derive(Debug, Clone, PartialEq, Eq, Hash, PartialOrd, Ord)] -#[repr(transparent)] -pub struct Symbol + Debug>(T); - -impl + Debug, D: Domain> Value for Symbol { - fn value_class(&self) -> ValueClass { ValueClass::Atomic(AtomClass::Symbol) } - fn as_symbol(&self) -> Option> { Some(Cow::Borrowed(self.0.as_ref())) } -} - -#[derive(Debug, Clone, PartialEq, Eq, Hash, PartialOrd, Ord)] -#[repr(transparent)] -pub struct Record(Vec /* at least one element, for the label */); - -impl Record { - pub fn new(label: V, mut fields: Vec) -> Self { - fields.insert(0, label); - Record(fields) - } -} - -impl> Value for Record { - fn value_class(&self) -> ValueClass { ValueClass::Compound(CompoundClass::Record) } - fn is_record(&self) -> bool { true } - fn label(&self) -> &dyn Value { &self.0[0] } - fn len(&self) -> usize { self.0.len() - 1 } - fn index(&self, i: usize) -> &dyn Value { &self.0[i + 1] } - fn iter(&self) -> Box> + '_> { - Box::new(self.0[1..].iter().map(value)) - } -} - -impl> Value for Vec { - fn value_class(&self) -> ValueClass { ValueClass::Compound(CompoundClass::Sequence) } - fn is_sequence(&self) -> bool { true } - fn len(&self) -> usize { self.len() } - fn index(&self, i: usize) -> &dyn Value { &self[i] } - fn iter(&self) -> Box> + '_> { - Box::new(self[..].iter().map(value)) - } -} - -impl> Value for [V] { - fn value_class(&self) -> ValueClass { ValueClass::Compound(CompoundClass::Sequence) } - fn is_sequence(&self) -> bool { true } - fn len(&self) -> usize { self.len() } - fn index(&self, i: usize) -> &dyn Value { &self[i] } - fn iter(&self) -> Box> + '_> { - Box::new(self[..].iter().map(value)) - } -} - -impl Value for Set>> { - fn value_class(&self) -> ValueClass { ValueClass::Compound(CompoundClass::Set) } - fn is_set(&self) -> bool { true } - fn len(&self) -> usize { self.len() } - fn has(&self, v: &dyn Value) -> bool { self.contains(v) } - fn iter(&self) -> Box> + '_> { - Box::new(self.iter().map(value)) - } -} - -#[derive(PartialEq, Eq, PartialOrd, Ord)] -#[repr(transparent)] -struct Key<'a, D: Domain>(dyn Value + 'a); - -// Many thanks to SkiFire13 and the other participants in -// https://users.rust-lang.org/t/is-the-lifetime-of-a-btreemap-get-result-attached-to-the-key-as-well-as-the-map/83568/7 -// for the idea of using TransparentWrapper here. -// -unsafe impl<'a, D: Domain> TransparentWrapper + 'a> for Key<'a, D> {} -impl<'a, 'b: 'a, D: Domain> Borrow> for Box + 'b> { - fn borrow(&self) -> &Key<'a, D> { - Key::wrap_ref(&**self) - } -} - -impl> Value for Map>, V> { - fn value_class(&self) -> ValueClass { ValueClass::Compound(CompoundClass::Dictionary) } - fn is_dictionary(&self) -> bool { true } - fn len(&self) -> usize { self.len() } - fn has(&self, v: &dyn Value) -> bool { self.contains_key(v) } - fn get(&self, k: &dyn Value) -> Option<&dyn Value> { - match Map::get(self, Key::wrap_ref(&k)) { - Some(v) => Some(v), - None => None, - } - } - fn entries(&self) -> Box, &dyn Value)> + '_> { - Box::new(self.iter().map(|(k,v)| (value(k), value(v)))) - } -} - -#[derive(Debug, Clone, PartialEq, Eq, Hash, PartialOrd, Ord)] -#[repr(transparent)] -pub struct Embedded(D); - -impl Value for Embedded { - fn value_class(&self) -> ValueClass { ValueClass::Embedded } - fn is_embedded(&self) -> bool { true } - fn embedded(&self) -> Cow<'_, D> { Cow::Borrowed(&self.0) } -} - -#[derive(Debug)] -pub struct Annotations>(V, Vec>>); - -impl> Annotations { - pub fn value(&self) -> &dyn Value { - &self.0 - } -} - -impl> Value for Annotations { - fn value_class(&self) -> ValueClass { self.value().value_class() } - fn as_boolean(&self) -> Option { self.value().as_boolean() } - fn as_float(&self) -> Option { self.value().as_float() } - fn as_double(&self) -> Option { self.value().as_double() } - fn is_signed_integer(&self) -> bool { self.value().is_signed_integer() } - fn as_signed_integer(&self) -> Option { self.value().as_signed_integer() } - fn as_string(&self) -> Option> { self.value().as_string() } - fn as_bytestring(&self) -> Option> { self.value().as_bytestring() } - fn as_symbol(&self) -> Option> { self.value().as_symbol() } - fn is_record(&self) -> bool { self.value().is_record() } - fn label(&self) -> &dyn Value { self.value().label() } - fn is_sequence(&self) -> bool { self.value().is_sequence() } - fn len(&self) -> usize { self.value().len() } - fn index(&self, i: usize) -> &dyn Value { self.value().index(i) } - fn iter(&self) -> Box> + '_> { self.value().iter() } - fn is_set(&self) -> bool { self.value().is_set() } - fn has(&self, v: &dyn Value) -> bool { self.value().has(v) } - fn is_dictionary(&self) -> bool { self.value().is_dictionary() } - fn get(&self, k: &dyn Value) -> Option<&dyn Value> { self.value().get(k) } - fn entries(&self) -> Box, &dyn Value)> + '_> { self.value().entries() } - fn is_embedded(&self) -> bool { self.value().is_embedded() } - fn embedded(&self) -> Cow<'_, D> { self.value().embedded() } - fn annotations(&self) -> Option<&[Box>]> { Some(&self.1) } -} - -impl> PartialEq for Annotations { - fn eq(&self, other: &Self) -> bool { - self.value().eq(&other.value()) - } -} - -impl> Eq for Annotations {} - -impl> Hash for Annotations { - fn hash(&self, state: &mut H) { - self.value().hash(state); - } -} - -impl> PartialOrd for Annotations { - fn partial_cmp(&self, other: &Self) -> Option { - Some(self.cmp(other)) - } -} - -impl> Ord for Annotations { - fn cmp(&self, other: &Self) -> Ordering { - self.value().cmp(&other.value()) - } -} +pub use types::AtomClass; +pub use types::CompoundClass; +pub use types::ValueClass; +pub use writer::Writer; #[cfg(test)] mod demo { @@ -534,7 +39,7 @@ mod demo { } #[test] fn a() { - let l = Symbol("label"); + let l = Symbol::new("label"); let r = Record::new(owned::(l.clone()), vec![owned(1), owned(2), owned(3)]); let r2 = Record::new(owned::(l), vec![owned(1), owned(2), owned(4)]); diff --git a/implementations/rust/oo/src/packed/constants.rs b/implementations/rust/oo/src/packed/constants.rs new file mode 100644 index 0000000..6e001e0 --- /dev/null +++ b/implementations/rust/oo/src/packed/constants.rs @@ -0,0 +1,91 @@ +use std::convert::{TryFrom, From}; +use std::io; + +#[derive(Debug, PartialEq, Eq)] +pub enum Tag { + False, + True, + Float, + Double, + End, + Annotation, + Embedded, + SmallInteger(i8), + MediumInteger(u8), + SignedInteger, + String, + ByteString, + Symbol, + Record, + Sequence, + Set, + Dictionary, +} + +#[derive(Debug, PartialEq, Eq)] +pub struct InvalidTag(u8); + +impl From for io::Error { + fn from(v: InvalidTag) -> Self { + io::Error::new(io::ErrorKind::InvalidData, format!("Invalid Preserves tag {}", v.0)) + } +} + +impl From for crate::error::Error { + fn from(v: InvalidTag) -> Self { + crate::error::Error::Io(v.into()) + } +} + +impl TryFrom for Tag { + type Error = InvalidTag; + #[inline(always)] + fn try_from(v: u8) -> Result { + match v { + 0x80 => Ok(Self::False), + 0x81 => Ok(Self::True), + 0x82 => Ok(Self::Float), + 0x83 => Ok(Self::Double), + 0x84 => Ok(Self::End), + 0x85 => Ok(Self::Annotation), + 0x86 => Ok(Self::Embedded), + 0x90..=0x9c => Ok(Self::SmallInteger((v - 0x90) as i8)), + 0x9d..=0x9f => Ok(Self::SmallInteger((v - 0x90) as i8 - 16)), + 0xa0..=0xaf => Ok(Self::MediumInteger(v - 0xa0 + 1)), + 0xb0 => Ok(Self::SignedInteger), + 0xb1 => Ok(Self::String), + 0xb2 => Ok(Self::ByteString), + 0xb3 => Ok(Self::Symbol), + 0xb4 => Ok(Self::Record), + 0xb5 => Ok(Self::Sequence), + 0xb6 => Ok(Self::Set), + 0xb7 => Ok(Self::Dictionary), + _ => Err(InvalidTag(v)) + } + } +} + +impl From for u8 { + #[inline(always)] + fn from(v: Tag) -> Self { + match v { + Tag::False => 0x80, + Tag::True => 0x81, + Tag::Float => 0x82, + Tag::Double => 0x83, + Tag::End => 0x84, + Tag::Annotation => 0x85, + Tag::Embedded => 0x86, + Tag::SmallInteger(v) => if v < 0 { (v + 16) as u8 + 0x90 } else { v as u8 + 0x90 }, + Tag::MediumInteger(count) => count - 1 + 0xa0, + Tag::SignedInteger => 0xb0, + Tag::String => 0xb1, + Tag::ByteString => 0xb2, + Tag::Symbol => 0xb3, + Tag::Record => 0xb4, + Tag::Sequence => 0xb5, + Tag::Set => 0xb6, + Tag::Dictionary => 0xb7, + } + } +} diff --git a/implementations/rust/oo/src/packed/mod.rs b/implementations/rust/oo/src/packed/mod.rs new file mode 100644 index 0000000..8696306 --- /dev/null +++ b/implementations/rust/oo/src/packed/mod.rs @@ -0,0 +1,32 @@ +pub mod constants; +pub mod reader; +pub mod writer; + +pub use reader::PackedReader; +pub use writer::PackedWriter; + +use std::io; + +use super::{BinarySource, DomainDecode, IOValue, IOValueDomainCodec, NestedValue, Reader}; + +pub fn from_bytes<'de, N: NestedValue, Dec: DomainDecode>( + bs: &'de [u8], + decode_embedded: &mut Dec, +) -> io::Result { + super::BytesBinarySource::new(bs).packed().demand_next_domain(false, decode_embedded) +} + +pub fn iovalue_from_bytes(bs: &[u8]) -> io::Result { + from_bytes(bs, &mut IOValueDomainCodec) +} + +pub fn annotated_from_bytes<'de, N: NestedValue, Dec: DomainDecode>( + bs: &'de [u8], + decode_embedded: &mut Dec, +) -> io::Result { + super::BytesBinarySource::new(bs).packed().demand_next_domain(true, decode_embedded) +} + +pub fn annotated_iovalue_from_bytes(bs: &[u8]) -> io::Result { + annotated_from_bytes(bs, &mut IOValueDomainCodec) +} diff --git a/implementations/rust/oo/src/packed/reader.rs b/implementations/rust/oo/src/packed/reader.rs new file mode 100644 index 0000000..1031995 --- /dev/null +++ b/implementations/rust/oo/src/packed/reader.rs @@ -0,0 +1,584 @@ +use crate::error::{self, ExpectedKind, io_eof}; +use crate::value::Domain; + +use num::bigint::BigInt; +use num::traits::cast::{FromPrimitive, ToPrimitive}; + +use std::borrow::Cow; +use std::convert::TryFrom; +use std::convert::TryInto; +use std::io; +use std::marker::PhantomData; + +use super::constants::Tag; +use super::super::{ + CompoundClass, + DomainDecode, + Map, + NestedValue, + Record, + Set, + Value, + + boundary as B, + reader::{ + Token, + Reader, + ReaderResult, + }, + signed_integer::SignedInteger, + source::BinarySource, +}; + +pub struct PackedReader<'de, 'src, S: BinarySource<'de>> { + pub source: &'src mut S, + phantom: PhantomData<&'de ()>, +} + +impl<'de, 'src, S: BinarySource<'de>> BinarySource<'de> for PackedReader<'de, 'src, S> { + type Mark = S::Mark; + #[inline(always)] + fn mark(&mut self) -> io::Result { + self.source.mark() + } + #[inline(always)] + fn restore(&mut self, mark: &Self::Mark) -> io::Result<()> { + self.source.restore(mark) + } + #[inline(always)] + fn skip(&mut self) -> io::Result<()> { + self.source.skip() + } + #[inline(always)] + fn peek(&mut self) -> io::Result> { + self.source.peek() + } + #[inline(always)] + fn readbytes(&mut self, count: u64) -> io::Result> { + self.source.readbytes(count) + } + #[inline(always)] + fn readbytes_into(&mut self, bs: &mut [u8]) -> io::Result<()> { + self.source.readbytes_into(bs) + } + #[inline(always)] + fn input_position(&mut self) -> io::Result> { + self.source.input_position() + } + #[inline(always)] + fn discard(&mut self, count: u64) -> io::Result<()> { + self.source.discard(count) + } + #[inline(always)] + fn read_to_end(&mut self) -> io::Result> { + self.source.read_to_end() + } +} + +fn out_of_range>(i: I) -> error::Error { + error::Error::NumberOutOfRange(i.into()) +} + +impl<'de, 'src, S: BinarySource<'de>> PackedReader<'de, 'src, S> { + #[inline(always)] + pub fn new(source: &'src mut S) -> Self { + PackedReader { source, phantom: PhantomData } + } + + #[inline(always)] + fn peek_noeof(&mut self) -> io::Result { + self.peek()?.ok_or_else(io_eof) + } + + #[inline(always)] + fn read(&mut self) -> io::Result { + let v = self.peek_noeof()?; + self.skip()?; + Ok(v) + } + + #[inline(always)] + fn varint(&mut self) -> io::Result { + let mut shift = 0; + let mut acc: u64 = 0; + loop { + let v = self.read()?; + if shift == 63 && v > 1 { Err(error::Error::Message("PackedReader length too long".to_string()))? } + acc |= ((v & 0x7f) as u64) << shift; + shift += 7; + if v & 0x80 == 0 { return Ok(acc) } + if shift >= 70 { Err(error::Error::Message("PackedReader length too long".to_string()))? } + } + } + + #[inline(always)] + fn peekend(&mut self) -> io::Result { + if self.peek()? == Some(Tag::End.into()) { + self.skip()?; + Ok(true) + } else { + Ok(false) + } + } + + #[inline(always)] + fn peek_next_nonannotation_tag(&mut self) -> ReaderResult { + loop { + match Tag::try_from(self.peek_noeof()?)? { + Tag::Annotation => { + self.skip()?; + self.skip_value()?; + }, + other => return Ok(other), + } + } + } + + fn next_atomic(&mut self, expected_tag: Tag, k: ExpectedKind) -> ReaderResult> { + let actual_tag = self.peek_next_nonannotation_tag()?; + if actual_tag == expected_tag { + self.skip()?; + let count = self.varint()?; + Ok(self.readbytes(count)?) + } else { + Err(error::Error::Expected(k)) + } + } + + fn next_compound(&mut self, expected_tag: Tag, k: ExpectedKind) -> ReaderResult<()> + { + let actual_tag = self.peek_next_nonannotation_tag()?; + if actual_tag == expected_tag { + self.skip()?; + Ok(()) + } else { + Err(error::Error::Expected(k)) + } + } + + #[inline(always)] + fn read_signed_integer(&mut self, count: u64) -> io::Result { + let count_u64 = count; + let count: usize = count.try_into().map_err( + |_| error::Error::Message("Signed integer too long".to_string()))?; + + if count == 0 { + return Ok(SignedInteger::from(0_i128)); + } + + if count > 16 { + let bs = self.readbytes(count_u64)?; + if (bs[0] & 0x80) == 0 { + // Positive or zero. + let mut i = 0; + while i < count && bs[i] == 0 { i += 1; } + if count - i <= 16 { + Ok(SignedInteger::from(u128::from_be_bytes(bs[bs.len() - 16..].try_into().unwrap()))) + } else { + Ok(SignedInteger::from(Cow::Owned(BigInt::from_bytes_be(num::bigint::Sign::Plus, &bs[i..])))) + } + } else { + // Negative. + let mut i = 0; + while i < count && bs[i] == 0xff { i += 1; } + if count - i <= 16 { + Ok(SignedInteger::from(i128::from_be_bytes(bs[bs.len() - 16..].try_into().unwrap()))) + } else { + Ok(SignedInteger::from(Cow::Owned(BigInt::from_signed_bytes_be(&bs)))) + } + } + } else { + let first_byte = self.read()?; + let prefix_byte = if (first_byte & 0x80) == 0 { 0x00 } else { 0xff }; + let mut bs = [prefix_byte; 16]; + bs[16 - count] = first_byte; + self.readbytes_into(&mut bs[16 - (count - 1)..])?; + Ok(SignedInteger::from(i128::from_be_bytes(bs))) + } + } + + #[inline(always)] + fn next_unsigned(&mut self, f: F) -> ReaderResult + where + F: FnOnce(u128) -> Option + { + let tag = self.peek_next_nonannotation_tag()?; + match tag { + Tag::SmallInteger(v) => { + self.skip()?; + if v < 0 { + Err(out_of_range(v)) + } else { + f(v as u128).ok_or_else(|| out_of_range(v)) + } + } + Tag::MediumInteger(count) => { + self.skip()?; + let n = &self.read_signed_integer(count.into())?; + let i = n.try_into().map_err(|_| out_of_range(n))?; + f(i).ok_or_else(|| out_of_range(i)) + } + Tag::SignedInteger => { + self.skip()?; + let count = self.varint()?; + let n = &self.read_signed_integer(count)?; + let i = n.try_into().map_err(|_| out_of_range(n))?; + f(i).ok_or_else(|| out_of_range(i)) + } + _ => Err(error::Error::Expected(ExpectedKind::SignedInteger)) + } + } + + #[inline(always)] + fn next_signed(&mut self, f: F) -> ReaderResult + where + F: FnOnce(i128) -> Option + { + let tag = self.peek_next_nonannotation_tag()?; + match tag { + Tag::SmallInteger(v) => { + self.skip()?; + f(v.into()).ok_or_else(|| out_of_range(v)) + } + Tag::MediumInteger(count) => { + self.skip()?; + let n = &self.read_signed_integer(count.into())?; + let i = n.try_into().map_err(|_| out_of_range(n))?; + f(i).ok_or_else(|| out_of_range(i)) + } + Tag::SignedInteger => { + self.skip()?; + let count = self.varint()?; + let n = &self.read_signed_integer(count)?; + let i = n.try_into().map_err(|_| out_of_range(n))?; + f(i).ok_or_else(|| out_of_range(i)) + } + _ => Err(error::Error::Expected(ExpectedKind::SignedInteger)) + } + } + + fn gather_annotations>( + &mut self, + dec: &mut Dec, + ) -> io::Result> { + let mut annotations = vec![self.demand_next_domain(true, dec)?]; + while Tag::try_from(self.peek_noeof()?)? == Tag::Annotation { + self.skip()?; + annotations.push(self.demand_next_domain(true, dec)?); + } + Ok(annotations) + } + + fn skip_annotations(&mut self) -> io::Result<()> { + self.skip_value()?; + while Tag::try_from(self.peek_noeof()?)? == Tag::Annotation { + self.skip()?; + self.skip_value()?; + } + Ok(()) + } + + fn next_upto_end>( + &mut self, + read_annotations: bool, + dec: &mut Dec, + ) -> io::Result> { + match self.peekend()? { + true => Ok(None), + false => Ok(Some(self.demand_next_domain(read_annotations, dec)?)), + } + } + + #[inline(always)] + fn decodestr<'a>(&mut self, cow: Cow<'a, [u8]>) -> io::Result> { + match cow { + Cow::Borrowed(bs) => + Ok(Cow::Borrowed(std::str::from_utf8(bs).map_err(|_| self.syntax_error("Invalid UTF-8"))?)), + Cow::Owned(bs) => + Ok(Cow::Owned(String::from_utf8(bs).map_err(|_| self.syntax_error("Invalid UTF-8"))?)), + } + } +} + +impl<'de, 'src, S: BinarySource<'de>> Reader<'de> for PackedReader<'de, 'src, S> { + fn next_domain>( + &mut self, + read_annotations: bool, + dec: &mut Dec, + ) -> io::Result> { + match self.peek()? { + None => return Ok(None), + Some(_) => (), + } + Ok(Some(match Tag::try_from(self.read()?)? { + Tag::False => N::new(false), + Tag::True => N::new(true), + Tag::Float => { + let mut bs = [0; 4]; + self.readbytes_into(&mut bs)?; + Value::from(f32::from_bits(u32::from_be_bytes(bs))).wrap() + } + Tag::Double => { + let mut bs = [0; 8]; + self.readbytes_into(&mut bs)?; + Value::from(f64::from_bits(u64::from_be_bytes(bs))).wrap() + } + Tag::Annotation => { + if read_annotations { + let mut annotations = self.gather_annotations(dec)?; + let (existing_annotations, v) = self.demand_next_domain::(read_annotations, dec)?.pieces(); + if let Some(vs) = existing_annotations { + annotations.extend_from_slice(&vs[..]); + } + N::wrap(Some(Box::new(annotations)), v) + } else { + self.skip_annotations()?; + self.demand_next_domain(read_annotations, dec)? + } + } + Tag::Embedded => { + Value::Embedded(dec.decode_embedded(self, read_annotations)?).wrap() + } + Tag::SmallInteger(v) => { + // TODO: prebuild these in value.rs + Value::from(v).wrap() + } + Tag::MediumInteger(count) => { + let n = self.read_signed_integer(count.into())?; + Value::SignedInteger(n).wrap() + } + Tag::SignedInteger => { + let count = self.varint()?; + let n = self.read_signed_integer(count)?; + Value::SignedInteger(n).wrap() + } + Tag::String => { + let count = self.varint()?; + let bs = self.readbytes(count)?; + Value::String(self.decodestr(bs)?.into_owned()).wrap() + } + Tag::ByteString => { + let count = self.varint()?; + Value::ByteString(self.readbytes(count)?.into_owned()).wrap() + } + Tag::Symbol => { + let count = self.varint()?; + let bs = self.readbytes(count)?; + Value::Symbol(self.decodestr(bs)?.into_owned()).wrap() + } + Tag::Record => { + let mut vs = Vec::new(); + while let Some(v) = self.next_upto_end(read_annotations, dec)? { vs.push(v); } + if vs.is_empty() { + return Err(self.syntax_error("Too few elements in encoded record")) + } + Value::Record(Record(vs)).wrap() + } + Tag::Sequence => { + let mut vs = Vec::new(); + while let Some(v) = self.next_upto_end(read_annotations, dec)? { vs.push(v); } + Value::Sequence(vs).wrap() + } + Tag::Set => { + let mut s = Set::new(); + while let Some(v) = self.next_upto_end(read_annotations, dec)? { s.insert(v); } + Value::Set(s).wrap() + } + Tag::Dictionary => { + let mut d = Map::new(); + while let Some(k) = self.next_upto_end(read_annotations, dec)? { + match self.next_upto_end(read_annotations, dec)? { + Some(v) => { d.insert(k, v); } + None => return Err(self.syntax_error("Missing dictionary value")), + } + } + Value::Dictionary(d).wrap() + } + tag @ Tag::End => { + return Err(self.syntax_error(&format!("Invalid tag: {:?}", tag))); + } + })) + } + + #[inline(always)] + fn open_record(&mut self) -> ReaderResult<()> { + self.next_compound(Tag::Record, ExpectedKind::Record) + } + + #[inline(always)] + fn open_sequence(&mut self) -> ReaderResult<()> { + self.next_compound(Tag::Sequence, ExpectedKind::Sequence) + } + + #[inline(always)] + fn open_set(&mut self) -> ReaderResult<()> { + self.next_compound(Tag::Set, ExpectedKind::Set) + } + + #[inline(always)] + fn open_dictionary(&mut self) -> ReaderResult<()> { + self.next_compound(Tag::Dictionary, ExpectedKind::Dictionary) + } + + #[inline(always)] + fn boundary(&mut self, _b: &B::Type) -> ReaderResult<()> { + Ok(()) + } + + #[inline(always)] + fn close_compound(&mut self, _b: &mut B::Type, _i: &B::Item) -> ReaderResult { + Ok(self.peekend()?) + } + + #[inline(always)] + fn open_embedded(&mut self) -> ReaderResult<()> { + self.next_compound(Tag::Embedded, ExpectedKind::Embedded) + } + + #[inline(always)] + fn close_embedded(&mut self) -> ReaderResult<()> { + Ok(()) + } + + type Mark = S::Mark; + + #[inline(always)] + fn mark(&mut self) -> io::Result { + self.source.mark() + } + + #[inline(always)] + fn restore(&mut self, mark: &Self::Mark) -> io::Result<()> { + self.source.restore(mark) + } + + fn next_token>( + &mut self, + read_embedded_annotations: bool, + decode_embedded: &mut Dec, + ) -> io::Result> { + loop { + return Ok(match Tag::try_from(self.peek_noeof()?)? { + Tag::Embedded => { + self.skip()?; + Token::Embedded(decode_embedded.decode_embedded(self, read_embedded_annotations)?) + } + Tag::False | + Tag::True | + Tag::Float | + Tag::Double | + Tag::SmallInteger(_) | + Tag::MediumInteger(_) | + Tag::SignedInteger | + Tag::String | + Tag::ByteString | + Tag::Symbol => + Token::Atom(self.demand_next_domain(false, decode_embedded)?), + + Tag::Record => { self.skip()?; Token::Compound(CompoundClass::Record) } + Tag::Sequence => { self.skip()?; Token::Compound(CompoundClass::Sequence) } + Tag::Set => { self.skip()?; Token::Compound(CompoundClass::Set) } + Tag::Dictionary => { self.skip()?; Token::Compound(CompoundClass::Dictionary) } + + Tag::End => { self.skip()?; Token::End } + + Tag::Annotation => { + self.skip()?; + self.skip_annotations()?; + continue + } + }) + } + } + + #[inline(always)] + fn next_boolean(&mut self) -> ReaderResult { + match self.peek_next_nonannotation_tag()? { + Tag::False => { self.skip()?; Ok(false) } + Tag::True => { self.skip()?; Ok(true) } + _ => Err(error::Error::Expected(ExpectedKind::Boolean)), + } + } + + fn next_signedinteger(&mut self) -> ReaderResult { + let tag = self.peek_next_nonannotation_tag()?; + match tag { + Tag::SmallInteger(v) => { + self.skip()?; + Ok(SignedInteger::from(v as i32)) + } + Tag::MediumInteger(count) => { + self.skip()?; + Ok(self.read_signed_integer(count.into())?) + } + Tag::SignedInteger => { + self.skip()?; + let count = self.varint()?; + Ok(self.read_signed_integer(count)?) + } + _ => Err(error::Error::Expected(ExpectedKind::SignedInteger)) + } + } + + fn next_i8(&mut self) -> ReaderResult { self.next_signed(|n| n.to_i8()) } + fn next_i16(&mut self) -> ReaderResult { self.next_signed(|n| n.to_i16()) } + fn next_i32(&mut self) -> ReaderResult { self.next_signed(|n| n.to_i32()) } + fn next_i64(&mut self) -> ReaderResult { self.next_signed(|n| n.to_i64()) } + fn next_i128(&mut self) -> ReaderResult { self.next_signed(|n| n.to_i128()) } + + fn next_u8(&mut self) -> ReaderResult { self.next_unsigned(|n| n.to_u8()) } + fn next_u16(&mut self) -> ReaderResult { self.next_unsigned(|n| n.to_u16()) } + fn next_u32(&mut self) -> ReaderResult { self.next_unsigned(|n| n.to_u32()) } + fn next_u64(&mut self) -> ReaderResult { self.next_unsigned(|n| n.to_u64()) } + fn next_u128(&mut self) -> ReaderResult { self.next_unsigned(|n| n.to_u128()) } + + fn next_f32(&mut self) -> ReaderResult { + match self.peek_next_nonannotation_tag()? { + Tag::Float => { + self.skip()?; + let mut bs = [0; 4]; + self.readbytes_into(&mut bs)?; + Ok(f32::from_bits(u32::from_be_bytes(bs))) + }, + Tag::Double => { + self.skip()?; + let mut bs = [0; 8]; + self.readbytes_into(&mut bs)?; + Ok(f64::from_bits(u64::from_be_bytes(bs)) as f32) + }, + _ => Err(error::Error::Expected(ExpectedKind::Float)), + } + } + + fn next_f64(&mut self) -> ReaderResult { + match self.peek_next_nonannotation_tag()? { + Tag::Float => { + self.skip()?; + let mut bs = [0; 4]; + self.readbytes_into(&mut bs)?; + Ok(f32::from_bits(u32::from_be_bytes(bs)) as f64) + }, + Tag::Double => { + self.skip()?; + let mut bs = [0; 8]; + self.readbytes_into(&mut bs)?; + Ok(f64::from_bits(u64::from_be_bytes(bs))) + }, + _ => Err(error::Error::Expected(ExpectedKind::Double)), + } + } + + fn next_str(&mut self) -> ReaderResult> { + let bs = self.next_atomic(Tag::String, ExpectedKind::Symbol)?; + Ok(self.decodestr(bs)?) + } + + fn next_bytestring(&mut self) -> ReaderResult> { + self.next_atomic(Tag::ByteString, ExpectedKind::Symbol) + } + + fn next_symbol(&mut self) -> ReaderResult> { + let bs = self.next_atomic(Tag::Symbol, ExpectedKind::Symbol)?; + Ok(self.decodestr(bs)?) + } +} diff --git a/implementations/rust/oo/src/packed/writer.rs b/implementations/rust/oo/src/packed/writer.rs new file mode 100644 index 0000000..ea418aa --- /dev/null +++ b/implementations/rust/oo/src/packed/writer.rs @@ -0,0 +1,395 @@ +use num::bigint::BigInt; +use num::cast::ToPrimitive; +use std::convert::TryInto; +use std::io; +use std::io::Write; +use super::constants::Tag; +use super::super::DomainEncode; +use super::super::IOValue; +use super::super::IOValueDomainCodec; +use super::super::NestedValue; +use super::super::boundary as B; +use super::super::writer::Writer; + +struct Buffers { + base: W, + stack: Vec>>, +} + +impl io::Write for Buffers { + fn write(&mut self, buf: &[u8]) -> io::Result { + if self.stack.is_empty() { + self.base.write(buf) + } else { + self.stack + .last_mut().unwrap() + .last_mut().expect("Internal error: PackedWriter buffer sequence underflow") + .write(buf) + } + } + + fn flush(&mut self) -> io::Result<()> { + if self.stack.is_empty() { + self.base.flush() + } else { + Ok(()) + } + } +} + +pub struct PackedWriter(Buffers); + +impl PackedWriter<&mut Vec> { + #[inline(always)] + pub fn encode>( + enc: &mut Enc, + v: &N, + ) -> io::Result> { + let mut buf: Vec = Vec::new(); + PackedWriter::new(&mut buf).write(enc, v)?; + Ok(buf) + } + + #[inline(always)] + pub fn encode_iovalue(v: &IOValue) -> io::Result> { + Self::encode(&mut IOValueDomainCodec, v) + } +} + +pub fn varint(w: &mut W, mut v: u64) -> io::Result { + let mut byte_count = 0; + loop { + byte_count += 1; + if v < 128 { + w.write_all(&[v as u8])?; + return Ok(byte_count); + } else { + w.write_all(&[((v & 0x7f) + 128) as u8])?; + v >>= 7; + } + } +} + +impl PackedWriter { + #[inline(always)] + pub fn new(write: W) -> Self { + PackedWriter(Buffers { + base: write, + stack: vec![], + }) + } + + #[inline(always)] + pub fn write_byte(&mut self, b: u8) -> io::Result<()> { + self.0.write_all(&[b]) + } + + #[inline(always)] + pub fn write_medium_integer(&mut self, bs: &[u8]) -> io::Result<()> { + let count: u8 = bs.len().try_into().unwrap(); + if !(1..=16).contains(&count) { panic!("Invalid medium_integer count: {}", count) } + self.write_byte(Tag::MediumInteger(count).into())?; + self.0.write_all(bs) + } + + #[inline(always)] + pub fn write_atom(&mut self, tag: Tag, bs: &[u8]) -> io::Result<()> { + self.write_byte(tag.into())?; + varint(&mut self.0, bs.len().try_into().unwrap())?; + self.0.write_all(bs) + } + + fn push(&mut self) -> io::Result<()> { + self.0.stack.push(vec![vec![]]); + Ok(()) + } + + fn shift(&mut self) { + match self.0.stack.last_mut() { + Some(bss) => bss.push(vec![]), + None => panic!("Internal error: Preserves PackedWriter stack underflow"), + } + } + + fn pop(&mut self, sorted: bool) -> io::Result<()> { + match self.0.stack.pop() { + Some(mut bss) => { + if sorted { bss.sort(); } + for bs in bss { self.0.write_all(&bs)? } + Ok(()) + } + None => + panic!("Internal error: Preserves PackedWriter stack underflow"), + } + } + + #[inline(always)] + fn write_tag(&mut self, tag: Tag) -> io::Result<()> { + self.write_byte(tag.into()) + } +} + +macro_rules! fits_in_bytes { + ($v:ident, $limit:literal) => ({ + let bits = $limit * 8 - 1; + $v >= -(2 << bits) && $v < (2 << bits) + }) +} + +impl Writer for PackedWriter +{ + #[inline(always)] + fn boundary(&mut self, b: &B::Type) -> io::Result<()> { + match b.closing { + Some(B::Item::DictionaryValue) | + Some(B::Item::SetValue) => + self.shift(), + _ => + () + } + match b.opening { + Some(B::Item::Annotation) => + self.write_tag(Tag::Annotation)?, + _ => + () + } + Ok(()) + } + + #[inline(always)] + fn start_annotations(&mut self) -> io::Result<()> { + Ok(()) + } + + #[inline(always)] + fn end_annotations(&mut self) -> io::Result<()> { + Ok(()) + } + + #[inline(always)] + fn write_bool(&mut self, v: bool) -> io::Result<()> { + self.write_tag(if v { Tag::True } else { Tag::False }) + } + + #[inline(always)] + fn write_f32(&mut self, v: f32) -> io::Result<()> { + self.write_tag(Tag::Float)?; + self.0.write_all(&u32::to_be_bytes(f32::to_bits(v))) + } + + #[inline(always)] + fn write_f64(&mut self, v: f64) -> io::Result<()> { + self.write_tag(Tag::Double)?; + self.0.write_all(&u64::to_be_bytes(f64::to_bits(v))) + } + + #[inline(always)] + fn write_i8(&mut self, v: i8) -> io::Result<()> { + if v >= -3 && v <= 12 { return self.write_tag(Tag::SmallInteger(v)) } + self.write_medium_integer(&[v as u8]) + } + + #[inline(always)] + fn write_u8(&mut self, v: u8) -> io::Result<()> { + if let Ok(w) = v.try_into() { return self.write_i8(w) } + self.write_medium_integer(&[0, v]) + } + + #[inline(always)] + fn write_i16(&mut self, v: i16) -> io::Result<()> { + if let Ok(w) = v.try_into() { return self.write_i8(w) } + self.write_medium_integer(&[(v >> 8) as u8, (v & 255) as u8]) + } + + #[inline(always)] + fn write_u16(&mut self, v: u16) -> io::Result<()> { + if let Ok(w) = v.try_into() { return self.write_i16(w) } + self.write_medium_integer(&[0, (v >> 8) as u8, (v & 255) as u8]) + } + + #[inline(always)] + fn write_i32(&mut self, v: i32) -> io::Result<()> { + if let Ok(w) = v.try_into() { return self.write_i16(w) } + if fits_in_bytes!(v, 3) { + return self.write_medium_integer(&[(v >> 16) as u8, + (v >> 8) as u8, + (v & 255) as u8]); + } + self.write_medium_integer(&[(v >> 24) as u8, + (v >> 16) as u8, + (v >> 8) as u8, + (v & 255) as u8]) + } + + #[inline(always)] + fn write_u32(&mut self, v: u32) -> io::Result<()> { + if let Ok(w) = v.try_into() { return self.write_i32(w) } + self.write_medium_integer(&[0, + (v >> 24) as u8, + (v >> 16) as u8, + (v >> 8) as u8, + (v & 255) as u8]) + } + + #[inline(always)] + fn write_i64(&mut self, v: i64) -> io::Result<()> { + if let Ok(w) = v.try_into() { return self.write_i32(w) } + if fits_in_bytes!(v, 5) { + return self.write_medium_integer(&[(v >> 32) as u8, + (v >> 24) as u8, + (v >> 16) as u8, + (v >> 8) as u8, + (v & 255) as u8]); + } + if fits_in_bytes!(v, 6) { + return self.write_medium_integer(&[(v >> 40) as u8, + (v >> 32) as u8, + (v >> 24) as u8, + (v >> 16) as u8, + (v >> 8) as u8, + (v & 255) as u8]); + } + if fits_in_bytes!(v, 7) { + return self.write_medium_integer(&[(v >> 48) as u8, + (v >> 40) as u8, + (v >> 32) as u8, + (v >> 24) as u8, + (v >> 16) as u8, + (v >> 8) as u8, + (v & 255) as u8]); + } + self.write_medium_integer(&[(v >> 56) as u8, + (v >> 48) as u8, + (v >> 40) as u8, + (v >> 32) as u8, + (v >> 24) as u8, + (v >> 16) as u8, + (v >> 8) as u8, + (v & 255) as u8]) + } + + #[inline(always)] + fn write_u64(&mut self, v: u64) -> io::Result<()> { + if let Ok(w) = v.try_into() { return self.write_i64(w) } + self.write_medium_integer(&[0, + (v >> 56) as u8, + (v >> 48) as u8, + (v >> 40) as u8, + (v >> 32) as u8, + (v >> 24) as u8, + (v >> 16) as u8, + (v >> 8) as u8, + (v & 255) as u8]) + } + + #[inline(always)] + fn write_i128(&mut self, v: i128) -> io::Result<()> { + if let Ok(w) = v.try_into() { return self.write_i64(w) } + let bs: [u8; 16] = v.to_be_bytes(); + if fits_in_bytes!(v, 9) { return self.write_medium_integer(&bs[7..]); } + if fits_in_bytes!(v, 10) { return self.write_medium_integer(&bs[6..]); } + if fits_in_bytes!(v, 11) { return self.write_medium_integer(&bs[5..]); } + if fits_in_bytes!(v, 12) { return self.write_medium_integer(&bs[4..]); } + if fits_in_bytes!(v, 13) { return self.write_medium_integer(&bs[3..]); } + if fits_in_bytes!(v, 14) { return self.write_medium_integer(&bs[2..]); } + if fits_in_bytes!(v, 15) { return self.write_medium_integer(&bs[1..]); } + self.write_medium_integer(&bs) + } + + #[inline(always)] + fn write_u128(&mut self, v: u128) -> io::Result<()> { + if let Ok(w) = v.try_into() { return self.write_i128(w) } + let bs: [u8; 16] = v.to_be_bytes(); + self.write_tag(Tag::SignedInteger)?; + varint(&mut self.0, 17)?; + self.write_byte(0)?; + self.0.write_all(&bs) + } + + #[inline(always)] + fn write_int(&mut self, v: &BigInt) -> io::Result<()> { + match v.to_i8() { + Some(n) => self.write_i8(n), + None => { + match v.to_i128() { + Some(n) => self.write_i128(n), + None => self.write_atom(Tag::SignedInteger, &v.to_signed_bytes_be()), + } + } + } + } + + #[inline(always)] + fn write_string(&mut self, v: &str) -> io::Result<()> { + self.write_atom(Tag::String, v.as_bytes()) + } + + #[inline(always)] + fn write_bytes(&mut self, v: &[u8]) -> io::Result<()> { + self.write_atom(Tag::ByteString, v) + } + + #[inline(always)] + fn write_symbol(&mut self, v: &str) -> io::Result<()> { + self.write_atom(Tag::Symbol, v.as_bytes()) + } + + #[inline(always)] + fn start_record(&mut self) -> io::Result<()> { + self.write_tag(Tag::Record) + } + + #[inline(always)] + fn end_record(&mut self) -> io::Result<()> { + self.write_tag(Tag::End) + } + + #[inline(always)] + fn start_sequence(&mut self) -> io::Result<()> { + self.write_tag(Tag::Sequence) + } + + #[inline(always)] + fn end_sequence(&mut self) -> io::Result<()> { + self.write_tag(Tag::End) + } + + #[inline(always)] + fn start_set(&mut self) -> io::Result<()> { + self.write_tag(Tag::Set)?; + self.push() + } + + #[inline(always)] + fn end_set(&mut self) -> io::Result<()> { + self.pop(true)?; + self.write_tag(Tag::End) + } + + #[inline(always)] + fn start_dictionary(&mut self) -> io::Result<()> { + self.write_tag(Tag::Dictionary)?; + self.push() + } + + #[inline(always)] + fn end_dictionary(&mut self) -> io::Result<()> { + self.pop(true)?; + self.write_tag(Tag::End) + } + + #[inline(always)] + fn start_embedded(&mut self) -> io::Result<()> { + self.write_tag(Tag::Embedded) + } + + #[inline(always)] + fn end_embedded(&mut self) -> io::Result<()> { + Ok(()) + } + + #[inline(always)] + fn flush(&mut self) -> io::Result<()> { + self.0.flush() + } +} diff --git a/implementations/rust/oo/src/reader.rs b/implementations/rust/oo/src/reader.rs new file mode 100644 index 0000000..5528a06 --- /dev/null +++ b/implementations/rust/oo/src/reader.rs @@ -0,0 +1,362 @@ +use std::borrow::Cow; +use std::convert::TryFrom; +use std::io; +use std::marker::PhantomData; + +use crate::CompoundClass; +use crate::SignedInteger; +use crate::ValueClass; +use crate::boundary as B; +use crate::error::Error; +use crate::error::ExpectedKind; +use crate::error::io_eof; +use crate::repr::Annotations; +use crate::repr::Atom; +use crate::repr::Embedded; +use crate::repr::IOValue; +use crate::repr::Map; +use crate::repr::Record; +use crate::repr::Set; +use crate::repr::Value; +use crate::repr::iovalue; + +pub type ReaderResult = std::result::Result; + +#[derive(Debug, Clone, PartialEq, Eq)] +pub enum NextToken { + Annotation, + Value(ValueClass), +} + +pub trait Reader<'de> { + fn peek_class(&mut self) -> io::Result>; + + fn next_atom(&mut self) -> ReaderResult>; + + fn boundary(&mut self, b: &B::Type) -> ReaderResult<()>; + + fn open_record(&mut self) -> ReaderResult<()>; + fn open_sequence(&mut self) -> ReaderResult<()>; + fn open_set(&mut self) -> ReaderResult<()>; + fn open_dictionary(&mut self) -> ReaderResult<()>; + + // Answers true for closed, false for more. + // Implies a b.shift of None if closed or of Some(i) if not closed, plus a .boundary. + fn close_compound(&mut self, b: &mut B::Type, i: &B::Item) -> ReaderResult; + + fn open_embedded(&mut self) -> ReaderResult<()>; + fn close_embedded(&mut self) -> ReaderResult<()>; + + fn open_annotation(&mut self) -> ReaderResult<()>; + fn close_annotation(&mut self) -> ReaderResult<()>; + + fn mark(&mut self) -> io::Result; + fn restore(&mut self, mark: usize) -> io::Result<()>; + + //--------------------------------------------------------------------------- + + fn skip_atom(&mut self) -> io::Result<()> { + let _ = self.next_atom()?; + Ok(()) + } + + fn skip_annotations(&mut self) -> io::Result> { + loop { + match self.peek_class()? { + None => return Ok(None), + Some(NextToken::Value(v)) => return Ok(Some(v)), + Some(NextToken::Annotation) => { + self.open_annotation()?; + self.skip_value()?; + self.close_annotation()?; + } + } + } + } + + fn skip_value(&mut self) -> io::Result<()> { + // TODO efficient skipping in specific impls of this trait + match self.skip_annotations()?.ok_or_else(io_eof)? { + ValueClass::Atomic(_) => self.skip_atom(), + ValueClass::Embedded => { + self.open_embedded()?; + self.skip_value()?; + self.close_embedded()?; + Ok(()) + } + ValueClass::Compound(CompoundClass::Record) => { + self.open_record()?; + let mut b = B::start(B::Item::RecordLabel); + self.boundary(&b)?; + self.skip_value()?; + while !self.close_compound(&mut b, &B::Item::RecordField)? { + self.skip_value()?; + } + Ok(()) + } + ValueClass::Compound(CompoundClass::Sequence) => { + self.open_sequence()?; + let mut b = B::Type::default(); + while !self.close_compound(&mut b, &B::Item::SequenceValue)? { + self.skip_value()?; + } + Ok(()) + } + ValueClass::Compound(CompoundClass::Set) => { + self.open_set()?; + let mut b = B::Type::default(); + while !self.close_compound(&mut b, &B::Item::SetValue)? { + self.skip_value()?; + } + Ok(()) + } + ValueClass::Compound(CompoundClass::Dictionary) => { + self.open_dictionary()?; + let mut b = B::Type::default(); + while !self.close_compound(&mut b, &B::Item::DictionaryKey)? { + self.skip_value()?; + b.shift(Some(B::Item::DictionaryValue)); + self.boundary(&b)?; + self.skip_value()?; + } + Ok(()) + } + } + } + + fn gather_annotations(&mut self) -> io::Result, ValueClass)>> { + let mut anns = Vec::new(); + loop { + match self.peek_class()? { + None => return Ok(None), + Some(NextToken::Value(v)) => return Ok(Some((anns, v))), + Some(NextToken::Annotation) => { + self.open_annotation()?; + anns.push(self.next_iovalue(true)?); + self.close_annotation()?; + } + } + } + } + + fn next_iovalue(&mut self, read_annotations: bool) -> io::Result { + let (anns, v) = match read_annotations { + true => self.gather_annotations()?.ok_or_else(io_eof)?, + false => (Vec::new(), self.skip_annotations()?.ok_or_else(io_eof)?), + }; + let value = match v { + ValueClass::Atomic(_) => + self.next_atom()?.into_value(), + ValueClass::Embedded => { + self.open_embedded()?; + let v = self.next_iovalue(read_annotations)?; + self.close_embedded()?; + Box::new(Embedded::new(v)) + } + ValueClass::Compound(CompoundClass::Record) => { + let mut vs = Vec::new(); + self.open_record()?; + let mut b = B::start(B::Item::RecordLabel); + self.boundary(&b)?; + vs.push(self.next_iovalue(read_annotations)?); + while !self.close_compound(&mut b, &B::Item::RecordField)? { + vs.push(self.next_iovalue(read_annotations)?); + } + Box::new(Record::_from_vec(vs)) + } + ValueClass::Compound(CompoundClass::Sequence) => { + let mut vs = Vec::new(); + self.open_sequence()?; + let mut b = B::Type::default(); + while !self.close_compound(&mut b, &B::Item::SequenceValue)? { + vs.push(self.next_iovalue(read_annotations)?); + } + Box::new(vs) + } + ValueClass::Compound(CompoundClass::Set) => { + let mut s = Set::new(); + self.open_set()?; + let mut b = B::Type::default(); + while !self.close_compound(&mut b, &B::Item::SetValue)? { + s.insert(self.next_iovalue(read_annotations)?); + } + Box::new(s) + } + ValueClass::Compound(CompoundClass::Dictionary) => { + let mut d = Map::new(); + self.open_dictionary()?; + let mut b = B::Type::default(); + while !self.close_compound(&mut b, &B::Item::DictionaryKey)? { + let k = self.next_iovalue(read_annotations)?; + b.shift(Some(B::Item::DictionaryValue)); + self.boundary(&b)?; + d.insert(k, self.next_iovalue(read_annotations)?); + } + Box::new(d) + } + }; + if anns.is_empty() { + Ok(value.into()) + } else { + Ok(iovalue(Annotations::new(value, anns))) + } + } + + fn next_boolean(&mut self) -> ReaderResult { + self.next_iovalue(false)?.as_boolean().ok_or(Error::Expected(ExpectedKind::Boolean)) + } + + fn next_float(&mut self) -> ReaderResult { + self.next_iovalue(false)?.as_float().ok_or(Error::Expected(ExpectedKind::Float)) + } + + fn next_double(&mut self) -> ReaderResult { + self.next_iovalue(false)?.as_double().ok_or(Error::Expected(ExpectedKind::Double)) + } + + fn next_signedinteger(&mut self) -> ReaderResult { + self.next_iovalue(false)?.as_signed_integer().ok_or(Error::Expected(ExpectedKind::SignedInteger)) + } + + fn next_i8(&mut self) -> ReaderResult { Ok(i8::try_from(&self.next_signedinteger()?)?) } + fn next_u8(&mut self) -> ReaderResult { Ok(u8::try_from(&self.next_signedinteger()?)?) } + fn next_i16(&mut self) -> ReaderResult { Ok(i16::try_from(&self.next_signedinteger()?)?) } + fn next_u16(&mut self) -> ReaderResult { Ok(u16::try_from(&self.next_signedinteger()?)?) } + fn next_i32(&mut self) -> ReaderResult { Ok(i32::try_from(&self.next_signedinteger()?)?) } + fn next_u32(&mut self) -> ReaderResult { Ok(u32::try_from(&self.next_signedinteger()?)?) } + fn next_i64(&mut self) -> ReaderResult { Ok(i64::try_from(&self.next_signedinteger()?)?) } + fn next_u64(&mut self) -> ReaderResult { Ok(u64::try_from(&self.next_signedinteger()?)?) } + fn next_i128(&mut self) -> ReaderResult { Ok(i128::try_from(&self.next_signedinteger()?)?) } + fn next_u128(&mut self) -> ReaderResult { Ok(u128::try_from(&self.next_signedinteger()?)?) } + + fn next_str(&mut self) -> ReaderResult> { + Ok(self.next_iovalue(false)?.as_string().ok_or(Error::Expected(ExpectedKind::String))?.into_owned().into()) + } + + fn next_bytestring(&mut self) -> ReaderResult> { + Ok(self.next_iovalue(false)?.as_bytestring().ok_or(Error::Expected(ExpectedKind::ByteString))?.into_owned().into()) + } + + fn next_symbol(&mut self) -> ReaderResult> { + Ok(self.next_iovalue(false)?.as_symbol().ok_or(Error::Expected(ExpectedKind::Symbol))?.into_owned().into()) + } + + fn open_simple_record(&mut self, name: &str) -> ReaderResult + { + self.open_record()?; + let b = B::start(B::Item::RecordLabel); + self.boundary(&b)?; + let label: &str = &self.next_symbol()?; + if label == name { + Ok(b) + } else { + Err(Error::Expected(ExpectedKind::SimpleRecord(name.to_owned()))) + } + } + + fn ensure_more_expected(&mut self, b: &mut B::Type, i: &B::Item) -> ReaderResult<()> { + if !self.close_compound(b, i)? { + Ok(()) + } else { + Err(Error::MissingItem) + } + } + + fn ensure_complete(&mut self, mut b: B::Type, i: &B::Item) -> ReaderResult<()> { + if !self.close_compound(&mut b, i)? { + Err(Error::MissingCloseDelimiter) + } else { + Ok(()) + } + } +} + +impl<'r, 'de, R: Reader<'de>> Reader<'de> for &'r mut R { + fn peek_class(&mut self) -> io::Result> { + (*self).peek_class() + } + + fn next_atom(&mut self) -> ReaderResult> { + (*self).next_atom() + } + + fn boundary(&mut self, b: &B::Type) -> ReaderResult<()> { + (*self).boundary(b) + } + + fn open_record(&mut self) -> ReaderResult<()> { + (*self).open_record() + } + + fn open_sequence(&mut self) -> ReaderResult<()> { + (*self).open_sequence() + } + + fn open_set(&mut self) -> ReaderResult<()> { + (*self).open_set() + } + + fn open_dictionary(&mut self) -> ReaderResult<()> { + (*self).open_dictionary() + } + + fn close_compound(&mut self, b: &mut B::Type, i: &B::Item) -> ReaderResult { + (*self).close_compound(b, i) + } + + fn open_embedded(&mut self) -> ReaderResult<()> { + (*self).open_embedded() + } + + fn close_embedded(&mut self) -> ReaderResult<()> { + (*self).close_embedded() + } + + fn open_annotation(&mut self) -> ReaderResult<()> { + (*self).open_annotation() + } + + fn close_annotation(&mut self) -> ReaderResult<()> { + (*self).close_annotation() + } + + fn mark(&mut self) -> io::Result { + (*self).mark() + } + + fn restore(&mut self, mark: usize) -> io::Result<()> { + (*self).restore(mark) + } +} + +pub struct IOValues<'de, R: Reader<'de>> { + pub reader: R, + pub read_annotations: bool, + phantom: PhantomData<&'de ()>, +} + +impl<'de, R: Reader<'de>> IOValues<'de, R> { + pub fn new(reader: R) -> Self { + IOValues { + reader, + read_annotations: false, + phantom: PhantomData, + } + } + + pub fn read_annotations(mut self, read_annotations: bool) -> Self { + self.read_annotations = read_annotations; + self + } +} + +impl<'de, R: Reader<'de>> std::iter::Iterator for IOValues<'de, R> { + type Item = io::Result; + fn next(&mut self) -> Option { + match self.reader.peek_class() { + Err(e) => Some(Err(e)), + Ok(None) => None, + Ok(Some(_)) => Some(self.reader.next_iovalue(self.read_annotations)), + } + } +} diff --git a/implementations/rust/oo/src/repr.rs b/implementations/rust/oo/src/repr.rs new file mode 100644 index 0000000..8de22cb --- /dev/null +++ b/implementations/rust/oo/src/repr.rs @@ -0,0 +1,826 @@ +use bytemuck::TransparentWrapper; + +use std::borrow::{Cow, Borrow}; +use std::cmp::Ordering; +use std::fmt::Debug; +use std::hash::{Hash, Hasher}; +use std::io; +use std::marker::PhantomData; +use std::sync::Arc; +use std::vec::Vec; + +pub use std::collections::BTreeSet as Set; +pub use std::collections::BTreeMap as Map; + +use crate::AtomClass; +use crate::CompoundClass; +use crate::Domain; +use crate::SignedInteger; +use crate::ValueClass; +use crate::Writer; +use crate::boundary as B; +use crate::domain::{NoEmbeddedDomainCodec, DomainEncode, IOValueDomainCodec}; + +use super::float::{eq_f32, eq_f64, cmp_f32, cmp_f64}; + +/// Atomic values from the specification. +pub trait Value: Debug { + fn write(&self, w: &mut dyn Writer, enc: &mut dyn DomainEncode) -> io::Result<()>; + + fn value_clone(&self) -> Box> where D: 'static; + fn value_class(&self) -> ValueClass; + + fn as_boolean(&self) -> Option { None } + fn as_float(&self) -> Option { None } + fn as_double(&self) -> Option { None } + + fn is_signed_integer(&self) -> bool { false } + fn as_signed_integer(&self) -> Option { None } + + fn as_string(&self) -> Option> { None } + fn as_bytestring(&self) -> Option> { None } + fn as_symbol(&self) -> Option> { None } + + fn is_record(&self) -> bool { false } + fn label(&self) -> &dyn Value { panic!("Not a record") } + + fn is_sequence(&self) -> bool { false } + fn len(&self) -> usize { panic!("Has no length") } + fn index(&self, _i: usize) -> &dyn Value { panic!("Not indexable") } + fn iter(&self) -> Box> + '_> { panic!("Not iterable") } + + fn is_set(&self) -> bool { false } + fn has(&self, _v: &dyn Value) -> bool { false } + + fn is_dictionary(&self) -> bool { false } + fn get(&self, _k: &dyn Value) -> Option<&dyn Value> { None } + fn entries(&self) -> Box, &dyn Value)> + '_> { panic!("Not a dictionary") } + + fn is_embedded(&self) -> bool { false } + fn embedded(&self) -> Cow<'_, D> { panic!("Not an embedded value") } + + fn annotations(&self) -> Option<&[IOValue]> { None } +} + +pub fn value>(v: &V) -> &dyn Value { + v +} + +pub fn owned + 'static>(v: V) -> Box> { + Box::new(v) +} + +pub fn iovalue + 'static>(v: V) -> IOValue { + IOValue(Arc::new(v)) +} + +impl<'a, D: Domain, V: Value + ?Sized> Value for &'a V { + fn write(&self, w: &mut dyn Writer, enc: &mut dyn DomainEncode) -> io::Result<()> { (*self).write(w, enc) } + fn value_clone(&self) -> Box> where D: 'static { (*self).value_clone() } + fn value_class(&self) -> ValueClass { (*self).value_class() } + fn as_boolean(&self) -> Option { (*self).as_boolean() } + fn as_float(&self) -> Option { (*self).as_float() } + fn as_double(&self) -> Option { (*self).as_double() } + fn is_signed_integer(&self) -> bool { (*self).is_signed_integer() } + fn as_signed_integer(&self) -> Option { (*self).as_signed_integer() } + fn as_string(&self) -> Option> { (*self).as_string() } + fn as_bytestring(&self) -> Option> { (*self).as_bytestring() } + fn as_symbol(&self) -> Option> { (*self).as_symbol() } + fn is_record(&self) -> bool { (*self).is_record() } + fn label(&self) -> &dyn Value { (*self).label() } + fn is_sequence(&self) -> bool { (*self).is_sequence() } + fn len(&self) -> usize { (*self).len() } + fn index(&self, i: usize) -> &dyn Value { (*self).index(i) } + fn iter(&self) -> Box> + '_> { (*self).iter() } + fn is_set(&self) -> bool { (*self).is_set() } + fn has(&self, v: &dyn Value) -> bool { (*self).has(v) } + fn is_dictionary(&self) -> bool { (*self).is_dictionary() } + fn get<'value>(&'value self, k: &dyn Value) -> Option<&'value dyn Value> { (*self).get(k) } + fn entries(&self) -> Box, &dyn Value)> + '_> { (*self).entries() } + fn is_embedded(&self) -> bool { (*self).is_embedded() } + fn embedded(&self) -> Cow<'_, D> { (*self).embedded() } + fn annotations(&self) -> Option<&[IOValue]> { (*self).annotations() } +} + +impl Value for Box> { + fn write(&self, w: &mut dyn Writer, enc: &mut dyn DomainEncode) -> io::Result<()> { self.as_ref().write(w, enc) } + fn value_clone(&self) -> Box> where D: 'static { self.as_ref().value_clone() } + fn value_class(&self) -> ValueClass { self.as_ref().value_class() } + fn as_boolean(&self) -> Option { self.as_ref().as_boolean() } + fn as_float(&self) -> Option { self.as_ref().as_float() } + fn as_double(&self) -> Option { self.as_ref().as_double() } + fn is_signed_integer(&self) -> bool { self.as_ref().is_signed_integer() } + fn as_signed_integer(&self) -> Option { self.as_ref().as_signed_integer() } + fn as_string(&self) -> Option> { self.as_ref().as_string() } + fn as_bytestring(&self) -> Option> { self.as_ref().as_bytestring() } + fn as_symbol(&self) -> Option> { self.as_ref().as_symbol() } + fn is_record(&self) -> bool { self.as_ref().is_record() } + fn label(&self) -> &dyn Value { self.as_ref().label() } + fn is_sequence(&self) -> bool { self.as_ref().is_sequence() } + fn len(&self) -> usize { self.as_ref().len() } + fn index(&self, i: usize) -> &dyn Value { self.as_ref().index(i) } + fn iter(&self) -> Box> + '_> { self.as_ref().iter() } + fn is_set(&self) -> bool { self.as_ref().is_set() } + fn has(&self, v: &dyn Value) -> bool { self.as_ref().has(v) } + fn is_dictionary(&self) -> bool { self.as_ref().is_dictionary() } + fn get<'value>(&'value self, k: &dyn Value) -> Option<&'value dyn Value> { self.as_ref().get(k) } + fn entries(&self) -> Box, &dyn Value)> + '_> { self.as_ref().entries() } + fn is_embedded(&self) -> bool { self.as_ref().is_embedded() } + fn embedded(&self) -> Cow<'_, D> { self.as_ref().embedded() } + fn annotations(&self) -> Option<&[IOValue]> { self.as_ref().annotations() } +} + +impl<'a, D: Domain> Hash for dyn Value + 'a { + fn hash(&self, state: &mut H) { + match self.value_class() { + ValueClass::Atomic(a) => match a { + AtomClass::Boolean => self.as_boolean().unwrap().hash(state), + AtomClass::Float => self.as_float().unwrap().to_bits().hash(state), + AtomClass::Double => self.as_double().unwrap().to_bits().hash(state), + AtomClass::SignedInteger => self.as_signed_integer().unwrap().hash(state), + AtomClass::String => self.as_string().unwrap().hash(state), + AtomClass::ByteString => self.as_bytestring().unwrap().hash(state), + AtomClass::Symbol => self.as_symbol().unwrap().hash(state), + } + ValueClass::Compound(c) => match c { + CompoundClass::Sequence | + CompoundClass::Set => { + state.write_usize(self.len()); + for v in self.iter() { v.hash(state) } + } + CompoundClass::Record => { + self.label().hash(state); + state.write_usize(self.len()); + for v in self.iter() { v.hash(state) } + } + CompoundClass::Dictionary => { + state.write_usize(self.len()); + for (k, v) in self.entries() { + k.hash(state); + v.hash(state); + } + } + } + ValueClass::Embedded => self.embedded().hash(state), + } + } +} + +fn iters_eq<'a, D: Domain>( + mut i1: Box> + 'a>, + mut i2: Box> + 'a>, +) -> bool { + loop { + match i1.next() { + None => return i2.next().is_none(), + Some(v1) => match i2.next() { + None => return false, + Some(v2) => if v1 != v2 { return false; }, + } + } + } +} + +impl<'a, D: Domain> PartialEq for dyn Value + 'a { + fn eq(&self, other: &Self) -> bool { + let cls = self.value_class(); + if cls != other.value_class() { return false; } + match cls { + ValueClass::Atomic(a) => match a { + AtomClass::Boolean => + self.as_boolean().unwrap() == other.as_boolean().unwrap(), + AtomClass::Float => + eq_f32(self.as_float().unwrap(), other.as_float().unwrap()), + AtomClass::Double => + eq_f64(self.as_double().unwrap(), other.as_double().unwrap()), + AtomClass::SignedInteger => + self.as_signed_integer().unwrap() == other.as_signed_integer().unwrap(), + AtomClass::String => + self.as_string().unwrap() == other.as_string().unwrap(), + AtomClass::ByteString => + self.as_bytestring().unwrap() == other.as_bytestring().unwrap(), + AtomClass::Symbol => + self.as_symbol().unwrap() == other.as_symbol().unwrap(), + } + ValueClass::Compound(c) => match c { + CompoundClass::Record => { + if self.label() != other.label() { return false; } + iters_eq(self.iter(), other.iter()) + } + CompoundClass::Sequence => { + iters_eq(self.iter(), other.iter()) + } + CompoundClass::Set => { + let s1 = self.iter().collect::>(); + let s2 = other.iter().collect::>(); + s1 == s2 + } + CompoundClass::Dictionary => { + let d1 = self.entries().collect::>(); + let d2 = other.entries().collect::>(); + d1 == d2 + } + } + ValueClass::Embedded => self.embedded() == other.embedded(), + } + } +} + +fn iters_cmp<'a, D: Domain>( + mut i1: Box> + 'a>, + mut i2: Box> + 'a>, +) -> Ordering { + loop { + match i1.next() { + None => match i2.next() { + None => return Ordering::Equal, + Some(_) => return Ordering::Less, + } + Some(v1) => match i2.next() { + None => return Ordering::Greater, + Some(v2) => match v1.cmp(v2) { + Ordering::Equal => (), + other => return other, + } + } + } + } +} + +impl<'a, D: Domain> Ord for dyn Value + 'a { + fn cmp(&self, other: &Self) -> Ordering { + let cls = self.value_class(); + cls.cmp(&other.value_class()).then_with(|| match cls { + ValueClass::Atomic(a) => match a { + AtomClass::Boolean => + self.as_boolean().cmp(&other.as_boolean()), + AtomClass::Float => + cmp_f32(self.as_float().unwrap(), other.as_float().unwrap()), + AtomClass::Double => + cmp_f64(self.as_double().unwrap(), other.as_double().unwrap()), + AtomClass::SignedInteger => + self.as_signed_integer().cmp(&other.as_signed_integer()), + AtomClass::String => + self.as_string().cmp(&other.as_string()), + AtomClass::ByteString => + self.as_bytestring().cmp(&other.as_bytestring()), + AtomClass::Symbol => + self.as_symbol().cmp(&other.as_symbol()), + }, + ValueClass::Compound(c) => match c { + CompoundClass::Record => + self.label().cmp(other.label()).then_with( + || iters_cmp(self.iter(), other.iter())), + CompoundClass::Sequence => iters_cmp(self.iter(), other.iter()), + CompoundClass::Set => { + let s1 = self.iter().collect::>(); + let s2 = other.iter().collect::>(); + s1.cmp(&s2) + } + CompoundClass::Dictionary => { + let d1 = self.entries().collect::>(); + let d2 = other.entries().collect::>(); + d1.cmp(&d2) + } + }, + ValueClass::Embedded => self.embedded().cmp(&other.embedded()), + }) + } +} + +impl<'a, D: Domain> Eq for dyn Value + 'a {} + +impl<'a, D: Domain> PartialOrd for dyn Value + 'a { + fn partial_cmp(&self, other: &Self) -> Option { + Some(self.cmp(other)) + } +} + +#[derive(Debug, Clone)] +pub enum Atom<'a> { + Boolean(bool), + Float(f32), + Double(f64), + SignedInteger(SignedInteger), + String(Cow<'a, str>), + ByteString(Cow<'a, [u8]>), + Symbol(Cow<'a, str>), +} + +impl<'a> Atom<'a> { + pub fn into_value(self) -> Box> { + match self { + Atom::Boolean(b) => Box::new(b), + Atom::Float(f) => Box::new(f), + Atom::Double(d) => Box::new(d), + Atom::SignedInteger(i) => Box::new(i), + Atom::String(s) => Box::new(s.into_owned()), + Atom::ByteString(bs) => Box::new(Bytes(bs.into_owned())), + Atom::Symbol(s) => Box::new(Symbol(s.into_owned())), + } + } +} + +impl<'a, D: Domain> Value for Atom<'a> { + fn write(&self, w: &mut dyn Writer, enc: &mut dyn DomainEncode) -> io::Result<()> { + match self { + Atom::Boolean(b) => w.write_bool(*b), + Atom::Float(f) => w.write_f32(*f), + Atom::Double(d) => w.write_f64(*d), + Atom::SignedInteger(i) => w.write_signed_integer(i), + Atom::String(s) => w.write_string(s), + Atom::ByteString(bs) => w.write_bytes(bs), + Atom::Symbol(s) => w.write_symbol(s), + } + } + + fn value_clone(&self) -> Box> where D: 'static { + self.clone().into_value() + } + + fn value_class(&self) -> ValueClass { + ValueClass::Atomic(match self { + Atom::Boolean(_) => AtomClass::Boolean, + Atom::Float(_) => AtomClass::Float, + Atom::Double(_) => AtomClass::Double, + Atom::SignedInteger(_) => AtomClass::SignedInteger, + Atom::String(_) => AtomClass::String, + Atom::ByteString(_) => AtomClass::ByteString, + Atom::Symbol(_) => AtomClass::Symbol, + }) + } + + fn as_boolean(&self) -> Option { + if let Atom::Boolean(b) = self { Some(*b) } else { None } + } + + fn as_float(&self) -> Option { + if let Atom::Float(f) = self { Some(*f) } else { None } + } + + fn as_double(&self) -> Option { + if let Atom::Double(d) = self { Some(*d) } else { None } + } + + fn is_signed_integer(&self) -> bool { matches!(self, Atom::SignedInteger(_)) } + fn as_signed_integer(&self) -> Option { + if let Atom::SignedInteger(i) = self { Some(i.clone()) } else { None } + } + + fn as_string(&self) -> Option> { + if let Atom::String(s) = self { Some(Cow::Borrowed(s)) } else { None } + } + + fn as_bytestring(&self) -> Option> { + if let Atom::ByteString(s) = self { Some(Cow::Borrowed(s)) } else { None } + } + + fn as_symbol(&self) -> Option> { + if let Atom::Symbol(s) = self { Some(Cow::Borrowed(s)) } else { None } + } +} + +#[derive(Debug, Clone, PartialEq, Eq, Hash, PartialOrd, Ord)] +pub enum NoValue {} + +impl Domain for NoValue { + type Decode = NoEmbeddedDomainCodec; + type Encode = NoEmbeddedDomainCodec; +} + +impl Value for NoValue { + fn write(&self, _w: &mut dyn Writer, _enc: &mut dyn DomainEncode) -> io::Result<()> { unreachable!() } + fn value_clone(&self) -> Box> where D: 'static { unreachable!() } + fn value_class(&self) -> ValueClass { unreachable!() } +} + +impl Value for bool { + fn write(&self, w: &mut dyn Writer, enc: &mut dyn DomainEncode) -> io::Result<()> { w.write_bool(*self) } + fn value_clone(&self) -> Box> where D: 'static { Box::new(*self) } + fn value_class(&self) -> ValueClass { ValueClass::Atomic(AtomClass::Boolean) } + fn as_boolean(&self) -> Option { Some(*self) } +} + +impl Value for u64 { + fn write(&self, w: &mut dyn Writer, enc: &mut dyn DomainEncode) -> io::Result<()> { w.write_u64(*self) } + fn value_clone(&self) -> Box> where D: 'static { Box::new(*self) } + fn value_class(&self) -> ValueClass { ValueClass::Atomic(AtomClass::SignedInteger) } + fn as_signed_integer(&self) -> Option { + Some((*self).into()) + } +} + +impl Value for SignedInteger { + fn write(&self, w: &mut dyn Writer, enc: &mut dyn DomainEncode) -> io::Result<()> { w.write_signed_integer(self) } + fn value_clone(&self) -> Box> where D: 'static { Box::new(self.clone()) } + fn value_class(&self) -> ValueClass { ValueClass::Atomic(AtomClass::SignedInteger) } + fn as_signed_integer(&self) -> Option { + Some(self.clone()) + } +} + +impl Value for f32 { + fn write(&self, w: &mut dyn Writer, enc: &mut dyn DomainEncode) -> io::Result<()> { w.write_f32(*self) } + fn value_clone(&self) -> Box> where D: 'static { Box::new(*self) } + fn value_class(&self) -> ValueClass { ValueClass::Atomic(AtomClass::Float) } + fn as_float(&self) -> Option { Some(*self) } + fn as_double(&self) -> Option { Some(*self as f64) } +} + +impl Value for f64 { + fn write(&self, w: &mut dyn Writer, enc: &mut dyn DomainEncode) -> io::Result<()> { w.write_f64(*self) } + fn value_clone(&self) -> Box> where D: 'static { Box::new(*self) } + fn value_class(&self) -> ValueClass { ValueClass::Atomic(AtomClass::Float) } + fn as_float(&self) -> Option { Some(*self as f32) } + fn as_double(&self) -> Option { Some(*self) } +} + +impl Value for str { + fn write(&self, w: &mut dyn Writer, enc: &mut dyn DomainEncode) -> io::Result<()> { w.write_string(self) } + fn value_clone(&self) -> Box> where D: 'static { Box::new(self.to_owned()) } + fn value_class(&self) -> ValueClass { ValueClass::Atomic(AtomClass::String) } + fn as_string(&self) -> Option> { Some(Cow::Borrowed(self)) } +} + +impl Value for String { + fn write(&self, w: &mut dyn Writer, enc: &mut dyn DomainEncode) -> io::Result<()> { w.write_string(self) } + fn value_clone(&self) -> Box> where D: 'static { Box::new(self.clone()) } + fn value_class(&self) -> ValueClass { ValueClass::Atomic(AtomClass::String) } + fn as_string(&self) -> Option> { Some(Cow::Borrowed(self)) } +} + +#[derive(Debug, Clone, PartialEq, Eq, Hash, PartialOrd, Ord)] +#[repr(transparent)] +pub struct Bytes>(T); + +impl + Debug, D: Domain> Value for Bytes { + fn write(&self, w: &mut dyn Writer, enc: &mut dyn DomainEncode) -> io::Result<()> { w.write_bytes(self.0.as_ref()) } + fn value_clone(&self) -> Box> where D: 'static { Box::new(Bytes(self.0.as_ref().to_owned())) } + fn value_class(&self) -> ValueClass { ValueClass::Atomic(AtomClass::ByteString) } + fn as_bytestring(&self) -> Option> { Some(Cow::Borrowed(self.0.as_ref())) } +} + +#[derive(Debug, Clone, PartialEq, Eq, Hash, PartialOrd, Ord)] +#[repr(transparent)] +pub struct Symbol + Debug>(T); + +impl + Debug> Symbol { + pub fn new(t: T) -> Self { + Symbol(t) + } +} + +impl + Debug, D: Domain> Value for Symbol { + fn write(&self, w: &mut dyn Writer, enc: &mut dyn DomainEncode) -> io::Result<()> { w.write_symbol(self.0.as_ref()) } + fn value_clone(&self) -> Box> where D: 'static { Box::new(Symbol(self.0.as_ref().to_owned())) } + fn value_class(&self) -> ValueClass { ValueClass::Atomic(AtomClass::Symbol) } + fn as_symbol(&self) -> Option> { Some(Cow::Borrowed(self.0.as_ref())) } +} + +#[derive(Debug, Clone, PartialEq, Eq, Hash, PartialOrd, Ord)] +#[repr(transparent)] +pub struct Record(Vec /* at least one element, for the label */); + +impl Record { + pub fn new(label: V, mut fields: Vec) -> Self { + fields.insert(0, label); + Record(fields) + } + + pub fn _from_vec(v: Vec) -> Self { + if v.is_empty() { panic!("Internal error: empty vec supplied to Record::_from_vec") } + Record(v) + } +} + +impl> Value for Record { + fn write(&self, w: &mut dyn Writer, enc: &mut dyn DomainEncode) -> io::Result<()> { + w.start_record()?; + let mut b = B::start(B::Item::RecordLabel); + w.boundary(&b)?; + self.0[0].write(w, enc)?; + for e in &self.0[1..] { + b.shift(Some(B::Item::RecordField)); + w.boundary(&b)?; + e.write(w, enc)?; + } + b.shift(None); + w.boundary(&b)?; + w.end_record() + } + + fn value_clone(&self) -> Box> where D: 'static { + Box::new(Record(self.0.iter().map(|v| v.value_clone()).collect())) + } + + fn value_class(&self) -> ValueClass { ValueClass::Compound(CompoundClass::Record) } + fn is_record(&self) -> bool { true } + fn label(&self) -> &dyn Value { &self.0[0] } + fn len(&self) -> usize { self.0.len() - 1 } + fn index(&self, i: usize) -> &dyn Value { &self.0[i + 1] } + fn iter(&self) -> Box> + '_> { + Box::new(self.0[1..].iter().map(value)) + } +} + +impl> Value for Vec { + fn write(&self, w: &mut dyn Writer, enc: &mut dyn DomainEncode) -> io::Result<()> { + (&self[..]).write(w, enc) + } + + fn value_clone(&self) -> Box> where D: 'static { + (&self[..]).value_clone() + } + + fn value_class(&self) -> ValueClass { ValueClass::Compound(CompoundClass::Sequence) } + fn is_sequence(&self) -> bool { true } + fn len(&self) -> usize { self.len() } + fn index(&self, i: usize) -> &dyn Value { &self[i] } + fn iter(&self) -> Box> + '_> { + Box::new(self[..].iter().map(value)) + } +} + +impl> Value for [V] { + fn write(&self, w: &mut dyn Writer, enc: &mut dyn DomainEncode) -> io::Result<()> { + w.start_sequence()?; + let mut b = B::Type::default(); + for e in self { + b.shift(Some(B::Item::SequenceValue)); + w.boundary(&b)?; + e.write(w, enc)?; + } + b.shift(None); + w.boundary(&b)?; + w.end_sequence() + } + + fn value_clone(&self) -> Box> where D: 'static { + Box::new(self.iter().map(|v| v.value_clone()).collect::>()) + } + + fn value_class(&self) -> ValueClass { ValueClass::Compound(CompoundClass::Sequence) } + fn is_sequence(&self) -> bool { true } + fn len(&self) -> usize { self.len() } + fn index(&self, i: usize) -> &dyn Value { &self[i] } + fn iter(&self) -> Box> + '_> { + Box::new(self[..].iter().map(value)) + } +} + +impl<'e, D: Domain, E: for<'a> Borrow> + Debug + Ord + 'e> Value for Set { + fn write(&self, w: &mut dyn Writer, enc: &mut dyn DomainEncode) -> io::Result<()> { + w.start_set()?; + let mut b = B::Type::default(); + for e in self { + b.shift(Some(B::Item::SetValue)); + w.boundary(&b)?; + Key::peel_ref(e.borrow()).write(w, enc)?; + } + b.shift(None); + w.boundary(&b)?; + w.end_set() + } + + fn value_clone(&self) -> Box> where D: 'static { + Box::new(self.iter().map(|v| Key::peel_ref(&v.borrow()).value_clone()).collect::>()) + } + + fn value_class(&self) -> ValueClass { ValueClass::Compound(CompoundClass::Set) } + fn is_set(&self) -> bool { true } + fn len(&self) -> usize { self.len() } + fn has(&self, v: &dyn Value) -> bool { self.contains(&Key::wrap_ref(v)) } + fn iter(&self) -> Box> + '_> { + Box::new(self.iter().map(|e| Key::peel_ref(&e.borrow()))) + } +} + +// Many thanks to SkiFire13 and the other participants in +// https://users.rust-lang.org/t/is-the-lifetime-of-a-btreemap-get-result-attached-to-the-key-as-well-as-the-map/83568/7 +// for the idea of using TransparentWrapper here. +// +#[derive(PartialEq, Eq, PartialOrd, Ord)] +#[repr(transparent)] +pub struct Key<'a, D: Domain>(pub dyn Value + 'a); +unsafe impl<'a, D: Domain> TransparentWrapper + 'a> for Key<'a, D> {} + +impl<'a, 'b: 'a, D: Domain> Borrow> for Box + 'b> { + fn borrow(&self) -> &Key<'a, D> { + Key::wrap_ref(&**self) + } +} + +impl<'a, 'b: 'a, D: Domain> Borrow> for &'b (dyn Value + 'b) { + fn borrow(&self) -> &Key<'a, D> { + Key::wrap_ref(self) + } +} + +impl<'k, D: Domain, V: Value, K: for<'a> Borrow> + Debug + Ord + 'k> Value + for Map +{ + fn write(&self, w: &mut dyn Writer, enc: &mut dyn DomainEncode) -> io::Result<()> { + w.start_dictionary()?; + let mut b = B::Type::default(); + for (k, v) in self { + b.shift(Some(B::Item::DictionaryKey)); + w.boundary(&b)?; + Key::peel_ref(k.borrow()).write(w, enc)?; + b.shift(Some(B::Item::DictionaryValue)); + w.boundary(&b)?; + v.write(w, enc)?; + } + b.shift(None); + w.boundary(&b)?; + w.end_dictionary() + } + + fn value_clone(&self) -> Box> where D: 'static { + Box::new(Value::entries(self).map(|(k, v)| (k.value_clone(), v.value_clone())).collect::>()) + } + + fn value_class(&self) -> ValueClass { ValueClass::Compound(CompoundClass::Dictionary) } + fn is_dictionary(&self) -> bool { true } + fn len(&self) -> usize { self.len() } + fn has(&self, v: &dyn Value) -> bool { self.contains_key(&Key::wrap_ref(v)) } + fn get(&self, k: &dyn Value) -> Option<&dyn Value> { + match Map::get(self, &Key::wrap_ref(k)) { + Some(v) => Some(v), + None => None, + } + } + fn entries(&self) -> Box, &dyn Value)> + '_> { + Box::new(self.iter().map(|(k,v)| (Key::peel_ref(&k.borrow()), value(v)))) + } +} + +#[derive(Debug, Clone, PartialEq, Eq, Hash, PartialOrd, Ord)] +#[repr(transparent)] +pub struct Embedded(D); + +impl Embedded { + pub fn new(d: D) -> Self { + Embedded(d) + } + + pub fn embedded_value(&self) -> &D { + &self.0 + } + + pub fn into_embedded_value(self) -> D { + self.0 + } +} + +impl Value for Embedded { + fn write(&self, w: &mut dyn Writer, enc: &mut dyn DomainEncode) -> io::Result<()> { + w.start_embedded()?; + enc.encode_embedded(w, &self.0)?; + w.end_embedded() + } + + fn value_clone(&self) -> Box> where D: 'static { Box::new(self.clone()) } + fn value_class(&self) -> ValueClass { ValueClass::Embedded } + fn is_embedded(&self) -> bool { true } + fn embedded(&self) -> Cow<'_, D> { Cow::Borrowed(&self.0) } +} + +#[derive(Debug)] +pub struct Annotations>(V, Vec, PhantomData); + +impl> Annotations { + pub fn new(value: V, anns: Vec) -> Self { + Annotations(value, anns, PhantomData) + } + + pub fn value(&self) -> &dyn Value { + &self.0 + } +} + +impl> Value for Annotations { + fn write(&self, w: &mut dyn Writer, enc: &mut dyn DomainEncode) -> io::Result<()> { + if !self.1.is_empty() { + w.start_annotations()?; + let mut b = B::Type::default(); + for ann in &self.1 { + b.shift(Some(B::Item::Annotation)); + w.boundary(&b)?; + ann.write(w, &mut IOValueDomainCodec)?; + } + b.shift(Some(B::Item::AnnotatedValue)); + w.boundary(&b)?; + self.0.write(w, enc)?; + b.shift(None); + w.boundary(&b)?; + w.end_annotations() + } else { + self.0.write(w, enc) + } + } + + fn value_clone(&self) -> Box> where D: 'static { + Box::new(Annotations(self.0.value_clone(), + self.1.iter().map(|v| v.value_clone().into()).collect(), + PhantomData)) + } + + fn value_class(&self) -> ValueClass { self.value().value_class() } + fn as_boolean(&self) -> Option { self.value().as_boolean() } + fn as_float(&self) -> Option { self.value().as_float() } + fn as_double(&self) -> Option { self.value().as_double() } + fn is_signed_integer(&self) -> bool { self.value().is_signed_integer() } + fn as_signed_integer(&self) -> Option { self.value().as_signed_integer() } + fn as_string(&self) -> Option> { self.value().as_string() } + fn as_bytestring(&self) -> Option> { self.value().as_bytestring() } + fn as_symbol(&self) -> Option> { self.value().as_symbol() } + fn is_record(&self) -> bool { self.value().is_record() } + fn label(&self) -> &dyn Value { self.value().label() } + fn is_sequence(&self) -> bool { self.value().is_sequence() } + fn len(&self) -> usize { self.value().len() } + fn index(&self, i: usize) -> &dyn Value { self.value().index(i) } + fn iter(&self) -> Box> + '_> { self.value().iter() } + fn is_set(&self) -> bool { self.value().is_set() } + fn has(&self, v: &dyn Value) -> bool { self.value().has(v) } + fn is_dictionary(&self) -> bool { self.value().is_dictionary() } + fn get(&self, k: &dyn Value) -> Option<&dyn Value> { self.value().get(k) } + fn entries(&self) -> Box, &dyn Value)> + '_> { self.value().entries() } + fn is_embedded(&self) -> bool { self.value().is_embedded() } + fn embedded(&self) -> Cow<'_, D> { self.value().embedded() } + fn annotations(&self) -> Option<&[IOValue]> { Some(&self.1) } +} + +impl> PartialEq for Annotations { + fn eq(&self, other: &Self) -> bool { + self.value().eq(&other.value()) + } +} + +impl> Eq for Annotations {} + +impl> Hash for Annotations { + fn hash(&self, state: &mut H) { + self.value().hash(state); + } +} + +impl> PartialOrd for Annotations { + fn partial_cmp(&self, other: &Self) -> Option { + Some(self.cmp(other)) + } +} + +impl> Ord for Annotations { + fn cmp(&self, other: &Self) -> Ordering { + self.value().cmp(&other.value()) + } +} + +#[derive(Debug, Clone, Eq, Hash, PartialOrd, Ord)] +pub struct IOValue(Arc>); + +impl PartialEq for IOValue { + fn eq(&self, other: &Self) -> bool { + &self.0 == &other.0 + } +} + +impl From>> for IOValue { + fn from(b: Box>) -> Self { + IOValue(Arc::from(b)) + } +} + +impl<'a> Borrow> for IOValue { + fn borrow(&self) -> &Key<'a, IOValue> { + Key::wrap_ref(&*self.0) + } +} + +impl Value for IOValue { + fn write(&self, w: &mut dyn Writer, enc: &mut dyn DomainEncode) -> io::Result<()> { self.0.write(w, enc) } + fn value_clone(&self) -> Box> { Box::new(self.clone()) } + fn value_class(&self) -> ValueClass { self.0.value_class() } + fn as_boolean(&self) -> Option { self.0.as_boolean() } + fn as_float(&self) -> Option { self.0.as_float() } + fn as_double(&self) -> Option { self.0.as_double() } + fn is_signed_integer(&self) -> bool { self.0.is_signed_integer() } + fn as_signed_integer(&self) -> Option { self.0.as_signed_integer() } + fn as_string(&self) -> Option> { self.0.as_string() } + fn as_bytestring(&self) -> Option> { self.0.as_bytestring() } + fn as_symbol(&self) -> Option> { self.0.as_symbol() } + fn is_record(&self) -> bool { self.0.is_record() } + fn label(&self) -> &dyn Value { self.0.label() } + fn is_sequence(&self) -> bool { self.0.is_sequence() } + fn len(&self) -> usize { self.0.len() } + fn index(&self, i: usize) -> &dyn Value { self.0.index(i) } + fn iter(&self) -> Box> + '_> { self.0.iter() } + fn is_set(&self) -> bool { self.0.is_set() } + fn has(&self, v: &dyn Value) -> bool { self.0.has(v) } + fn is_dictionary(&self) -> bool { self.0.is_dictionary() } + fn get<'value>(&'value self, k: &dyn Value) -> Option<&'value dyn Value> { self.0.get(k) } + fn entries(&self) -> Box, &dyn Value)> + '_> { self.0.entries() } + fn is_embedded(&self) -> bool { self.0.is_embedded() } + fn embedded(&self) -> Cow<'_, IOValue> { self.0.embedded() } + fn annotations(&self) -> Option<&[IOValue]> { self.0.annotations() } +} diff --git a/implementations/rust/oo/src/signed_integer.rs b/implementations/rust/oo/src/signed_integer.rs index 9c753e5..2eae359 100644 --- a/implementations/rust/oo/src/signed_integer.rs +++ b/implementations/rust/oo/src/signed_integer.rs @@ -7,6 +7,14 @@ use std::convert::TryFrom; use std::convert::TryInto; use std::fmt; +pub struct OutOfRange(pub BigInt); + +impl From for crate::error::Error { + fn from(e: OutOfRange) -> Self { + crate::error::Error::NumberOutOfRange(e.0) + } +} + // Invariant: if I128 can be used, it will be; otherwise, if U128 can // be used, it will be; otherwise, Big will be used. #[derive(Clone, Debug, PartialEq, Eq, Hash)] @@ -98,16 +106,16 @@ macro_rules! map_integral_type_to_signed_integer { } impl TryFrom<&SignedInteger> for $iN { - type Error = (); + type Error = OutOfRange; fn try_from(v: &SignedInteger) -> Result { - i128::try_from(v)?.try_into().map_err(|_| ()) + i128::try_from(v)?.try_into().map_err(|_| OutOfRange(v.into())) } } impl TryFrom<&SignedInteger> for $uN { - type Error = (); + type Error = OutOfRange; fn try_from(v: &SignedInteger) -> Result { - u128::try_from(v)?.try_into().map_err(|_| ()) + u128::try_from(v)?.try_into().map_err(|_| OutOfRange(v.into())) } } } @@ -137,23 +145,23 @@ impl From for SignedInteger { } impl TryFrom<&SignedInteger> for i128 { - type Error = (); + type Error = OutOfRange; fn try_from(v: &SignedInteger) -> Result { match v.repr() { SignedIntegerRepr::I128(i) => Ok(*i), - SignedIntegerRepr::U128(_) => Err(()), - SignedIntegerRepr::Big(_) => Err(()), + SignedIntegerRepr::U128(_) => Err(OutOfRange(v.into())), + SignedIntegerRepr::Big(_) => Err(OutOfRange(v.into())), } } } impl TryFrom<&SignedInteger> for u128 { - type Error = (); + type Error = OutOfRange; fn try_from(v: &SignedInteger) -> Result { match v.repr() { - SignedIntegerRepr::I128(i) => i.to_u128().ok_or(()), + SignedIntegerRepr::I128(i) => i.to_u128().ok_or_else(|| OutOfRange(v.into())), SignedIntegerRepr::U128(u) => Ok(*u), - SignedIntegerRepr::Big(_) => Err(()), + SignedIntegerRepr::Big(_) => Err(OutOfRange(v.into())), } } } @@ -215,15 +223,15 @@ impl From for SignedInteger { } impl TryFrom<&SignedInteger> for isize { - type Error = (); + type Error = OutOfRange; fn try_from(v: &SignedInteger) -> Result { - i128::try_from(v)?.try_into().map_err(|_| ()) + i128::try_from(v)?.try_into().map_err(|_| OutOfRange(v.into())) } } impl TryFrom<&SignedInteger> for usize { - type Error = (); + type Error = OutOfRange; fn try_from(v: &SignedInteger) -> Result { - u128::try_from(v)?.try_into().map_err(|_| ()) + u128::try_from(v)?.try_into().map_err(|_| OutOfRange(v.into())) } } diff --git a/implementations/rust/oo/src/source.rs b/implementations/rust/oo/src/source.rs new file mode 100644 index 0000000..1f15561 --- /dev/null +++ b/implementations/rust/oo/src/source.rs @@ -0,0 +1,223 @@ +use crate::error::io_eof; + +use std::borrow::Cow; +use std::io; + +pub trait BinarySource<'de>: Sized { + type Mark; + fn mark(&mut self) -> io::Result; + fn restore(&mut self, mark: &Self::Mark) -> io::Result<()>; + fn input_position(&mut self) -> io::Result>; + + fn skip(&mut self) -> io::Result<()>; + fn peek(&mut self) -> io::Result>; + fn discard(&mut self, count: u64) -> io::Result<()>; + fn readbytes(&mut self, count: u64) -> io::Result>; + fn readbytes_into(&mut self, bs: &mut [u8]) -> io::Result<()>; + fn read_to_end(&mut self) -> io::Result>; + + //--------------------------------------------------------------------------- + + fn syntax_error(&mut self, message: &str) -> io::Error { + io::Error::new(io::ErrorKind::InvalidData, SyntaxError { + position: match self.input_position() { + Ok(p) => p, + Err(_) => None, + }, + message: message.to_owned(), + }) + } +} + +#[derive(Debug)] +pub struct SyntaxError { + position: Option, + message: String, +} + +impl std::fmt::Display for SyntaxError { + fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result { + write!(f, "position {}: {}", + match self.position { + Some(p) => p.to_string(), + None => "??".to_string(), + }, + self.message) + } +} + +impl std::error::Error for SyntaxError {} + +pub struct IOBinarySource { + pub read: R, + pub buf: Option, +} + +impl IOBinarySource { + #[inline(always)] + pub fn new(read: R) -> Self { + IOBinarySource { read, buf: None } + } +} + +impl<'de, R: io::Read + io::Seek> BinarySource<'de> for IOBinarySource { + type Mark = u64; + + #[inline(always)] + fn mark(&mut self) -> io::Result { + Ok(self.read.stream_position()? - (if self.buf.is_some() { 1 } else { 0 })) + } + + #[inline(always)] + fn restore(&mut self, mark: &Self::Mark) -> io::Result<()> { + self.read.seek(io::SeekFrom::Start(*mark))?; + self.buf = None; + Ok(()) + } + + fn input_position(&mut self) -> io::Result> { + Ok(Some(self.mark()? as usize)) + } + + #[inline(always)] + fn skip(&mut self) -> io::Result<()> { + if self.buf.is_none() { unreachable!(); } + self.buf = None; + Ok(()) + } + + #[inline(always)] + fn peek(&mut self) -> io::Result> { + match self.buf { + Some(b) => Ok(Some(b)), + None => { + let b = &mut [0]; + match self.read.read(b)? { + 0 => Ok(None), + 1 => { + self.buf = Some(b[0]); + Ok(Some(b[0])) + } + _ => unreachable!(), + } + } + } + } + + fn discard(&mut self, mut count: u64) -> io::Result<()> { + if self.buf.is_some() { unreachable!(); } + while count > i64::MAX as u64 { + self.read.seek(io::SeekFrom::Current(i64::MAX))?; + count -= i64::MAX as u64; + } + self.read.seek(io::SeekFrom::Current(count as i64))?; + Ok(()) + } + + fn readbytes(&mut self, count: u64) -> io::Result> { + let mut bs = vec![0; count as usize]; + self.readbytes_into(&mut bs)?; + Ok(Cow::Owned(bs)) + } + + fn readbytes_into(&mut self, bs: &mut [u8]) -> io::Result<()> { + if self.buf.is_some() { unreachable!(); } + self.read.read_exact(bs) + } + + fn read_to_end(&mut self) -> io::Result> { + if self.buf.is_some() { unreachable!(); } + let mut bs = Vec::new(); + self.read.read_to_end(&mut bs)?; + Ok(Cow::Owned(bs)) + } +} + +pub struct BytesBinarySource<'de> { + pub bytes: &'de [u8], + pub index: u64, +} + +impl<'de> BytesBinarySource<'de> { + #[inline(always)] + pub fn new(bytes: &'de [u8]) -> Self { + BytesBinarySource { bytes, index: 0 } + } +} + +impl<'de> BinarySource<'de> for BytesBinarySource<'de> { + type Mark = u64; + + #[inline(always)] + fn mark(&mut self) -> io::Result { + Ok(self.index) + } + + #[inline(always)] + fn restore(&mut self, mark: &Self::Mark) -> io::Result<()> { + self.index = *mark; + Ok(()) + } + + fn input_position(&mut self) -> io::Result> { + Ok(Some(self.index as usize)) + } + + #[inline(always)] + fn skip(&mut self) -> io::Result<()> { + if self.index as usize >= self.bytes.len() { unreachable!(); } + self.index += 1; + Ok(()) + } + + #[inline(always)] + fn peek(&mut self) -> io::Result> { + if self.index as usize >= self.bytes.len() { + Ok(None) + } else { + Ok(Some(self.bytes[self.index as usize])) + } + } + + #[inline(always)] + fn discard(&mut self, count: u64) -> io::Result<()> { + if (self.index + count) as usize > self.bytes.len() { + Err(io_eof()) + } else { + self.index += count; + Ok(()) + } + } + + #[inline(always)] + fn readbytes(&mut self, count: u64) -> io::Result> { + let base = self.index as usize; + let limit = base + count as usize; + if limit > self.bytes.len() { + Err(io_eof()) + } else { + let bs = &self.bytes[base..limit]; + self.index += count; + Ok(Cow::Borrowed(bs)) + } + } + + #[inline(always)] + fn readbytes_into(&mut self, bs: &mut [u8]) -> io::Result<()> { + let base = self.index as usize; + let count = bs.len(); + let limit = base + count; + if limit > self.bytes.len() { + Err(io_eof()) + } else { + bs.copy_from_slice(&self.bytes[base..limit]); + self.index += count as u64; + Ok(()) + } + } + + #[inline(always)] + fn read_to_end(&mut self) -> io::Result> { + self.readbytes(self.bytes.len() as u64 - self.index) + } +} diff --git a/implementations/rust/oo/src/text/mod.rs b/implementations/rust/oo/src/text/mod.rs new file mode 100644 index 0000000..0f7454a --- /dev/null +++ b/implementations/rust/oo/src/text/mod.rs @@ -0,0 +1,33 @@ +pub mod reader; +pub mod writer; + +pub use reader::TextReader; +pub use writer::TextWriter; + +use crate::value::source::BytesBinarySource; + +use std::io; + +use super::{IOValue, IOValueDomainCodec, NestedValue, Reader, DomainDecode}; + +pub fn from_str<'de, N: NestedValue, Dec: DomainDecode>( + s: &'de str, + decode_embedded: &mut Dec, +) -> io::Result { + TextReader::new(&mut BytesBinarySource::new(s.as_bytes())).demand_next_domain(false, decode_embedded) +} + +pub fn iovalue_from_str(s: &str) -> io::Result { + from_str(s, &mut IOValueDomainCodec) +} + +pub fn annotated_from_str<'de, N: NestedValue, Dec: DomainDecode>( + s: &'de str, + decode_embedded: &mut Dec, +) -> io::Result { + TextReader::new(&mut BytesBinarySource::new(s.as_bytes())).demand_next_domain(true, decode_embedded) +} + +pub fn annotated_iovalue_from_str(s: &str) -> io::Result { + annotated_from_str(s, &mut IOValueDomainCodec) +} diff --git a/implementations/rust/oo/src/text/reader.rs b/implementations/rust/oo/src/text/reader.rs new file mode 100644 index 0000000..d517ea0 --- /dev/null +++ b/implementations/rust/oo/src/text/reader.rs @@ -0,0 +1,613 @@ +use crate::error::Error; +use crate::error::ExpectedKind; +use crate::error::io_eof; + +use crate::hex; + +use crate::value::CompoundClass; +use crate::value::Domain; +use crate::value::DomainDecode; +use crate::value::Map; +use crate::value::NestedValue; +use crate::value::Reader; +use crate::value::Record; +use crate::value::Set; +use crate::value::Token; +use crate::value::Value; +use crate::value::boundary as B; +use crate::value::reader::ReaderResult; +use crate::value::source::BinarySource; + +use num::bigint::BigInt; + +use std::io; +use std::iter::FromIterator; +use std::marker::PhantomData; + +pub struct TextReader<'de, 'src, S: BinarySource<'de>> { + pub source: &'src mut S, + phantom: PhantomData<&'de ()>, +} + +impl<'de, 'src, S: BinarySource<'de>> TextReader<'de, 'src, S> +{ + pub fn new(source: &'src mut S) -> Self { + TextReader { + source, + phantom: PhantomData, + } + } + + fn syntax_error(&mut self, message: &str) -> io::Error { + self.source.syntax_error(message) + } + + fn peek(&mut self) -> io::Result> { + self.source.peek() + } + + #[inline(always)] + fn peek_noeof(&mut self) -> io::Result { + self.source.peek()?.ok_or_else(io_eof) + } + + fn skip(&mut self) -> io::Result<()> { + self.source.skip() + } + + fn next_byte(&mut self) -> io::Result { + let b = self.peek_noeof()?; + self.source.skip()?; + Ok(b) + } + + fn skip_whitespace(&mut self) { + // Deliberately swallows errors. + while let Ok(Some(c)) = self.peek() { + match c { + b' ' | b'\t' | b'\r' | b'\n' | b',' => { + let _ = self.skip(); + () + } + _ => break, + } + } + } + + fn gather_annotations>( + &mut self, + decode_embedded: &mut Dec, + ) -> ReaderResult> { + let mut vs = Vec::new(); + loop { + self.skip_whitespace(); + match self.peek_noeof()? { + b';' => { self.skip()?; vs.push(N::new(self.comment_line()?)) } + b'@' => { self.skip()?; vs.push(self.demand_next_domain(true, decode_embedded)?) } + _ => return Ok(vs), + } + } + } + + fn skip_annotations(&mut self) -> ReaderResult<()> { + loop { + self.skip_whitespace(); + match self.peek_noeof()? { + b';' => { self.skip()?; self.comment_line()?; }, + b'@' => { self.skip()?; self.skip_value()?; }, + _ => return Ok(()), + } + } + } + + fn decode_utf8(&mut self, bs: Vec) -> io::Result { + String::from_utf8(bs).map_err(|_| self.syntax_error("Invalid UTF-8")) + } + + fn comment_line(&mut self) -> io::Result { + let mut bs = Vec::new(); + loop { + let b = self.peek_noeof()?; + self.skip()?; + match b { + b'\r' | b'\n' => return Ok(self.decode_utf8(bs)?), + _ => bs.push(b), + } + } + } + + fn read_intpart(&mut self, mut bs: Vec, c: u8) -> io::Result { + match c { + b'0' => { + bs.push(c); + self.read_fracexp(bs) + } + _ => { + self.read_digit1(&mut bs, c)?; + self.read_fracexp(bs) + } + } + } + + fn read_fracexp(&mut self, mut bs: Vec) -> io::Result { + let mut is_float = false; + match self.peek_noeof() { + Ok(b'.') => { + is_float = true; + bs.push(self.next_byte()?); + let c = self.next_byte()?; + self.read_digit1(&mut bs, c)?; + } + _ => () + } + match self.peek_noeof() { + Ok(b'e') | Ok(b'E') => { + bs.push(self.next_byte()?); + self.read_sign_and_exp(bs) + } + _ => self.finish_number(bs, is_float) + } + } + + fn read_sign_and_exp(&mut self, mut bs: Vec) -> io::Result { + match self.peek_noeof()? { + b'+' | b'-' => bs.push(self.next_byte()?), + _ => (), + } + let c = self.next_byte()?; + self.read_digit1(&mut bs, c)?; + self.finish_number(bs, true) + } + + fn finish_number(&mut self, bs: Vec, is_float: bool) -> io::Result { + let s = self.decode_utf8(bs)?; + if is_float { + match self.peek_noeof() { + Ok(b'f') | Ok(b'F') => { + self.skip()?; + Ok(N::new(s.parse::().map_err( + |_| self.syntax_error(&format!( + "Invalid single-precision number: {:?}", s)))?)) + } + _ => + Ok(N::new(s.parse::().map_err( + |_| self.syntax_error(&format!( + "Invalid double-precision number: {:?}", s)))?)) + } + } else { + Ok(N::new(s.parse::().map_err( + |_| self.syntax_error(&format!( + "Invalid signed-integer number: {:?}", s)))?)) + } + } + + fn read_digit1(&mut self, bs: &mut Vec, c: u8) -> io::Result<()> + { + if !(c as char).is_digit(10) { + return Err(self.syntax_error("Incomplete number")); + } + bs.push(c); + while let Ok(Some(c)) = self.peek() { + if !(c as char).is_digit(10) { + break; + } + bs.push(self.next_byte()?); + } + Ok(()) + } + + fn read_stringlike( + &mut self, + mut seed: R, + xform_item: X, + terminator: u8, + hexescape: u8, + hexescaper: H, + ) -> io::Result + where + X: Fn(&mut Self, &mut R, u8) -> io::Result<()>, + H: Fn(&mut Self, &mut R) -> io::Result<()>, + { + loop { + match self.next_byte()? { + c if c == terminator => return Ok(seed), + b'\\' => match self.next_byte()? { + c if c == hexescape => hexescaper(self, &mut seed)?, + c if c == terminator || c == b'\\' || c == b'/' => xform_item(self, &mut seed, c)?, + b'b' => xform_item(self, &mut seed, b'\x08')?, + b'f' => xform_item(self, &mut seed, b'\x0c')?, + b'n' => xform_item(self, &mut seed, b'\x0a')?, + b'r' => xform_item(self, &mut seed, b'\x0d')?, + b't' => xform_item(self, &mut seed, b'\x09')?, + _ => return Err(self.syntax_error("Invalid escape code")), + }, + c => xform_item(self, &mut seed, c)?, + } + } + } + + fn hexnum(&mut self, count: usize) -> io::Result { + let mut v: u32 = 0; + for _ in 0 .. count { + let c = self.next_byte()?; + match (c as char).to_digit(16) { + Some(d) => + v = v << 4 | d, + None => + return Err(self.syntax_error("Bad hex escape")), + } + } + Ok(v) + } + + fn append_codepoint(&mut self, bs: &mut Vec, n: u32) -> io::Result<()> { + let c = char::from_u32(n).ok_or_else(|| self.syntax_error("Bad code point"))?; + let mut buf = [0; 4]; + let _ = c.encode_utf8(&mut buf); + bs.extend(&buf[0 .. c.len_utf8()]); + Ok(()) + } + + fn read_string(&mut self, delimiter: u8) -> io::Result { + let raw = self.read_stringlike( + Vec::new(), + |_r, bs, c| Ok(bs.push(c)), + delimiter, + b'u', + |r, bs| { + let n1 = r.hexnum(4)?; + if (0xd800 ..= 0xdbff).contains(&n1) { + let mut ok = true; + ok = ok && r.next_byte()? == b'\\'; + ok = ok && r.next_byte()? == b'u'; + if !ok { + Err(r.syntax_error("Missing second half of surrogate pair")) + } else { + let n2 = r.hexnum(4)?; + if (0xdc00 ..= 0xdfff).contains(&n2) { + let n = ((n1 - 0xd800) << 10) + (n2 - 0xdc00) + 0x10000; + r.append_codepoint(bs, n) + } else { + Err(r.syntax_error("Bad second half of surrogate pair")) + } + } + } else { + r.append_codepoint(bs, n1) + } + })?; + self.decode_utf8(raw) + } + + fn read_literal_binary(&mut self) -> io::Result { + Ok(N::new(&self.read_stringlike( + Vec::new(), + |_r, bs, b| Ok(bs.push(b)), + b'"', + b'x', + |r, bs| Ok(bs.push(r.hexnum(2)? as u8)))?[..])) + } + + fn read_hex_binary(&mut self) -> io::Result { + let mut s = String::new(); + loop { + self.skip_whitespace(); + let c1 = self.next_byte()? as char; + if c1 == '"' { + let bs = hex::HexParser::Strict.decode(&s).unwrap(); + return Ok(N::new(&bs[..])); + } + let c2 = self.next_byte()? as char; + if !(c1.is_digit(16) && c2.is_digit(16)) { + return Err(self.syntax_error("Invalid hex binary")); + } + s.push(c1); + s.push(c2); + } + } + + fn read_base64_binary(&mut self) -> io::Result { + let mut bs = Vec::new(); + loop { + self.skip_whitespace(); + let mut c = self.next_byte()?; + if c == b']' { + let bs = base64::decode_config(&self.decode_utf8(bs)?, base64::STANDARD_NO_PAD) + .map_err(|_| self.syntax_error("Invalid base64 character"))?; + return Ok(N::new(&bs[..])); + } + if c == b'-' { c = b'+'; } + if c == b'_' { c = b'/'; } + if c == b'=' { continue; } + bs.push(c); + } + } + + fn upto>( + &mut self, + delimiter: u8, + read_annotations: bool, + decode_embedded: &mut Dec, + ) -> io::Result> { + let mut vs = Vec::new(); + loop { + self.skip_whitespace(); + if self.peek()? == Some(delimiter) { + self.skip()?; + return Ok(vs); + } + vs.push(self.demand_next_domain(read_annotations, decode_embedded)?); + } + } + + fn read_dictionary>( + &mut self, + read_annotations: bool, + decode_embedded: &mut Dec, + ) -> io::Result { + let mut d = Map::new(); + loop { + self.skip_whitespace(); + if self.peek_noeof()? == b'}' { + self.skip()?; + return Ok(N::new(d)); + } + let k = self.demand_next_domain(read_annotations, decode_embedded)?; + self.skip_whitespace(); + if self.next_byte()? != b':' { + return Err(self.syntax_error("Missing expected key/value separator")); + } + let v = self.demand_next_domain(read_annotations, decode_embedded)?; + d.insert(k, v); + } + } + + fn read_raw_symbol(&mut self, mut bs: Vec) -> io::Result { + loop { + let c = match self.peek()? { + None => b' ', + Some(c) if (c as char).is_whitespace() => b' ', + Some(c) => c + }; + match c { + b'(' | b')' | b'{' | b'}' | b'[' | b']' | b'<' | b'>' | + b'"' | b';' | b',' | b'@' | b'#' | b':' | b'|' | b' ' => + return Ok(N::symbol(&self.decode_utf8(bs)?)), + c => { + self.skip()?; + bs.push(c) + } + } + } + } +} + +impl<'de, 'src, S: BinarySource<'de>> Reader<'de> for TextReader<'de, 'src, S> +{ + fn next_domain>( + &mut self, + read_annotations: bool, + decode_embedded: &mut Dec, + ) -> io::Result> { + self.skip_whitespace(); + let c = match self.peek()? { + None => return Ok(None), + Some(c) => c, + }; + Ok(Some(match c { + b'-' => { + self.skip()?; + let c1 = self.next_byte()?; + self.read_intpart(vec![b'-'], c1)? + } + b'0' | b'1' | b'2' | b'3' | b'4' | b'5' | b'6' | b'7' | b'8' | b'9' => { + self.skip()?; + self.read_intpart(Vec::new(), c)? + } + b'"' => { + self.skip()?; + N::new(self.read_string(b'"')?) + } + b'|' => { + self.skip()?; + N::symbol(&self.read_string(b'|')?) + } + b';' | b'@' => { + if read_annotations { + let mut annotations = self.gather_annotations(decode_embedded)?; + let av: N = self.demand_next_domain(read_annotations, decode_embedded)?; + let (existing_annotations, v) = av.pieces(); + if let Some(vs) = existing_annotations { + annotations.extend_from_slice(&vs[..]); + } + N::wrap(Some(Box::new(annotations)), v) + } else { + self.skip_annotations()?; + self.demand_next_domain(read_annotations, decode_embedded)? + } + } + b':' => { + return Err(self.syntax_error("Unexpected key/value separator between items")); + } + b'#' => { + self.skip()?; + match self.next_byte()? { + b'f' => N::new(false), + b't' => N::new(true), + b'{' => N::new(Set::from_iter(self.upto(b'}', read_annotations, decode_embedded)?.into_iter())), + b'"' => self.read_literal_binary()?, + b'x' => if self.next_byte()? == b'"' { + self.read_hex_binary()? + } else { + return Err(self.syntax_error("Expected open-quote at start of hex ByteString")); + }, + b'[' => self.read_base64_binary()?, + b'=' => { + let bs_val = self.next_iovalue(true)?; + if let Some(anns) = bs_val.annotations() { + if anns.len() > 0 { + return Err(self.syntax_error("Annotations not permitted after #=")); + } + } + match bs_val.value().as_bytestring() { + None => + return Err(self.syntax_error("ByteString must follow #=")), + Some(bs) => + crate::value::BytesBinarySource::new(bs) + .packed() + .demand_next_domain(read_annotations, decode_embedded)? + } + } + b'!' => Value::Embedded(decode_embedded.decode_embedded(self, read_annotations)?).wrap(), + other => return Err(self.syntax_error(&format!("Invalid # syntax: {:?}", other))), + } + } + b'<' => { + self.skip()?; + let vs = self.upto(b'>', read_annotations, decode_embedded)?; + if vs.is_empty() { + return Err(self.syntax_error("Missing record label")); + } + Value::Record(Record(vs)).wrap() + } + b'[' => { + self.skip()?; + N::new(self.upto(b']', read_annotations, decode_embedded)?) + } + b'{' => { + self.skip()?; + self.read_dictionary(read_annotations, decode_embedded)? + } + b'>' => return Err(self.syntax_error("Unexpected >")), + b']' => return Err(self.syntax_error("Unexpected ]")), + b'}' => return Err(self.syntax_error("Unexpected }")), + other => { + self.skip()?; + self.read_raw_symbol(vec![other])? + } + })) + } + + fn open_record(&mut self) -> ReaderResult<()> { + self.skip_annotations()?; + if self.peek()? != Some(b'<') { return Err(Error::Expected(ExpectedKind::Record)); } + self.skip()?; + Ok(()) + } + + fn open_sequence(&mut self) -> ReaderResult<()> { + self.skip_annotations()?; + if self.peek()? != Some(b'[') { return Err(Error::Expected(ExpectedKind::Sequence)); } + self.skip()?; + Ok(()) + } + + fn open_set(&mut self) -> ReaderResult<()> { + self.skip_annotations()?; + let mark = self.mark()?; + match self.next_byte()? { + b'#' => match self.next_byte()? { + b'{' => return Ok(()), + _ => (), + }, + _ => (), + } + self.restore(&mark)?; + Err(Error::Expected(ExpectedKind::Set)) + } + + fn open_dictionary(&mut self) -> ReaderResult<()> { + self.skip_annotations()?; + if self.peek()? != Some(b'{') { return Err(Error::Expected(ExpectedKind::Dictionary)); } + self.skip()?; + Ok(()) + } + + #[inline] + fn boundary(&mut self, b: &B::Type) -> ReaderResult<()> { + match b { + B::Type { + closing: Some(B::Item::DictionaryKey), + opening: Some(B::Item::DictionaryValue), + } => { + self.skip_whitespace(); + if self.next_byte()? != b':' { + Err(self.syntax_error("Missing expected key/value separator"))?; + } + }, + _ => (), + } + Ok(()) + } + + fn close_compound(&mut self, b: &mut B::Type, i: &B::Item) -> ReaderResult { + self.skip_whitespace(); + match self.peek_noeof()? { + b'>' | b']' | b'}' => { + self.skip()?; + Ok(true) + } + _ => { + b.shift(Some(i.clone())); + self.boundary(b)?; + Ok(false) + } + } + } + + fn open_embedded(&mut self) -> ReaderResult<()> { + self.skip_annotations()?; + let mark = self.mark()?; + match self.next_byte()? { + b'#' => match self.next_byte()? { + b'!' => return Ok(()), + _ => (), + }, + _ => (), + } + self.restore(&mark)?; + Err(Error::Expected(ExpectedKind::Embedded)) + } + + fn close_embedded(&mut self) -> ReaderResult<()> { + Ok(()) + } + + type Mark = S::Mark; + + fn mark(&mut self) -> io::Result { + self.source.mark() + } + + fn restore(&mut self, mark: &Self::Mark) -> io::Result<()> { + self.source.restore(mark) + } + + fn next_token>( + &mut self, + read_embedded_annotations: bool, + decode_embedded: &mut Dec, + ) -> io::Result> { + self.skip_annotations()?; + let mark = self.mark()?; + Ok(match self.next_byte()? { + b'<' => Token::Compound(CompoundClass::Record), + b'[' => Token::Compound(CompoundClass::Sequence), + b'{' => Token::Compound(CompoundClass::Dictionary), + b'>' => Token::End, + b']' => Token::End, + b'}' => Token::End, + b'#' => match self.next_byte()? { + b'!' => Token::Embedded(decode_embedded.decode_embedded(self, read_embedded_annotations)?), + b'{' => Token::Compound(CompoundClass::Set), + _ => { + self.restore(&mark)?; + Token::Atom(self.demand_next_domain(false, decode_embedded)?) + } + }, + _ => { + self.restore(&mark)?; + Token::Atom(self.demand_next_domain(false, decode_embedded)?) + } + }) + } +} diff --git a/implementations/rust/oo/src/text/writer.rs b/implementations/rust/oo/src/text/writer.rs new file mode 100644 index 0000000..b3cd2b1 --- /dev/null +++ b/implementations/rust/oo/src/text/writer.rs @@ -0,0 +1,305 @@ +use crate::value::DomainEncode; +use crate::value::IOValue; +use crate::value::IOValueDomainCodec; +use crate::value::NestedValue; +use crate::value::Writer; + +use num::bigint::BigInt; + +use std::io; + +use super::super::boundary as B; + +#[derive(Clone, Copy, Debug)] +pub enum CommaStyle { + None, + Separating, + Terminating, +} + +pub struct TextWriter { + w: W, + pub comma_style: CommaStyle, + pub indentation: usize, + pub escape_spaces: bool, + indent: String, +} + +impl std::default::Default for CommaStyle { + fn default() -> Self { + CommaStyle::Separating + } +} + +impl TextWriter<&mut Vec> { + pub fn fmt_value>( + f: &mut std::fmt::Formatter<'_>, + enc: &mut Enc, + v: &crate::value::Value, + ) -> io::Result<()> { + let mut buf: Vec = Vec::new(); + let mut w = TextWriter::new(&mut buf); + if f.alternate() { w.indentation = 4 } + w.write_value(enc, v)?; + f.write_str(std::str::from_utf8(&buf).expect("valid UTF-8 from TextWriter")).map_err( + |_| io::Error::new(io::ErrorKind::Other, "could not append to Formatter")) + } + + pub fn encode>( + enc: &mut Enc, + v: &N, + ) -> io::Result { + let mut buf: Vec = Vec::new(); + TextWriter::new(&mut buf).write(enc, v)?; + Ok(String::from_utf8(buf).expect("valid UTF-8 from TextWriter")) + } + + pub fn encode_iovalue(v: &IOValue) -> io::Result { + Self::encode(&mut IOValueDomainCodec, v) + } +} + +impl TextWriter { + pub fn new(w: W) -> Self { + TextWriter { + w, + comma_style: CommaStyle::default(), + indentation: 0, + escape_spaces: false, + indent: "\n".to_owned(), + } + } + + pub fn set_comma_style(mut self, v: CommaStyle) -> Self { + self.comma_style = v; + self + } + + pub fn set_escape_spaces(mut self, v: bool) -> Self { + self.escape_spaces = v; + self + } + + pub fn write_stringlike_char_fallback( + &mut self, + c: char, + f: F, + ) -> io::Result<()> where + F: FnOnce(&mut W, char) -> io::Result<()> + { + match c { + '\\' => write!(self.w, "\\\\"), + '\x08' => write!(self.w, "\\b"), + '\x0c' => write!(self.w, "\\f"), + '\x0a' => write!(self.w, "\\n"), + '\x0d' => write!(self.w, "\\r"), + '\x09' => write!(self.w, "\\t"), + _ => f(&mut self.w, c), + } + } + + pub fn write_stringlike_char(&mut self, c: char) -> io::Result<()> { + self.write_stringlike_char_fallback(c, |w, c| write!(w, "{}", c)) + } + + pub fn add_indent(&mut self) { + for _ in 0 .. self.indentation { + self.indent.push(' ') + } + } + + pub fn del_indent(&mut self) { + if self.indentation > 0 { + self.indent.truncate(self.indent.len() - self.indentation) + } + } + + pub fn indent(&mut self) -> io::Result<()> { + if self.indentation > 0 { + write!(self.w, "{}", &self.indent) + } else { + Ok(()) + } + } + + pub fn indent_sp(&mut self) -> io::Result<()> { + if self.indentation > 0 { + write!(self.w, "{}", &self.indent) + } else { + write!(self.w, " ") + } + } + + pub fn borrow_write(&mut self) -> &mut W { + &mut self.w + } +} + +impl Writer for TextWriter { + #[inline] + fn boundary(&mut self, b: &B::Type) -> io::Result<()> { + match (b.closing.as_ref(), b.opening.as_ref()) { + (None, Some(B::Item::RecordLabel)) | + (Some(B::Item::RecordLabel), None) | + (Some(B::Item::RecordField), None) => + return Ok(()), + (_, Some(B::Item::RecordField)) => + return write!(self.w, " "), + + (Some(B::Item::DictionaryKey), Some(B::Item::DictionaryValue)) => { + return write!(self.w, ": ") + } + + (None, Some(B::Item::Annotation)) => { + return write!(self.w, "@") + } + (Some(_), Some(B::Item::Annotation)) => { + return write!(self.w, " @") + } + (Some(B::Item::Annotation), Some(B::Item::AnnotatedValue)) => { + return write!(self.w, " ") + } + (Some(B::Item::AnnotatedValue), None) => + return Ok(()), + + _ => (), + } + + match (b.closing.as_ref(), b.opening.as_ref()) { + (None, None) => (), + (None, Some(_)) => { + self.add_indent(); + self.indent()? + }, + (Some(_), Some(_)) => { + match self.comma_style { + CommaStyle::Separating | CommaStyle::Terminating => write!(self.w, ",")?, + CommaStyle::None => (), + } + self.indent_sp()? + } + (Some(_), None) => { + match self.comma_style { + CommaStyle::Terminating => write!(self.w, ",")?, + CommaStyle::Separating | CommaStyle::None => (), + } + self.del_indent(); + self.indent()? + } + } + + Ok(()) + } + + fn start_annotations(&mut self) -> io::Result<()> { + Ok(()) + } + + fn end_annotations(&mut self) -> io::Result<()> { + Ok(()) + } + + fn write_bool(&mut self, v: bool) -> io::Result<()> { + write!(self.w, "{}", if v { "#t" } else { "#f" }) + } + + fn write_f32(&mut self, v: f32) -> io::Result<()> { + dtoa::write(&mut self.w, v)?; + write!(self.w, "f") + } + + fn write_f64(&mut self, v: f64) -> io::Result<()> { + dtoa::write(&mut self.w, v)?; + Ok(()) + } + + fn write_i128(&mut self, v: i128) -> io::Result<()> { + write!(self.w, "{}", v) + } + + fn write_u128(&mut self, v: u128) -> io::Result<()> { + write!(self.w, "{}", v) + } + + fn write_int(&mut self, v: &BigInt) -> io::Result<()> { + write!(self.w, "{}", v) + } + + fn write_string(&mut self, v: &str) -> io::Result<()> { + write!(self.w, "\"")?; + for c in v.chars() { + match c { + '"' => write!(self.w, "\\\"")?, + ' ' if self.escape_spaces => write!(self.w, "\\u0020")?, + _ => self.write_stringlike_char(c)?, + } + } + write!(self.w, "\"") + } + + fn write_bytes(&mut self, v: &[u8]) -> io::Result<()> { + write!(self.w, "#[{}]", base64::encode_config(v, base64::STANDARD_NO_PAD)) + } + + fn write_symbol(&mut self, v: &str) -> io::Result<()> { + // FIXME: This regular expression is conservatively correct, but Anglo-chauvinistic. + let re = regex::Regex::new("^[a-zA-Z~!$%^&*?_=+/.][-a-zA-Z~!$%^&*?_=+/.0-9]*$").unwrap(); + if re.is_match(v) { + write!(self.w, "{}", v) + } else { + write!(self.w, "|")?; + for c in v.chars() { + match c { + '|' => write!(self.w, "\\|")?, + ' ' if self.escape_spaces => write!(self.w, "\\u0020")?, + _ => self.write_stringlike_char(c)?, + } + } + write!(self.w, "|") + } + } + + fn start_record(&mut self) -> io::Result<()> { + write!(self.w, "<") + } + + fn end_record(&mut self) -> io::Result<()> { + write!(self.w, ">") + } + + fn start_sequence(&mut self) -> io::Result<()> { + write!(self.w, "[") + } + + fn end_sequence(&mut self) -> io::Result<()> { + write!(self.w, "]") + } + + fn start_set(&mut self) -> io::Result<()> { + write!(self.w, "#{{") + } + + fn end_set(&mut self) -> io::Result<()> { + write!(self.w, "}}") + } + + fn start_dictionary(&mut self) -> io::Result<()> { + write!(self.w, "{{") + } + + fn end_dictionary(&mut self) -> io::Result<()> { + write!(self.w, "}}") + } + + fn start_embedded(&mut self) -> io::Result<()> { + write!(self.w, "#!") + } + + fn end_embedded(&mut self) -> io::Result<()> { + Ok(()) + } + + fn flush(&mut self) -> io::Result<()> { + self.w.flush() + } +} diff --git a/implementations/rust/oo/src/types.rs b/implementations/rust/oo/src/types.rs new file mode 100644 index 0000000..5a1e024 --- /dev/null +++ b/implementations/rust/oo/src/types.rs @@ -0,0 +1,28 @@ +/// The kinds of `Value` from the specification. +#[derive(Debug, Clone, PartialEq, Eq, Hash, PartialOrd, Ord)] +pub enum ValueClass { + Atomic(AtomClass), + Compound(CompoundClass), + Embedded, +} + +/// The kinds of `Atom` from the specification. +#[derive(Debug, Clone, PartialEq, Eq, Hash, PartialOrd, Ord)] +pub enum AtomClass { + Boolean, + Float, + Double, + SignedInteger, + String, + ByteString, + Symbol, +} + +/// The kinds of `Compound` from the specification. +#[derive(Debug, Clone, PartialEq, Eq, Hash, PartialOrd, Ord)] +pub enum CompoundClass { + Record, + Sequence, + Set, + Dictionary, +} diff --git a/implementations/rust/oo/src/writer.rs b/implementations/rust/oo/src/writer.rs new file mode 100644 index 0000000..653da87 --- /dev/null +++ b/implementations/rust/oo/src/writer.rs @@ -0,0 +1,62 @@ +use num::bigint::BigInt; +use std::io; + +use crate::SignedInteger; +use crate::boundary as B; +use crate::signed_integer::SignedIntegerRepr; + +pub trait Writer { + fn start_annotations(&mut self) -> io::Result<()>; + fn end_annotations(&mut self) -> io::Result<()>; + + fn write_bool(&mut self, v: bool) -> io::Result<()>; + + fn write_f32(&mut self, v: f32) -> io::Result<()>; + fn write_f64(&mut self, v: f64) -> io::Result<()>; + + fn write_i128(&mut self, v: i128) -> io::Result<()>; + fn write_u128(&mut self, v: u128) -> io::Result<()>; + fn write_int(&mut self, v: &BigInt) -> io::Result<()>; + + fn write_string(&mut self, v: &str) -> io::Result<()>; + fn write_bytes(&mut self, v: &[u8]) -> io::Result<()>; + fn write_symbol(&mut self, v: &str) -> io::Result<()>; + + fn boundary(&mut self, b: &B::Type) -> io::Result<()>; + + fn start_record(&mut self) -> io::Result<()>; + fn end_record(&mut self) -> io::Result<()>; + + fn start_sequence(&mut self) -> io::Result<()>; + fn end_sequence(&mut self) -> io::Result<()>; + + fn start_set(&mut self) -> io::Result<()>; + fn end_set(&mut self) -> io::Result<()>; + + fn start_dictionary(&mut self) -> io::Result<()>; + fn end_dictionary(&mut self) -> io::Result<()>; + + fn start_embedded(&mut self) -> io::Result<()>; + fn end_embedded(&mut self) -> io::Result<()>; + + fn flush(&mut self) -> io::Result<()>; + + //--------------------------------------------------------------------------- + + fn write_i8(&mut self, v: i8) -> io::Result<()> { self.write_i128(v as i128) } + fn write_u8(&mut self, v: u8) -> io::Result<()> { self.write_u128(v as u128) } + fn write_i16(&mut self, v: i16) -> io::Result<()> { self.write_i128(v as i128) } + fn write_u16(&mut self, v: u16) -> io::Result<()> { self.write_u128(v as u128) } + fn write_i32(&mut self, v: i32) -> io::Result<()> { self.write_i128(v as i128) } + fn write_u32(&mut self, v: u32) -> io::Result<()> { self.write_u128(v as u128) } + fn write_i64(&mut self, v: i64) -> io::Result<()> { self.write_i128(v as i128) } + fn write_u64(&mut self, v: u64) -> io::Result<()> { self.write_u128(v as u128) } + + fn write_signed_integer(&mut self, v: &SignedInteger) -> io::Result<()> { + match v.repr() { + SignedIntegerRepr::I128(i) => self.write_i128(*i), + SignedIntegerRepr::U128(u) => self.write_u128(*u), + SignedIntegerRepr::Big(n) => self.write_int(n), + } + } +}