From 9d4e6998f2c05598864a73ddfa2bfe211e3bacf0 Mon Sep 17 00:00:00 2001 From: Tony Garnock-Jones Date: Mon, 2 Aug 2021 11:42:48 +0200 Subject: [PATCH] TextReader/TextWriter --- implementations/rust/preserves/Cargo.toml | 2 + implementations/rust/preserves/src/de.rs | 77 ++- implementations/rust/preserves/src/hex.rs | 21 +- implementations/rust/preserves/src/ser.rs | 163 +++-- .../rust/preserves/src/value/boundary.rs | 45 ++ .../rust/preserves/src/value/domain.rs | 82 ++- .../rust/preserves/src/value/mod.rs | 9 + .../rust/preserves/src/value/packed/reader.rs | 26 +- .../rust/preserves/src/value/packed/writer.rs | 151 ++--- .../rust/preserves/src/value/reader.rs | 61 +- .../rust/preserves/src/value/repr.rs | 41 ++ .../preserves/src/value/signed_integer.rs | 6 + .../rust/preserves/src/value/suspendable.rs | 60 ++ .../rust/preserves/src/value/text/mod.rs | 31 + .../rust/preserves/src/value/text/reader.rs | 614 ++++++++++++++++++ .../rust/preserves/src/value/text/writer.rs | 234 +++++++ .../rust/preserves/src/value/writer.rs | 73 ++- .../rust/preserves/tests/samples/mod.rs | 4 +- .../rust/preserves/tests/samples_tests.rs | 75 ++- 19 files changed, 1493 insertions(+), 282 deletions(-) create mode 100644 implementations/rust/preserves/src/value/boundary.rs create mode 100644 implementations/rust/preserves/src/value/suspendable.rs create mode 100644 implementations/rust/preserves/src/value/text/mod.rs create mode 100644 implementations/rust/preserves/src/value/text/reader.rs create mode 100644 implementations/rust/preserves/src/value/text/writer.rs diff --git a/implementations/rust/preserves/Cargo.toml b/implementations/rust/preserves/Cargo.toml index 99dc85c..7f23e9b 100644 --- a/implementations/rust/preserves/Cargo.toml +++ b/implementations/rust/preserves/Cargo.toml @@ -12,7 +12,9 @@ license = "Apache-2.0" gitlab = { repository = "preserves/preserves" } [dependencies] +base64 = "0.13" num = "0.2" +regex = "1.5" serde = { version = "1.0", features = ["derive"] } serde_bytes = "0.11" diff --git a/implementations/rust/preserves/src/de.rs b/implementations/rust/preserves/src/de.rs index f2702d0..d035417 100644 --- a/implementations/rust/preserves/src/de.rs +++ b/implementations/rust/preserves/src/de.rs @@ -5,7 +5,8 @@ use std::borrow::Cow; use std::io; use std::marker::PhantomData; -use super::value::{IOValue, IOValueDomainCodec, PackedReader}; +use super::value::boundary as B; +use super::value::{IOValue, IOValueDomainCodec, PackedReader, TextReader, ViaCodec}; use super::value::reader::{Reader, IOBinarySource, BytesBinarySource}; pub use super::error::Error; @@ -25,6 +26,10 @@ where from_reader(&mut PackedReader::new(&mut BytesBinarySource::new(bytes), IOValueDomainCodec)) } +pub fn from_text<'de, T>(text: &str) -> Result where T: Deserialize<'de> { + from_reader(&mut TextReader::new(text, ViaCodec::new(IOValueDomainCodec))) +} + pub fn from_read<'de, 'r, IOR: io::Read + io::Seek, T>(read: &'r mut IOR) -> Result where @@ -147,31 +152,30 @@ impl<'r, 'de, 'a, R: Reader<'de, IOValue, IOValue>> serde::de::Deserializer<'de> fn deserialize_option(self, visitor: V) -> Result where V: Visitor<'de> { - let is_some = self.read.open_option()?; - let result = if is_some { - self.read.ensure_more_expected()?; - visitor.visit_some(&mut *self)? + if let Some(mut b) = self.read.open_option()? { + self.read.ensure_more_expected(&mut b, &B::Item::RecordField)?; + let result = visitor.visit_some(&mut *self)?; + self.read.ensure_complete(b, &B::Item::RecordField)?; + Ok(result) } else { - visitor.visit_none::()? - }; - self.read.ensure_complete()?; - Ok(result) + Ok(visitor.visit_none::()?) + } } fn deserialize_unit(self, visitor: V) -> Result where V: Visitor<'de> { - self.read.open_simple_record("tuple", Some(0))?; + let b = self.read.open_simple_record("tuple", Some(0))?; let result = visitor.visit_unit::()?; - self.read.ensure_complete()?; + self.read.ensure_complete(b, &B::Item::RecordField)?; Ok(result) } fn deserialize_unit_struct(self, name: &'static str, visitor: V) -> Result where V: Visitor<'de> { - self.read.open_simple_record(name, Some(0))?; + let b = self.read.open_simple_record(name, Some(0))?; let result = visitor.visit_unit::()?; - self.read.ensure_complete()?; + self.read.ensure_complete(b, &B::Item::RecordField)?; Ok(result) } @@ -179,14 +183,14 @@ impl<'r, 'de, 'a, R: Reader<'de, IOValue, IOValue>> serde::de::Deserializer<'de> -> Result where V: Visitor<'de> { match super::value::magic::transmit_input_value( - name, || Ok(self.read.demand_next(false)?))? + name, || Ok(self.read.demand_next(true)?))? { Some(v) => visitor.visit_u64(v), None => { - self.read.open_simple_record(name, Some(1))?; - self.read.ensure_more_expected()?; + let mut b = self.read.open_simple_record(name, Some(1))?; + self.read.ensure_more_expected(&mut b, &B::Item::RecordField)?; let result = visitor.visit_newtype_struct(&mut *self)?; - self.read.ensure_complete()?; + self.read.ensure_complete(b, &B::Item::RecordField)?; Ok(result) } } @@ -195,14 +199,14 @@ impl<'r, 'de, 'a, R: Reader<'de, IOValue, IOValue>> serde::de::Deserializer<'de> fn deserialize_seq(self, visitor: V) -> Result where V: Visitor<'de> { // Hack around serde's model: Deserialize *sets* as sequences, // too, and reconstruct them as Rust Sets on the visitor side. - self.read.open_sequence_or_set()?; - visitor.visit_seq(Seq::new(self)) + let i = self.read.open_sequence_or_set()?; + visitor.visit_seq(Seq::new(self, B::Type::default(), i)) } fn deserialize_tuple(self, len: usize, visitor: V) -> Result where V: Visitor<'de> { - self.read.open_simple_record("tuple", Some(len))?; - let mut seq = Seq::new(self); + let b = self.read.open_simple_record("tuple", Some(len))?; + let mut seq = Seq::new(self, b, B::Item::RecordField); let result = visitor.visit_seq(&mut seq)?; seq.skip_remainder()?; Ok(result) @@ -211,8 +215,8 @@ impl<'r, 'de, 'a, R: Reader<'de, IOValue, IOValue>> serde::de::Deserializer<'de> fn deserialize_tuple_struct(self, name: &'static str, len: usize, visitor: V) -> Result where V: Visitor<'de> { - self.read.open_simple_record(name, Some(len))?; - let mut seq = Seq::new(self); + let b = self.read.open_simple_record(name, Some(len))?; + let mut seq = Seq::new(self, b, B::Item::RecordField); let result = visitor.visit_seq(&mut seq)?; seq.skip_remainder()?; Ok(result) @@ -220,7 +224,7 @@ impl<'r, 'de, 'a, R: Reader<'de, IOValue, IOValue>> serde::de::Deserializer<'de> fn deserialize_map(self, visitor: V) -> Result where V: Visitor<'de> { self.read.open_dictionary()?; - let mut seq = Seq::new(self); + let mut seq = Seq::new(self, B::Type::default(), B::Item::DictionaryKey); let result = visitor.visit_map(&mut seq)?; Ok(result) } @@ -231,8 +235,8 @@ impl<'r, 'de, 'a, R: Reader<'de, IOValue, IOValue>> serde::de::Deserializer<'de> visitor: V) -> Result where V: Visitor<'de> { - self.read.open_simple_record(name, Some(fields.len()))?; - let mut seq = Seq::new(self); + let b = self.read.open_simple_record(name, Some(fields.len()))?; + let mut seq = Seq::new(self, b, B::Item::RecordField); let result = visitor.visit_seq(&mut seq)?; seq.skip_remainder()?; Ok(result) @@ -262,22 +266,27 @@ impl<'r, 'de, 'a, R: Reader<'de, IOValue, IOValue>> serde::de::Deserializer<'de> } pub struct Seq<'de, 'r, 'a, R: Reader<'de, IOValue, IOValue>> { + b: B::Type, + i: B::Item, de: &'a mut Deserializer<'de, 'r, R>, } impl<'de, 'r, 'a, R: Reader<'de, IOValue, IOValue>> Seq<'de, 'r, 'a, R> { - fn new(de: &'a mut Deserializer<'de, 'r, R>) -> Self { - Seq { de } + fn new(de: &'a mut Deserializer<'de, 'r, R>, b: B::Type, i: B::Item) -> Self { + Seq { b, i, de } } fn skip_remainder(&mut self) -> Result<()> { - self.de.read.skip_remainder() + while !self.de.read.close_compound(&mut self.b, &self.i)? { + self.de.read.skip_value()?; + } + Ok(()) } fn next_item(&mut self, seed: T) -> Result> where T: DeserializeSeed<'de> { - match self.de.read.close_compound()? { + match self.de.read.close_compound(&mut self.b, &self.i)? { true => Ok(None), false => Ok(Some(seed.deserialize(&mut *self.de)?)), } @@ -290,7 +299,7 @@ impl<'de, 'r, 'a, R: Reader<'de, IOValue, IOValue>> SeqAccess<'de> for Seq<'de, fn next_element_seed(&mut self, seed: T) -> Result> where T: DeserializeSeed<'de> { - self.next_item(seed) + Ok(self.next_item(seed)?) } } @@ -300,12 +309,14 @@ impl<'de, 'r, 'a, R: Reader<'de, IOValue, IOValue>> MapAccess<'de> for Seq<'de, fn next_key_seed(&mut self, seed: K) -> Result> where K: DeserializeSeed<'de> { + self.i = B::Item::DictionaryKey; self.next_item(seed) } fn next_value_seed(&mut self, seed: V) -> Result where V: DeserializeSeed<'de> { + self.i = B::Item::DictionaryValue; match self.next_item(seed)? { Some(item) => Ok(item), None => Err(Error::MissingItem), @@ -320,9 +331,9 @@ impl<'de, 'r, 'a, R: Reader<'de, IOValue, IOValue>> EnumAccess<'de> for &'a mut fn variant_seed(self, seed: V) -> Result<(V::Value, Self::Variant)> where V: DeserializeSeed<'de> { - self.read.open_record(None)?; + let b = self.read.open_record(None)?; let variant = seed.deserialize(&mut *self)?; - Ok((variant, Seq::new(self))) + Ok((variant, Seq::new(self, b, B::Item::RecordField))) } } diff --git a/implementations/rust/preserves/src/hex.rs b/implementations/rust/preserves/src/hex.rs index 2d0f1d3..7adb7cf 100644 --- a/implementations/rust/preserves/src/hex.rs +++ b/implementations/rust/preserves/src/hex.rs @@ -10,20 +10,7 @@ pub enum HexFormatter { } pub fn hexdigit(v: u8) -> char { - (if v < 10 {('0' as u8) + v} else {('a' as u8) + v - 10}) as char -} - -pub fn unhex(c: char) -> Option { - if c >= '0' && c <= '9' { - return Some((c as u8) - ('0' as u8)); - } - if c >= 'a' && c <= 'f' { - return Some((c as u8) - ('a' as u8) + 10); - } - if c >= 'A' && c <= 'F' { - return Some((c as u8) - ('A' as u8) + 10); - } - None + char::from_digit(v as u32, 16).expect("hexadecimal digit value") } impl HexParser { @@ -32,7 +19,7 @@ impl HexParser { let mut buf: u8 = 0; let mut buf_full = false; for c in s.chars() { - match unhex(c) { + match c.to_digit(16) { None => match self { HexParser::Liberal => (), @@ -41,10 +28,10 @@ impl HexParser { }, Some(nibble) => if buf_full { - result.push(buf << 4 | nibble); + result.push(buf << 4 | (nibble as u8)); buf_full = false; } else { - buf = nibble; + buf = nibble as u8; buf_full = true; }, } diff --git a/implementations/rust/preserves/src/ser.rs b/implementations/rust/preserves/src/ser.rs index 15d95c2..e28abe1 100644 --- a/implementations/rust/preserves/src/ser.rs +++ b/implementations/rust/preserves/src/ser.rs @@ -1,5 +1,6 @@ use serde::Serialize; use super::value::IOValueDomainCodec; +use super::value::boundary as B; use super::value::writer::{Writer, CompoundWriter}; pub use super::error::Error; @@ -16,16 +17,22 @@ impl<'w, W: Writer> Serializer<'w, W> { } } -#[derive(Debug)] -pub struct SerializeCompound<'a, 'w, W: Writer> { - ser: &'a mut Serializer<'w, W>, - c: W::SeqWriter, +enum SequenceVariant { + Sequence(W::SeqWriter), + Record(W::RecWriter), } -#[derive(Debug)] -pub struct SerializeDictionary<'a, 'w, W: Writer> { +pub struct SerializeCompound<'a, 'w, W: Writer> { + b: B::Type, + i: B::Item, ser: &'a mut Serializer<'w, W>, - d: W::SetWriter, + c: SequenceVariant, +} + +pub struct SerializeDictionary<'a, 'w, W: Writer> { + b: B::Type, + ser: &'a mut Serializer<'w, W>, + d: W::DictWriter, } impl<'a, 'w, W: Writer> serde::Serializer for &'a mut Serializer<'w, W> { @@ -85,13 +92,12 @@ impl<'a, 'w, W: Writer> serde::Serializer for &'a mut Serializer<'w, W> { fn serialize_char(self, v: char) -> Result { let mut c = self.write.start_record(Some(1))?; - c.extend()?; + c.boundary(&B::start(B::Item::RecordLabel))?; c.write_symbol("UnicodeScalar")?; - c.delimit()?; - c.extend()?; + c.boundary(&B::mid(B::Item::RecordLabel, B::Item::RecordField))?; c.write_u32(v as u32)?; - c.delimit()?; - Ok(self.write.end_seq(c)?) + c.boundary(&B::end(B::Item::RecordField))?; + Ok(self.write.end_record(c)?) } fn serialize_str(self, v: &str) -> Result { @@ -104,37 +110,36 @@ impl<'a, 'w, W: Writer> serde::Serializer for &'a mut Serializer<'w, W> { fn serialize_none(self) -> Result { let mut c = self.write.start_record(Some(0))?; - c.extend()?; + c.boundary(&B::start(B::Item::RecordLabel))?; c.write_symbol("None")?; - c.delimit()?; - Ok(self.write.end_seq(c)?) + c.boundary(&B::end(B::Item::RecordLabel))?; + Ok(self.write.end_record(c)?) } fn serialize_some(self, v: &T) -> Result where T: Serialize { let mut c = self.write.start_record(Some(1))?; - c.extend()?; + c.boundary(&B::start(B::Item::RecordLabel))?; c.write_symbol("Some")?; - c.delimit()?; - c.extend()?; + c.boundary(&B::mid(B::Item::RecordLabel, B::Item::RecordField))?; to_writer(&mut c, v)?; - c.delimit()?; - Ok(self.write.end_seq(c)?) + c.boundary(&B::end(B::Item::RecordField))?; + Ok(self.write.end_record(c)?) } fn serialize_unit(self) -> Result { let mut c = self.write.start_record(Some(0))?; - c.extend()?; + c.boundary(&B::start(B::Item::RecordLabel))?; c.write_symbol("tuple")?; - c.delimit()?; - Ok(self.write.end_seq(c)?) + c.boundary(&B::end(B::Item::RecordLabel))?; + Ok(self.write.end_record(c)?) } fn serialize_unit_struct(self, name: &'static str) -> Result { let mut c = self.write.start_record(Some(0))?; - c.extend()?; + c.boundary(&B::start(B::Item::RecordLabel))?; c.write_symbol(name)?; - c.delimit()?; - Ok(self.write.end_seq(c)?) + c.boundary(&B::end(B::Item::RecordLabel))?; + Ok(self.write.end_record(c)?) } fn serialize_unit_variant(self, @@ -144,10 +149,10 @@ impl<'a, 'w, W: Writer> serde::Serializer for &'a mut Serializer<'w, W> { Result { let mut c = self.write.start_record(Some(0))?; - c.extend()?; + c.boundary(&B::start(B::Item::RecordLabel))?; c.write_symbol(variant_name)?; - c.delimit()?; - Ok(self.write.end_seq(c)?) + c.boundary(&B::end(B::Item::RecordLabel))?; + Ok(self.write.end_record(c)?) } fn serialize_newtype_struct(self, name: &'static str, value: &T) -> @@ -158,13 +163,12 @@ impl<'a, 'w, W: Writer> serde::Serializer for &'a mut Serializer<'w, W> { None => { // TODO: This is apparently discouraged, and we should apparently just serialize `value`? let mut c = self.write.start_record(Some(1))?; - c.extend()?; + c.boundary(&B::start(B::Item::RecordLabel))?; c.write_symbol(name)?; - c.delimit()?; - c.extend()?; + c.boundary(&B::mid(B::Item::RecordLabel, B::Item::RecordField))?; to_writer(&mut c, value)?; - c.delimit()?; - Ok(self.write.end_seq(c)?) + c.boundary(&B::end(B::Item::RecordField))?; + Ok(self.write.end_record(c)?) } } } @@ -177,36 +181,33 @@ impl<'a, 'w, W: Writer> serde::Serializer for &'a mut Serializer<'w, W> { Result where T: Serialize { let mut c = self.write.start_record(Some(1))?; - c.extend()?; + c.boundary(&B::start(B::Item::RecordLabel))?; c.write_symbol(variant_name)?; - c.delimit()?; - c.extend()?; + c.boundary(&B::mid(B::Item::RecordLabel, B::Item::RecordField))?; to_writer(&mut c, value)?; - c.delimit()?; - Ok(self.write.end_seq(c)?) + c.boundary(&B::end(B::Item::RecordField))?; + Ok(self.write.end_record(c)?) } fn serialize_seq(self, count: Option) -> Result { let c = self.write.start_sequence(count)?; - Ok(SerializeCompound { ser: self, c }) + Ok(SerializeCompound::seq(self, c)) } fn serialize_tuple(self, count: usize) -> Result { let mut c = self.write.start_record(Some(count))?; - c.extend()?; + c.boundary(&B::start(B::Item::RecordLabel))?; c.write_symbol("tuple")?; - c.delimit()?; - Ok(SerializeCompound { ser: self, c }) + Ok(SerializeCompound::rec(self, c)) } fn serialize_tuple_struct(self, name: &'static str, count: usize) -> Result { let mut c = self.write.start_record(Some(count))?; - c.extend()?; + c.boundary(&B::start(B::Item::RecordLabel))?; c.write_symbol(name)?; - c.delimit()?; - Ok(SerializeCompound { ser: self, c }) + Ok(SerializeCompound::rec(self, c)) } fn serialize_tuple_variant(self, @@ -217,23 +218,21 @@ impl<'a, 'w, W: Writer> serde::Serializer for &'a mut Serializer<'w, W> { Result { let mut c = self.write.start_record(Some(count))?; - c.extend()?; + c.boundary(&B::start(B::Item::RecordLabel))?; c.write_symbol(variant_name)?; - c.delimit()?; - Ok(SerializeCompound { ser: self, c }) + Ok(SerializeCompound::rec(self, c)) } fn serialize_map(self, count: Option) -> Result { let d = self.write.start_dictionary(count)?; - Ok(SerializeDictionary { ser: self, d }) + Ok(SerializeDictionary { b: B::Type::default(), ser: self, d }) } fn serialize_struct(self, name: &'static str, count: usize) -> Result { let mut c = self.write.start_record(Some(count))?; - c.extend()?; + c.boundary(&B::start(B::Item::RecordLabel))?; c.write_symbol(name)?; - c.delimit()?; - Ok(SerializeCompound { ser: self, c }) + Ok(SerializeCompound::rec(self, c)) } fn serialize_struct_variant(self, @@ -244,10 +243,9 @@ impl<'a, 'w, W: Writer> serde::Serializer for &'a mut Serializer<'w, W> { Result { let mut c = self.write.start_record(Some(count))?; - c.extend()?; + c.boundary(&B::start(B::Item::RecordLabel))?; c.write_symbol(variant_name)?; - c.delimit()?; - Ok(SerializeCompound { ser: self, c }) + Ok(SerializeCompound::rec(self, c)) } } @@ -256,32 +254,69 @@ impl<'a, 'w, W: Writer> serde::ser::SerializeMap for SerializeDictionary<'a, 'w, type Error = Error; fn serialize_key(&mut self, key: &T) -> Result<()> where T: Serialize { - self.d.extend()?; + self.b.opening = Some(B::Item::DictionaryKey); + self.d.boundary(&self.b)?; to_writer(&mut self.d, key)?; + self.b.shift(None); Ok(()) } fn serialize_value(&mut self, value: &T) -> Result<()> where T: Serialize { + self.b.opening = Some(B::Item::DictionaryValue); + self.d.boundary(&self.b)?; to_writer(&mut self.d, value)?; - Ok(self.d.delimit()?) + self.b.shift(None); + Ok(()) } - fn end(self) -> Result { - Ok(self.ser.write.end_set(self.d)?) + fn end(mut self) -> Result { + self.d.boundary(&self.b)?; + Ok(self.ser.write.end_dictionary(self.d)?) } } impl<'a, 'w, W: Writer> SerializeCompound<'a, 'w, W> { + fn seq(ser: &'a mut Serializer<'w, W>, c: W::SeqWriter) -> Self { + SerializeCompound { + b: B::Type::default(), + i: B::Item::SequenceValue, + ser, + c: SequenceVariant::Sequence(c), + } + } + + fn rec(ser: &'a mut Serializer<'w, W>, c: W::RecWriter) -> Self { + SerializeCompound { + b: B::end(B::Item::RecordLabel), + i: B::Item::RecordField, + ser, + c: SequenceVariant::Record(c), + } + } + fn extend(&mut self, value: &T) -> Result<()> where T: Serialize { - self.c.extend()?; - to_writer(&mut self.c, value)?; - Ok(self.c.delimit()?) + self.b.opening = Some(self.i.clone()); + match &mut self.c { + SequenceVariant::Sequence(w) => { w.boundary(&self.b)?; to_writer(w, value)?; } + SequenceVariant::Record(w) => { w.boundary(&self.b)?; to_writer(w, value)?; } + } + self.b.shift(None); + Ok(()) } fn complete(self) -> Result<()> { - Ok(self.ser.write.end_seq(self.c)?) + match self.c { + SequenceVariant::Sequence(mut w) => { + w.boundary(&self.b)?; + Ok(self.ser.write.end_sequence(w)?) + } + SequenceVariant::Record(mut w) => { + w.boundary(&self.b)?; + Ok(self.ser.write.end_record(w)?) + } + } } } diff --git a/implementations/rust/preserves/src/value/boundary.rs b/implementations/rust/preserves/src/value/boundary.rs new file mode 100644 index 0000000..b6ebd69 --- /dev/null +++ b/implementations/rust/preserves/src/value/boundary.rs @@ -0,0 +1,45 @@ +#[derive(Default, Clone, Debug)] +pub struct Type { + pub closing: Option, + pub opening: Option, +} + +#[derive(Clone, Debug)] +pub enum Item { + Annotation, + AnnotatedValue, + DictionaryKey, + DictionaryValue, + RecordField, + RecordLabel, + SequenceValue, + SetValue, +} + +impl Type { + pub fn shift(&mut self, i: Option) { + let tmp = std::mem::replace(&mut self.opening, i); + self.closing = tmp; + } +} + +pub fn start(i: Item) -> Type { + Type { + closing: None, + opening: Some(i), + } +} + +pub fn mid(c: Item, o: Item) -> Type { + Type { + closing: Some(c), + opening: Some(o), + } +} + +pub fn end(i: Item) -> Type { + Type { + closing: Some(i), + opening: None, + } +} diff --git a/implementations/rust/preserves/src/value/domain.rs b/implementations/rust/preserves/src/value/domain.rs index d6e86c6..f1042e0 100644 --- a/implementations/rust/preserves/src/value/domain.rs +++ b/implementations/rust/preserves/src/value/domain.rs @@ -1,11 +1,26 @@ use std::io; use super::BinarySource; +use super::BytesBinarySource; use super::Embeddable; use super::IOValue; use super::Reader; use super::Writer; -use super::packed::PackedReader; +use super::packed; + +pub trait DomainParse { + fn parse_embedded( + &mut self, + v: &IOValue, + ) -> io::Result; +} + +pub trait DomainUnparse { + fn unparse_embedded( + &mut self, + d: &D, + ) -> io::Result; +} pub trait DomainDecode { fn decode_embedded<'de, 'src, S: BinarySource<'de>>( @@ -23,7 +38,16 @@ pub trait DomainEncode { ) -> io::Result<()>; } -impl <'a, D: Embeddable, T: DomainDecode> DomainDecode for &'a mut T { +impl<'a, D: Embeddable, T: DomainParse> DomainParse for &'a mut T { + fn parse_embedded( + &mut self, + v: &IOValue, + ) -> io::Result { + (**self).parse_embedded(v) + } +} + +impl<'a, D: Embeddable, T: DomainDecode> DomainDecode for &'a mut T { fn decode_embedded<'de, 'src, S: BinarySource<'de>>( &mut self, src: &'src mut S, @@ -41,7 +65,7 @@ impl DomainDecode for IOValueDomainCodec { src: &'src mut S, read_annotations: bool, ) -> io::Result { - PackedReader::new(src, IOValueDomainCodec).demand_next(read_annotations) + packed::PackedReader::new(src, IOValueDomainCodec).demand_next(read_annotations) } } @@ -76,3 +100,55 @@ impl DomainEncode for NoEmbeddedDomainCodec { Err(io::Error::new(io::ErrorKind::Unsupported, "Embedded values not supported here")) } } + +pub struct ViaCodec(C); + +impl ViaCodec { + pub fn new(c: C) -> Self { + ViaCodec(c) + } +} + +impl> DomainParse for ViaCodec { + fn parse_embedded( + &mut self, + v: &IOValue, + ) -> io::Result { + let bs = packed::PackedWriter::encode_iovalue(v)?; + self.0.decode_embedded(&mut BytesBinarySource::new(&bs), true) + } +} + +impl> DomainUnparse for ViaCodec { + fn unparse_embedded( + &mut self, + d: &D, + ) -> io::Result { + let mut bs = Vec::new(); + let w = &mut packed::PackedWriter::new(&mut bs); + self.0.encode_embedded(w, d)?; + packed::annotated_iovalue_from_bytes(&bs) + } +} + +impl> DomainDecode for ViaCodec { + fn decode_embedded<'de, 'src, S: BinarySource<'de>>( + &mut self, + src: &'src mut S, + read_annotations: bool, + ) -> io::Result { + let v = src.packed(IOValueDomainCodec).demand_next(read_annotations)?; + self.0.parse_embedded(&v) + } +} + +impl> DomainEncode for ViaCodec { + fn encode_embedded( + &mut self, + w: &mut W, + d: &D, + ) -> io::Result<()> { + let v = self.0.unparse_embedded(d)?; + w.write(&mut IOValueDomainCodec, &v) + } +} diff --git a/implementations/rust/preserves/src/value/mod.rs b/implementations/rust/preserves/src/value/mod.rs index f21bd8e..b89dbb0 100644 --- a/implementations/rust/preserves/src/value/mod.rs +++ b/implementations/rust/preserves/src/value/mod.rs @@ -1,3 +1,4 @@ +pub mod boundary; pub mod de; pub mod domain; pub mod magic; @@ -6,14 +7,19 @@ pub mod reader; pub mod repr; pub mod ser; pub mod signed_integer; +pub mod suspendable; +pub mod text; pub mod writer; pub use de::Deserializer; pub use de::from_value; pub use domain::DomainDecode; pub use domain::DomainEncode; +pub use domain::DomainParse; +pub use domain::DomainUnparse; pub use domain::IOValueDomainCodec; pub use domain::NoEmbeddedDomainCodec; +pub use domain::ViaCodec; pub use packed::PackedReader; pub use packed::PackedWriter; pub use reader::BinarySource; @@ -28,6 +34,7 @@ pub use repr::AtomClass; pub use repr::CompoundClass; pub use repr::Domain; pub use repr::Double; +pub use repr::DummyValue; pub use repr::Embeddable; pub use repr::Float; pub use repr::IOValue; @@ -42,6 +49,8 @@ pub use repr::Value; pub use repr::ValueClass; pub use ser::Serializer; pub use ser::to_value; +pub use text::TextReader; +pub use text::TextWriter; pub use writer::Writer; pub fn invert_map(m: &Map) -> Map diff --git a/implementations/rust/preserves/src/value/packed/reader.rs b/implementations/rust/preserves/src/value/packed/reader.rs index 6523663..9a43c7d 100644 --- a/implementations/rust/preserves/src/value/packed/reader.rs +++ b/implementations/rust/preserves/src/value/packed/reader.rs @@ -20,6 +20,7 @@ use super::super::{ Set, Value, + boundary as B, reader::{ Token, BinarySource, @@ -353,18 +354,25 @@ impl<'de, 'src, D: Embeddable, N: NestedValue, Dec: DomainDecode, S: Binar })) } - fn open_record(&mut self, arity: Option) -> ReaderResult<()> { + fn open_record(&mut self, arity: Option) -> ReaderResult { self.next_compound(Tag::Record, ExpectedKind::Record(arity))?; - self.ensure_more_expected() + let mut b = B::Type::default(); + self.ensure_more_expected(&mut b, &B::Item::RecordLabel)?; + Ok(b) } - fn open_sequence_or_set(&mut self) -> ReaderResult<()> { + fn open_sequence_or_set(&mut self) -> ReaderResult { match self.peek_next_nonannotation_tag()? { - Tag::Sequence | Tag::Set => { + Tag::Sequence => { self.skip()?; - Ok(()) + Ok(B::Item::SequenceValue) } - _ => Err(self.expected(ExpectedKind::SequenceOrSet)), + Tag::Set => { + self.skip()?; + Ok(B::Item::SetValue) + } + _ => + Err(self.expected(ExpectedKind::SequenceOrSet)), } } @@ -380,7 +388,11 @@ impl<'de, 'src, D: Embeddable, N: NestedValue, Dec: DomainDecode, S: Binar self.next_compound(Tag::Dictionary, ExpectedKind::Dictionary) } - fn close_compound(&mut self) -> ReaderResult { + fn boundary(&mut self, _b: &B::Type) -> ReaderResult<()> { + Ok(()) + } + + fn close_compound(&mut self, _b: &mut B::Type, _i: &B::Item) -> ReaderResult { Ok(self.peekend()?) } diff --git a/implementations/rust/preserves/src/value/packed/writer.rs b/implementations/rust/preserves/src/value/packed/writer.rs index 2b73347..855bcce 100644 --- a/implementations/rust/preserves/src/value/packed/writer.rs +++ b/implementations/rust/preserves/src/value/packed/writer.rs @@ -2,75 +2,17 @@ use num::bigint::BigInt; use num::cast::ToPrimitive; use std::convert::TryInto; use std::io; -use std::mem; -use std::ops::{Deref, DerefMut}; +use std::ops::DerefMut; use super::constants::Tag; use super::super::DomainEncode; use super::super::Embeddable; use super::super::IOValue; use super::super::IOValueDomainCodec; use super::super::NestedValue; +use super::super::boundary as B; +use super::super::suspendable::Suspendable; -use super::super::writer::{Writer, AnnotationWriter, CompoundWriter, varint}; - -pub enum Suspendable { - Active(T), - Suspended, -} - -impl Suspendable { - pub fn new(t: T) -> Self { - Suspendable::Active(t) - } - - pub fn suspend(&mut self) -> Self { - match self { - Suspendable::Active(_) => mem::replace(self, Suspendable::Suspended), - Suspendable::Suspended => - panic!("Attempt to suspend suspended Suspendable"), - } - } - - pub fn resume(&mut self, other: Self) { - match self { - Suspendable::Suspended => - match other { - Suspendable::Active(_) => *self = other, - Suspendable::Suspended => - panic!("Attempt to resume from suspended Suspendable"), - }, - Suspendable::Active(_) => - panic!("Attempt to resume non-suspended Suspendable"), - } - } - - pub fn take(self) -> T { - match self { - Suspendable::Active(t) => t, - Suspendable::Suspended => - panic!("Attempt to take from suspended Suspendable"), - } - } -} - -impl Deref for Suspendable { - type Target = T; - fn deref(&self) -> &Self::Target { - match self { - Suspendable::Suspended => panic!("Suspended Suspendable at deref"), - Suspendable::Active(t) => t - } - } -} - -impl DerefMut for Suspendable { - fn deref_mut(&mut self) -> &mut Self::Target { - match self { - Suspendable::Suspended => panic!("Empty Suspendable at deref_mut"), - Suspendable::Active(t) => t - } - } -} +use super::super::writer::{Writer, CompoundWriter, varint}; pub struct PackedWriter(Suspendable); @@ -153,7 +95,7 @@ impl BinaryOrderWriter { } fn finish(mut self, w: &mut W) -> io::Result<()> { - if !self.buffer().is_empty() { panic!("Missing final delimit()"); } + if !self.buffer().is_empty() { panic!("Missing final boundary()"); } self.items_mut().pop(); self.items_mut().sort(); for bs in self.items() { @@ -184,33 +126,26 @@ impl WriteWriter for BinaryOrderWriter { } } -impl AnnotationWriter for T { - fn start_annotation(&mut self) -> io::Result<()> { - Ok(self.write_tag(Tag::Annotation)?) - } - - fn start_value(&mut self) -> io::Result<()> { - Ok(()) - } -} - impl CompoundWriter for PackedWriter { - fn extend(&mut self) -> io::Result<()> { - Ok(()) - } - - fn delimit(&mut self) -> io::Result<()> { + fn boundary(&mut self, b: &B::Type) -> io::Result<()> { + if let Some(B::Item::Annotation) = b.opening { + self.write_tag(Tag::Annotation)?; + } Ok(()) } } impl CompoundWriter for BinaryOrderWriter { - fn extend(&mut self) -> io::Result<()> { - Ok(()) - } - - fn delimit(&mut self) -> io::Result<()> { - self.items_mut().push(vec![]); + fn boundary(&mut self, b: &B::Type) -> io::Result<()> { + match b.closing { + Some(B::Item::DictionaryValue) | + Some(B::Item::RecordField) | + Some(B::Item::SequenceValue) | + Some(B::Item::SetValue) => + self.items_mut().push(vec![]), + _ => + () + } Ok(()) } } @@ -224,12 +159,12 @@ macro_rules! binary_order_writer_method { impl Writer for BinaryOrderWriter { type AnnWriter = PackedWriter>; + type RecWriter = PackedWriter>; type SeqWriter = PackedWriter>; type SetWriter = BinaryOrderWriter; + type DictWriter = BinaryOrderWriter; type EmbeddedWriter = PackedWriter>; - binary_order_writer_method!(mut align(natural_chunksize: u64) -> io::Result<()>); - fn start_annotations(&mut self) -> io::Result { Ok(self.pop()) } @@ -259,15 +194,20 @@ impl Writer for BinaryOrderWriter { binary_order_writer_method!(mut write_bytes(v: &[u8]) -> io::Result<()>); binary_order_writer_method!(mut write_symbol(v: &str) -> io::Result<()>); - fn start_record(&mut self, _field_count: Option) -> io::Result { + fn start_record(&mut self, _field_count: Option) -> io::Result { self.write_tag(Tag::Record)?; Ok(self.pop()) } + fn end_record(&mut self, rec: Self::RecWriter) -> io::Result<()> { + self.push(rec); + self.write_tag(Tag::End) + } + fn start_sequence(&mut self, _item_count: Option) -> io::Result { self.write_tag(Tag::Sequence)?; Ok(self.pop()) } - fn end_seq(&mut self, seq: Self::SeqWriter) -> io::Result<()> { + fn end_sequence(&mut self, seq: Self::SeqWriter) -> io::Result<()> { self.push(seq); self.write_tag(Tag::End) } @@ -276,12 +216,16 @@ impl Writer for BinaryOrderWriter { self.write_tag(Tag::Set)?; Ok(BinaryOrderWriter::new()) } - fn start_dictionary(&mut self, _entry_count: Option) -> io::Result { + fn end_set(&mut self, set: Self::SetWriter) -> io::Result<()> { + set.finish(self) + } + + fn start_dictionary(&mut self, _entry_count: Option) -> io::Result { self.write_tag(Tag::Dictionary)?; Ok(BinaryOrderWriter::new()) } - fn end_set(&mut self, set: Self::SetWriter) -> io::Result<()> { - set.finish(self) + fn end_dictionary(&mut self, dict: Self::DictWriter) -> io::Result<()> { + dict.finish(self) } fn start_embedded(&mut self) -> io::Result { @@ -304,8 +248,10 @@ macro_rules! fits_in_bytes { impl Writer for PackedWriter { type AnnWriter = Self; + type RecWriter = Self; type SeqWriter = Self; type SetWriter = BinaryOrderWriter; + type DictWriter = BinaryOrderWriter; type EmbeddedWriter = Self; fn start_annotations(&mut self) -> io::Result { @@ -317,10 +263,6 @@ impl Writer for PackedWriter Ok(()) } - fn align(&mut self, _natural_chunksize: u64) -> io::Result<()> { - Ok(()) - } - fn write_bool(&mut self, v: bool) -> io::Result<()> { self.write_tag(if v { Tag::True } else { Tag::False }) } @@ -472,17 +414,22 @@ impl Writer for PackedWriter self.write_atom(Tag::Symbol, v.as_bytes()) } - fn start_record(&mut self, _field_count: Option) -> io::Result { + fn start_record(&mut self, _field_count: Option) -> io::Result { self.write_tag(Tag::Record)?; Ok(self.suspend()) } + fn end_record(&mut self, rec: Self::RecWriter) -> io::Result<()> { + self.resume(rec); + self.write_tag(Tag::End) + } + fn start_sequence(&mut self, _item_count: Option) -> io::Result { self.write_tag(Tag::Sequence)?; Ok(self.suspend()) } - fn end_seq(&mut self, seq: Self::SeqWriter) -> io::Result<()> { + fn end_sequence(&mut self, seq: Self::SeqWriter) -> io::Result<()> { self.resume(seq); self.write_tag(Tag::End) } @@ -492,13 +439,17 @@ impl Writer for PackedWriter Ok(BinaryOrderWriter::new()) } - fn start_dictionary(&mut self, _entry_count: Option) -> io::Result { + fn end_set(&mut self, set: Self::SetWriter) -> io::Result<()> { + set.finish(self) + } + + fn start_dictionary(&mut self, _entry_count: Option) -> io::Result { self.write_tag(Tag::Dictionary)?; Ok(BinaryOrderWriter::new()) } - fn end_set(&mut self, set: Self::SetWriter) -> io::Result<()> { - set.finish(self) + fn end_dictionary(&mut self, dict: Self::DictWriter) -> io::Result<()> { + dict.finish(self) } fn start_embedded(&mut self) -> io::Result { diff --git a/implementations/rust/preserves/src/value/reader.rs b/implementations/rust/preserves/src/value/reader.rs index b0d659e..ac7cd51 100644 --- a/implementations/rust/preserves/src/value/reader.rs +++ b/implementations/rust/preserves/src/value/reader.rs @@ -12,6 +12,7 @@ use super::IOValue; use super::IOValueDomainCodec; use super::NestedValue; use super::CompoundClass; +use super::boundary as B; use super::signed_integer::SignedInteger; pub type ReaderResult = std::result::Result; @@ -25,12 +26,16 @@ pub enum Token> { pub trait Reader<'de, D: Embeddable, N: NestedValue> { fn next(&mut self, read_annotations: bool) -> io::Result>; - fn open_record(&mut self, arity: Option) -> ReaderResult<()>; - fn open_sequence_or_set(&mut self) -> ReaderResult<()>; + fn open_record(&mut self, arity: Option) -> ReaderResult; + fn open_sequence_or_set(&mut self) -> ReaderResult; fn open_sequence(&mut self) -> ReaderResult<()>; fn open_set(&mut self) -> ReaderResult<()>; fn open_dictionary(&mut self) -> ReaderResult<()>; - fn close_compound(&mut self) -> ReaderResult; + fn boundary(&mut self, b: &B::Type) -> ReaderResult<()>; + + // close_compound implies a b.shift(...) and a self.boundary(b). + fn close_compound(&mut self, b: &mut B::Type, i: &B::Item) -> ReaderResult; + fn open_embedded(&mut self) -> ReaderResult<()>; fn close_embedded(&mut self) -> ReaderResult<()>; @@ -95,24 +100,29 @@ pub trait Reader<'de, D: Embeddable, N: NestedValue> { Ok(Cow::Owned(self.demand_next(false)?.value().to_symbol()?.to_owned())) } - fn open_option(&mut self) -> ReaderResult + fn open_option(&mut self) -> ReaderResult> { - self.open_record(None)?; + let b = self.open_record(None)?; let label: &str = &self.next_symbol()?; match label { - "None" => Ok(false), - "Some" => Ok(true), - _ => Err(error::Error::Expected(ExpectedKind::Option, - Received::ReceivedRecordWithLabel(label.to_owned()))), + "None" => { + self.ensure_complete(b, &B::Item::RecordField)?; + Ok(None) + } + "Some" => + Ok(Some(b)), + _ => + Err(error::Error::Expected(ExpectedKind::Option, + Received::ReceivedRecordWithLabel(label.to_owned()))), } } - fn open_simple_record(&mut self, name: &str, arity: Option) -> ReaderResult<()> + fn open_simple_record(&mut self, name: &str, arity: Option) -> ReaderResult { - self.open_record(arity)?; + let b = self.open_record(arity)?; let label: &str = &self.next_symbol()?; if label == name { - Ok(()) + Ok(b) } else { Err(error::Error::Expected(ExpectedKind::SimpleRecord(name.to_owned(), arity), Received::ReceivedRecordWithLabel(label.to_owned()))) @@ -130,23 +140,16 @@ pub trait Reader<'de, D: Embeddable, N: NestedValue> { } } - fn skip_remainder(&mut self) -> ReaderResult<()> { - while !self.close_compound()? { - self.skip_value()?; - } - Ok(()) - } - - fn ensure_more_expected(&mut self) -> ReaderResult<()> { - if !self.close_compound()? { + fn ensure_more_expected(&mut self, b: &mut B::Type, i: &B::Item) -> ReaderResult<()> { + if !self.close_compound(b, i)? { Ok(()) } else { Err(error::Error::MissingItem) } } - fn ensure_complete(&mut self) -> ReaderResult<()> { - if !self.close_compound()? { + fn ensure_complete(&mut self, mut b: B::Type, i: &B::Item) -> ReaderResult<()> { + if !self.close_compound(&mut b, i)? { Err(error::Error::MissingCloseDelimiter) } else { Ok(()) @@ -162,11 +165,11 @@ impl<'r, 'de, D: Embeddable, N: NestedValue, R: Reader<'de, D, N>> (*self).next(read_annotations) } - fn open_record(&mut self, arity: Option) -> ReaderResult<()> { + fn open_record(&mut self, arity: Option) -> ReaderResult { (*self).open_record(arity) } - fn open_sequence_or_set(&mut self) -> ReaderResult<()> { + fn open_sequence_or_set(&mut self) -> ReaderResult { (*self).open_sequence_or_set() } @@ -182,8 +185,12 @@ impl<'r, 'de, D: Embeddable, N: NestedValue, R: Reader<'de, D, N>> (*self).open_dictionary() } - fn close_compound(&mut self) -> ReaderResult { - (*self).close_compound() + fn boundary(&mut self, b: &B::Type) -> ReaderResult<()> { + (*self).boundary(b) + } + + fn close_compound(&mut self, b: &mut B::Type, i: &B::Item) -> ReaderResult { + (*self).close_compound(b, i) } fn open_embedded(&mut self) -> ReaderResult<()> { diff --git a/implementations/rust/preserves/src/value/repr.rs b/implementations/rust/preserves/src/value/repr.rs index a1a5b19..b827cc0 100644 --- a/implementations/rust/preserves/src/value/repr.rs +++ b/implementations/rust/preserves/src/value/repr.rs @@ -268,6 +268,7 @@ impl, D: Embeddable> From for Value { fn from(v: impl, D: Embeddable> From for Value { fn from(v: u128) -> Self { Value::SignedInteger(SignedInteger::from(v)) } } impl, D: Embeddable> From for Value { fn from(v: i128) -> Self { Value::SignedInteger(SignedInteger::from(v)) } } impl, D: Embeddable> From<&BigInt> for Value { fn from(v: &BigInt) -> Self { Value::SignedInteger(SignedInteger::from(Cow::Borrowed(v))) } } +impl, D: Embeddable> From for Value { fn from(v: BigInt) -> Self { Value::SignedInteger(SignedInteger::from(Cow::Owned(v))) } } impl, D: Embeddable> From<&SignedInteger> for Value { fn from(v: &SignedInteger) -> Self { Value::SignedInteger(v.clone()) } } impl, D: Embeddable> From<&str> for Value { fn from(v: &str) -> Self { Value::String(String::from(v)) } } @@ -1183,3 +1184,43 @@ impl<'de> serde::Deserialize<'de> for IOValue { super::magic::input_value(deserializer) } } + +//--------------------------------------------------------------------------- + +#[derive(Clone, PartialEq, Eq, Hash, PartialOrd, Ord)] +pub struct DummyValue(AnnotatedValue, D>); + +impl Debug for DummyValue { + fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result { + f.write_str("<>") + } +} + + +impl DummyValue { + pub fn new() -> Self { + DummyValue(AnnotatedValue::new(Annotations::empty(), Value::Boolean(false))) + } +} + +impl NestedValue for DummyValue { + fn wrap(_anns: Annotations, _v: Value) -> Self { + DummyValue::new() + } + + fn annotations(&self) -> &Annotations { + &self.0.0 + } + + fn value(&self) -> &Value { + &self.0.1 + } + + fn pieces(self) -> (Annotations, Value) { + (self.0.0, self.0.1) + } + + fn value_owned(self) -> Value { + self.0.1 + } +} diff --git a/implementations/rust/preserves/src/value/signed_integer.rs b/implementations/rust/preserves/src/value/signed_integer.rs index 732feeb..8b5e45f 100644 --- a/implementations/rust/preserves/src/value/signed_integer.rs +++ b/implementations/rust/preserves/src/value/signed_integer.rs @@ -121,6 +121,12 @@ impl From for SignedInteger { } } +impl From for SignedInteger { + fn from(v: BigInt) -> Self { + Self::from(Cow::Owned(v)) + } +} + impl<'a> From> for SignedInteger { fn from(v: Cow<'a, BigInt>) -> Self { if let Some(w) = v.to_i128() { diff --git a/implementations/rust/preserves/src/value/suspendable.rs b/implementations/rust/preserves/src/value/suspendable.rs new file mode 100644 index 0000000..23dfdd0 --- /dev/null +++ b/implementations/rust/preserves/src/value/suspendable.rs @@ -0,0 +1,60 @@ +use std::ops::{Deref, DerefMut}; + +pub enum Suspendable { + Active(T), + Suspended, +} + +impl Suspendable { + pub fn new(t: T) -> Self { + Suspendable::Active(t) + } + + pub fn suspend(&mut self) -> Self { + match self { + Suspendable::Active(_) => std::mem::replace(self, Suspendable::Suspended), + Suspendable::Suspended => + panic!("Attempt to suspend suspended Suspendable"), + } + } + + pub fn resume(&mut self, other: Self) { + match self { + Suspendable::Suspended => + match other { + Suspendable::Active(_) => *self = other, + Suspendable::Suspended => + panic!("Attempt to resume from suspended Suspendable"), + }, + Suspendable::Active(_) => + panic!("Attempt to resume non-suspended Suspendable"), + } + } + + pub fn take(self) -> T { + match self { + Suspendable::Active(t) => t, + Suspendable::Suspended => + panic!("Attempt to take from suspended Suspendable"), + } + } +} + +impl Deref for Suspendable { + type Target = T; + fn deref(&self) -> &Self::Target { + match self { + Suspendable::Suspended => panic!("Suspended Suspendable at deref"), + Suspendable::Active(t) => t + } + } +} + +impl DerefMut for Suspendable { + fn deref_mut(&mut self) -> &mut Self::Target { + match self { + Suspendable::Suspended => panic!("Empty Suspendable at deref_mut"), + Suspendable::Active(t) => t + } + } +} diff --git a/implementations/rust/preserves/src/value/text/mod.rs b/implementations/rust/preserves/src/value/text/mod.rs new file mode 100644 index 0000000..8949567 --- /dev/null +++ b/implementations/rust/preserves/src/value/text/mod.rs @@ -0,0 +1,31 @@ +pub mod reader; +pub mod writer; + +pub use reader::TextReader; +pub use writer::TextWriter; + +use std::io; + +use super::{DomainParse, Embeddable, IOValue, IOValueDomainCodec, NestedValue, Reader, ViaCodec}; + +pub fn from_str, Dec: DomainParse>( + s: &str, + decode_embedded: Dec, +) -> io::Result { + TextReader::new(s, decode_embedded).demand_next(false) +} + +pub fn iovalue_from_str(s: &str) -> io::Result { + from_str(s, ViaCodec::new(IOValueDomainCodec)) +} + +pub fn annotated_from_str, Dec: DomainParse>( + s: &str, + decode_embedded: Dec, +) -> io::Result { + TextReader::new(s, decode_embedded).demand_next(true) +} + +pub fn annotated_iovalue_from_str(s: &str) -> io::Result { + annotated_from_str(s, ViaCodec::new(IOValueDomainCodec)) +} diff --git a/implementations/rust/preserves/src/value/text/reader.rs b/implementations/rust/preserves/src/value/text/reader.rs new file mode 100644 index 0000000..e732edf --- /dev/null +++ b/implementations/rust/preserves/src/value/text/reader.rs @@ -0,0 +1,614 @@ +use crate::error::Error; +use crate::error::ExpectedKind; +use crate::error::Received; +use crate::error::eof; +use crate::error::io_syntax_error; +use crate::error::is_eof_error; +use crate::error::syntax_error; + +use crate::hex; + +use crate::value::CompoundClass; +use crate::value::DomainParse; +use crate::value::DummyValue; +use crate::value::Embeddable; +use crate::value::IOValue; +use crate::value::IOValueDomainCodec; +use crate::value::Map; +use crate::value::NestedValue; +use crate::value::Reader; +use crate::value::Record; +use crate::value::Set; +use crate::value::Token; +use crate::value::Value; +use crate::value::ViaCodec; +use crate::value::boundary as B; +use crate::value::reader::BinarySource; +use crate::value::reader::ReaderResult; +use crate::value::repr::Annotations; + +use num::bigint::BigInt; + +use std::io; +use std::iter::FromIterator; +use std::marker::PhantomData; + +pub struct TextReader<'a, D: Embeddable, Dec: DomainParse> { + buf: &'a str, + pos: usize, + dec: Dec, + phantom: PhantomData, +} + +impl<'a, D: Embeddable, Dec: DomainParse> TextReader<'a, D, Dec> { + pub fn new(buf: &'a str, dec: Dec) -> Self { + TextReader { + buf, + pos: 0, + dec, + phantom: PhantomData, + } + } + + fn remaining_input(&self) -> &str { + &self.buf[self.pos ..] + } + + fn peek(&self) -> ReaderResult { + if self.pos >= self.buf.len() { + Err(eof()) + } else { + Ok(self.buf[self.pos ..].chars().next().unwrap()) + } + } + + fn drop(&mut self, count: usize) { + self.pos += count; + } + + fn undrop(&mut self, count: usize) { + self.pos -= count; + } + + fn next_char(&mut self) -> ReaderResult { + let c = self.peek()?; + self.drop(c.len_utf8()); + Ok(c) + } + + fn skip_whitespace(&mut self) { + while let Ok(c) = self.peek() { + if !c.is_whitespace() && c != ',' { + break; + } + self.drop(c.len_utf8()) + } + } + + // TODO: This is a duplicate of fn expected in PackedReader. + fn expected>(&mut self, k: ExpectedKind) -> Error { + match Reader::::demand_next(self, true) { + Ok(v) => Error::Expected(k, Received::ReceivedOtherValue(format!("{:?}", v))), + Err(e) => e.into() + } + } + + fn gather_annotations>(&mut self) -> ReaderResult> { + let mut vs = Vec::new(); + loop { + self.skip_whitespace(); + match self.peek()? { + ';' => { self.drop(1); vs.push(N::new(self.comment_line()?)) } + '@' => { self.drop(1); vs.push(self.demand_next(true)?) } + _ => return Ok(vs), + } + } + } + + fn skip_annotations(&mut self) -> ReaderResult<()> { + loop { + self.skip_whitespace(); + match self.peek()? { + ';' => { self.drop(1); self.comment_line()?; }, + '@' => { self.drop(1); Reader::>::skip_value(self)?; }, + _ => return Ok(()), + } + } + } + + pub fn next_iovalue(&mut self, read_annotations: bool) -> io::Result { + let mut r = TextReader::new(self.remaining_input(), ViaCodec::new(IOValueDomainCodec)); + let v = r.demand_next(read_annotations)?; + self.pos += r.pos; + Ok(v) + } + + fn comment_line(&mut self) -> io::Result { + let mut s = String::new(); + loop { + match self.next_char()? { + '\r' | '\n' => return Ok(s), + c => s.push(c), + } + } + } + + fn read_intpart>(&mut self, mut s: String, c: char) -> io::Result { + match c { + '0' => { + s.push(c); + self.read_fracexp(s) + } + _ => { + self.read_digit1(&mut s, c)?; + self.read_fracexp(s) + } + } + } + + fn read_fracexp>(&mut self, mut s: String) -> io::Result { + match self.peek()? { + '.' => { + s.push(self.next_char()?); + let c = self.next_char()?; + self.read_digit1(&mut s, c)?; + } + _ => () + } + self.read_exp(s) + } + + fn read_exp>(&mut self, mut s: String) -> io::Result { + match self.peek()? { + 'e' | 'E' => { + s.push(self.next_char()?); + self.read_sign_and_exp(s) + } + _ => self.finish_number(s) + } + } + + fn read_sign_and_exp>(&mut self, mut s: String) -> io::Result { + match self.peek()? { + '+' | '-' => s.push(self.next_char()?), + _ => (), + } + let c = self.next_char()?; + self.read_digit1(&mut s, c)?; + self.finish_number(s) + } + + fn finish_number>(&mut self, s: String) -> io::Result { + if let Ok(n) = s.parse::() { + return Ok(N::new(n)); + } + match self.peek()? { + 'f' | 'F' => { + self.drop(1); + Ok(N::new(s.parse::().map_err( + |_| io_syntax_error(&format!( + "Invalid single-precision number: {:?}", s)))?)) + } + _ => + Ok(N::new(s.parse::().map_err( + |_| io_syntax_error(&format!( + "Invalid double-precision number: {:?}", s)))?)) + } + } + + fn read_digit1(&mut self, s: &mut String, c: char) -> io::Result<()> + { + if !c.is_digit(10) { + return Err(io_syntax_error("Incomplete number")); + } + s.push(c); + while self.peek()?.is_digit(10) { + s.push(self.next_char()?); + } + Ok(()) + } + + fn read_stringlike( + &mut self, + mut seed: R, + acc: Acc, + xform_item: X, + terminator: char, + hexescape: char, + hexescaper: H, + ) -> io::Result + where + X: Fn(char) -> Element, + H: Fn(&mut Self) -> io::Result, + Acc: Fn(&mut R, Element) -> (), + { + loop { + match self.next_char()? { + c if c == terminator => return Ok(seed), + '\\' => match self.next_char()? { + c if c == hexescape => + acc(&mut seed, hexescaper(self)?), + c if c == terminator || c == '\\' || c == '/' => + acc(&mut seed, xform_item(c)), + 'b' => acc(&mut seed, xform_item('\x08')), + 'f' => acc(&mut seed, xform_item('\x0c')), + 'n' => acc(&mut seed, xform_item('\x0a')), + 'r' => acc(&mut seed, xform_item('\x0d')), + 't' => acc(&mut seed, xform_item('\x09')), + _ => return Err(io_syntax_error("Invalid escape code")), + }, + c => acc(&mut seed, xform_item(c)), + } + } + } + + fn hexnum(&mut self, count: usize) -> io::Result { + let mut v: u32 = 0; + for _ in 0 .. count { + let c = self.next_char()?; + match c.to_digit(16) { + Some(d) => + v = v << 4 | d, + None => + return Err(io_syntax_error("Bad hex escape")), + } + } + Ok(v) + } + + fn read_string(&mut self, delimiter: char) -> io::Result { + self.read_stringlike( + String::new(), + |s, c| s.push(c), + |c| c, + delimiter, + 'u', + |r| { + let n1 = r.hexnum(4)?; + if (0xd800 ..= 0xdbff).contains(&n1) { + let mut ok = true; + ok = ok && r.next_char()? == '\\'; + ok = ok && r.next_char()? == 'u'; + if !ok { + Err(io_syntax_error("Missing second half of surrogate pair")) + } else { + let n2 = r.hexnum(4)?; + if (0xdc00 ..= 0xdfff).contains(&n2) { + let n = ((n1 - 0xd800) << 10) + (n2 - 0xdc00) + 0x10000; + char::from_u32(n).ok_or_else( + || io_syntax_error("Bad code point from surrogate pair")) + } else { + Err(io_syntax_error("Bad second half of surrogate pair")) + } + } + } else { + char::from_u32(n1).ok_or_else( + || io_syntax_error("Bad code point")) + } + }) + } + + fn read_literal_binary>(&mut self) -> io::Result { + Ok(N::new(&self.read_stringlike( + Vec::new(), + |bs, b| bs.push(b), + |c| c as u8, + '"', + 'x', + |r| Ok(r.hexnum(2)? as u8))?[..])) + } + + fn read_hex_binary>(&mut self) -> io::Result { + let mut s = String::new(); + loop { + self.skip_whitespace(); + let c1 = self.next_char()?; + if c1 == '"' { + let bs = hex::HexParser::Strict.decode(&s).unwrap(); + return Ok(N::new(&bs[..])); + } + let c2 = self.next_char()?; + if !(c1.is_digit(16) && c2.is_digit(16)) { + return Err(io_syntax_error("Invalid hex binary")); + } + s.push(c1); + s.push(c2); + } + } + + fn read_base64_binary>(&mut self) -> io::Result { + let mut s = String::new(); + loop { + self.skip_whitespace(); + let mut c = self.next_char()?; + if c == ']' { + let bs = base64::decode_config(&s, base64::STANDARD_NO_PAD) + .map_err(|_| io_syntax_error("Invalid base64 character"))?; + return Ok(N::new(&bs[..])); + } + if c == '-' { c = '+'; } + if c == '_' { c = '/'; } + if c == '=' { continue; } + s.push(c); + } + } + + fn upto>(&mut self, delimiter: char, read_annotations: bool) -> io::Result> { + let mut vs = Vec::new(); + loop { + self.skip_whitespace(); + if self.peek()? == delimiter { + self.drop(delimiter.len_utf8()); + return Ok(vs); + } + vs.push(Reader::::demand_next(self, read_annotations)?); + } + } + + fn read_dictionary>(&mut self, read_annotations: bool) -> io::Result { + let mut d = Map::new(); + loop { + self.skip_whitespace(); + if self.peek()? == '}' { + self.drop(1); + return Ok(N::new(d)); + } + let k = Reader::::demand_next(self, read_annotations)?; + self.skip_whitespace(); + if self.next_char()? != ':' { + return Err(io_syntax_error("Missing expected key/value separator")); + } + let v = Reader::::demand_next(self, read_annotations)?; + d.insert(k, v); + } + } + + fn read_raw_symbol>(&mut self, mut s: String) -> io::Result { + loop { + let c = match self.peek() { + Err(e) if is_eof_error(&e) => ' ', + Err(e) => return Err(e)?, + Ok(c) if c.is_whitespace() => ' ', + Ok(c) => c + }; + match c { + '(' | ')' | '{' | '}' | '[' | ']' | '<' | '>' | + '"' | ';' | ',' | '@' | '#' | ':' | '|' | ' ' => + return Ok(Value::symbol(&s).wrap()), + c => { + self.drop(c.len_utf8()); + s.push(c) + } + } + } + } +} + +impl<'a, 'de, D: Embeddable, N: NestedValue, Dec: DomainParse> Reader<'de, D, N> for TextReader<'a, D, Dec> { + fn next(&mut self, read_annotations: bool) -> io::Result> { + self.skip_whitespace(); + let c = match self.next_char() { + Ok(c) => c, + Err(e) if is_eof_error(&e) => return Ok(None), + Err(e) => return Err(e.into()), + }; + Ok(Some(match c { + '-' => { + let c1 = self.next_char()?; + self.read_intpart("-".to_owned(), c1)? + } + '0' | '1' | '2' | '3' | '4' | '5' | '6' | '7' | '8' | '9' => + self.read_intpart(String::new(), c)?, + '"' => + N::new(self.read_string('"')?), + '|' => + Value::symbol(&self.read_string('|')?).wrap(), + ';' | '@' => { + self.undrop(1); + if read_annotations { + let mut annotations = self.gather_annotations()?; + let (existing_annotations, v) = + Reader::::demand_next(self, read_annotations)?.pieces(); + annotations.extend_from_slice(existing_annotations.slice()); + N::wrap(Annotations::new(Some(annotations)), v) + } else { + self.skip_annotations()?; + self.demand_next(read_annotations)? + } + } + ':' => { + // return Err(io_syntax_error("Unexpected key/value separator between items")), + return Err(io_syntax_error(&format!("Unexpected key/value separator between items (pos {:?})", self.pos))); + } + '#' => match self.next_char()? { + 'f' => N::new(false), + 't' => N::new(true), + '{' => N::new(Set::from_iter(self.upto('}', read_annotations)?.into_iter())), + '"' => self.read_literal_binary()?, + 'x' => if self.next_char()? == '"' { + self.read_hex_binary()? + } else { + return Err(io_syntax_error("Expected open-quote at start of hex ByteString")); + }, + '[' => self.read_base64_binary()?, + '=' => { + let bs_val: N = self.demand_next(true)?; + if bs_val.annotations().slice().len() > 0 { + return Err(io_syntax_error("Annotations not permitted after #=")); + } + match bs_val.value().as_bytestring() { + None => + return Err(io_syntax_error("ByteString must follow #=")), + Some(bs) => + crate::value::BytesBinarySource::new(bs) + .packed(ViaCodec::new(&mut self.dec)) + .demand_next(read_annotations)? + } + } + '!' => { + let v = self.next_iovalue(read_annotations)?; + Value::Embedded(self.dec.parse_embedded(&v)?).wrap() + } + other => return Err(io_syntax_error(&format!("Invalid # syntax: {:?}", other))), + }, + '<' => { + let vs = self.upto('>', read_annotations)?; + if vs.is_empty() { + return Err(io_syntax_error("Missing record label")); + } + Value::Record(Record(vs)).wrap() + } + '[' => N::new(self.upto(']', read_annotations)?), + '{' => self.read_dictionary(read_annotations)?, + '>' => return Err(io_syntax_error("Unexpected >")), + ']' => return Err(io_syntax_error("Unexpected ]")), + '}' => return Err(io_syntax_error("Unexpected }")), + other => self.read_raw_symbol(other.to_string())?, + })) + } + + fn open_record(&mut self, arity: Option) -> ReaderResult { + self.skip_annotations()?; + if self.peek()? != '<' { return Err(self.expected::(ExpectedKind::Record(arity))); } + self.drop(1); + let mut b = B::Type::default(); + Reader::::ensure_more_expected(self, &mut b, &B::Item::RecordLabel)?; + Ok(b) + } + + fn open_sequence_or_set(&mut self) -> ReaderResult { + self.skip_annotations()?; + let mark = Reader::::mark(self)?; + match self.next_char()? { + '#' => match self.next_char()? { + '{' => return Ok(B::Item::SetValue), + _ => (), + }, + '[' => return Ok(B::Item::SequenceValue), + _ => (), + } + Reader::::restore(self, &mark)?; + Err(self.expected::(ExpectedKind::SequenceOrSet)) + } + + fn open_sequence(&mut self) -> ReaderResult<()> { + self.skip_annotations()?; + if self.peek()? != '[' { return Err(self.expected::(ExpectedKind::Sequence)); } + self.drop(1); + Ok(()) + } + + fn open_set(&mut self) -> ReaderResult<()> { + self.skip_annotations()?; + let mark = Reader::::mark(self)?; + match self.next_char()? { + '#' => match self.next_char()? { + '{' => return Ok(()), + _ => (), + }, + _ => (), + } + Reader::::restore(self, &mark)?; + Err(self.expected::(ExpectedKind::Set)) + } + + fn open_dictionary(&mut self) -> ReaderResult<()> { + self.skip_annotations()?; + if self.peek()? != '{' { return Err(self.expected::(ExpectedKind::Dictionary)); } + self.drop(1); + Ok(()) + } + + fn boundary(&mut self, b: &B::Type) -> ReaderResult<()> { + match b { + B::Type { + closing: Some(B::Item::DictionaryKey), + opening: Some(B::Item::DictionaryValue), + } => { + self.skip_whitespace(); + if self.next_char()? != ':' { + return Err(syntax_error("Missing expected key/value separator")); + } + }, + _ => (), + } + Ok(()) + } + + fn close_compound(&mut self, b: &mut B::Type, i: &B::Item) -> ReaderResult { + self.skip_whitespace(); + match self.peek()? { + '>' | ']' | '}' => { + self.drop(1); + Ok(true) + } + _ => { + b.shift(Some(i.clone())); + Reader::::boundary(self, b)?; + Ok(false) + } + } + } + + fn open_embedded(&mut self) -> ReaderResult<()> { + self.skip_annotations()?; + let mark = Reader::::mark(self)?; + match self.next_char()? { + '#' => match self.next_char()? { + '!' => return Ok(()), + _ => (), + }, + _ => (), + } + Reader::::restore(self, &mark)?; + Err(self.expected::(ExpectedKind::Embedded)) + } + + fn close_embedded(&mut self) -> ReaderResult<()> { + Ok(()) + } + + type Mark = usize; + + fn mark(&mut self) -> io::Result { + Ok(self.pos) + } + + fn restore(&mut self, mark: &Self::Mark) -> io::Result<()> { + self.pos = *mark; + Ok(()) + } + + fn next_token(&mut self, read_embedded_annotations: bool) -> io::Result> { + self.skip_annotations()?; + let mark = Reader::::mark(self)?; + Ok(match self.next_char()? { + '<' => Token::Compound(CompoundClass::Record), + '[' => Token::Compound(CompoundClass::Sequence), + '{' => Token::Compound(CompoundClass::Dictionary), + '>' => Token::End, + ']' => Token::End, + '}' => Token::End, + '#' => match self.next_char()? { + '!' => { + let v = self.next_iovalue(read_embedded_annotations)?; + Token::Embedded(self.dec.parse_embedded(&v)?) + } + '{' => Token::Compound(CompoundClass::Set), + _ => { + Reader::::restore(self, &mark)?; + Token::Atom(self.demand_next(false)?) + } + }, + _ => { + Reader::::restore(self, &mark)?; + Token::Atom(self.demand_next(false)?) + } + }) + } + + fn next_annotations_and_token(&mut self) -> io::Result<(Vec, Token)> { + let annotations = self.gather_annotations()?; + Ok((annotations, self.next_token(true)?)) + } +} diff --git a/implementations/rust/preserves/src/value/text/writer.rs b/implementations/rust/preserves/src/value/text/writer.rs new file mode 100644 index 0000000..4d61164 --- /dev/null +++ b/implementations/rust/preserves/src/value/text/writer.rs @@ -0,0 +1,234 @@ +use crate::value::Writer; +use crate::value::suspendable::Suspendable; +use crate::value::writer::CompoundWriter; + +use num::bigint::BigInt; + +use std::io; + +use super::super::boundary as B; + +#[derive(Clone, Copy)] +pub enum CommaStyle { + None, + Separating, + Terminating, +} + +pub struct TextWriter(Suspendable, CommaStyle); + +impl std::default::Default for CommaStyle { + fn default() -> Self { + CommaStyle::None + } +} + +impl TextWriter { + pub fn new(w: W) -> Self { + TextWriter(Suspendable::new(w), CommaStyle::default()) + } + + pub fn suspend(&mut self) -> Self { + TextWriter(self.0.suspend(), self.1) + } + + pub fn resume(&mut self, other: Self) { + self.0.resume(other.0) + } + + pub fn write_stringlike_char_fallback( + &mut self, + c: char, + f: F, + ) -> io::Result<()> where + F: FnOnce(&mut W, char) -> io::Result<()> + { + match c { + '\\' => write!(self.0, "\\\\"), + '\x08' => write!(self.0, "\\b"), + '\x0c' => write!(self.0, "\\f"), + '\x0a' => write!(self.0, "\\n"), + '\x0d' => write!(self.0, "\\r"), + '\x09' => write!(self.0, "\\t"), + _ => f(&mut self.0, c), + } + } + + pub fn write_stringlike_char(&mut self, c: char) -> io::Result<()> { + self.write_stringlike_char_fallback(c, |w, c| write!(w, "{}", c)) + } +} + +impl CompoundWriter for TextWriter { + fn boundary(&mut self, b: &B::Type) -> io::Result<()> { + match (b.closing.as_ref(), b.opening.as_ref()) { + (None, Some(B::Item::Annotation)) => + write!(self.0, "@"), + (Some(_), Some(B::Item::Annotation)) => + write!(self.0, " @"), + (Some(B::Item::Annotation), Some(B::Item::AnnotatedValue)) => + write!(self.0, " "), + + (Some(B::Item::DictionaryKey), Some(B::Item::DictionaryValue)) => + write!(self.0, ": "), + + (Some(B::Item::RecordLabel), Some(B::Item::RecordField)) | + (Some(B::Item::RecordField), Some(B::Item::RecordField)) => + write!(self.0, " "), + + (Some(B::Item::DictionaryValue), Some(B::Item::DictionaryKey)) | + (Some(B::Item::SequenceValue), Some(B::Item::SequenceValue)) | + (Some(B::Item::SetValue), Some(B::Item::SetValue)) => + match self.1 { + CommaStyle::Separating | CommaStyle::Terminating => + write!(self.0, ", "), + CommaStyle::None => + write!(self.0, " "), + }, + + (Some(B::Item::DictionaryValue), None) | + (Some(B::Item::SequenceValue), None) | + (Some(B::Item::SetValue), None) => + match self.1 { + CommaStyle::Terminating => + write!(self.0, ","), + CommaStyle::Separating | CommaStyle::None => + Ok(()), + }, + + _ => + Ok(()) + } + } +} + +macro_rules! simple_writer_method { + ($n:ident, $argty:ty) => + (fn $n (&mut self, v: $argty) -> io::Result<()> { + write!(self.0, "{}", v) + }); +} + +impl Writer for TextWriter { + type AnnWriter = Self; + type RecWriter = Self; + type SeqWriter = Self; + type SetWriter = Self; + type DictWriter = Self; + type EmbeddedWriter = Self; + + fn start_annotations(&mut self) -> io::Result { + Ok(self.suspend()) + } + + fn end_annotations(&mut self, ann: Self::AnnWriter) -> io::Result<()> { + self.resume(ann); + Ok(()) + } + + fn write_bool(&mut self, v: bool) -> io::Result<()> { + write!(self.0, "{}", if v { "#t" } else { "#f" }) + } + + fn write_f32(&mut self, v: f32) -> io::Result<()> { + write!(self.0, "{:e}f", v) + } + + fn write_f64(&mut self, v: f64) -> io::Result<()> { + write!(self.0, "{:e}", v) + } + + simple_writer_method!(write_i8, i8); + simple_writer_method!(write_u8, u8); + simple_writer_method!(write_i16, i16); + simple_writer_method!(write_u16, u16); + simple_writer_method!(write_i32, i32); + simple_writer_method!(write_u32, u32); + simple_writer_method!(write_i64, i64); + simple_writer_method!(write_u64, u64); + simple_writer_method!(write_i128, i128); + simple_writer_method!(write_u128, u128); + simple_writer_method!(write_int, &BigInt); + + fn write_string(&mut self, v: &str) -> io::Result<()> { + write!(self.0, "\"")?; + for c in v.chars() { + match c { + '"' => write!(self.0, "\\\"")?, + _ => self.write_stringlike_char(c)?, + } + } + write!(self.0, "\"") + } + + fn write_bytes(&mut self, v: &[u8]) -> io::Result<()> { + write!(self.0, "#[{}]", base64::encode_config(v, base64::STANDARD_NO_PAD)) + } + + fn write_symbol(&mut self, v: &str) -> io::Result<()> { + // FIXME: This regular expression is conservatively correct, but Anglo-chauvinistic. + let re = regex::Regex::new("^[a-zA-Z~!$%^&*?_=+/.][-a-zA-Z~!$%^&*?_=+/.0-9]*$").unwrap(); + if re.is_match(v) { + write!(self.0, "{}", v) + } else { + write!(self.0, "|")?; + for c in v.chars() { + match c { + '|' => write!(self.0, "\\|")?, + _ => self.write_stringlike_char(c)?, + } + } + write!(self.0, "|") + } + } + + fn start_record(&mut self, _field_count: Option) -> io::Result { + write!(self.0, "<")?; + Ok(self.suspend()) + } + + fn end_record(&mut self, rec: Self::RecWriter) -> io::Result<()> { + self.resume(rec); + write!(self.0, ">") + } + + fn start_sequence(&mut self, _item_count: Option) -> io::Result { + write!(self.0, "[")?; + Ok(self.suspend()) + } + + fn end_sequence(&mut self, seq: Self::SeqWriter) -> io::Result<()> { + self.resume(seq); + write!(self.0, "]") + } + + fn start_set(&mut self, _item_count: Option) -> io::Result { + write!(self.0, "#{{")?; + Ok(self.suspend()) + } + + fn end_set(&mut self, set: Self::SetWriter) -> io::Result<()> { + self.resume(set); + write!(self.0, "}}") + } + + fn start_dictionary(&mut self, _entry_count: Option) -> io::Result { + write!(self.0, "{{")?; + Ok(self.suspend()) + } + + fn end_dictionary(&mut self, dict: Self::DictWriter) -> io::Result<()> { + self.resume(dict); + write!(self.0, "}}") + } + + fn start_embedded(&mut self) -> io::Result { + write!(self.0, "#!")?; + Ok(self.suspend()) + } + + fn end_embedded(&mut self, ptr: Self::EmbeddedWriter) -> io::Result<()> { + self.resume(ptr); + Ok(()) + } +} diff --git a/implementations/rust/preserves/src/value/writer.rs b/implementations/rust/preserves/src/value/writer.rs index f6e8dee..6746721 100644 --- a/implementations/rust/preserves/src/value/writer.rs +++ b/implementations/rust/preserves/src/value/writer.rs @@ -1,27 +1,22 @@ use num::bigint::BigInt; use std::io; use super::DomainEncode; +use super::boundary as B; use super::signed_integer::SignedIntegerRepr; use super::repr::{Embeddable, Value, NestedValue, Float, Double}; -pub trait AnnotationWriter: Writer { - fn start_annotation(&mut self) -> io::Result<()>; - fn start_value(&mut self) -> io::Result<()>; -} - pub trait CompoundWriter: Writer { - fn extend(&mut self) -> io::Result<()>; - fn delimit(&mut self) -> io::Result<()>; + fn boundary(&mut self, b: &B::Type) -> io::Result<()>; } pub trait Writer: Sized { - type AnnWriter: AnnotationWriter; + type AnnWriter: CompoundWriter; + type RecWriter: CompoundWriter; type SeqWriter: CompoundWriter; type SetWriter: CompoundWriter; + type DictWriter: CompoundWriter; type EmbeddedWriter: Writer; - fn align(&mut self, natural_chunksize: u64) -> io::Result<()>; - fn start_annotations(&mut self) -> io::Result; fn end_annotations(&mut self, ann: Self::AnnWriter) -> io::Result<()>; @@ -46,14 +41,18 @@ pub trait Writer: Sized { fn write_bytes(&mut self, v: &[u8]) -> io::Result<()>; fn write_symbol(&mut self, v: &str) -> io::Result<()>; - fn start_record(&mut self, field_count: Option) -> io::Result; + fn start_record(&mut self, field_count: Option) -> io::Result; + fn end_record(&mut self, rec: Self::RecWriter) -> io::Result<()>; + fn start_sequence(&mut self, item_count: Option) -> io::Result; - fn end_seq(&mut self, seq: Self::SeqWriter) -> io::Result<()>; + fn end_sequence(&mut self, seq: Self::SeqWriter) -> io::Result<()>; fn start_set(&mut self, item_count: Option) -> io::Result; - fn start_dictionary(&mut self, entry_count: Option) -> io::Result; fn end_set(&mut self, set: Self::SetWriter) -> io::Result<()>; + fn start_dictionary(&mut self, entry_count: Option) -> io::Result; + fn end_dictionary(&mut self, dict: Self::DictWriter) -> io::Result<()>; + fn start_embedded(&mut self) -> io::Result; fn end_embedded(&mut self, ptr: Self::EmbeddedWriter) -> io::Result<()>; @@ -70,12 +69,17 @@ pub trait Writer: Sized { } Some(anns) => { let mut a = self.start_annotations()?; + let mut b = B::Type::default(); for ann in anns { - a.start_annotation()?; + b.shift(Some(B::Item::Annotation)); + a.boundary(&b)?; a.write(enc, ann)?; } - a.start_value()?; + b.shift(Some(B::Item::AnnotatedValue)); + a.boundary(&b)?; a.write_value(enc, v.value())?; + b.shift(None); + a.boundary(&b)?; self.end_annotations(a)?; } } @@ -101,43 +105,56 @@ pub trait Writer: Sized { Value::Symbol(s) => self.write_symbol(s), Value::Record(r) => { let mut c = self.start_record(Some(r.arity()))?; - c.extend()?; + let mut b = B::start(B::Item::RecordLabel); + c.boundary(&b)?; c.write(enc, r.label())?; - c.delimit()?; for f in r.fields() { - c.extend()?; + b.shift(Some(B::Item::RecordField)); + c.boundary(&b)?; c.write(enc, f)?; - c.delimit()?; } - self.end_seq(c) + b.shift(None); + c.boundary(&b)?; + self.end_record(c) } Value::Sequence(vs) => { let mut c = self.start_sequence(Some(vs.len()))?; + let mut b = B::Type::default(); for v in vs { - c.extend()?; + b.shift(Some(B::Item::SequenceValue)); + c.boundary(&b)?; c.write(enc, v)?; - c.delimit()?; } - self.end_seq(c) + b.shift(None); + c.boundary(&b)?; + self.end_sequence(c) } Value::Set(vs) => { let mut c = self.start_set(Some(vs.len()))?; + let mut b = B::Type::default(); for v in vs { - c.extend()?; + b.shift(Some(B::Item::SetValue)); + c.boundary(&b)?; c.write(enc, v)?; - c.delimit()?; } + b.shift(None); + c.boundary(&b)?; self.end_set(c) } Value::Dictionary(vs) => { let mut c = self.start_dictionary(Some(vs.len()))?; + let mut b = B::Type::default(); for (k, v) in vs { - c.extend()?; + b.shift(Some(B::Item::DictionaryKey)); + c.boundary(&b)?; c.write(enc, k)?; + b.shift(Some(B::Item::DictionaryValue)); + c.boundary(&b)?; c.write(enc, v)?; - c.delimit()?; } - self.end_set(c) + b.shift(None); + c.boundary(&b)?; + self.end_dictionary(c) } Value::Embedded(d) => { let mut c = self.start_embedded()?; diff --git a/implementations/rust/preserves/tests/samples/mod.rs b/implementations/rust/preserves/tests/samples/mod.rs index 0507d8d..093beef 100644 --- a/implementations/rust/preserves/tests/samples/mod.rs +++ b/implementations/rust/preserves/tests/samples/mod.rs @@ -1,12 +1,12 @@ use preserves::symbol::Symbol; use preserves::value::{IOValue, Map}; -#[derive(Debug, serde::Serialize, serde::Deserialize)] +#[derive(Debug, PartialEq, Eq, serde::Serialize, serde::Deserialize)] pub struct TestCases { pub tests: Map } -#[derive(Debug, serde::Serialize, serde::Deserialize)] +#[derive(Debug, PartialEq, Eq, serde::Serialize, serde::Deserialize)] pub enum TestCase { Test(#[serde(with = "serde_bytes")] Vec, IOValue), NondeterministicTest(#[serde(with = "serde_bytes")] Vec, IOValue), diff --git a/implementations/rust/preserves/tests/samples_tests.rs b/implementations/rust/preserves/tests/samples_tests.rs index 78fa6b1..0320b42 100644 --- a/implementations/rust/preserves/tests/samples_tests.rs +++ b/implementations/rust/preserves/tests/samples_tests.rs @@ -17,12 +17,85 @@ fn decode_all(bytes: &'_ [u8]) -> io::Result> { BytesBinarySource::new(bytes).packed_iovalues().configured(true).collect() } +#[test] fn compare_text_with_packed() -> io::Result<()> { + use io::prelude::*; + let from_text = { + let mut fh = std::fs::File::open("../../../tests/samples.pr").unwrap(); + let mut contents = String::new(); + fh.read_to_string(&mut contents)?; + let mut d = preserves::value::TextReader::new(&contents, preserves::value::ViaCodec::new(preserves::value::IOValueDomainCodec)); + d.next_iovalue(true)? + }; + let from_packed = { + let mut fh = std::fs::File::open("../../../tests/samples.bin").unwrap(); + IOBinarySource::new(&mut fh).packed_iovalues().demand_next(true)? + }; + assert_eq!(from_text, from_packed); + Ok(()) +} + +#[test] fn compare_deserialize_text_with_packed() -> io::Result<()> { + use io::prelude::*; + let from_text = { + let mut fh = std::fs::File::open("../../../tests/samples.pr").unwrap(); + let mut contents = String::new(); + fh.read_to_string(&mut contents)?; + let tests: TestCases = preserves::de::from_text(&contents)?; + tests + }; + let from_packed = { + let mut fh = std::fs::File::open("../../../tests/samples.bin").unwrap(); + let tests: TestCases = preserves::de::from_read(&mut fh)?; + tests + }; + assert_eq!(from_text, from_packed); + Ok(()) +} + +#[test] fn read_write_read_text() -> io::Result<()> { + use io::prelude::*; + let from_text = { + let mut fh = std::fs::File::open("../../../tests/samples.pr").unwrap(); + let mut contents = String::new(); + fh.read_to_string(&mut contents)?; + preserves::value::text::annotated_iovalue_from_str(&contents)? + }; + let roundtripped = { + let mut bs = Vec::new(); + let mut w = preserves::value::TextWriter::new(&mut bs); + preserves::ser::to_writer(&mut w, &from_text)?; + let s = String::from_utf8(bs).unwrap(); + preserves::value::text::annotated_iovalue_from_str(&s)? + }; + assert_eq!(from_text, roundtripped); + Ok(()) +} + +#[test] fn deserialize_serialize_deserialize_text() -> io::Result<()> { + use io::prelude::*; + let from_text = { + let mut fh = std::fs::File::open("../../../tests/samples.pr").unwrap(); + let mut contents = String::new(); + fh.read_to_string(&mut contents)?; + let tests: TestCases = preserves::de::from_text(&contents)?; + tests + }; + let roundtripped = { + let mut bs = Vec::new(); + let mut w = preserves::value::TextWriter::new(&mut bs); + preserves::ser::to_writer(&mut w, &from_text)?; + let s = String::from_utf8(bs).unwrap(); + preserves::de::from_text(&s)? + }; + assert_eq!(from_text, roundtripped); + Ok(()) +} + #[test] fn run() -> io::Result<()> { let mut fh = std::fs::File::open("../../../tests/samples.bin").unwrap(); let mut src = IOBinarySource::new(&mut fh); let mut d = src.packed_iovalues().configured(true); let tests: TestCases = deserialize_from_value(&d.next().unwrap().unwrap()).unwrap(); - // println!("{:#?}", tests); for (Symbol(ref name), ref case) in tests.tests { println!("{:?} ==> {:?}", name, case);