From bddb4331aa73e9c290ed24b31003a31e1201efbc Mon Sep 17 00:00:00 2001 From: Tony Garnock-Jones Date: Sat, 23 May 2020 22:33:02 +0200 Subject: [PATCH] Direct serialization --- implementations/rust/Cargo.toml | 2 +- implementations/rust/src/de.rs | 0 implementations/rust/src/lib.rs | 47 ++- implementations/rust/src/ser.rs | 400 ++++++++++++++++++++++ implementations/rust/src/symbol.rs | 8 +- implementations/rust/src/value/encoder.rs | 110 ++---- implementations/rust/src/value/mod.rs | 5 + implementations/rust/src/value/value.rs | 6 +- implementations/rust/src/value/writer.rs | 274 +++++++++++++++ 9 files changed, 736 insertions(+), 116 deletions(-) create mode 100644 implementations/rust/src/de.rs create mode 100644 implementations/rust/src/ser.rs create mode 100644 implementations/rust/src/value/writer.rs diff --git a/implementations/rust/Cargo.toml b/implementations/rust/Cargo.toml index 74236de..5c45cbe 100644 --- a/implementations/rust/Cargo.toml +++ b/implementations/rust/Cargo.toml @@ -1,6 +1,6 @@ [package] name = "preserves" -version = "0.5.1" +version = "0.6.0" authors = ["Tony Garnock-Jones "] edition = "2018" description = "Implementation of the Preserves serialization format via serde." diff --git a/implementations/rust/src/de.rs b/implementations/rust/src/de.rs new file mode 100644 index 0000000..e69de29 diff --git a/implementations/rust/src/lib.rs b/implementations/rust/src/lib.rs index 67565ba..ad92362 100644 --- a/implementations/rust/src/lib.rs +++ b/implementations/rust/src/lib.rs @@ -1,9 +1,11 @@ -pub mod value; +pub mod de; +pub mod ser; pub mod symbol; +pub mod value; #[cfg(test)] mod dom { - use super::value::{Value, PlainValue, NestedValue, Domain, Encoder, Codec}; + use super::value::{Value, PlainValue, NestedValue, Domain, Encoder, Codec, writer::Writer}; #[derive(Debug, Hash, Clone, Ord, PartialEq, Eq, PartialOrd)] pub enum Dom { @@ -12,12 +14,16 @@ mod dom { } impl Domain for Dom { - fn encode<'a, 'b, W: std::io::Write, N: NestedValue>( + fn encode<'a, 'b, W: Writer, N: NestedValue>( &self, enc: &mut Encoder<'a, 'b, W, N, Self>) -> super::value::encoder::Result { match self { - Dom::One => enc.write_all(&[255, 255, 255, 255]), + Dom::One => { + enc.write.stream_bytes()?; + enc.write.write_bytes(&[255, 255, 255, 255])?; + enc.write.close_stream() + } Dom::Two => enc.write(&self.as_preserves()?) } } @@ -33,7 +39,7 @@ mod dom { Value::from(2).wrap()]) .wrap(); assert_eq!(Codec::without_placeholders().encode_bytes(&v).unwrap(), - [147, 49, 255, 255, 255, 255, 50]); + [147, 49, 38, 100, 255, 255, 255, 255, 4, 50]); } #[test] fn test_two() { @@ -44,16 +50,6 @@ mod dom { assert_eq!(Codec::without_placeholders().encode_bytes(&v).unwrap(), [147, 49, 120, 68, 111, 109, 58, 58, 84, 119, 111, 50]); } - - #[test] fn test_unit() { - let v: PlainValue<_> = Value::from(vec![Value::from(1).wrap(), - Value::Domain(()).wrap(), - Value::from(2).wrap()]) - .wrap(); - let e = Codec::without_placeholders().encode_bytes(&v).err().unwrap(); - assert_eq!(e.kind(), std::io::ErrorKind::InvalidData); - assert_eq!(e.to_string(), "Cannot Preserves-encode domain-specific value ()"); - } } #[cfg(test)] @@ -372,8 +368,11 @@ mod samples_tests { } #[test] fn simple_to_value() { + use serde::Serialize; #[derive(Debug, PartialEq, Eq, serde::Serialize, serde::Deserialize)] struct SimpleValue<'a>(String, + #[serde(with = "crate::symbol")] String, + Symbol, #[serde(with = "crate::symbol")] String, Symbol, &'a str, @@ -384,16 +383,28 @@ mod samples_tests { let v = SimpleValue("hello".to_string(), "sym1".to_string(), Symbol("sym2".to_string()), + "sym3".to_string(), + Symbol("sym4".to_string()), "world", &b"slice"[..], b"vec".to_vec(), 12345, Value::from("hi").wrap()); - println!("{:#?}", v); + println!("== v: {:#?}", v); let w: PlainValue = to_value(&v).unwrap(); - println!("{:#?}", w); + println!("== w: {:#?}", w); let x = from_value(&w).unwrap(); - println!("{:#?}", &x); + println!("== x: {:#?}", &x); assert_eq!(v, x); + let mut placeholders = Map::new(); + placeholders.insert(5, Value::symbol("sym1")); + placeholders.insert(500, Value::symbol("sym2")); + placeholders.insert(0, Value::symbol("SimpleValue")); + let v_bytes_1 = Codec::new(placeholders.clone()).encode_bytes(&w).unwrap(); + let mut v_bytes_2 = Vec::new(); + v.serialize(&mut crate::ser::Serializer::new(&mut v_bytes_2, placeholders)).unwrap(); + println!("== w bytes = {:?}", v_bytes_1); + println!("== v bytes = {:?}", v_bytes_2); + assert_eq!(v_bytes_1, v_bytes_2); } } diff --git a/implementations/rust/src/ser.rs b/implementations/rust/src/ser.rs new file mode 100644 index 0000000..95c0a9a --- /dev/null +++ b/implementations/rust/src/ser.rs @@ -0,0 +1,400 @@ +use serde::Serialize; +use super::value::writer::{self, Writer}; +use super::value::{ + Value, NestedValue, PlainValue, + Domain, NullDomain, + DecodePlaceholderMap, EncodePlaceholderMap, invert_map, + Decoder, Encoder, to_value, +}; + +type Result = std::result::Result; + +#[derive(Debug)] +pub struct Error { + inner: writer::Error, +} + +impl std::convert::From for Error { + fn from(inner: writer::Error) -> Self { + Error { inner } + } +} + +impl std::convert::From> for Error { + fn from(_inner: crate::value::error::Error) -> Self { + magic_error() + } +} + +impl serde::ser::Error for Error { + fn custom(msg: T) -> Self { + Error { inner: writer::Error::new(std::io::ErrorKind::Other, msg.to_string()) } + } +} + +impl std::fmt::Display for Error { + fn fmt(&self, f: &mut std::fmt::Formatter) -> std::result::Result<(), std::fmt::Error> { + self.inner.fmt(f) + } +} + +impl std::error::Error for Error {} + +#[derive(Debug)] +pub struct Serializer, D: Domain> { + pub write: W, + placeholders: Option>, +} + +impl, D: Domain> Serializer { + pub fn new(write: W, placeholders: DecodePlaceholderMap) -> Self { + Serializer { write, placeholders: Some(invert_map(&placeholders)) } + } + + fn write_symbol(&mut self, s: &str) -> Result<()> { + match self.placeholders.as_ref().and_then(|m| m.get(&Value::symbol(s))) { + Some(&n) => Ok(self.write.write_placeholder_ref(n)?), + None => Ok(self.write.write_symbol(s)?), + } + } +} + +impl Serializer, NullDomain> { + pub fn without_placeholders(write: W) -> Self { + Serializer { write, placeholders: None } + } +} + +#[derive(Debug)] +pub struct SerializeCompound<'a, W: Writer, N: NestedValue, D: Domain> { + ser: &'a mut Serializer, + count: Option, +} + +fn magic_error() -> Error { + Error { inner: writer::Error::new(std::io::ErrorKind::InvalidData, + "Internal error handling magic Value encoding") } +} + +impl<'a, W: Writer, N: NestedValue, D: Domain> serde::Serializer for &'a mut Serializer { + type Ok = (); + type Error = Error; + type SerializeSeq = SerializeCompound<'a, W, N, D>; + type SerializeTuple = SerializeCompound<'a, W, N, D>; + type SerializeTupleStruct = SerializeCompound<'a, W, N, D>; + type SerializeTupleVariant = SerializeCompound<'a, W, N, D>; + type SerializeMap = SerializeCompound<'a, W, N, D>; + type SerializeStruct = SerializeCompound<'a, W, N, D>; + type SerializeStructVariant = SerializeCompound<'a, W, N, D>; + + fn serialize_bool(self, v: bool) -> Result<()> { + Ok(self.write.write_bool(v)?) + } + + fn serialize_i8(self, v: i8) -> Result<()> { + Ok(self.write.write_i8(v)?) + } + + fn serialize_i16(self, v: i16) -> Result<()> { + Ok(self.write.write_i16(v)?) + } + + fn serialize_i32(self, v: i32) -> Result<()> { + Ok(self.write.write_i32(v)?) + } + + fn serialize_i64(self, v: i64) -> Result<()> { + Ok(self.write.write_i64(v)?) + } + + fn serialize_u8(self, v: u8) -> Result<()> { + Ok(self.write.write_u8(v)?) + } + + fn serialize_u16(self, v: u16) -> Result<()> { + Ok(self.write.write_u16(v)?) + } + + fn serialize_u32(self, v: u32) -> Result<()> { + Ok(self.write.write_u32(v)?) + } + + fn serialize_u64(self, v: u64) -> Result<()> { + Ok(self.write.write_u64(v)?) + } + + fn serialize_f32(self, v: f32) -> Result<()> { + Ok(self.write.write_f32(v)?) + } + + fn serialize_f64(self, v: f64) -> Result<()> { + Ok(self.write.write_f64(v)?) + } + + fn serialize_char(self, v: char) -> Result<()> { + self.write.open_record(1)?; + self.write_symbol("UnicodeScalar")?; + self.write.write_u32(v as u32)?; + Ok(self.write.close_record()?) + } + + fn serialize_str(self, v: &str) -> Result<()> { + Ok(self.write.write_string(v)?) + } + + fn serialize_bytes(self, v: &[u8]) -> Result<()> { + Ok(self.write.write_bytes(v)?) + } + + fn serialize_none(self) -> Result<()> { + self.write.open_record(0)?; + self.write_symbol("None")?; + Ok(self.write.close_record()?) + } + + fn serialize_some(self, v: &T) -> Result<()> where T: Serialize { + self.write.open_record(1)?; + self.write_symbol("Some")?; + v.serialize(&mut *self)?; + Ok(self.write.close_record()?) + } + + fn serialize_unit(self) -> Result<()> { + self.write.open_record(0)?; + self.write_symbol("tuple")?; + Ok(self.write.close_record()?) + } + + fn serialize_unit_struct(self, name: &'static str) -> Result<()> { + self.write.open_record(0)?; + self.write_symbol(name)?; + Ok(self.write.close_record()?) + } + + fn serialize_unit_variant(self, + _name: &'static str, + _variant: u32, + variant_name: &'static str) -> + Result<()> + { + self.write.open_record(0)?; + self.write_symbol(variant_name)?; + Ok(self.write.close_record()?) + } + + fn serialize_newtype_struct(self, name: &'static str, value: &T) -> + Result<()> where T: Serialize + { + if name == crate::value::value::MAGIC { + let v0: N = to_value(value)?; + let mut buf: &[u8] = v0.value().as_bytestring().ok_or_else(magic_error)?; + let v1: N = Decoder::new(&mut buf, None).next().or_else(|_| Err(magic_error()))?; + Encoder::new(&mut self.write, self.placeholders.as_ref()).write(&v1)?; + Ok(()) + } else { + // TODO: This is apparently discouraged, and we should apparently just serialize `value`? + self.write.open_record(1)?; + self.write_symbol(name)?; + value.serialize(&mut *self)?; + Ok(self.write.close_record()?) + } + } + + fn serialize_newtype_variant(self, + _name: &'static str, + _variant: u32, + variant_name: &'static str, + value: &T) -> + Result<()> where T: Serialize + { + self.write.open_record(1)?; + self.write_symbol(variant_name)?; + value.serialize(&mut *self)?; + Ok(self.write.close_record()?) + } + + fn serialize_seq(self, count: Option) -> Result { + match count { + Some(n) => self.write.open_sequence(n)?, + None => self.write.stream_sequence()?, + } + Ok(SerializeCompound { ser: self, count }) + } + + fn serialize_tuple(self, count: usize) -> Result { + self.write.open_record(count)?; + self.write_symbol("tuple")?; + Ok(SerializeCompound { ser: self, count: Some(count) }) + } + + fn serialize_tuple_struct(self, name: &'static str, count: usize) -> + Result + { + self.write.open_record(count)?; + self.write_symbol(name)?; + Ok(SerializeCompound { ser: self, count: Some(count) }) + } + + fn serialize_tuple_variant(self, + _name: &'static str, + _variant: u32, + variant_name: &'static str, + count: usize) -> + Result + { + self.write.open_record(count)?; + self.write_symbol(variant_name)?; + Ok(SerializeCompound { ser: self, count: Some(count) }) + } + + fn serialize_map(self, count: Option) -> Result { + match count { + Some(n) => self.write.open_dictionary(n)?, + None => self.write.stream_dictionary()?, + } + Ok(SerializeCompound { ser: self, count }) + } + + fn serialize_struct(self, name: &'static str, count: usize) -> Result { + self.write.open_record(count)?; + self.write_symbol(name)?; + Ok(SerializeCompound { ser: self, count: Some(count) }) + } + + fn serialize_struct_variant(self, + _name: &'static str, + _variant: u32, + variant_name: &'static str, + count: usize) -> + Result + { + self.write.open_record(count)?; + self.write_symbol(variant_name)?; + Ok(SerializeCompound { ser: self, count: Some(count) }) + } +} + +impl<'a, W: Writer, N: NestedValue, D: Domain> serde::ser::SerializeMap for SerializeCompound<'a, W, N, D> { + type Ok = (); + type Error = Error; + + fn serialize_key(&mut self, key: &T) -> Result<()> where T: Serialize { + Ok(key.serialize(&mut *self.ser)?) + } + + fn serialize_value(&mut self, value: &T) -> Result<()> where T: Serialize { + Ok(value.serialize(&mut *self.ser)?) + } + + fn end(self) -> Result<()> { + match self.count { + None => Ok(self.ser.write.close_stream()?), + Some(_) => Ok(self.ser.write.close_dictionary()?), + } + } +} + +impl<'a, W: Writer, N: NestedValue, D: Domain> serde::ser::SerializeStruct for SerializeCompound<'a, W, N, D> { + type Ok = (); + type Error = Error; + + fn serialize_field(&mut self, _name: &'static str, value: &T) -> Result<()> + where T: Serialize + { + Ok(value.serialize(&mut *self.ser)?) + } + + fn end(self) -> Result<()> { + match self.count { + None => Ok(self.ser.write.close_stream()?), + Some(_) => Ok(self.ser.write.close_record()?), + } + } +} + +impl<'a, W: Writer, N: NestedValue, D: Domain> serde::ser::SerializeStructVariant for SerializeCompound<'a, W, N, D> { + type Ok = (); + type Error = Error; + + fn serialize_field(&mut self, _name: &'static str, value: &T) -> Result<()> + where T: Serialize + { + Ok(value.serialize(&mut *self.ser)?) + } + + fn end(self) -> Result<()> { + match self.count { + None => Ok(self.ser.write.close_stream()?), + Some(_) => Ok(self.ser.write.close_record()?), + } + } +} + +impl<'a, W: Writer, N: NestedValue, D: Domain> serde::ser::SerializeTuple for SerializeCompound<'a, W, N, D> { + type Ok = (); + type Error = Error; + + fn serialize_element(&mut self, value: &T) -> Result<()> where T: Serialize { + Ok(value.serialize(&mut *self.ser)?) + } + + fn end(self) -> Result<()> { + match self.count { + None => Ok(self.ser.write.close_stream()?), + Some(_) => Ok(self.ser.write.close_record()?), + } + } +} + +impl<'a, W: Writer, N: NestedValue, D: Domain> serde::ser::SerializeTupleStruct for SerializeCompound<'a, W, N, D> { + type Ok = (); + type Error = Error; + + fn serialize_field(&mut self, value: &T) -> Result<()> where T: Serialize { + Ok(value.serialize(&mut *self.ser)?) + } + + fn end(self) -> Result<()> { + match self.count { + None => Ok(self.ser.write.close_stream()?), + Some(_) => Ok(self.ser.write.close_record()?), + } + } +} + +impl<'a, W: Writer, N: NestedValue, D: Domain> serde::ser::SerializeTupleVariant for SerializeCompound<'a, W, N, D> { + type Ok = (); + type Error = Error; + + fn serialize_field(&mut self, value: &T) -> Result<()> where T: Serialize { + Ok(value.serialize(&mut *self.ser)?) + } + + fn end(self) -> Result<()> { + match self.count { + None => Ok(self.ser.write.close_stream()?), + Some(_) => Ok(self.ser.write.close_record()?), + } + } +} + +impl<'a, W: Writer, N: NestedValue, D: Domain> serde::ser::SerializeSeq for SerializeCompound<'a, W, N, D> { + type Ok = (); + type Error = Error; + + fn serialize_element(&mut self, value: &T) -> Result<()> where T: Serialize { + Ok(value.serialize(&mut *self.ser)?) + } + + fn end(self) -> Result<()> { + match self.count { + None => Ok(self.ser.write.close_stream()?), + Some(_) => Ok(self.ser.write.close_sequence()?), + } + } +} + +pub fn to_writer(write: W, value: &T) -> Result<()> { + let mut ser = Serializer::without_placeholders(write); + value.serialize(&mut ser) +} diff --git a/implementations/rust/src/symbol.rs b/implementations/rust/src/symbol.rs index fb6b4ac..ea7d14e 100644 --- a/implementations/rust/src/symbol.rs +++ b/implementations/rust/src/symbol.rs @@ -1,17 +1,17 @@ -use crate::value::{Value, PlainValue, NestedValue}; +use crate::value::{Value, PlainValue, NestedValue, NullDomain}; -#[derive(Debug, PartialEq, Eq, PartialOrd, Ord)] +#[derive(Debug, PartialEq, Eq, PartialOrd, Ord, Clone)] pub struct Symbol(pub String); impl serde::Serialize for Symbol { fn serialize(&self, serializer: S) -> Result where S: serde::Serializer { - Value::, ()>::symbol(&self.0).wrap().serialize(serializer) + Value::, NullDomain>::symbol(&self.0).wrap().serialize(serializer) } } impl<'de> serde::Deserialize<'de> for Symbol { fn deserialize(deserializer: D) -> Result where D: serde::Deserializer<'de> { - let v = PlainValue::<()>::deserialize(deserializer)?; + let v = PlainValue::::deserialize(deserializer)?; let s = v.value().as_symbol().ok_or_else(|| serde::de::Error::custom("Expected symbol"))?; Ok(Symbol(s.clone())) } diff --git a/implementations/rust/src/value/encoder.rs b/implementations/rust/src/value/encoder.rs index 6dd82d9..c9d5eb0 100644 --- a/implementations/rust/src/value/encoder.rs +++ b/implementations/rust/src/value/encoder.rs @@ -1,82 +1,23 @@ -use std::io::Write; -use crate::value::value::{Value, NestedValue, Float, Double, Map, Domain}; -use num::bigint::BigInt; -use num::cast::ToPrimitive; -use crate::value::constants::{Op, AtomMinor, CompoundMinor}; +use super::value::{Value, NestedValue, Float, Double, Map, Domain}; +use super::writer::Writer; -pub type Error = std::io::Error; -pub type Result = std::result::Result<(), Error>; +pub use super::writer::{Error, Result}; pub type EncodePlaceholderMap = Map, usize>; -pub struct Encoder<'a, 'b, W: Write, N: NestedValue, D: Domain> { +pub struct Encoder<'a, 'b, W: Writer, N: NestedValue, D: Domain> { pub write: &'a mut W, pub placeholders: Option<&'b EncodePlaceholderMap>, } -impl<'a, 'b, W: Write, N: NestedValue, D: Domain> Encoder<'a, 'b, W, N, D> { +impl<'a, 'b, W: Writer, N: NestedValue, D: Domain> Encoder<'a, 'b, W, N, D> { pub fn new(write: &'a mut W, placeholders: Option<&'b EncodePlaceholderMap>) -> Self { Encoder{ write, placeholders } } - pub fn _write(&mut self, v: u8) -> Result { - self.write_all(&[v]) - } - - pub fn write_all(&mut self, vs: &[u8]) -> Result { - self.write.write_all(vs) - } - - pub fn varint(&mut self, v: usize) -> Result { - if v < 128 { - self._write(v as u8) - } else { - self._write(((v & 0x7f) + 128) as u8)?; - self.varint(v >> 7) - } - } - - pub fn write_op(&mut self, op: Op, arg: u8) -> Result { - self._write((u8::from(op) << 4) | (arg & 15)) - } - - pub fn write_header(&mut self, op: Op, wirelength: usize) -> Result { - if wirelength < 15 { - self.write_op(op, wirelength as u8) - } else { - self.write_op(op, 15)?; - self.varint(wirelength) - } - } - - pub fn write_int(&mut self, v: &BigInt) -> Result { - match v.to_i8() { - Some(n) if n >= 0 && n <= 12 => self.write_op(Op::Misc(3), n as u8), - Some(n) if n >= -3 && n < 0 => self.write_op(Op::Misc(3), (n + 16) as u8), - _ => self.write_atom(AtomMinor::SignedInteger, &v.to_signed_bytes_be()), - } - } - - pub fn write_stream_header(&mut self, op: Op) -> Result { - self.write_op(Op::Misc(2), u8::from(op)) - } - - pub fn write_stream_footer(&mut self) -> Result { - self.write_op(Op::Misc(0), 4) - } - - pub fn write_atom(&mut self, minor: AtomMinor, bs: &[u8]) -> Result { - self.write_header(Op::Atom(minor), bs.len())?; - self.write_all(bs) - } - - pub fn write_noop(&mut self) -> Result { - self.write_op(Op::Reserved(3), 15) - } - pub fn write(&mut self, v: &N) -> Result { for ann in v.annotations() { - self.write_header(Op::Misc(0), 5)?; + self.write.write_annotation_prefix()?; self.write(ann)?; } self.write_value(v.value()) @@ -84,42 +25,35 @@ impl<'a, 'b, W: Write, N: NestedValue, D: Domain> Encoder<'a, 'b, W, N, D> { pub fn write_value(&mut self, v: &Value) -> Result { match self.placeholders.and_then(|m| m.get(v)) { - Some(&n) => self.write_header(Op::Misc(1), n), + Some(&n) => self.write.write_placeholder_ref(n), None => match v { - Value::Boolean(false) => self.write_op(Op::Misc(0), 0), - Value::Boolean(true) => self.write_op(Op::Misc(0), 1), - Value::Float(Float(f)) => { - self.write_op(Op::Misc(0), 2)?; - self.write_all(&u32::to_be_bytes(f32::to_bits(*f))) - } - Value::Double(Double(d)) => { - self.write_op(Op::Misc(0), 3)?; - self.write_all(&u64::to_be_bytes(f64::to_bits(*d))) - } - Value::SignedInteger(ref b) => self.write_int(b), - Value::String(ref s) => self.write_atom(AtomMinor::String, s.as_bytes()), - Value::ByteString(ref bs) => self.write_atom(AtomMinor::ByteString, bs), - Value::Symbol(ref s) => self.write_atom(AtomMinor::Symbol, s.as_bytes()), + Value::Boolean(b) => self.write.write_bool(*b), + Value::Float(Float(f)) => self.write.write_f32(*f), + Value::Double(Double(d)) => self.write.write_f64(*d), + Value::SignedInteger(ref b) => self.write.write_int(b), + Value::String(ref s) => self.write.write_string(s), + Value::ByteString(ref bs) => self.write.write_bytes(bs), + Value::Symbol(ref s) => self.write.write_symbol(s), Value::Record((ref l, ref fs)) => { - self.write_header(Op::Compound(CompoundMinor::Record), fs.len() + 1)?; + self.write.open_record(fs.len())?; self.write(N::boxunwrap(l))?; for f in fs { self.write(f)?; } - Ok(()) + self.write.close_record() } Value::Sequence(ref vs) => { - self.write_header(Op::Compound(CompoundMinor::Sequence), vs.len())?; + self.write.open_sequence(vs.len())?; for v in vs { self.write(v)?; } - Ok(()) + self.write.close_sequence() } Value::Set(ref vs) => { - self.write_header(Op::Compound(CompoundMinor::Set), vs.len())?; + self.write.open_set(vs.len())?; for v in vs { self.write(v)?; } - Ok(()) + self.write.close_set() } Value::Dictionary(ref vs) => { - self.write_header(Op::Compound(CompoundMinor::Dictionary), vs.len() << 1)?; + self.write.open_dictionary(vs.len())?; for (k, v) in vs { self.write(k)?; self.write(v)?; } - Ok(()) + self.write.close_dictionary() } Value::Domain(ref d) => d.encode(self), } diff --git a/implementations/rust/src/value/mod.rs b/implementations/rust/src/value/mod.rs index cd65c31..0501055 100644 --- a/implementations/rust/src/value/mod.rs +++ b/implementations/rust/src/value/mod.rs @@ -6,6 +6,7 @@ pub mod encoder; pub mod error; pub mod ser; pub mod value; +pub mod writer; pub use codec::Codec; pub use de::Deserializer; @@ -32,3 +33,7 @@ pub fn invert_map(m: &Map) -> Map use std::iter::FromIterator; Map::from_iter(m.iter().map(|(a, b)| (b.clone(), a.clone()))) } + +#[derive(Debug, Clone, PartialEq, Eq, Hash, PartialOrd, Ord)] +pub enum NullDomain {} +impl Domain for NullDomain {} diff --git a/implementations/rust/src/value/value.rs b/implementations/rust/src/value/value.rs index c228f6c..23d767e 100644 --- a/implementations/rust/src/value/value.rs +++ b/implementations/rust/src/value/value.rs @@ -12,7 +12,7 @@ pub use std::collections::BTreeSet as Set; pub use std::collections::BTreeMap as Map; pub trait Domain: Sized + Debug + Clone + Eq + Hash + Ord { - fn encode<'a, 'b, W: std::io::Write, N: NestedValue>( + fn encode<'a, 'b, W: super::writer::Writer, N: NestedValue>( &self, enc: &mut super::encoder::Encoder<'a, 'b, W, N, Self>) -> super::encoder::Result { @@ -823,7 +823,3 @@ impl<'de, Dom: Domain> serde::Deserialize<'de> for ArcValue { deserialize_nested_value(deserializer) } } - -//--------------------------------------------------------------------------- - -impl Domain for () {} diff --git a/implementations/rust/src/value/writer.rs b/implementations/rust/src/value/writer.rs new file mode 100644 index 0000000..9e2191d --- /dev/null +++ b/implementations/rust/src/value/writer.rs @@ -0,0 +1,274 @@ +use num::bigint::BigInt; +use num::cast::ToPrimitive; +use std::convert::TryInto; +use super::constants::{Op, AtomMinor, CompoundMinor}; + +pub type Error = std::io::Error; +pub type Result = std::result::Result<(), Error>; + +pub trait Writer { + fn write_annotation_prefix(&mut self) -> Result; + fn write_placeholder_ref(&mut self, v: usize) -> Result; + fn write_noop(&mut self) -> Result; + + fn write_bool(&mut self, v: bool) -> Result; + + fn write_f32(&mut self, v: f32) -> Result; + fn write_f64(&mut self, v: f64) -> Result; + + fn write_i8(&mut self, v: i8) -> Result; + fn write_u8(&mut self, v: u8) -> Result; + fn write_i16(&mut self, v: i16) -> Result; + fn write_u16(&mut self, v: u16) -> Result; + fn write_i32(&mut self, v: i32) -> Result; + fn write_u32(&mut self, v: u32) -> Result; + fn write_i64(&mut self, v: i64) -> Result; + fn write_u64(&mut self, v: u64) -> Result; + fn write_int(&mut self, v: &BigInt) -> Result; + + fn write_string(&mut self, v: &str) -> Result; + fn write_bytes(&mut self, v: &[u8]) -> Result; + fn write_symbol(&mut self, v: &str) -> Result; + + fn open_record(&mut self, field_count: usize) -> Result; + fn open_sequence(&mut self, item_count: usize) -> Result; + fn open_set(&mut self, item_count: usize) -> Result; + fn open_dictionary(&mut self, entry_count: usize) -> Result; + + fn close_record(&mut self) -> Result; + fn close_sequence(&mut self) -> Result; + fn close_set(&mut self) -> Result; + fn close_dictionary(&mut self) -> Result; + + fn stream_string(&mut self) -> Result; + fn stream_bytes(&mut self) -> Result; + fn stream_symbol(&mut self) -> Result; + + fn stream_record(&mut self) -> Result; + fn stream_sequence(&mut self) -> Result; + fn stream_set(&mut self) -> Result; + fn stream_dictionary(&mut self) -> Result; + + fn close_stream(&mut self) -> Result; +} + +pub fn varint(w: &mut W, mut v: usize) -> Result { + loop { + if v < 128 { + return w.write_all(&[v as u8]) + } else { + w.write_all(&[((v & 0x7f) + 128) as u8])?; + v = v >> 7; + } + } +} + +pub fn write_op(w: &mut W, op: Op, arg: u8) -> Result { + w.write_all(&[(u8::from(op) << 4) | (arg & 15)]) +} + +pub fn write_header(w: &mut W, op: Op, wirelength: usize) -> Result { + if wirelength < 15 { + write_op(w, op, wirelength as u8) + } else { + write_op(w, op, 15)?; + varint(w, wirelength) + } +} + +pub fn write_atom(w: &mut W, minor: AtomMinor, bs: &[u8]) -> Result { + write_header(w, Op::Atom(minor), bs.len())?; + w.write_all(bs) +} + +impl Writer for W { + fn write_annotation_prefix(&mut self) -> Result { + write_header(self, Op::Misc(0), 5) + } + + fn write_placeholder_ref(&mut self, v: usize) -> Result { + write_header(self, Op::Misc(1), v) + } + + fn write_noop(&mut self) -> Result { + write_op(self, Op::Reserved(3), 15) + } + + fn write_bool(&mut self, v: bool) -> Result { + write_op(self, Op::Misc(0), if v { 1 } else { 0 }) + } + + fn write_f32(&mut self, v: f32) -> Result { + write_op(self, Op::Misc(0), 2)?; + self.write_all(&u32::to_be_bytes(f32::to_bits(v))) + } + + fn write_f64(&mut self, v: f64) -> Result { + write_op(self, Op::Misc(0), 3)?; + self.write_all(&u64::to_be_bytes(f64::to_bits(v))) + } + + fn write_i8(&mut self, v: i8) -> Result { + if v >= 0 && v <= 12 { return write_op(self, Op::Misc(3), v as u8) } + if v >= -3 && v < 0 { return write_op(self, Op::Misc(3), (v + 16) as u8) } + write_atom(self, AtomMinor::SignedInteger, &[v as u8]) + } + + fn write_u8(&mut self, v: u8) -> Result { + if let Ok(w) = v.try_into() { return self.write_i8(w) } + write_atom(self, AtomMinor::SignedInteger, &[0, v]) + } + + fn write_i16(&mut self, v: i16) -> Result { + if let Ok(w) = v.try_into() { return self.write_i8(w) } + write_atom(self, AtomMinor::SignedInteger, &[(v >> 8) as u8, (v & 255) as u8]) + } + + fn write_u16(&mut self, v: u16) -> Result { + if let Ok(w) = v.try_into() { return self.write_i16(w) } + write_atom(self, AtomMinor::SignedInteger, &[0, (v >> 8) as u8, (v & 255) as u8]) + } + + fn write_i32(&mut self, v: i32) -> Result { + if let Ok(w) = v.try_into() { return self.write_i16(w) } + if v >= -(2 << 23) && v < (2 << 23) { + return write_atom(self, AtomMinor::SignedInteger, &[(v >> 16) as u8, + (v >> 8) as u8, + (v & 255) as u8]); + } + return write_atom(self, AtomMinor::SignedInteger, &[(v >> 24) as u8, + (v >> 16) as u8, + (v >> 8) as u8, + (v & 255) as u8]); + } + + fn write_u32(&mut self, v: u32) -> Result { + if let Ok(w) = v.try_into() { return self.write_i32(w) } + return write_atom(self, AtomMinor::SignedInteger, &[0, + (v >> 24) as u8, + (v >> 16) as u8, + (v >> 8) as u8, + (v & 255) as u8]); + } + + fn write_i64(&mut self, v: i64) -> Result { + if let Ok(w) = v.try_into() { return self.write_i32(w) } + if v >= -(2 << 39) && v < (2 << 39) { + return write_atom(self, AtomMinor::SignedInteger, &[(v >> 32) as u8, + (v >> 24) as u8, + (v >> 16) as u8, + (v >> 8) as u8, + (v & 255) as u8]); + } + if v >= -(2 << 47) && v < (2 << 47) { + return write_atom(self, AtomMinor::SignedInteger, &[(v >> 40) as u8, + (v >> 32) as u8, + (v >> 24) as u8, + (v >> 16) as u8, + (v >> 8) as u8, + (v & 255) as u8]); + } + if v >= -(2 << 55) && v < (2 << 55) { + return write_atom(self, AtomMinor::SignedInteger, &[(v >> 48) as u8, + (v >> 40) as u8, + (v >> 32) as u8, + (v >> 24) as u8, + (v >> 16) as u8, + (v >> 8) as u8, + (v & 255) as u8]); + } + return write_atom(self, AtomMinor::SignedInteger, &[(v >> 56) as u8, + (v >> 48) as u8, + (v >> 40) as u8, + (v >> 32) as u8, + (v >> 24) as u8, + (v >> 16) as u8, + (v >> 8) as u8, + (v & 255) as u8]); + } + + fn write_u64(&mut self, v: u64) -> Result { + if let Ok(w) = v.try_into() { return self.write_i64(w) } + return write_atom(self, AtomMinor::SignedInteger, &[0, + (v >> 56) as u8, + (v >> 48) as u8, + (v >> 40) as u8, + (v >> 32) as u8, + (v >> 24) as u8, + (v >> 16) as u8, + (v >> 8) as u8, + (v & 255) as u8]); + } + + fn write_int(&mut self, v: &BigInt) -> Result { + match v.to_i8() { + Some(n) => self.write_i8(n), + None => write_atom(self, AtomMinor::SignedInteger, &v.to_signed_bytes_be()), + } + } + + fn write_string(&mut self, v: &str) -> Result { + write_atom(self, AtomMinor::String, v.as_bytes()) + } + + fn write_bytes(&mut self, v: &[u8]) -> Result { + write_atom(self, AtomMinor::ByteString, v) + } + + fn write_symbol(&mut self, v: &str) -> Result { + write_atom(self, AtomMinor::Symbol, v.as_bytes()) + } + + fn open_record(&mut self, field_count: usize) -> Result { + write_header(self, Op::Compound(CompoundMinor::Record), field_count + 1) + } + + fn open_sequence(&mut self, item_count: usize) -> Result { + write_header(self, Op::Compound(CompoundMinor::Sequence), item_count) + } + + fn open_set(&mut self, item_count: usize) -> Result { + write_header(self, Op::Compound(CompoundMinor::Set), item_count) + } + + fn open_dictionary(&mut self, entry_count: usize) -> Result { + write_header(self, Op::Compound(CompoundMinor::Dictionary), entry_count << 1) + } + + fn close_record(&mut self) -> Result { Ok(()) } + fn close_sequence(&mut self) -> Result { Ok(()) } + fn close_set(&mut self) -> Result { Ok(()) } + fn close_dictionary(&mut self) -> Result { Ok(()) } + + fn stream_string(&mut self) -> Result { + write_op(self, Op::Misc(2), Op::Atom(AtomMinor::String).into()) + } + + fn stream_bytes(&mut self) -> Result { + write_op(self, Op::Misc(2), Op::Atom(AtomMinor::ByteString).into()) + } + + fn stream_symbol(&mut self) -> Result { + write_op(self, Op::Misc(2), Op::Atom(AtomMinor::Symbol).into()) + } + + fn stream_record(&mut self) -> Result { + write_op(self, Op::Misc(2), Op::Compound(CompoundMinor::Record).into()) + } + + fn stream_sequence(&mut self) -> Result { + write_op(self, Op::Misc(2), Op::Compound(CompoundMinor::Sequence).into()) + } + + fn stream_set(&mut self) -> Result { + write_op(self, Op::Misc(2), Op::Compound(CompoundMinor::Set).into()) + } + + fn stream_dictionary(&mut self) -> Result { + write_op(self, Op::Misc(2), Op::Compound(CompoundMinor::Dictionary).into()) + } + + fn close_stream(&mut self) -> Result { + write_op(self, Op::Misc(0), 4) + } +}