Rust Encoder
This commit is contained in:
parent
bec3a10d64
commit
2349e80b30
|
@ -6,5 +6,6 @@ edition = "2018"
|
|||
|
||||
[dependencies]
|
||||
num = "0.2"
|
||||
num_enum = "0.4.1"
|
||||
serde = "1.0"
|
||||
serde_bytes = "0.11"
|
||||
|
|
|
@ -175,7 +175,8 @@ mod value_tests {
|
|||
|
||||
#[cfg(test)]
|
||||
mod decoder_tests {
|
||||
use crate::value::codec::Decoder;
|
||||
use crate::value::decoder::Decoder;
|
||||
use crate::value::encoder::Encoder;
|
||||
use crate::value::value::Value;
|
||||
|
||||
#[test] fn read_123() {
|
||||
|
@ -204,6 +205,11 @@ mod decoder_tests {
|
|||
|
||||
#[test] fn read_samples() {
|
||||
let mut d = Decoder::new(std::fs::File::open("../../tests/samples.bin").unwrap(), None);
|
||||
println!("{:#?}", d.next().ok().unwrap());
|
||||
let v = d.next().ok().unwrap();
|
||||
println!("{:#?}", v);
|
||||
// let mut buf = Vec::new();
|
||||
// let mut e = Encoder::new(&mut buf, None);
|
||||
// e.write(&v).ok().unwrap();
|
||||
// println!("{:?}", Value::from(buf))
|
||||
}
|
||||
}
|
||||
|
|
|
@ -0,0 +1,52 @@
|
|||
use std::convert::{TryFrom, From};
|
||||
use num_enum::TryFromPrimitive;
|
||||
|
||||
#[derive(Debug, PartialEq, Eq)]
|
||||
pub enum Op {
|
||||
Misc(u8),
|
||||
Atom(AtomMinor),
|
||||
Compound(CompoundMinor),
|
||||
}
|
||||
|
||||
#[derive(Debug, PartialEq, Eq)]
|
||||
pub struct InvalidOp;
|
||||
|
||||
impl TryFrom<u8> for Op {
|
||||
type Error = InvalidOp;
|
||||
fn try_from(v: u8) -> Result<Self, Self::Error> {
|
||||
match v >> 2 {
|
||||
0 => Ok(Self::Misc(v & 3)),
|
||||
1 => Ok(Self::Atom(AtomMinor::try_from(v & 3).unwrap())),
|
||||
2 => Ok(Self::Compound(CompoundMinor::try_from(v & 3).unwrap())),
|
||||
_ => Err(InvalidOp),
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
impl From<Op> for u8 {
|
||||
fn from(v: Op) -> Self {
|
||||
match v {
|
||||
Op::Misc(minor) => minor & 3,
|
||||
Op::Atom(minor) => (1 << 2) | ((minor as u8) & 3),
|
||||
Op::Compound(minor) => (2 << 2) | ((minor as u8) & 3),
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
#[derive(Debug, TryFromPrimitive, PartialEq, Eq)]
|
||||
#[repr(u8)]
|
||||
pub enum AtomMinor {
|
||||
SignedInteger = 0,
|
||||
String = 1,
|
||||
ByteString = 2,
|
||||
Symbol = 3,
|
||||
}
|
||||
|
||||
#[derive(Debug, TryFromPrimitive, PartialEq, Eq)]
|
||||
#[repr(u8)]
|
||||
pub enum CompoundMinor {
|
||||
Record = 0,
|
||||
Sequence = 1,
|
||||
Set = 2,
|
||||
Dictionary = 3,
|
||||
}
|
|
@ -1,7 +1,9 @@
|
|||
use std::io::Read;
|
||||
use std::convert::TryInto;
|
||||
use std::convert::TryFrom;
|
||||
use crate::value::value::{Value, AValue, Set, Dictionary};
|
||||
use num::bigint::BigInt;
|
||||
use crate::value::constants::{Op, InvalidOp, AtomMinor, CompoundMinor};
|
||||
|
||||
pub type Result<V> = std::result::Result<V, Error>;
|
||||
|
||||
|
@ -24,29 +26,35 @@ impl From<std::str::Utf8Error> for Error {
|
|||
}
|
||||
}
|
||||
|
||||
impl From<InvalidOp> for Error {
|
||||
fn from(_v: InvalidOp) -> Self {
|
||||
Error::Syntax("Invalid lead byte major value")
|
||||
}
|
||||
}
|
||||
|
||||
impl Error {
|
||||
pub fn is_io(&self) -> bool { if let Error::Io(_) = *self { true } else { false } }
|
||||
pub fn is_syntax(&self) -> bool { if let Error::Syntax(_) = *self { true } else { false } }
|
||||
pub fn is_eof(&self) -> bool { if let Error::Eof = *self { true } else { false } }
|
||||
}
|
||||
|
||||
pub type PlaceholderMap = std::collections::BTreeMap<usize, Value>;
|
||||
pub type DecodePlaceholderMap = std::collections::BTreeMap<usize, Value>;
|
||||
|
||||
pub struct Decoder<R: Read> {
|
||||
read: R,
|
||||
index: usize,
|
||||
buf: Box<Option<Option<u8>>>,
|
||||
placeholders: PlaceholderMap,
|
||||
placeholders: DecodePlaceholderMap,
|
||||
read_annotations: bool,
|
||||
}
|
||||
|
||||
impl<R: Read> Decoder<R> {
|
||||
pub fn new(read: R, placeholders: Option<PlaceholderMap>) -> Self {
|
||||
pub fn new(read: R, placeholders: Option<DecodePlaceholderMap>) -> Self {
|
||||
Decoder{
|
||||
read,
|
||||
index: 0,
|
||||
buf: Box::new(None),
|
||||
placeholders: placeholders.unwrap_or(PlaceholderMap::new()),
|
||||
placeholders: placeholders.unwrap_or(DecodePlaceholderMap::new()),
|
||||
read_annotations: true,
|
||||
}
|
||||
}
|
||||
|
@ -119,15 +127,12 @@ impl<R: Read> Decoder<R> {
|
|||
Ok(pieces)
|
||||
}
|
||||
|
||||
pub fn decodeop(b: u8) -> (u8, u8, u8) {
|
||||
let major = b >> 6;
|
||||
let minor = (b >> 4) & 3;
|
||||
let arg = b & 15;
|
||||
(major, minor, arg)
|
||||
pub fn decodeop(b: u8) -> Result<(Op, u8)> {
|
||||
Ok((Op::try_from(b >> 4)?, b & 15))
|
||||
}
|
||||
|
||||
pub fn nextop(&mut self) -> Result<(u8, u8, u8)> {
|
||||
Ok(Self::decodeop(self.read()?))
|
||||
pub fn nextop(&mut self) -> Result<(Op, u8)> {
|
||||
Self::decodeop(self.read()?)
|
||||
}
|
||||
|
||||
pub fn varint(&mut self) -> Result<usize> {
|
||||
|
@ -160,34 +165,33 @@ impl<R: Read> Decoder<R> {
|
|||
BigInt::from_signed_bytes_be(bs)
|
||||
}
|
||||
|
||||
pub fn decodebinary(minor: u8, bs: Vec<u8>) -> Result<AValue> {
|
||||
pub fn decodebinary(minor: AtomMinor, bs: Vec<u8>) -> Result<AValue> {
|
||||
Ok(match minor {
|
||||
0 => Value::from(Self::decodeint(&bs)).wrap(),
|
||||
1 => Value::from(std::str::from_utf8(&bs)?).wrap(),
|
||||
2 => Value::from(bs).wrap(),
|
||||
3 => Value::symbol(std::str::from_utf8(&bs)?).wrap(),
|
||||
_ => panic!()
|
||||
AtomMinor::SignedInteger => Value::from(Self::decodeint(&bs)).wrap(),
|
||||
AtomMinor::String => Value::from(std::str::from_utf8(&bs)?).wrap(),
|
||||
AtomMinor::ByteString => Value::from(bs).wrap(),
|
||||
AtomMinor::Symbol => Value::symbol(std::str::from_utf8(&bs)?).wrap(),
|
||||
})
|
||||
}
|
||||
|
||||
pub fn decodecompound(minor: u8, mut pieces: Vec<AValue>) -> Result<AValue> {
|
||||
pub fn decodecompound(minor: CompoundMinor, mut pieces: Vec<AValue>) -> Result<AValue> {
|
||||
match minor {
|
||||
0 =>
|
||||
CompoundMinor::Record =>
|
||||
if pieces.len() == 0 {
|
||||
Err(Error::Syntax("Too few elements in encoded record"))
|
||||
} else {
|
||||
let label = pieces.remove(0).rc();
|
||||
Ok(Value::record(&label, pieces).wrap())
|
||||
},
|
||||
1 => Ok(Value::from(pieces).wrap()),
|
||||
2 => {
|
||||
CompoundMinor::Sequence => Ok(Value::from(pieces).wrap()),
|
||||
CompoundMinor::Set => {
|
||||
let mut s = Set::new();
|
||||
while let Some(v) = pieces.pop() {
|
||||
s.insert(v);
|
||||
}
|
||||
Ok(Value::Set(s).wrap())
|
||||
}
|
||||
3 =>
|
||||
CompoundMinor::Dictionary =>
|
||||
if pieces.len() % 2 != 0 {
|
||||
Err(Error::Syntax("Missing dictionary value"))
|
||||
} else {
|
||||
|
@ -198,11 +202,10 @@ impl<R: Read> Decoder<R> {
|
|||
}
|
||||
Ok(Value::Dictionary(d).wrap())
|
||||
},
|
||||
_ => panic!()
|
||||
}
|
||||
}
|
||||
|
||||
pub fn binarystream(&mut self, minor: u8) -> Result<AValue> {
|
||||
pub fn binarystream(&mut self, minor: AtomMinor) -> Result<AValue> {
|
||||
let mut bs: Vec<u8> = Vec::new();
|
||||
while !self.peekend()? {
|
||||
match self.next()?.1.as_bytestring() {
|
||||
|
@ -213,7 +216,7 @@ impl<R: Read> Decoder<R> {
|
|||
Self::decodebinary(minor, bs)
|
||||
}
|
||||
|
||||
pub fn valuestream(&mut self, minor: u8) -> Result<AValue> {
|
||||
pub fn valuestream(&mut self, minor: CompoundMinor) -> Result<AValue> {
|
||||
let mut pieces: Vec<AValue> = Vec::new();
|
||||
while !self.peekend()? {
|
||||
pieces.push(self.next()?);
|
||||
|
@ -223,20 +226,20 @@ impl<R: Read> Decoder<R> {
|
|||
|
||||
pub fn next(&mut self) -> Result<AValue> {
|
||||
match self.nextop()? {
|
||||
(0, 0, 0) => Ok(Value::from(false).wrap()),
|
||||
(0, 0, 1) => Ok(Value::from(true).wrap()),
|
||||
(0, 0, 2) => {
|
||||
(Op::Misc(0), 0) => Ok(Value::from(false).wrap()),
|
||||
(Op::Misc(0), 1) => Ok(Value::from(true).wrap()),
|
||||
(Op::Misc(0), 2) => {
|
||||
let bs: &[u8] = &self.readbytes(4)?;
|
||||
Ok(Value::from(f32::from_bits(u32::from_be_bytes(bs.try_into().unwrap()))).wrap())
|
||||
}
|
||||
(0, 0, 3) => {
|
||||
(Op::Misc(0), 3) => {
|
||||
let bs: &[u8] = &self.readbytes(8)?;
|
||||
Ok(Value::from(f64::from_bits(u64::from_be_bytes(bs.try_into().unwrap()))).wrap())
|
||||
}
|
||||
(0, 0, 5) => {
|
||||
(Op::Misc(0), 5) => {
|
||||
if self.read_annotations {
|
||||
let mut annotations = vec![self.next()?];
|
||||
while Self::decodeop(self.peek()?) == (0, 0, 5) {
|
||||
while Self::decodeop(self.peek()?).ok() == Some((Op::Misc(0), 5)) {
|
||||
self.skip()?;
|
||||
annotations.push(self.next()?);
|
||||
}
|
||||
|
@ -248,38 +251,34 @@ impl<R: Read> Decoder<R> {
|
|||
self.next()
|
||||
}
|
||||
}
|
||||
(0, 0, _) => Err(Error::Syntax("Invalid format A encoding")),
|
||||
(0, 1, arg) => {
|
||||
(Op::Misc(0), _) => Err(Error::Syntax("Invalid format A encoding")),
|
||||
(Op::Misc(1), arg) => {
|
||||
let n = self.wirelength(arg)?;
|
||||
match self.placeholders.get(&n) {
|
||||
Some(v) => Ok(v.clone().wrap()),
|
||||
None => Err(Error::Syntax("Invalid Preserves placeholder")),
|
||||
}
|
||||
}
|
||||
(0, 2, arg) => {
|
||||
let t = arg >> 2;
|
||||
let n = arg & 3;
|
||||
match t {
|
||||
1 => self.binarystream(n),
|
||||
2 => self.valuestream(n),
|
||||
(Op::Misc(2), arg) => {
|
||||
match Op::try_from(arg)? {
|
||||
Op::Atom(minor) => self.binarystream(minor),
|
||||
Op::Compound(minor) => self.valuestream(minor),
|
||||
_ => Err(Error::Syntax("Invalid format C start byte")),
|
||||
}
|
||||
}
|
||||
(0, 3, arg) => {
|
||||
(Op::Misc(3), arg) => {
|
||||
let n = if arg > 12 { i32::from(arg) - 16 } else { i32::from(arg) };
|
||||
Ok(Value::from(n).wrap())
|
||||
}
|
||||
(0, _, _) => panic!(),
|
||||
(1, minor, arg) => {
|
||||
let n = self.wirelength(arg)?;
|
||||
Self::decodebinary(minor, self.readbytes(n)?)
|
||||
(Op::Misc(_), _) => panic!(),
|
||||
(Op::Atom(minor), arg) => {
|
||||
let count = self.wirelength(arg)?;
|
||||
Self::decodebinary(minor, self.readbytes(count)?)
|
||||
}
|
||||
(2, minor, arg) => {
|
||||
let n = self.wirelength(arg)?;
|
||||
Self::decodecompound(minor, self.readvalues(n)?)
|
||||
(Op::Compound(minor), arg) => {
|
||||
let count = self.wirelength(arg)?;
|
||||
Self::decodecompound(minor, self.readvalues(count)?)
|
||||
}
|
||||
(3, _, _) => Err(Error::Syntax("Invalid lead byte (major 3)")),
|
||||
(_, _, _) => panic!(),
|
||||
}
|
||||
}
|
||||
}
|
|
@ -0,0 +1,118 @@
|
|||
use std::io::Write;
|
||||
use crate::value::value::{Value, AValue, Float, Double};
|
||||
use num::bigint::BigInt;
|
||||
use num::cast::ToPrimitive;
|
||||
use std::io::Error;
|
||||
use crate::value::constants::{Op, AtomMinor, CompoundMinor};
|
||||
|
||||
pub type Result = std::result::Result<(), Error>;
|
||||
|
||||
pub type EncodePlaceholderMap = std::collections::BTreeMap<Value, usize>;
|
||||
|
||||
pub struct Encoder<'a, W: Write> {
|
||||
write: &'a mut W,
|
||||
placeholders: EncodePlaceholderMap,
|
||||
}
|
||||
|
||||
impl<'a, W: Write> Encoder<'a, W> {
|
||||
pub fn new(write: &'a mut W, placeholders: Option<EncodePlaceholderMap>) -> Self {
|
||||
Encoder{
|
||||
write,
|
||||
placeholders: placeholders.unwrap_or(EncodePlaceholderMap::new()),
|
||||
}
|
||||
}
|
||||
|
||||
pub fn _write(&mut self, v: u8) -> Result {
|
||||
self.write_all(&[v])
|
||||
}
|
||||
|
||||
pub fn write_all(&mut self, vs: &[u8]) -> Result {
|
||||
self.write.write_all(vs)
|
||||
}
|
||||
|
||||
pub fn varint(&mut self, v: usize) -> Result {
|
||||
if v < 128 {
|
||||
self._write(v as u8)
|
||||
} else {
|
||||
self._write(((v & 0x7f) + 128) as u8)?;
|
||||
self.varint(v >> 7)
|
||||
}
|
||||
}
|
||||
|
||||
pub fn write_op(&mut self, op: Op, arg: u8) -> Result {
|
||||
self._write((u8::from(op) << 4) | (arg & 15))
|
||||
}
|
||||
|
||||
pub fn write_header(&mut self, op: Op, wirelength: usize) -> Result {
|
||||
if wirelength < 15 {
|
||||
self.write_op(op, wirelength as u8)
|
||||
} else {
|
||||
self.write_op(op, 15)?;
|
||||
self.varint(wirelength)
|
||||
}
|
||||
}
|
||||
|
||||
pub fn write_int(&mut self, v: &BigInt) -> Result {
|
||||
match v.to_i8() {
|
||||
Some(n) if n >= 0 && n <= 12 => self.write_op(Op::Misc(3), n as u8),
|
||||
Some(n) if n >= -3 && n < 0 => self.write_op(Op::Misc(3), (n + 16) as u8),
|
||||
_ => self.write_atom(AtomMinor::SignedInteger, &v.to_signed_bytes_be()),
|
||||
}
|
||||
}
|
||||
|
||||
pub fn write_stream_header(&mut self, op: Op) -> Result {
|
||||
self.write_op(Op::Misc(2), u8::from(op))
|
||||
}
|
||||
|
||||
pub fn write_stream_footer(&mut self) -> Result {
|
||||
self.write_op(Op::Misc(0), 4)
|
||||
}
|
||||
|
||||
pub fn write_atom(&mut self, minor: AtomMinor, bs: &[u8]) -> Result {
|
||||
self.write_header(Op::Atom(minor), bs.len())?;
|
||||
self.write_all(bs)
|
||||
}
|
||||
|
||||
pub fn write(&mut self, v: &AValue) -> Result {
|
||||
for ann in v.annotations() {
|
||||
self.write_header(Op::Misc(0), 5)?;
|
||||
self.write(ann)?;
|
||||
}
|
||||
match self.placeholders.get(&v.value()) {
|
||||
Some(&n) => self.write_header(Op::Misc(1), n),
|
||||
None => match v.value() {
|
||||
Value::Boolean(false) => self.write_op(Op::Misc(0), 0),
|
||||
Value::Boolean(true) => self.write_op(Op::Misc(0), 1),
|
||||
Value::Float(Float(f)) => {
|
||||
self.write_op(Op::Misc(0), 2)?;
|
||||
self.write_all(&u32::to_be_bytes(f32::to_bits(*f)))
|
||||
}
|
||||
Value::Double(Double(d)) => {
|
||||
self.write_op(Op::Misc(0), 3)?;
|
||||
self.write_all(&u64::to_be_bytes(f64::to_bits(*d)))
|
||||
}
|
||||
Value::SignedInteger(ref b) => self.write_int(b),
|
||||
Value::String(ref s) => self.write_atom(AtomMinor::String, s.as_bytes()),
|
||||
Value::ByteString(ref bs) => self.write_atom(AtomMinor::ByteString, bs),
|
||||
Value::Symbol(ref s) => self.write_atom(AtomMinor::Symbol, s.as_bytes()),
|
||||
Value::Record((ref l, ref fs)) => {
|
||||
self.write_header(Op::Compound(CompoundMinor::Record), fs.len() + 1)?;
|
||||
self.write(l)?;
|
||||
Ok(for f in fs { self.write(f)?; })
|
||||
}
|
||||
Value::Sequence(ref vs) => {
|
||||
self.write_header(Op::Compound(CompoundMinor::Sequence), vs.len())?;
|
||||
Ok(for v in vs { self.write(v)?; })
|
||||
}
|
||||
Value::Set(ref vs) => {
|
||||
self.write_header(Op::Compound(CompoundMinor::Set), vs.len())?;
|
||||
Ok(for v in vs { self.write(v)?; })
|
||||
}
|
||||
Value::Dictionary(ref vs) => {
|
||||
self.write_header(Op::Compound(CompoundMinor::Dictionary), vs.len() << 1)?;
|
||||
Ok(for (k, v) in vs { self.write(k)?; self.write(v)?; })
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
|
@ -1,2 +1,4 @@
|
|||
pub mod value;
|
||||
pub mod codec;
|
||||
pub mod decoder;
|
||||
pub mod encoder;
|
||||
pub mod constants;
|
||||
|
|
|
@ -185,8 +185,8 @@ impl std::fmt::Debug for Value {
|
|||
// doesn't escape strings/symbols properly.
|
||||
fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
|
||||
match self {
|
||||
Value::Boolean(true) => f.write_str("#true"),
|
||||
Value::Boolean(false) => f.write_str("#false"),
|
||||
Value::Boolean(true) => f.write_str("#true"),
|
||||
Value::Float(Float(v)) => write!(f, "{:?}f", v),
|
||||
Value::Double(Double(v)) => write!(f, "{:?}", v),
|
||||
Value::SignedInteger(v) => write!(f, "{}", v),
|
||||
|
|
Loading…
Reference in New Issue