Update Rust implementation
This commit is contained in:
parent
67613877ce
commit
c2116841da
|
@ -15,6 +15,7 @@ gitlab = { repository = "preserves/preserves" }
|
||||||
base64 = "0.13"
|
base64 = "0.13"
|
||||||
dtoa = "0.4"
|
dtoa = "0.4"
|
||||||
num = "0.4"
|
num = "0.4"
|
||||||
|
lazy_static = "1.4.0"
|
||||||
regex = "1.5"
|
regex = "1.5"
|
||||||
serde = { version = "1.0", features = ["derive"] }
|
serde = { version = "1.0", features = ["derive"] }
|
||||||
serde_bytes = "0.11"
|
serde_bytes = "0.11"
|
||||||
|
|
|
@ -26,8 +26,11 @@ use crate::value::reader::BinarySource;
|
||||||
use crate::value::reader::ReaderResult;
|
use crate::value::reader::ReaderResult;
|
||||||
use crate::value::repr::Annotations;
|
use crate::value::repr::Annotations;
|
||||||
|
|
||||||
|
use lazy_static::lazy_static;
|
||||||
|
|
||||||
use num::bigint::BigInt;
|
use num::bigint::BigInt;
|
||||||
|
|
||||||
|
use std::convert::TryInto;
|
||||||
use std::io;
|
use std::io;
|
||||||
use std::iter::FromIterator;
|
use std::iter::FromIterator;
|
||||||
use std::marker::PhantomData;
|
use std::marker::PhantomData;
|
||||||
|
@ -137,86 +140,21 @@ impl<'de, 'src, D: Embeddable, Dec: DomainParse<D>, S: BinarySource<'de>>
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
fn read_intpart<N: NestedValue>(&mut self, mut bs: Vec<u8>, c: u8) -> io::Result<N> {
|
fn read_hex_float<N: NestedValue>(&mut self, bytecount: usize) -> io::Result<N> {
|
||||||
match c {
|
if self.next_byte()? != b'"' {
|
||||||
b'0' => {
|
return Err(io_syntax_error("Missing open-double-quote in hex-encoded floating-point number"));
|
||||||
bs.push(c);
|
|
||||||
self.read_fracexp(bs)
|
|
||||||
}
|
|
||||||
_ => {
|
|
||||||
self.read_digit1(&mut bs, c)?;
|
|
||||||
self.read_fracexp(bs)
|
|
||||||
}
|
|
||||||
}
|
}
|
||||||
}
|
let bs = self.read_hex_binary()?;
|
||||||
|
if bs.len() != bytecount {
|
||||||
fn read_fracexp<N: NestedValue>(&mut self, mut bs: Vec<u8>) -> io::Result<N> {
|
return Err(io_syntax_error("Incorrect number of bytes in hex-encoded floating-point number"));
|
||||||
let mut is_float = false;
|
|
||||||
match self.peek() {
|
|
||||||
Ok(b'.') => {
|
|
||||||
is_float = true;
|
|
||||||
bs.push(self.next_byte()?);
|
|
||||||
let c = self.next_byte()?;
|
|
||||||
self.read_digit1(&mut bs, c)?;
|
|
||||||
}
|
|
||||||
_ => ()
|
|
||||||
}
|
}
|
||||||
match self.peek() {
|
match bytecount {
|
||||||
Ok(b'e') | Ok(b'E') => {
|
4 => Ok(Value::from(f32::from_bits(u32::from_be_bytes(bs.try_into().unwrap()))).wrap()),
|
||||||
bs.push(self.next_byte()?);
|
8 => Ok(Value::from(f64::from_bits(u64::from_be_bytes(bs.try_into().unwrap()))).wrap()),
|
||||||
self.read_sign_and_exp(bs)
|
_ => Err(io_syntax_error("Unsupported byte count in hex-encoded floating-point number")),
|
||||||
}
|
|
||||||
_ => self.finish_number(bs, is_float)
|
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
fn read_sign_and_exp<N: NestedValue>(&mut self, mut bs: Vec<u8>) -> io::Result<N> {
|
|
||||||
match self.peek()? {
|
|
||||||
b'+' | b'-' => bs.push(self.next_byte()?),
|
|
||||||
_ => (),
|
|
||||||
}
|
|
||||||
let c = self.next_byte()?;
|
|
||||||
self.read_digit1(&mut bs, c)?;
|
|
||||||
self.finish_number(bs, true)
|
|
||||||
}
|
|
||||||
|
|
||||||
fn finish_number<N: NestedValue>(&mut self, bs: Vec<u8>, is_float: bool) -> io::Result<N> {
|
|
||||||
let s = decode_utf8(bs)?;
|
|
||||||
if is_float {
|
|
||||||
match self.peek() {
|
|
||||||
Ok(b'f') | Ok(b'F') => {
|
|
||||||
self.skip()?;
|
|
||||||
Ok(N::new(s.parse::<f32>().map_err(
|
|
||||||
|_| io_syntax_error(&format!(
|
|
||||||
"Invalid single-precision number: {:?}", s)))?))
|
|
||||||
}
|
|
||||||
_ =>
|
|
||||||
Ok(N::new(s.parse::<f64>().map_err(
|
|
||||||
|_| io_syntax_error(&format!(
|
|
||||||
"Invalid double-precision number: {:?}", s)))?))
|
|
||||||
}
|
|
||||||
} else {
|
|
||||||
Ok(N::new(s.parse::<BigInt>().map_err(
|
|
||||||
|_| io_syntax_error(&format!(
|
|
||||||
"Invalid signed-integer number: {:?}", s)))?))
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
fn read_digit1(&mut self, bs: &mut Vec<u8>, c: u8) -> io::Result<()>
|
|
||||||
{
|
|
||||||
if !(c as char).is_digit(10) {
|
|
||||||
return Err(io_syntax_error("Incomplete number"));
|
|
||||||
}
|
|
||||||
bs.push(c);
|
|
||||||
while let Ok(c) = self.peek() {
|
|
||||||
if !(c as char).is_digit(10) {
|
|
||||||
break;
|
|
||||||
}
|
|
||||||
bs.push(self.next_byte()?);
|
|
||||||
}
|
|
||||||
Ok(())
|
|
||||||
}
|
|
||||||
|
|
||||||
fn read_stringlike<X, H, R>(
|
fn read_stringlike<X, H, R>(
|
||||||
&mut self,
|
&mut self,
|
||||||
mut seed: R,
|
mut seed: R,
|
||||||
|
@ -299,14 +237,13 @@ impl<'de, 'src, D: Embeddable, Dec: DomainParse<D>, S: BinarySource<'de>>
|
||||||
|bs, r| Ok(bs.push(r.hexnum(2)? as u8)))?[..]))
|
|bs, r| Ok(bs.push(r.hexnum(2)? as u8)))?[..]))
|
||||||
}
|
}
|
||||||
|
|
||||||
fn read_hex_binary<N: NestedValue>(&mut self) -> io::Result<N> {
|
fn read_hex_binary(&mut self) -> io::Result<Vec<u8>> {
|
||||||
let mut s = String::new();
|
let mut s = String::new();
|
||||||
loop {
|
loop {
|
||||||
self.skip_whitespace();
|
self.skip_whitespace();
|
||||||
let c1 = self.next_byte()? as char;
|
let c1 = self.next_byte()? as char;
|
||||||
if c1 == '"' {
|
if c1 == '"' {
|
||||||
let bs = hex::HexParser::Strict.decode(&s).unwrap();
|
return Ok(hex::HexParser::Strict.decode(&s).unwrap());
|
||||||
return Ok(N::new(&bs[..]));
|
|
||||||
}
|
}
|
||||||
let c2 = self.next_byte()? as char;
|
let c2 = self.next_byte()? as char;
|
||||||
if !(c1.is_digit(16) && c2.is_digit(16)) {
|
if !(c1.is_digit(16) && c2.is_digit(16)) {
|
||||||
|
@ -364,7 +301,11 @@ impl<'de, 'src, D: Embeddable, Dec: DomainParse<D>, S: BinarySource<'de>>
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
fn read_raw_symbol<N: NestedValue>(&mut self, mut bs: Vec<u8>) -> io::Result<N> {
|
fn read_raw_symbol_or_number<N: NestedValue>(&mut self, mut bs: Vec<u8>) -> io::Result<N> {
|
||||||
|
lazy_static! {
|
||||||
|
static ref NUMBER_RE: regex::Regex = regex::Regex::new(
|
||||||
|
r"^([-+]?\d+)(((\.\d+([eE][-+]?\d+)?)|([eE][-+]?\d+))([fF]?))?$").unwrap();
|
||||||
|
}
|
||||||
loop {
|
loop {
|
||||||
let c = match self.peek() {
|
let c = match self.peek() {
|
||||||
Err(e) if is_eof_io_error(&e) => b' ',
|
Err(e) if is_eof_io_error(&e) => b' ',
|
||||||
|
@ -374,8 +315,33 @@ impl<'de, 'src, D: Embeddable, Dec: DomainParse<D>, S: BinarySource<'de>>
|
||||||
};
|
};
|
||||||
match c {
|
match c {
|
||||||
b'(' | b')' | b'{' | b'}' | b'[' | b']' | b'<' | b'>' |
|
b'(' | b')' | b'{' | b'}' | b'[' | b']' | b'<' | b'>' |
|
||||||
b'"' | b';' | b',' | b'@' | b'#' | b':' | b'|' | b' ' =>
|
b'"' | b';' | b',' | b'@' | b'#' | b':' | b'|' | b' ' => {
|
||||||
return Ok(N::symbol(&decode_utf8(bs)?)),
|
let s = decode_utf8(bs)?;
|
||||||
|
return match NUMBER_RE.captures(&s) {
|
||||||
|
None => Ok(N::symbol(&s)),
|
||||||
|
Some(m) => match m.get(2) {
|
||||||
|
None => Ok(N::new(s.parse::<BigInt>().map_err(
|
||||||
|
|_| io_syntax_error(&format!(
|
||||||
|
"Invalid signed-integer number: {:?}", s)))?)),
|
||||||
|
Some(_) => {
|
||||||
|
if let Some(maybe_f) = m.get(7) {
|
||||||
|
let s = m[1].to_owned() + &m[3];
|
||||||
|
if maybe_f.range().is_empty() {
|
||||||
|
Ok(N::new(s.parse::<f64>().map_err(
|
||||||
|
|_| io_syntax_error(&format!(
|
||||||
|
"Invalid double-precision number: {:?}", s)))?))
|
||||||
|
} else {
|
||||||
|
Ok(N::new(s.parse::<f32>().map_err(
|
||||||
|
|_| io_syntax_error(&format!(
|
||||||
|
"Invalid single-precision number: {:?}", s)))?))
|
||||||
|
}
|
||||||
|
} else {
|
||||||
|
panic!("Internal error: cannot analyze number {:?}", s)
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
c => {
|
c => {
|
||||||
self.skip()?;
|
self.skip()?;
|
||||||
bs.push(c)
|
bs.push(c)
|
||||||
|
@ -396,15 +362,6 @@ impl<'de, 'src, N: NestedValue, Dec: DomainParse<N::Embedded>, S: BinarySource<'
|
||||||
Err(e) => return Err(e.into()),
|
Err(e) => return Err(e.into()),
|
||||||
};
|
};
|
||||||
Ok(Some(match c {
|
Ok(Some(match c {
|
||||||
b'-' => {
|
|
||||||
self.skip()?;
|
|
||||||
let c1 = self.next_byte()?;
|
|
||||||
self.read_intpart(vec![b'-'], c1)?
|
|
||||||
}
|
|
||||||
b'0' | b'1' | b'2' | b'3' | b'4' | b'5' | b'6' | b'7' | b'8' | b'9' => {
|
|
||||||
self.skip()?;
|
|
||||||
self.read_intpart(Vec::new(), c)?
|
|
||||||
}
|
|
||||||
b'"' => {
|
b'"' => {
|
||||||
self.skip()?;
|
self.skip()?;
|
||||||
N::new(self.read_string(b'"')?)
|
N::new(self.read_string(b'"')?)
|
||||||
|
@ -435,26 +392,13 @@ impl<'de, 'src, N: NestedValue, Dec: DomainParse<N::Embedded>, S: BinarySource<'
|
||||||
b't' => N::new(true),
|
b't' => N::new(true),
|
||||||
b'{' => N::new(Set::from_iter(self.upto(b'}', read_annotations)?.into_iter())),
|
b'{' => N::new(Set::from_iter(self.upto(b'}', read_annotations)?.into_iter())),
|
||||||
b'"' => self.read_literal_binary()?,
|
b'"' => self.read_literal_binary()?,
|
||||||
b'x' => if self.next_byte()? == b'"' {
|
b'x' => match self.next_byte()? {
|
||||||
self.read_hex_binary()?
|
b'"' => N::new(&self.read_hex_binary()?[..]),
|
||||||
} else {
|
b'f' => self.read_hex_float(4)?,
|
||||||
return Err(io_syntax_error("Expected open-quote at start of hex ByteString"));
|
b'd' => self.read_hex_float(8)?,
|
||||||
|
_ => return Err(io_syntax_error("Invalid #x syntax")),
|
||||||
},
|
},
|
||||||
b'[' => self.read_base64_binary()?,
|
b'[' => self.read_base64_binary()?,
|
||||||
b'=' => {
|
|
||||||
let bs_val: N = self.demand_next(true)?;
|
|
||||||
if bs_val.annotations().slice().len() > 0 {
|
|
||||||
return Err(io_syntax_error("Annotations not permitted after #="));
|
|
||||||
}
|
|
||||||
match bs_val.value().as_bytestring() {
|
|
||||||
None =>
|
|
||||||
return Err(io_syntax_error("ByteString must follow #=")),
|
|
||||||
Some(bs) =>
|
|
||||||
crate::value::BytesBinarySource::new(bs)
|
|
||||||
.packed(ViaCodec::new(&mut self.dec))
|
|
||||||
.demand_next(read_annotations)?
|
|
||||||
}
|
|
||||||
}
|
|
||||||
b'!' => {
|
b'!' => {
|
||||||
let v = self.next_iovalue(read_annotations)?;
|
let v = self.next_iovalue(read_annotations)?;
|
||||||
Value::Embedded(self.dec.parse_embedded(&v)?).wrap()
|
Value::Embedded(self.dec.parse_embedded(&v)?).wrap()
|
||||||
|
@ -483,7 +427,7 @@ impl<'de, 'src, N: NestedValue, Dec: DomainParse<N::Embedded>, S: BinarySource<'
|
||||||
b'}' => return Err(io_syntax_error("Unexpected }")),
|
b'}' => return Err(io_syntax_error("Unexpected }")),
|
||||||
other => {
|
other => {
|
||||||
self.skip()?;
|
self.skip()?;
|
||||||
self.read_raw_symbol(vec![other])?
|
self.read_raw_symbol_or_number(vec![other])?
|
||||||
}
|
}
|
||||||
}))
|
}))
|
||||||
}
|
}
|
||||||
|
|
|
@ -1,3 +1,4 @@
|
||||||
|
use crate::hex::HexFormatter;
|
||||||
use crate::value::DomainEncode;
|
use crate::value::DomainEncode;
|
||||||
use crate::value::IOValue;
|
use crate::value::IOValue;
|
||||||
use crate::value::IOValueDomainCodec;
|
use crate::value::IOValueDomainCodec;
|
||||||
|
@ -6,6 +7,8 @@ use crate::value::Writer;
|
||||||
use crate::value::suspendable::Suspendable;
|
use crate::value::suspendable::Suspendable;
|
||||||
use crate::value::writer::CompoundWriter;
|
use crate::value::writer::CompoundWriter;
|
||||||
|
|
||||||
|
use lazy_static::lazy_static;
|
||||||
|
|
||||||
use num::bigint::BigInt;
|
use num::bigint::BigInt;
|
||||||
|
|
||||||
use std::io;
|
use std::io;
|
||||||
|
@ -231,13 +234,23 @@ impl<W: io::Write> Writer for TextWriter<W> {
|
||||||
}
|
}
|
||||||
|
|
||||||
fn write_f32(&mut self, v: f32) -> io::Result<()> {
|
fn write_f32(&mut self, v: f32) -> io::Result<()> {
|
||||||
dtoa::write(&mut *self.w, v)?;
|
if v.is_nan() || v.is_infinite() {
|
||||||
write!(self.w, "f")
|
write!(self.w, "#xf\"{}\"",
|
||||||
|
HexFormatter::Packed.encode(&u32::to_be_bytes(f32::to_bits(v))))
|
||||||
|
} else {
|
||||||
|
dtoa::write(&mut *self.w, v)?;
|
||||||
|
write!(self.w, "f")
|
||||||
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
fn write_f64(&mut self, v: f64) -> io::Result<()> {
|
fn write_f64(&mut self, v: f64) -> io::Result<()> {
|
||||||
dtoa::write(&mut *self.w, v)?;
|
if v.is_nan() || v.is_infinite() {
|
||||||
Ok(())
|
write!(self.w, "#xd\"{}\"",
|
||||||
|
HexFormatter::Packed.encode(&u64::to_be_bytes(f64::to_bits(v))))
|
||||||
|
} else {
|
||||||
|
dtoa::write(&mut *self.w, v)?;
|
||||||
|
Ok(())
|
||||||
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
simple_writer_method!(write_i8, i8);
|
simple_writer_method!(write_i8, i8);
|
||||||
|
@ -269,9 +282,12 @@ impl<W: io::Write> Writer for TextWriter<W> {
|
||||||
}
|
}
|
||||||
|
|
||||||
fn write_symbol(&mut self, v: &str) -> io::Result<()> {
|
fn write_symbol(&mut self, v: &str) -> io::Result<()> {
|
||||||
// FIXME: This regular expression is conservatively correct, but Anglo-chauvinistic.
|
lazy_static! {
|
||||||
let re = regex::Regex::new("^[a-zA-Z~!$%^&*?_=+/.][-a-zA-Z~!$%^&*?_=+/.0-9]*$").unwrap();
|
// FIXME: This regular expression is conservatively correct, but Anglo-chauvinistic.
|
||||||
if re.is_match(v) {
|
static ref RE: regex::Regex =
|
||||||
|
regex::Regex::new("^[-a-zA-Z0-9~!$%^&*?_=+/.]+$").unwrap();
|
||||||
|
}
|
||||||
|
if RE.is_match(v) {
|
||||||
write!(self.w, "{}", v)
|
write!(self.w, "{}", v)
|
||||||
} else {
|
} else {
|
||||||
write!(self.w, "|")?;
|
write!(self.w, "|")?;
|
||||||
|
|
Loading…
Reference in New Issue