diff --git a/implementations/rust/preserves/Cargo.toml b/implementations/rust/preserves/Cargo.toml index 0d9bd3d..fc7bb85 100644 --- a/implementations/rust/preserves/Cargo.toml +++ b/implementations/rust/preserves/Cargo.toml @@ -15,6 +15,7 @@ gitlab = { repository = "preserves/preserves" } base64 = "0.13" dtoa = "0.4" num = "0.4" +lazy_static = "1.4.0" regex = "1.5" serde = { version = "1.0", features = ["derive"] } serde_bytes = "0.11" diff --git a/implementations/rust/preserves/src/value/text/reader.rs b/implementations/rust/preserves/src/value/text/reader.rs index 446d3e8..8673ce3 100644 --- a/implementations/rust/preserves/src/value/text/reader.rs +++ b/implementations/rust/preserves/src/value/text/reader.rs @@ -26,8 +26,11 @@ use crate::value::reader::BinarySource; use crate::value::reader::ReaderResult; use crate::value::repr::Annotations; +use lazy_static::lazy_static; + use num::bigint::BigInt; +use std::convert::TryInto; use std::io; use std::iter::FromIterator; use std::marker::PhantomData; @@ -137,86 +140,21 @@ impl<'de, 'src, D: Embeddable, Dec: DomainParse, S: BinarySource<'de>> } } - fn read_intpart(&mut self, mut bs: Vec, c: u8) -> io::Result { - match c { - b'0' => { - bs.push(c); - self.read_fracexp(bs) - } - _ => { - self.read_digit1(&mut bs, c)?; - self.read_fracexp(bs) - } + fn read_hex_float(&mut self, bytecount: usize) -> io::Result { + if self.next_byte()? != b'"' { + return Err(io_syntax_error("Missing open-double-quote in hex-encoded floating-point number")); } - } - - fn read_fracexp(&mut self, mut bs: Vec) -> io::Result { - let mut is_float = false; - match self.peek() { - Ok(b'.') => { - is_float = true; - bs.push(self.next_byte()?); - let c = self.next_byte()?; - self.read_digit1(&mut bs, c)?; - } - _ => () + let bs = self.read_hex_binary()?; + if bs.len() != bytecount { + return Err(io_syntax_error("Incorrect number of bytes in hex-encoded floating-point number")); } - match self.peek() { - Ok(b'e') | Ok(b'E') => { - bs.push(self.next_byte()?); - self.read_sign_and_exp(bs) - } - _ => self.finish_number(bs, is_float) + match bytecount { + 4 => Ok(Value::from(f32::from_bits(u32::from_be_bytes(bs.try_into().unwrap()))).wrap()), + 8 => Ok(Value::from(f64::from_bits(u64::from_be_bytes(bs.try_into().unwrap()))).wrap()), + _ => Err(io_syntax_error("Unsupported byte count in hex-encoded floating-point number")), } } - fn read_sign_and_exp(&mut self, mut bs: Vec) -> io::Result { - match self.peek()? { - b'+' | b'-' => bs.push(self.next_byte()?), - _ => (), - } - let c = self.next_byte()?; - self.read_digit1(&mut bs, c)?; - self.finish_number(bs, true) - } - - fn finish_number(&mut self, bs: Vec, is_float: bool) -> io::Result { - let s = decode_utf8(bs)?; - if is_float { - match self.peek() { - Ok(b'f') | Ok(b'F') => { - self.skip()?; - Ok(N::new(s.parse::().map_err( - |_| io_syntax_error(&format!( - "Invalid single-precision number: {:?}", s)))?)) - } - _ => - Ok(N::new(s.parse::().map_err( - |_| io_syntax_error(&format!( - "Invalid double-precision number: {:?}", s)))?)) - } - } else { - Ok(N::new(s.parse::().map_err( - |_| io_syntax_error(&format!( - "Invalid signed-integer number: {:?}", s)))?)) - } - } - - fn read_digit1(&mut self, bs: &mut Vec, c: u8) -> io::Result<()> - { - if !(c as char).is_digit(10) { - return Err(io_syntax_error("Incomplete number")); - } - bs.push(c); - while let Ok(c) = self.peek() { - if !(c as char).is_digit(10) { - break; - } - bs.push(self.next_byte()?); - } - Ok(()) - } - fn read_stringlike( &mut self, mut seed: R, @@ -299,14 +237,13 @@ impl<'de, 'src, D: Embeddable, Dec: DomainParse, S: BinarySource<'de>> |bs, r| Ok(bs.push(r.hexnum(2)? as u8)))?[..])) } - fn read_hex_binary(&mut self) -> io::Result { + fn read_hex_binary(&mut self) -> io::Result> { let mut s = String::new(); loop { self.skip_whitespace(); let c1 = self.next_byte()? as char; if c1 == '"' { - let bs = hex::HexParser::Strict.decode(&s).unwrap(); - return Ok(N::new(&bs[..])); + return Ok(hex::HexParser::Strict.decode(&s).unwrap()); } let c2 = self.next_byte()? as char; if !(c1.is_digit(16) && c2.is_digit(16)) { @@ -364,7 +301,11 @@ impl<'de, 'src, D: Embeddable, Dec: DomainParse, S: BinarySource<'de>> } } - fn read_raw_symbol(&mut self, mut bs: Vec) -> io::Result { + fn read_raw_symbol_or_number(&mut self, mut bs: Vec) -> io::Result { + lazy_static! { + static ref NUMBER_RE: regex::Regex = regex::Regex::new( + r"^([-+]?\d+)(((\.\d+([eE][-+]?\d+)?)|([eE][-+]?\d+))([fF]?))?$").unwrap(); + } loop { let c = match self.peek() { Err(e) if is_eof_io_error(&e) => b' ', @@ -374,8 +315,33 @@ impl<'de, 'src, D: Embeddable, Dec: DomainParse, S: BinarySource<'de>> }; match c { b'(' | b')' | b'{' | b'}' | b'[' | b']' | b'<' | b'>' | - b'"' | b';' | b',' | b'@' | b'#' | b':' | b'|' | b' ' => - return Ok(N::symbol(&decode_utf8(bs)?)), + b'"' | b';' | b',' | b'@' | b'#' | b':' | b'|' | b' ' => { + let s = decode_utf8(bs)?; + return match NUMBER_RE.captures(&s) { + None => Ok(N::symbol(&s)), + Some(m) => match m.get(2) { + None => Ok(N::new(s.parse::().map_err( + |_| io_syntax_error(&format!( + "Invalid signed-integer number: {:?}", s)))?)), + Some(_) => { + if let Some(maybe_f) = m.get(7) { + let s = m[1].to_owned() + &m[3]; + if maybe_f.range().is_empty() { + Ok(N::new(s.parse::().map_err( + |_| io_syntax_error(&format!( + "Invalid double-precision number: {:?}", s)))?)) + } else { + Ok(N::new(s.parse::().map_err( + |_| io_syntax_error(&format!( + "Invalid single-precision number: {:?}", s)))?)) + } + } else { + panic!("Internal error: cannot analyze number {:?}", s) + } + } + } + } + } c => { self.skip()?; bs.push(c) @@ -396,15 +362,6 @@ impl<'de, 'src, N: NestedValue, Dec: DomainParse, S: BinarySource<' Err(e) => return Err(e.into()), }; Ok(Some(match c { - b'-' => { - self.skip()?; - let c1 = self.next_byte()?; - self.read_intpart(vec![b'-'], c1)? - } - b'0' | b'1' | b'2' | b'3' | b'4' | b'5' | b'6' | b'7' | b'8' | b'9' => { - self.skip()?; - self.read_intpart(Vec::new(), c)? - } b'"' => { self.skip()?; N::new(self.read_string(b'"')?) @@ -435,26 +392,13 @@ impl<'de, 'src, N: NestedValue, Dec: DomainParse, S: BinarySource<' b't' => N::new(true), b'{' => N::new(Set::from_iter(self.upto(b'}', read_annotations)?.into_iter())), b'"' => self.read_literal_binary()?, - b'x' => if self.next_byte()? == b'"' { - self.read_hex_binary()? - } else { - return Err(io_syntax_error("Expected open-quote at start of hex ByteString")); + b'x' => match self.next_byte()? { + b'"' => N::new(&self.read_hex_binary()?[..]), + b'f' => self.read_hex_float(4)?, + b'd' => self.read_hex_float(8)?, + _ => return Err(io_syntax_error("Invalid #x syntax")), }, b'[' => self.read_base64_binary()?, - b'=' => { - let bs_val: N = self.demand_next(true)?; - if bs_val.annotations().slice().len() > 0 { - return Err(io_syntax_error("Annotations not permitted after #=")); - } - match bs_val.value().as_bytestring() { - None => - return Err(io_syntax_error("ByteString must follow #=")), - Some(bs) => - crate::value::BytesBinarySource::new(bs) - .packed(ViaCodec::new(&mut self.dec)) - .demand_next(read_annotations)? - } - } b'!' => { let v = self.next_iovalue(read_annotations)?; Value::Embedded(self.dec.parse_embedded(&v)?).wrap() @@ -483,7 +427,7 @@ impl<'de, 'src, N: NestedValue, Dec: DomainParse, S: BinarySource<' b'}' => return Err(io_syntax_error("Unexpected }")), other => { self.skip()?; - self.read_raw_symbol(vec![other])? + self.read_raw_symbol_or_number(vec![other])? } })) } diff --git a/implementations/rust/preserves/src/value/text/writer.rs b/implementations/rust/preserves/src/value/text/writer.rs index 3eb8446..8589d10 100644 --- a/implementations/rust/preserves/src/value/text/writer.rs +++ b/implementations/rust/preserves/src/value/text/writer.rs @@ -1,3 +1,4 @@ +use crate::hex::HexFormatter; use crate::value::DomainEncode; use crate::value::IOValue; use crate::value::IOValueDomainCodec; @@ -6,6 +7,8 @@ use crate::value::Writer; use crate::value::suspendable::Suspendable; use crate::value::writer::CompoundWriter; +use lazy_static::lazy_static; + use num::bigint::BigInt; use std::io; @@ -231,13 +234,23 @@ impl Writer for TextWriter { } fn write_f32(&mut self, v: f32) -> io::Result<()> { - dtoa::write(&mut *self.w, v)?; - write!(self.w, "f") + if v.is_nan() || v.is_infinite() { + write!(self.w, "#xf\"{}\"", + HexFormatter::Packed.encode(&u32::to_be_bytes(f32::to_bits(v)))) + } else { + dtoa::write(&mut *self.w, v)?; + write!(self.w, "f") + } } fn write_f64(&mut self, v: f64) -> io::Result<()> { - dtoa::write(&mut *self.w, v)?; - Ok(()) + if v.is_nan() || v.is_infinite() { + write!(self.w, "#xd\"{}\"", + HexFormatter::Packed.encode(&u64::to_be_bytes(f64::to_bits(v)))) + } else { + dtoa::write(&mut *self.w, v)?; + Ok(()) + } } simple_writer_method!(write_i8, i8); @@ -269,9 +282,12 @@ impl Writer for TextWriter { } fn write_symbol(&mut self, v: &str) -> io::Result<()> { - // FIXME: This regular expression is conservatively correct, but Anglo-chauvinistic. - let re = regex::Regex::new("^[a-zA-Z~!$%^&*?_=+/.][-a-zA-Z~!$%^&*?_=+/.0-9]*$").unwrap(); - if re.is_match(v) { + lazy_static! { + // FIXME: This regular expression is conservatively correct, but Anglo-chauvinistic. + static ref RE: regex::Regex = + regex::Regex::new("^[-a-zA-Z0-9~!$%^&*?_=+/.]+$").unwrap(); + } + if RE.is_match(v) { write!(self.w, "{}", v) } else { write!(self.w, "|")?;