From 071a559511ca6c188b396d579ed74928e3934f07 Mon Sep 17 00:00:00 2001 From: Tony Garnock-Jones Date: Tue, 8 Nov 2022 20:34:47 +0100 Subject: [PATCH] Adapt to latest spec changes from 269ed23. --- implementations/rust/oo/Cargo.toml | 1 + implementations/rust/oo/src/text/reader.rs | 148 ++++++++------------- implementations/rust/oo/src/text/writer.rs | 15 ++- 3 files changed, 63 insertions(+), 101 deletions(-) diff --git a/implementations/rust/oo/Cargo.toml b/implementations/rust/oo/Cargo.toml index 2b62e8d..82ce8ce 100644 --- a/implementations/rust/oo/Cargo.toml +++ b/implementations/rust/oo/Cargo.toml @@ -9,6 +9,7 @@ edition = "2021" base64 = "0.13" bytemuck = "1.12" dtoa = "0.4" +lazy_static = "1.4.0" num-bigint = "0.4" num-traits = "0.2" regex = "1.5" diff --git a/implementations/rust/oo/src/text/reader.rs b/implementations/rust/oo/src/text/reader.rs index cbdef36..0ea299a 100644 --- a/implementations/rust/oo/src/text/reader.rs +++ b/implementations/rust/oo/src/text/reader.rs @@ -13,6 +13,8 @@ use crate::reader::NextToken; use crate::reader::ReaderResult; use crate::source::BinarySource; +use lazy_static::lazy_static; + use num_bigint::BigInt; use std::borrow::Cow; @@ -107,86 +109,21 @@ impl<'de, 'src, S: BinarySource<'de>> TextReader<'de, 'src, S> } } - fn read_intpart(&mut self, mut bs: Vec, c: u8) -> io::Result> { - match c { - b'0' => { - bs.push(c); - self.read_fracexp(bs) - } - _ => { - self.read_digit1(&mut bs, c)?; - self.read_fracexp(bs) - } + fn read_hex_float(&mut self, bytecount: usize) -> io::Result> { + if self.next_byte()? != b'"' { + return Err(self.syntax_error("Missing open-double-quote in hex-encoded floating-point number")); } - } - - fn read_fracexp(&mut self, mut bs: Vec) -> io::Result> { - let mut is_float = false; - match self.peek_noeof() { - Ok(b'.') => { - is_float = true; - bs.push(self.next_byte()?); - let c = self.next_byte()?; - self.read_digit1(&mut bs, c)?; - } - _ => () + let bs = self.read_hex_binary()?; + if bs.len() != bytecount { + return Err(self.syntax_error("Incorrect number of bytes in hex-encoded floating-point number")); } - match self.peek_noeof() { - Ok(b'e') | Ok(b'E') => { - bs.push(self.next_byte()?); - self.read_sign_and_exp(bs) - } - _ => self.finish_number(bs, is_float) + match bytecount { + 4 => Ok(Atom::Float(f32::from_bits(u32::from_be_bytes(bs.try_into().unwrap())))), + 8 => Ok(Atom::Double(f64::from_bits(u64::from_be_bytes(bs.try_into().unwrap())))), + _ => Err(self.syntax_error("Unsupported byte count in hex-encoded floating-point number")), } } - fn read_sign_and_exp(&mut self, mut bs: Vec) -> io::Result> { - match self.peek_noeof()? { - b'+' | b'-' => bs.push(self.next_byte()?), - _ => (), - } - let c = self.next_byte()?; - self.read_digit1(&mut bs, c)?; - self.finish_number(bs, true) - } - - fn finish_number(&mut self, bs: Vec, is_float: bool) -> io::Result> { - let s = self.decode_utf8(bs)?; - if is_float { - match self.peek_noeof() { - Ok(b'f') | Ok(b'F') => { - self.skip()?; - Ok(Atom::Float(s.parse::().map_err( - |_| self.syntax_error(&format!( - "Invalid single-precision number: {:?}", s)))?)) - } - _ => - Ok(Atom::Double(s.parse::().map_err( - |_| self.syntax_error(&format!( - "Invalid double-precision number: {:?}", s)))?)) - } - } else { - Ok(Atom::SignedInteger(s.parse::().map_err( - |_| self.syntax_error(&format!( - "Invalid signed-integer number: {:?}", s)))?.into())) - } - } - - fn read_digit1(&mut self, bs: &mut Vec, c: u8) -> io::Result<()> - { - if !(c as char).is_digit(10) { - return Err(self.syntax_error("Incomplete number")); - } - bs.push(c); - while let Ok(Some(c)) = self.peek() { - if !(c as char).is_digit(10) { - break; - } - bs.push(self.next_byte()?); - } - Ok(()) - } - fn read_stringlike( &mut self, mut seed: R, @@ -278,14 +215,13 @@ impl<'de, 'src, S: BinarySource<'de>> TextReader<'de, 'src, S> |r, bs| Ok(bs.push(r.hexnum(2)? as u8)))?))) } - fn read_hex_binary(&mut self) -> io::Result> { + fn read_hex_binary(&mut self) -> io::Result> { let mut s = String::new(); loop { self.skip_whitespace(); let c1 = self.next_byte()? as char; if c1 == '"' { - let bs = hex::HexParser::Strict.decode(&s).unwrap(); - return Ok(Atom::ByteString(Cow::Owned(bs))); + return Ok(hex::HexParser::Strict.decode(&s).unwrap()); } let c2 = self.next_byte()? as char; if !(c1.is_digit(16) && c2.is_digit(16)) { @@ -313,7 +249,11 @@ impl<'de, 'src, S: BinarySource<'de>> TextReader<'de, 'src, S> } } - fn read_raw_symbol(&mut self, mut bs: Vec) -> io::Result> { + fn read_raw_symbol_or_number(&mut self, mut bs: Vec) -> io::Result> { + lazy_static! { + static ref NUMBER_RE: regex::Regex = regex::Regex::new( + r"^([-+]?\d+)(((\.\d+([eE][-+]?\d+)?)|([eE][-+]?\d+))([fF]?))?$").unwrap(); + } loop { let c = match self.peek()? { None => b' ', @@ -322,8 +262,33 @@ impl<'de, 'src, S: BinarySource<'de>> TextReader<'de, 'src, S> }; match c { b'(' | b')' | b'{' | b'}' | b'[' | b']' | b'<' | b'>' | - b'"' | b';' | b',' | b'@' | b'#' | b':' | b'|' | b' ' => - return Ok(Atom::Symbol(Cow::Owned(self.decode_utf8(bs)?))), + b'"' | b';' | b',' | b'@' | b'#' | b':' | b'|' | b' ' => { + let s = self.decode_utf8(bs)?; + return match NUMBER_RE.captures(&s) { + None => Ok(Atom::Symbol(s.into())), + Some(m) => match m.get(2) { + None => Ok(Atom::SignedInteger(s.parse::().map_err( + |_| self.syntax_error(&format!( + "Invalid signed-integer number: {:?}", s)))?.into())), + Some(_) => { + if let Some(maybe_f) = m.get(7) { + let s = m[1].to_owned() + &m[3]; + if maybe_f.range().is_empty() { + Ok(Atom::Double(s.parse::().map_err( + |_| self.syntax_error(&format!( + "Invalid double-precision number: {:?}", s)))?)) + } else { + Ok(Atom::Float(s.parse::().map_err( + |_| self.syntax_error(&format!( + "Invalid single-precision number: {:?}", s)))?)) + } + } else { + panic!("Internal error: cannot analyze number {:?}", s) + } + } + } + } + } c => { self.skip()?; bs.push(c) @@ -341,13 +306,6 @@ impl<'de, 'src, S: BinarySource<'de>> TextReader<'de, 'src, S> self.skip()?; Ok(match c { - b'-' => { - let c1 = self.next_byte()?; - Classification::Atom(self.read_intpart(vec![b'-'], c1)?) - } - b'0' | b'1' | b'2' | b'3' | b'4' | b'5' | b'6' | b'7' | b'8' | b'9' => { - Classification::Atom(self.read_intpart(Vec::new(), c)?) - } b'"' => Classification::Atom(Atom::String(Cow::Owned(self.read_string(b'"')?))), b'|' => Classification::Atom(Atom::Symbol(Cow::Owned(self.read_string(b'|')?))), b':' => Err(self.syntax_error("Unexpected key/value separator between items"))?, @@ -359,15 +317,13 @@ impl<'de, 'src, S: BinarySource<'de>> TextReader<'de, 'src, S> b't' => Classification::Atom(Atom::Boolean(true)), b'{' => Classification::Compound(CompoundClass::Set), b'"' => Classification::Atom(self.read_literal_binary()?), - b'x' => if self.next_byte()? == b'"' { - Classification::Atom(self.read_hex_binary()?) - } else { - Err(self.syntax_error("Expected open-quote at start of hex ByteString"))? + b'x' => match self.next_byte()? { + b'"' => Classification::Atom(Atom::ByteString(self.read_hex_binary()?.into())), + b'f' => Classification::Atom(self.read_hex_float(4)?), + b'd' => Classification::Atom(self.read_hex_float(8)?), + _ => Err(self.syntax_error("Invalid #x syntax"))?, }, b'[' => Classification::Atom(self.read_base64_binary()?), - b'=' => { - todo!("Remove machine text syntax") - } b'!' => Classification::Embedded, other => Err(self.syntax_error(&format!("Invalid # syntax: {:?}", other)))?, } @@ -378,7 +334,7 @@ impl<'de, 'src, S: BinarySource<'de>> TextReader<'de, 'src, S> b'>' => Err(self.syntax_error("Unexpected >"))?, b']' => Err(self.syntax_error("Unexpected ]"))?, b'}' => Err(self.syntax_error("Unexpected }"))?, - other => Classification::Atom(self.read_raw_symbol(vec![other])?), + other => Classification::Atom(self.read_raw_symbol_or_number(vec![other])?), }) } } diff --git a/implementations/rust/oo/src/text/writer.rs b/implementations/rust/oo/src/text/writer.rs index 652ebb0..2e549fc 100644 --- a/implementations/rust/oo/src/text/writer.rs +++ b/implementations/rust/oo/src/text/writer.rs @@ -6,6 +6,8 @@ use crate::Value; use crate::Writer; use crate::hex::HexFormatter; +use lazy_static::lazy_static; + use num_bigint::BigInt; use std::io; @@ -211,7 +213,7 @@ impl Writer for TextWriter { write!(self.w, "f") } else { let bs = v.to_be_bytes(); - write!(self.w, "#p#x\"{}\"", HexFormatter::Packed.encode(&bs)) + write!(self.w, "#xf\"{}\"", HexFormatter::Packed.encode(&bs)) } } @@ -221,7 +223,7 @@ impl Writer for TextWriter { Ok(()) } else { let bs = v.to_be_bytes(); - write!(self.w, "#p#x\"{}\"", HexFormatter::Packed.encode(&bs)) + write!(self.w, "#xd\"{}\"", HexFormatter::Packed.encode(&bs)) } } @@ -254,9 +256,12 @@ impl Writer for TextWriter { } fn write_symbol(&mut self, v: &str) -> io::Result<()> { - // FIXME: This regular expression is conservatively correct, but Anglo-chauvinistic. - let re = regex::Regex::new("^[a-zA-Z~!$%^&*?_=+/.][-a-zA-Z~!$%^&*?_=+/.0-9]*$").unwrap(); - if re.is_match(v) { + lazy_static! { + // FIXME: This regular expression is conservatively correct, but Anglo-chauvinistic. + static ref RE: regex::Regex = + regex::Regex::new("^[-a-zA-Z0-9~!$%^&*?_=+/.]+$").unwrap(); + } + if RE.is_match(v) { write!(self.w, "{}", v) } else { write!(self.w, "|")?;