Adapt to latest spec changes from 269ed23
.
This commit is contained in:
parent
d8079f0dd4
commit
071a559511
|
@ -9,6 +9,7 @@ edition = "2021"
|
|||
base64 = "0.13"
|
||||
bytemuck = "1.12"
|
||||
dtoa = "0.4"
|
||||
lazy_static = "1.4.0"
|
||||
num-bigint = "0.4"
|
||||
num-traits = "0.2"
|
||||
regex = "1.5"
|
||||
|
|
|
@ -13,6 +13,8 @@ use crate::reader::NextToken;
|
|||
use crate::reader::ReaderResult;
|
||||
use crate::source::BinarySource;
|
||||
|
||||
use lazy_static::lazy_static;
|
||||
|
||||
use num_bigint::BigInt;
|
||||
|
||||
use std::borrow::Cow;
|
||||
|
@ -107,86 +109,21 @@ impl<'de, 'src, S: BinarySource<'de>> TextReader<'de, 'src, S>
|
|||
}
|
||||
}
|
||||
|
||||
fn read_intpart(&mut self, mut bs: Vec<u8>, c: u8) -> io::Result<Atom<'static>> {
|
||||
match c {
|
||||
b'0' => {
|
||||
bs.push(c);
|
||||
self.read_fracexp(bs)
|
||||
}
|
||||
_ => {
|
||||
self.read_digit1(&mut bs, c)?;
|
||||
self.read_fracexp(bs)
|
||||
}
|
||||
fn read_hex_float(&mut self, bytecount: usize) -> io::Result<Atom<'static>> {
|
||||
if self.next_byte()? != b'"' {
|
||||
return Err(self.syntax_error("Missing open-double-quote in hex-encoded floating-point number"));
|
||||
}
|
||||
}
|
||||
|
||||
fn read_fracexp(&mut self, mut bs: Vec<u8>) -> io::Result<Atom<'static>> {
|
||||
let mut is_float = false;
|
||||
match self.peek_noeof() {
|
||||
Ok(b'.') => {
|
||||
is_float = true;
|
||||
bs.push(self.next_byte()?);
|
||||
let c = self.next_byte()?;
|
||||
self.read_digit1(&mut bs, c)?;
|
||||
}
|
||||
_ => ()
|
||||
let bs = self.read_hex_binary()?;
|
||||
if bs.len() != bytecount {
|
||||
return Err(self.syntax_error("Incorrect number of bytes in hex-encoded floating-point number"));
|
||||
}
|
||||
match self.peek_noeof() {
|
||||
Ok(b'e') | Ok(b'E') => {
|
||||
bs.push(self.next_byte()?);
|
||||
self.read_sign_and_exp(bs)
|
||||
}
|
||||
_ => self.finish_number(bs, is_float)
|
||||
match bytecount {
|
||||
4 => Ok(Atom::Float(f32::from_bits(u32::from_be_bytes(bs.try_into().unwrap())))),
|
||||
8 => Ok(Atom::Double(f64::from_bits(u64::from_be_bytes(bs.try_into().unwrap())))),
|
||||
_ => Err(self.syntax_error("Unsupported byte count in hex-encoded floating-point number")),
|
||||
}
|
||||
}
|
||||
|
||||
fn read_sign_and_exp(&mut self, mut bs: Vec<u8>) -> io::Result<Atom<'static>> {
|
||||
match self.peek_noeof()? {
|
||||
b'+' | b'-' => bs.push(self.next_byte()?),
|
||||
_ => (),
|
||||
}
|
||||
let c = self.next_byte()?;
|
||||
self.read_digit1(&mut bs, c)?;
|
||||
self.finish_number(bs, true)
|
||||
}
|
||||
|
||||
fn finish_number(&mut self, bs: Vec<u8>, is_float: bool) -> io::Result<Atom<'static>> {
|
||||
let s = self.decode_utf8(bs)?;
|
||||
if is_float {
|
||||
match self.peek_noeof() {
|
||||
Ok(b'f') | Ok(b'F') => {
|
||||
self.skip()?;
|
||||
Ok(Atom::Float(s.parse::<f32>().map_err(
|
||||
|_| self.syntax_error(&format!(
|
||||
"Invalid single-precision number: {:?}", s)))?))
|
||||
}
|
||||
_ =>
|
||||
Ok(Atom::Double(s.parse::<f64>().map_err(
|
||||
|_| self.syntax_error(&format!(
|
||||
"Invalid double-precision number: {:?}", s)))?))
|
||||
}
|
||||
} else {
|
||||
Ok(Atom::SignedInteger(s.parse::<BigInt>().map_err(
|
||||
|_| self.syntax_error(&format!(
|
||||
"Invalid signed-integer number: {:?}", s)))?.into()))
|
||||
}
|
||||
}
|
||||
|
||||
fn read_digit1(&mut self, bs: &mut Vec<u8>, c: u8) -> io::Result<()>
|
||||
{
|
||||
if !(c as char).is_digit(10) {
|
||||
return Err(self.syntax_error("Incomplete number"));
|
||||
}
|
||||
bs.push(c);
|
||||
while let Ok(Some(c)) = self.peek() {
|
||||
if !(c as char).is_digit(10) {
|
||||
break;
|
||||
}
|
||||
bs.push(self.next_byte()?);
|
||||
}
|
||||
Ok(())
|
||||
}
|
||||
|
||||
fn read_stringlike<X, H, R>(
|
||||
&mut self,
|
||||
mut seed: R,
|
||||
|
@ -278,14 +215,13 @@ impl<'de, 'src, S: BinarySource<'de>> TextReader<'de, 'src, S>
|
|||
|r, bs| Ok(bs.push(r.hexnum(2)? as u8)))?)))
|
||||
}
|
||||
|
||||
fn read_hex_binary(&mut self) -> io::Result<Atom<'static>> {
|
||||
fn read_hex_binary(&mut self) -> io::Result<Vec<u8>> {
|
||||
let mut s = String::new();
|
||||
loop {
|
||||
self.skip_whitespace();
|
||||
let c1 = self.next_byte()? as char;
|
||||
if c1 == '"' {
|
||||
let bs = hex::HexParser::Strict.decode(&s).unwrap();
|
||||
return Ok(Atom::ByteString(Cow::Owned(bs)));
|
||||
return Ok(hex::HexParser::Strict.decode(&s).unwrap());
|
||||
}
|
||||
let c2 = self.next_byte()? as char;
|
||||
if !(c1.is_digit(16) && c2.is_digit(16)) {
|
||||
|
@ -313,7 +249,11 @@ impl<'de, 'src, S: BinarySource<'de>> TextReader<'de, 'src, S>
|
|||
}
|
||||
}
|
||||
|
||||
fn read_raw_symbol(&mut self, mut bs: Vec<u8>) -> io::Result<Atom<'static>> {
|
||||
fn read_raw_symbol_or_number(&mut self, mut bs: Vec<u8>) -> io::Result<Atom<'static>> {
|
||||
lazy_static! {
|
||||
static ref NUMBER_RE: regex::Regex = regex::Regex::new(
|
||||
r"^([-+]?\d+)(((\.\d+([eE][-+]?\d+)?)|([eE][-+]?\d+))([fF]?))?$").unwrap();
|
||||
}
|
||||
loop {
|
||||
let c = match self.peek()? {
|
||||
None => b' ',
|
||||
|
@ -322,8 +262,33 @@ impl<'de, 'src, S: BinarySource<'de>> TextReader<'de, 'src, S>
|
|||
};
|
||||
match c {
|
||||
b'(' | b')' | b'{' | b'}' | b'[' | b']' | b'<' | b'>' |
|
||||
b'"' | b';' | b',' | b'@' | b'#' | b':' | b'|' | b' ' =>
|
||||
return Ok(Atom::Symbol(Cow::Owned(self.decode_utf8(bs)?))),
|
||||
b'"' | b';' | b',' | b'@' | b'#' | b':' | b'|' | b' ' => {
|
||||
let s = self.decode_utf8(bs)?;
|
||||
return match NUMBER_RE.captures(&s) {
|
||||
None => Ok(Atom::Symbol(s.into())),
|
||||
Some(m) => match m.get(2) {
|
||||
None => Ok(Atom::SignedInteger(s.parse::<BigInt>().map_err(
|
||||
|_| self.syntax_error(&format!(
|
||||
"Invalid signed-integer number: {:?}", s)))?.into())),
|
||||
Some(_) => {
|
||||
if let Some(maybe_f) = m.get(7) {
|
||||
let s = m[1].to_owned() + &m[3];
|
||||
if maybe_f.range().is_empty() {
|
||||
Ok(Atom::Double(s.parse::<f64>().map_err(
|
||||
|_| self.syntax_error(&format!(
|
||||
"Invalid double-precision number: {:?}", s)))?))
|
||||
} else {
|
||||
Ok(Atom::Float(s.parse::<f32>().map_err(
|
||||
|_| self.syntax_error(&format!(
|
||||
"Invalid single-precision number: {:?}", s)))?))
|
||||
}
|
||||
} else {
|
||||
panic!("Internal error: cannot analyze number {:?}", s)
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
c => {
|
||||
self.skip()?;
|
||||
bs.push(c)
|
||||
|
@ -341,13 +306,6 @@ impl<'de, 'src, S: BinarySource<'de>> TextReader<'de, 'src, S>
|
|||
self.skip()?;
|
||||
|
||||
Ok(match c {
|
||||
b'-' => {
|
||||
let c1 = self.next_byte()?;
|
||||
Classification::Atom(self.read_intpart(vec![b'-'], c1)?)
|
||||
}
|
||||
b'0' | b'1' | b'2' | b'3' | b'4' | b'5' | b'6' | b'7' | b'8' | b'9' => {
|
||||
Classification::Atom(self.read_intpart(Vec::new(), c)?)
|
||||
}
|
||||
b'"' => Classification::Atom(Atom::String(Cow::Owned(self.read_string(b'"')?))),
|
||||
b'|' => Classification::Atom(Atom::Symbol(Cow::Owned(self.read_string(b'|')?))),
|
||||
b':' => Err(self.syntax_error("Unexpected key/value separator between items"))?,
|
||||
|
@ -359,15 +317,13 @@ impl<'de, 'src, S: BinarySource<'de>> TextReader<'de, 'src, S>
|
|||
b't' => Classification::Atom(Atom::Boolean(true)),
|
||||
b'{' => Classification::Compound(CompoundClass::Set),
|
||||
b'"' => Classification::Atom(self.read_literal_binary()?),
|
||||
b'x' => if self.next_byte()? == b'"' {
|
||||
Classification::Atom(self.read_hex_binary()?)
|
||||
} else {
|
||||
Err(self.syntax_error("Expected open-quote at start of hex ByteString"))?
|
||||
b'x' => match self.next_byte()? {
|
||||
b'"' => Classification::Atom(Atom::ByteString(self.read_hex_binary()?.into())),
|
||||
b'f' => Classification::Atom(self.read_hex_float(4)?),
|
||||
b'd' => Classification::Atom(self.read_hex_float(8)?),
|
||||
_ => Err(self.syntax_error("Invalid #x syntax"))?,
|
||||
},
|
||||
b'[' => Classification::Atom(self.read_base64_binary()?),
|
||||
b'=' => {
|
||||
todo!("Remove machine text syntax")
|
||||
}
|
||||
b'!' => Classification::Embedded,
|
||||
other => Err(self.syntax_error(&format!("Invalid # syntax: {:?}", other)))?,
|
||||
}
|
||||
|
@ -378,7 +334,7 @@ impl<'de, 'src, S: BinarySource<'de>> TextReader<'de, 'src, S>
|
|||
b'>' => Err(self.syntax_error("Unexpected >"))?,
|
||||
b']' => Err(self.syntax_error("Unexpected ]"))?,
|
||||
b'}' => Err(self.syntax_error("Unexpected }"))?,
|
||||
other => Classification::Atom(self.read_raw_symbol(vec![other])?),
|
||||
other => Classification::Atom(self.read_raw_symbol_or_number(vec![other])?),
|
||||
})
|
||||
}
|
||||
}
|
||||
|
|
|
@ -6,6 +6,8 @@ use crate::Value;
|
|||
use crate::Writer;
|
||||
use crate::hex::HexFormatter;
|
||||
|
||||
use lazy_static::lazy_static;
|
||||
|
||||
use num_bigint::BigInt;
|
||||
|
||||
use std::io;
|
||||
|
@ -211,7 +213,7 @@ impl<W: io::Write> Writer for TextWriter<W> {
|
|||
write!(self.w, "f")
|
||||
} else {
|
||||
let bs = v.to_be_bytes();
|
||||
write!(self.w, "#p#x\"{}\"", HexFormatter::Packed.encode(&bs))
|
||||
write!(self.w, "#xf\"{}\"", HexFormatter::Packed.encode(&bs))
|
||||
}
|
||||
}
|
||||
|
||||
|
@ -221,7 +223,7 @@ impl<W: io::Write> Writer for TextWriter<W> {
|
|||
Ok(())
|
||||
} else {
|
||||
let bs = v.to_be_bytes();
|
||||
write!(self.w, "#p#x\"{}\"", HexFormatter::Packed.encode(&bs))
|
||||
write!(self.w, "#xd\"{}\"", HexFormatter::Packed.encode(&bs))
|
||||
}
|
||||
}
|
||||
|
||||
|
@ -254,9 +256,12 @@ impl<W: io::Write> Writer for TextWriter<W> {
|
|||
}
|
||||
|
||||
fn write_symbol(&mut self, v: &str) -> io::Result<()> {
|
||||
// FIXME: This regular expression is conservatively correct, but Anglo-chauvinistic.
|
||||
let re = regex::Regex::new("^[a-zA-Z~!$%^&*?_=+/.][-a-zA-Z~!$%^&*?_=+/.0-9]*$").unwrap();
|
||||
if re.is_match(v) {
|
||||
lazy_static! {
|
||||
// FIXME: This regular expression is conservatively correct, but Anglo-chauvinistic.
|
||||
static ref RE: regex::Regex =
|
||||
regex::Regex::new("^[-a-zA-Z0-9~!$%^&*?_=+/.]+$").unwrap();
|
||||
}
|
||||
if RE.is_match(v) {
|
||||
write!(self.w, "{}", v)
|
||||
} else {
|
||||
write!(self.w, "|")?;
|
||||
|
|
Loading…
Reference in New Issue