From 96707352e66a93ecd8c9e6fb46a0a121b345c863 Mon Sep 17 00:00:00 2001 From: Tony Garnock-Jones Date: Thu, 5 Aug 2021 15:55:48 +0200 Subject: [PATCH] Quotation --- .../preserves-tools/src/bin/preserves-tool.rs | 227 +++++++++++++++--- 1 file changed, 189 insertions(+), 38 deletions(-) diff --git a/implementations/rust/preserves-tools/src/bin/preserves-tool.rs b/implementations/rust/preserves-tools/src/bin/preserves-tool.rs index 1c17d84..1f49967 100644 --- a/implementations/rust/preserves-tools/src/bin/preserves-tool.rs +++ b/implementations/rust/preserves-tools/src/bin/preserves-tool.rs @@ -10,15 +10,18 @@ use clap_generate::{generate, generators}; use preserves::value::IOBinarySource; use preserves::value::IOValue; use preserves::value::IOValueDomainCodec; +use preserves::value::NestedValue; use preserves::value::PackedReader; use preserves::value::PackedWriter; use preserves::value::Reader; use preserves::value::TextReader; use preserves::value::TextWriter; +use preserves::value::Value; use preserves::value::ViaCodec; use preserves::value::Writer; use std::io; +use std::io::Read; // #[derive(ArgEnum, Clone, Debug)] // enum Encoding { @@ -77,6 +80,9 @@ struct Convert { #[clap(long, short, arg_enum, default_value = "text")] output_format: OutputFormat, + #[clap(long)] + escape_spaces: bool, + #[clap(long)] limit: Option, @@ -87,13 +93,50 @@ struct Convert { annotations: Boolish, } +#[derive(ArgEnum, Clone, Debug)] +enum StringInputTerminator { + EOF, + Newline, + Nul, +} + +#[derive(Clap, Clone, Debug)] +struct StringQuotation { + #[clap(long, arg_enum, default_value = "eof")] + input_terminator: StringInputTerminator, + + #[clap(long)] + include_terminator: bool, + + #[clap(long)] + escape_spaces: bool, +} + + +#[derive(Clap, Clone, Debug)] +enum QuotationOutput { + String(StringQuotation), + ByteString, + Symbol(StringQuotation), +} + +#[derive(Clap, Clone, Debug)] +struct Quote { + #[clap(long, short, arg_enum, default_value = "text")] + output_format: OutputFormat, + + #[clap(subcommand)] + output: QuotationOutput, +} + #[derive(Clap, Clone, Debug)] enum Subcommand { Convert(Convert), Completions { #[clap(arg_enum, value_name = "dialect")] dialect: CompletionDialect, - } + }, + Quote(Quote), } #[derive(Clap, Clone, Debug)] @@ -117,6 +160,7 @@ fn main() -> io::Result<()> { } }, Subcommand::Convert(c) => convert(c)?, + Subcommand::Quote(q) => quote(q)?, } Ok(()) } @@ -164,6 +208,21 @@ impl RollingBuffer { fn discard_to_pos(&mut self) { self.discard(self.pos - self.discarded) } + + fn read_upto(&mut self, delimiter: u8, inclusive: bool) -> io::Result>> { + let mut result = Vec::new(); + let mut buf = [0; 1]; + while self.read(&mut buf)? == 1 { + if buf[0] == delimiter { + if inclusive { + result.push(delimiter); + } + return Ok(Some(result)); + } + result.push(buf[0]); + } + Ok(if result.is_empty() { None } else { Some(result) }) + } } impl io::Seek for RollingBuffer { @@ -213,13 +272,74 @@ impl io::Read for RollingBuffer { } } +// TODO: extract this and RollingBuffer to some preserves utils module +pub struct ValueStream { + input_format: InputFormat, + read_annotations: bool, + source: IOBinarySource>, + count: usize, +} + +impl ValueStream { + fn new(input_format: InputFormat, read_annotations: bool, r: R) -> Self { + ValueStream { + input_format, + read_annotations, + source: IOBinarySource::new(RollingBuffer::new(r)), + count: 0, + } + } + + fn read(&mut self) -> io::Result> { + let is_text = { + let peek_buf = self.source.read.peek_buf()?; + if peek_buf.is_empty() { + return Ok(None); + } + peek_buf[0] < 128 + }; + + let maybe_value: Option = if is_text { + match self.input_format { + InputFormat::AutoDetect | InputFormat::Text => (), + InputFormat::Binary => return Err(io::Error::new( + io::ErrorKind::InvalidData, "Expected binary input, saw text input")), + } + TextReader::new(&mut self.source, ViaCodec::new(IOValueDomainCodec)) + .next(self.read_annotations)? + } else { + match self.input_format { + InputFormat::AutoDetect | InputFormat::Binary => (), + InputFormat::Text => return Err(io::Error::new( + io::ErrorKind::InvalidData, "Expected text input, saw binary input")), + } + PackedReader::new(&mut self.source, IOValueDomainCodec) + .next(self.read_annotations)? + }; + + match maybe_value { + None => return Ok(None), + Some(value) => { + self.source.read.discard_to_pos(); + self.count += 1; + Ok(Some(value)) + } + } + } +} + +impl std::iter::Iterator for ValueStream { + type Item = io::Result; + fn next(&mut self) -> Option { + self.read().transpose() + } +} + fn convert(c: Convert) -> io::Result<()> { - let mut r = RollingBuffer::new(io::stdin()); - let mut source = IOBinarySource::new(&mut r); - let mut count = 0; + let mut vs = ValueStream::new(c.input_format, c.annotations.into(), io::stdin()); let mut w: Box io::Result<()>> = match c.output_format { OutputFormat::Text => { - let mut t = TextWriter::new(io::stdout()); + let mut t = TextWriter::new(io::stdout()).set_escape_spaces(c.escape_spaces); if c.indent.into() { t.indentation = 2; } @@ -234,45 +354,76 @@ fn convert(c: Convert) -> io::Result<()> { Box::new(move |v| p.write(&mut IOValueDomainCodec, v)) } }; - loop { - let is_text = { - let peek_buf = source.read.peek_buf()?; - if peek_buf.is_empty() { + while let Some(value) = vs.next() { + let value = value?; + w(&value)?; + if let Some(limit) = c.limit { + if vs.count >= limit { return Ok(()); } - peek_buf[0] < 128 - }; + } + } + Ok(()) +} - let maybe_value: Option = if is_text { - match c.input_format { - InputFormat::AutoDetect | InputFormat::Text => (), - InputFormat::Binary => return Err(io::Error::new( - io::ErrorKind::InvalidData, "Expected binary input, saw text input")), - } - TextReader::new(&mut source, ViaCodec::new(IOValueDomainCodec)) - .next(c.annotations.into())? - } else { - match c.input_format { - InputFormat::AutoDetect | InputFormat::Binary => (), - InputFormat::Text => return Err(io::Error::new( - io::ErrorKind::InvalidData, "Expected text input, saw binary input")), - } - PackedReader::new(&mut source, IOValueDomainCodec) - .next(c.annotations.into())? - }; +impl Quote { + fn escape_spaces(&self) -> bool { + match &self.output { + QuotationOutput::ByteString => false, + QuotationOutput::String(s) | QuotationOutput::Symbol(s) => s.escape_spaces, + } + } +} - match maybe_value { - None => return Ok(()), - Some(value) => { - w(&value)?; - source.read.discard_to_pos(); - count += 1; - if let Some(limit) = c.limit { - if count >= limit { - return Ok(()); - } +fn output_one(q: &Quote, v: &IOValue) -> io::Result<()> { + match q.output_format { + OutputFormat::Binary => + PackedWriter::new(io::stdout()).write(&mut IOValueDomainCodec, v), + OutputFormat::Text => { + TextWriter::new(io::stdout()) + .set_escape_spaces(q.escape_spaces()) + .write(&mut IOValueDomainCodec, v)?; + println!(); + Ok(()) + } + } +} + +fn quote(q: Quote) -> io::Result<()> { + match &q.output { + QuotationOutput::ByteString => { + let mut buf = Vec::new(); + io::stdin().read_to_end(&mut buf)?; + output_one(&q, &IOValue::new(&buf[..])) + }, + QuotationOutput::String(s) | QuotationOutput::Symbol(s) => { + match s.input_terminator { + StringInputTerminator::EOF => { + let mut buf = String::new(); + io::stdin().read_to_string(&mut buf)?; + quote_chunk(&q, buf) } + StringInputTerminator::Newline => quote_terminated_strings(b'\n', &q, s), + StringInputTerminator::Nul => quote_terminated_strings(b'\0', &q, s), } } } } + +fn quote_chunk(q: &Quote, buf: String) -> io::Result<()> { + let v = if let QuotationOutput::Symbol(_) = q.output { + Value::symbol(&buf).wrap() + } else { + Value::String(buf).wrap() + }; + output_one(q, &v) +} + +fn quote_terminated_strings(delimiter: u8, q: &Quote, s: &StringQuotation) -> io::Result<()> { + let mut r = RollingBuffer::new(io::stdin()); + while let Some(chunk) = r.read_upto(delimiter, s.include_terminator)? { + quote_chunk(q, String::from_utf8(chunk).map_err( + |_| io::Error::new(io::ErrorKind::InvalidData, "Invalid UTF-8"))?)? + } + Ok(()) +}