Quotation

This commit is contained in:
Tony Garnock-Jones 2021-08-05 15:55:48 +02:00
parent cfd9898b4d
commit 96707352e6
1 changed files with 189 additions and 38 deletions

View File

@ -10,15 +10,18 @@ use clap_generate::{generate, generators};
use preserves::value::IOBinarySource;
use preserves::value::IOValue;
use preserves::value::IOValueDomainCodec;
use preserves::value::NestedValue;
use preserves::value::PackedReader;
use preserves::value::PackedWriter;
use preserves::value::Reader;
use preserves::value::TextReader;
use preserves::value::TextWriter;
use preserves::value::Value;
use preserves::value::ViaCodec;
use preserves::value::Writer;
use std::io;
use std::io::Read;
// #[derive(ArgEnum, Clone, Debug)]
// enum Encoding {
@ -77,6 +80,9 @@ struct Convert {
#[clap(long, short, arg_enum, default_value = "text")]
output_format: OutputFormat,
#[clap(long)]
escape_spaces: bool,
#[clap(long)]
limit: Option<usize>,
@ -87,13 +93,50 @@ struct Convert {
annotations: Boolish,
}
#[derive(ArgEnum, Clone, Debug)]
enum StringInputTerminator {
EOF,
Newline,
Nul,
}
#[derive(Clap, Clone, Debug)]
struct StringQuotation {
#[clap(long, arg_enum, default_value = "eof")]
input_terminator: StringInputTerminator,
#[clap(long)]
include_terminator: bool,
#[clap(long)]
escape_spaces: bool,
}
#[derive(Clap, Clone, Debug)]
enum QuotationOutput {
String(StringQuotation),
ByteString,
Symbol(StringQuotation),
}
#[derive(Clap, Clone, Debug)]
struct Quote {
#[clap(long, short, arg_enum, default_value = "text")]
output_format: OutputFormat,
#[clap(subcommand)]
output: QuotationOutput,
}
#[derive(Clap, Clone, Debug)]
enum Subcommand {
Convert(Convert),
Completions {
#[clap(arg_enum, value_name = "dialect")]
dialect: CompletionDialect,
}
},
Quote(Quote),
}
#[derive(Clap, Clone, Debug)]
@ -117,6 +160,7 @@ fn main() -> io::Result<()> {
}
},
Subcommand::Convert(c) => convert(c)?,
Subcommand::Quote(q) => quote(q)?,
}
Ok(())
}
@ -164,6 +208,21 @@ impl<R: io::Read> RollingBuffer<R> {
fn discard_to_pos(&mut self) {
self.discard(self.pos - self.discarded)
}
fn read_upto(&mut self, delimiter: u8, inclusive: bool) -> io::Result<Option<Vec<u8>>> {
let mut result = Vec::new();
let mut buf = [0; 1];
while self.read(&mut buf)? == 1 {
if buf[0] == delimiter {
if inclusive {
result.push(delimiter);
}
return Ok(Some(result));
}
result.push(buf[0]);
}
Ok(if result.is_empty() { None } else { Some(result) })
}
}
impl<R: io::Read> io::Seek for RollingBuffer<R> {
@ -213,13 +272,74 @@ impl<R: io::Read> io::Read for RollingBuffer<R> {
}
}
// TODO: extract this and RollingBuffer to some preserves utils module
pub struct ValueStream<R: io::Read> {
input_format: InputFormat,
read_annotations: bool,
source: IOBinarySource<RollingBuffer<R>>,
count: usize,
}
impl<R: io::Read> ValueStream<R> {
fn new(input_format: InputFormat, read_annotations: bool, r: R) -> Self {
ValueStream {
input_format,
read_annotations,
source: IOBinarySource::new(RollingBuffer::new(r)),
count: 0,
}
}
fn read(&mut self) -> io::Result<Option<IOValue>> {
let is_text = {
let peek_buf = self.source.read.peek_buf()?;
if peek_buf.is_empty() {
return Ok(None);
}
peek_buf[0] < 128
};
let maybe_value: Option<IOValue> = if is_text {
match self.input_format {
InputFormat::AutoDetect | InputFormat::Text => (),
InputFormat::Binary => return Err(io::Error::new(
io::ErrorKind::InvalidData, "Expected binary input, saw text input")),
}
TextReader::new(&mut self.source, ViaCodec::new(IOValueDomainCodec))
.next(self.read_annotations)?
} else {
match self.input_format {
InputFormat::AutoDetect | InputFormat::Binary => (),
InputFormat::Text => return Err(io::Error::new(
io::ErrorKind::InvalidData, "Expected text input, saw binary input")),
}
PackedReader::new(&mut self.source, IOValueDomainCodec)
.next(self.read_annotations)?
};
match maybe_value {
None => return Ok(None),
Some(value) => {
self.source.read.discard_to_pos();
self.count += 1;
Ok(Some(value))
}
}
}
}
impl<R: io::Read> std::iter::Iterator for ValueStream<R> {
type Item = io::Result<IOValue>;
fn next(&mut self) -> Option<Self::Item> {
self.read().transpose()
}
}
fn convert(c: Convert) -> io::Result<()> {
let mut r = RollingBuffer::new(io::stdin());
let mut source = IOBinarySource::new(&mut r);
let mut count = 0;
let mut vs = ValueStream::new(c.input_format, c.annotations.into(), io::stdin());
let mut w: Box<dyn FnMut(&IOValue) -> io::Result<()>> = match c.output_format {
OutputFormat::Text => {
let mut t = TextWriter::new(io::stdout());
let mut t = TextWriter::new(io::stdout()).set_escape_spaces(c.escape_spaces);
if c.indent.into() {
t.indentation = 2;
}
@ -234,45 +354,76 @@ fn convert(c: Convert) -> io::Result<()> {
Box::new(move |v| p.write(&mut IOValueDomainCodec, v))
}
};
loop {
let is_text = {
let peek_buf = source.read.peek_buf()?;
if peek_buf.is_empty() {
while let Some(value) = vs.next() {
let value = value?;
w(&value)?;
if let Some(limit) = c.limit {
if vs.count >= limit {
return Ok(());
}
peek_buf[0] < 128
};
}
}
Ok(())
}
let maybe_value: Option<IOValue> = if is_text {
match c.input_format {
InputFormat::AutoDetect | InputFormat::Text => (),
InputFormat::Binary => return Err(io::Error::new(
io::ErrorKind::InvalidData, "Expected binary input, saw text input")),
}
TextReader::new(&mut source, ViaCodec::new(IOValueDomainCodec))
.next(c.annotations.into())?
} else {
match c.input_format {
InputFormat::AutoDetect | InputFormat::Binary => (),
InputFormat::Text => return Err(io::Error::new(
io::ErrorKind::InvalidData, "Expected text input, saw binary input")),
}
PackedReader::new(&mut source, IOValueDomainCodec)
.next(c.annotations.into())?
};
impl Quote {
fn escape_spaces(&self) -> bool {
match &self.output {
QuotationOutput::ByteString => false,
QuotationOutput::String(s) | QuotationOutput::Symbol(s) => s.escape_spaces,
}
}
}
match maybe_value {
None => return Ok(()),
Some(value) => {
w(&value)?;
source.read.discard_to_pos();
count += 1;
if let Some(limit) = c.limit {
if count >= limit {
return Ok(());
}
fn output_one(q: &Quote, v: &IOValue) -> io::Result<()> {
match q.output_format {
OutputFormat::Binary =>
PackedWriter::new(io::stdout()).write(&mut IOValueDomainCodec, v),
OutputFormat::Text => {
TextWriter::new(io::stdout())
.set_escape_spaces(q.escape_spaces())
.write(&mut IOValueDomainCodec, v)?;
println!();
Ok(())
}
}
}
fn quote(q: Quote) -> io::Result<()> {
match &q.output {
QuotationOutput::ByteString => {
let mut buf = Vec::new();
io::stdin().read_to_end(&mut buf)?;
output_one(&q, &IOValue::new(&buf[..]))
},
QuotationOutput::String(s) | QuotationOutput::Symbol(s) => {
match s.input_terminator {
StringInputTerminator::EOF => {
let mut buf = String::new();
io::stdin().read_to_string(&mut buf)?;
quote_chunk(&q, buf)
}
StringInputTerminator::Newline => quote_terminated_strings(b'\n', &q, s),
StringInputTerminator::Nul => quote_terminated_strings(b'\0', &q, s),
}
}
}
}
fn quote_chunk(q: &Quote, buf: String) -> io::Result<()> {
let v = if let QuotationOutput::Symbol(_) = q.output {
Value::symbol(&buf).wrap()
} else {
Value::String(buf).wrap()
};
output_one(q, &v)
}
fn quote_terminated_strings(delimiter: u8, q: &Quote, s: &StringQuotation) -> io::Result<()> {
let mut r = RollingBuffer::new(io::stdin());
while let Some(chunk) = r.read_upto(delimiter, s.include_terminator)? {
quote_chunk(q, String::from_utf8(chunk).map_err(
|_| io::Error::new(io::ErrorKind::InvalidData, "Invalid UTF-8"))?)?
}
Ok(())
}