preserves/implementations/rust/preserves-tools/src/bin/preserves-tool.rs

483 lines
14 KiB
Rust

use bytes::Buf;
use bytes::BufMut;
use bytes::BytesMut;
use clap::ArgEnum;
use clap::Clap;
use clap::IntoApp;
use clap_generate::{generate, generators};
use preserves::value::IOBinarySource;
use preserves::value::IOValue;
use preserves::value::IOValueDomainCodec;
use preserves::value::NestedValue;
use preserves::value::PackedReader;
use preserves::value::PackedWriter;
use preserves::value::Reader;
use preserves::value::Set;
use preserves::value::TextReader;
use preserves::value::TextWriter;
use preserves::value::Value;
use preserves::value::ViaCodec;
use preserves::value::Writer;
use std::iter::FromIterator;
use std::io;
use std::io::Read;
// #[derive(ArgEnum, Clone, Debug)]
// enum Encoding {
// None,
// Base64,
// Hex,
// }
#[derive(ArgEnum, Clone, Debug)]
enum InputFormat {
AutoDetect,
Text,
Binary,
}
#[derive(ArgEnum, Clone, Debug)]
enum OutputFormat {
Text,
Binary,
Unquoted,
}
#[derive(ArgEnum, Clone, Debug)]
enum CompletionDialect {
Bash,
Zsh,
PowerShell,
Fish,
Elvish,
}
#[derive(ArgEnum, Clone, Copy, Debug)]
enum Boolish {
#[clap(alias = "no", alias = "n", alias = "off", alias = "0", alias = "false")]
Disabled,
#[clap(alias = "yes", alias = "y", alias = "on", alias = "1", alias = "true")]
Enabled,
}
impl From<Boolish> for bool {
fn from(b: Boolish) -> Self {
matches!(b, Boolish::Enabled)
}
}
#[derive(ArgEnum, Clone, Debug)]
enum SelectOutput {
Sequence,
Set,
}
#[derive(Clap, Clone, Debug)]
struct Convert {
// #[clap(long, arg_enum, default_value = "none")]
// input_encoding: Encoding,
// #[clap(long, arg_enum, default_value = "none")]
// output_encoding: Encoding,
#[clap(long, short, arg_enum, default_value = "auto-detect")]
input_format: InputFormat,
#[clap(long, short, arg_enum, default_value = "text")]
output_format: OutputFormat,
#[clap(long)]
escape_spaces: bool,
#[clap(long)]
limit: Option<usize>,
#[clap(long, arg_enum, value_name = "on/off", default_value = "on")]
indent: Boolish,
#[clap(long, default_value="*")]
select: preserves_path::Node,
#[clap(long, arg_enum, default_value="sequence")]
select_output: SelectOutput,
#[clap(long)]
collect: bool,
#[clap(long, arg_enum, value_name = "on/off", default_value = "on")]
read_annotations: Boolish,
#[clap(long, arg_enum, value_name = "on/off", default_value = "on")]
write_annotations: Boolish,
}
#[derive(ArgEnum, Clone, Debug)]
enum StringInputTerminator {
EOF,
Newline,
Nul,
}
#[derive(Clap, Clone, Debug)]
struct StringQuotation {
#[clap(long, arg_enum, default_value = "eof")]
input_terminator: StringInputTerminator,
#[clap(long)]
include_terminator: bool,
#[clap(long)]
escape_spaces: bool,
}
#[derive(Clap, Clone, Debug)]
enum QuotationOutput {
String(StringQuotation),
ByteString,
Symbol(StringQuotation),
}
#[derive(Clap, Clone, Debug)]
struct Quote {
#[clap(long, short, arg_enum, default_value = "text")]
output_format: OutputFormat,
#[clap(subcommand)]
output: QuotationOutput,
}
#[derive(Clap, Clone, Debug)]
enum Subcommand {
Convert(Convert),
Completions {
#[clap(arg_enum, value_name = "dialect")]
dialect: CompletionDialect,
},
Quote(Quote),
}
#[derive(Clap, Clone, Debug)]
#[clap(version)]
struct CommandLine {
#[clap(subcommand)]
command: Subcommand,
}
fn main() -> io::Result<()> {
let args = CommandLine::parse();
match args.command {
Subcommand::Completions { dialect } => {
let mut app = CommandLine::into_app();
match dialect {
CompletionDialect::Bash => generate::<generators::Bash, _>(&mut app, "preserves-tool", &mut io::stdout()),
CompletionDialect::Zsh => generate::<generators::Zsh, _>(&mut app, "preserves-tool", &mut io::stdout()),
CompletionDialect::PowerShell => generate::<generators::PowerShell, _>(&mut app, "preserves-tool", &mut io::stdout()),
CompletionDialect::Fish => generate::<generators::Fish, _>(&mut app, "preserves-tool", &mut io::stdout()),
CompletionDialect::Elvish => generate::<generators::Elvish, _>(&mut app, "preserves-tool", &mut io::stdout()),
}
},
Subcommand::Convert(c) => convert(c)?,
Subcommand::Quote(q) => quote(q)?,
}
Ok(())
}
struct RollingBuffer<R: io::Read> {
r: R,
discarded: usize,
pos: usize,
buf: BytesMut,
}
impl<R: io::Read> RollingBuffer<R> {
fn new(r: R) -> Self {
RollingBuffer {
r,
discarded: 0,
pos: 0,
buf: BytesMut::new(),
}
}
fn read_more(&mut self) -> io::Result<usize> {
let mut buf = [0; 8192];
let n = self.r.read(&mut buf)?;
self.buf.put(&buf[.. n]);
Ok(n)
}
fn peek_buf(&mut self) -> io::Result<&[u8]> {
if self.rhs() == self.pos {
let _ = self.read_more()?;
}
return Ok(&self.buf[self.pos - self.discarded ..]);
}
fn rhs(&self) -> usize {
self.buf.remaining() + self.discarded
}
fn discard(&mut self, count: usize) {
self.buf.advance(count);
self.discarded += count;
}
fn discard_to_pos(&mut self) {
self.discard(self.pos - self.discarded)
}
fn read_upto(&mut self, delimiter: u8, inclusive: bool) -> io::Result<Option<Vec<u8>>> {
let mut result = Vec::new();
let mut buf = [0; 1];
while self.read(&mut buf)? == 1 {
if buf[0] == delimiter {
if inclusive {
result.push(delimiter);
}
return Ok(Some(result));
}
result.push(buf[0]);
}
Ok(if result.is_empty() { None } else { Some(result) })
}
}
impl<R: io::Read> io::Seek for RollingBuffer<R> {
fn seek(&mut self, offset: io::SeekFrom) -> io::Result<u64> {
let new_position = match offset {
io::SeekFrom::Current(delta) => if delta >= 0 {
self.pos + delta as usize
} else {
self.pos - (-delta) as usize
},
io::SeekFrom::End(_) => Err(io::Error::new(
io::ErrorKind::Unsupported, "Cannot seek wrt end on open-ended stream"))?,
io::SeekFrom::Start(new_position) => new_position as usize,
};
if new_position > self.rhs() {
Err(io::Error::new(
io::ErrorKind::InvalidInput, "Attempt to seek beyond end of buffer"))?;
}
if new_position < self.discarded {
Err(io::Error::new(
io::ErrorKind::InvalidInput, "Attempt to seek before start of buffer"))?;
}
self.pos = new_position;
Ok(new_position as u64)
}
fn stream_position(&mut self) -> io::Result<u64> {
Ok(self.pos as u64)
}
}
impl<R: io::Read> io::Read for RollingBuffer<R> {
fn read(&mut self, buf: &mut [u8]) -> io::Result<usize> {
let i = self.pos - self.discarded;
loop {
let n = std::cmp::min(self.buf.remaining() - i , buf.len());
if n == 0 {
if self.read_more()? == 0 {
return Ok(0);
}
continue;
}
let _ = &buf[.. n].copy_from_slice(&self.buf[i .. i + n]);
self.pos += n;
return Ok(n);
}
}
}
// TODO: extract this and RollingBuffer to some preserves utils module
pub struct ValueStream<R: io::Read> {
input_format: InputFormat,
read_annotations: bool,
source: IOBinarySource<RollingBuffer<R>>,
count: usize,
}
impl<R: io::Read> ValueStream<R> {
fn new(input_format: InputFormat, read_annotations: bool, r: R) -> Self {
ValueStream {
input_format,
read_annotations,
source: IOBinarySource::new(RollingBuffer::new(r)),
count: 0,
}
}
fn read(&mut self) -> io::Result<Option<IOValue>> {
let is_text = {
let peek_buf = self.source.read.peek_buf()?;
if peek_buf.is_empty() {
return Ok(None);
}
peek_buf[0] < 128
};
let maybe_value: Option<IOValue> = if is_text {
match self.input_format {
InputFormat::AutoDetect | InputFormat::Text => (),
InputFormat::Binary => return Err(io::Error::new(
io::ErrorKind::InvalidData, "Expected binary input, saw text input")),
}
TextReader::new(&mut self.source, ViaCodec::new(IOValueDomainCodec))
.next(self.read_annotations)?
} else {
match self.input_format {
InputFormat::AutoDetect | InputFormat::Binary => (),
InputFormat::Text => return Err(io::Error::new(
io::ErrorKind::InvalidData, "Expected text input, saw binary input")),
}
PackedReader::new(&mut self.source, IOValueDomainCodec)
.next(self.read_annotations)?
};
match maybe_value {
None => return Ok(None),
Some(value) => {
self.source.read.discard_to_pos();
self.count += 1;
Ok(Some(value))
}
}
}
}
impl<R: io::Read> std::iter::Iterator for ValueStream<R> {
type Item = io::Result<IOValue>;
fn next(&mut self) -> Option<Self::Item> {
self.read().transpose()
}
}
fn print_unquoted(v: &IOValue) {
match v.value() {
Value::String(s) => println!("{}", &s),
Value::Symbol(s) => println!("{}", &s),
_ => (),
}
}
fn convert(c: Convert) -> io::Result<()> {
let mut vs = ValueStream::new(c.input_format, c.read_annotations.into(), io::stdin());
let write_ann: bool = c.write_annotations.into();
let mut w: Box<dyn FnMut(&IOValue) -> io::Result<()>> = match c.output_format {
OutputFormat::Text => {
let mut t = TextWriter::new(io::stdout()).set_escape_spaces(c.escape_spaces);
if c.indent.into() {
t.indentation = 2;
}
Box::new(move |v| {
if write_ann {
t.write(&mut IOValueDomainCodec, v)?;
} else {
t.write(&mut IOValueDomainCodec, &v.strip_annotations::<IOValue>())?;
}
println!();
Ok(())
})
}
OutputFormat::Binary => {
let mut p = PackedWriter::new(io::stdout());
Box::new(move |v| if write_ann {
p.write(&mut IOValueDomainCodec, v)
} else {
p.write(&mut IOValueDomainCodec, &v.strip_annotations::<IOValue>())
})
}
OutputFormat::Unquoted =>
Box::new(|v| Ok(print_unquoted(v))),
};
while let Some(value) = vs.next() {
let value = value?;
let matches = c.select.exec(&value);
if c.collect {
match c.select_output {
SelectOutput::Sequence => w(&IOValue::new(matches))?,
SelectOutput::Set => w(&IOValue::new(Set::from_iter(matches)))?,
}
} else {
match c.select_output {
SelectOutput::Sequence => for v in matches { w(&v)?; },
SelectOutput::Set => for v in Set::from_iter(matches) { w(&v)?; },
}
}
if let Some(limit) = c.limit {
if vs.count >= limit {
return Ok(());
}
}
}
Ok(())
}
impl Quote {
fn escape_spaces(&self) -> bool {
match &self.output {
QuotationOutput::ByteString => false,
QuotationOutput::String(s) | QuotationOutput::Symbol(s) => s.escape_spaces,
}
}
}
fn output_one(q: &Quote, v: &IOValue) -> io::Result<()> {
match q.output_format {
OutputFormat::Binary =>
PackedWriter::new(io::stdout()).write(&mut IOValueDomainCodec, v),
OutputFormat::Text => {
TextWriter::new(io::stdout())
.set_escape_spaces(q.escape_spaces())
.write(&mut IOValueDomainCodec, v)?;
println!();
Ok(())
}
OutputFormat::Unquoted => {
print_unquoted(v);
Ok(())
}
}
}
fn quote(q: Quote) -> io::Result<()> {
match &q.output {
QuotationOutput::ByteString => {
let mut buf = Vec::new();
io::stdin().read_to_end(&mut buf)?;
output_one(&q, &IOValue::new(&buf[..]))
},
QuotationOutput::String(s) | QuotationOutput::Symbol(s) => {
match s.input_terminator {
StringInputTerminator::EOF => {
let mut buf = String::new();
io::stdin().read_to_string(&mut buf)?;
quote_chunk(&q, buf)
}
StringInputTerminator::Newline => quote_terminated_strings(b'\n', &q, s),
StringInputTerminator::Nul => quote_terminated_strings(b'\0', &q, s),
}
}
}
}
fn quote_chunk(q: &Quote, buf: String) -> io::Result<()> {
match q.output {
QuotationOutput::Symbol(_) => output_one(q, &IOValue::symbol(&buf)),
_ => output_one(q, &IOValue::new(buf)),
}
}
fn quote_terminated_strings(delimiter: u8, q: &Quote, s: &StringQuotation) -> io::Result<()> {
let mut r = RollingBuffer::new(io::stdin());
while let Some(chunk) = r.read_upto(delimiter, s.include_terminator)? {
quote_chunk(q, String::from_utf8(chunk).map_err(
|_| io::Error::new(io::ErrorKind::InvalidData, "Invalid UTF-8"))?)?
}
Ok(())
}