use bytes::Buf; use bytes::BufMut; use bytes::BytesMut; use clap::{value_parser, ArgEnum, Command, IntoApp, Parser}; use clap_complete::{generate, Generator, Shell}; use preserves::value::IOBinarySource; use preserves::value::IOValue; use preserves::value::IOValueDomainCodec; use preserves::value::NestedValue; use preserves::value::PackedReader; use preserves::value::PackedWriter; use preserves::value::Reader; use preserves::value::Set; use preserves::value::TextReader; use preserves::value::TextWriter; use preserves::value::Value; use preserves::value::ViaCodec; use preserves::value::Writer; use preserves::value::text::writer::CommaStyle; use std::io; use std::io::Read; use std::iter::FromIterator; // #[derive(ArgEnum, Clone, Debug)] // enum Encoding { // None, // Base64, // Hex, // } #[derive(ArgEnum, Clone, Debug)] enum InputFormat { AutoDetect, Text, Binary, } #[derive(ArgEnum, Clone, Debug)] enum OutputFormat { Text, Binary, Unquoted, } #[derive(ArgEnum, Clone, Debug)] enum CommasFormat { None, Separating, Terminating, } #[derive(ArgEnum, Clone, Copy, Debug)] enum Boolish { #[clap(alias = "no", alias = "n", alias = "off", alias = "0", alias = "false")] Disabled, #[clap(alias = "yes", alias = "y", alias = "on", alias = "1", alias = "true")] Enabled, } impl From for bool { fn from(b: Boolish) -> Self { matches!(b, Boolish::Enabled) } } impl From for CommaStyle { fn from(commas: CommasFormat) -> Self { match commas { CommasFormat::None => CommaStyle::None, CommasFormat::Separating => CommaStyle::Separating, CommasFormat::Terminating => CommaStyle::Terminating, } } } #[derive(ArgEnum, Clone, Debug)] enum SelectOutput { Sequence, Set, } #[derive(Clone, Debug, Parser)] struct Convert { // #[clap(long, arg_enum, default_value = "none")] // input_encoding: Encoding, // #[clap(long, arg_enum, default_value = "none")] // output_encoding: Encoding, #[clap(long, short, arg_enum, default_value = "auto-detect")] input_format: InputFormat, #[clap(long, short, arg_enum, default_value = "text")] output_format: OutputFormat, #[clap(long)] escape_spaces: bool, #[clap(long, short, arg_enum, default_value = "none")] commas: CommasFormat, #[clap(long)] limit: Option, #[clap(long, arg_enum, value_name = "on/off", default_value = "on")] indent: Boolish, #[clap(long = "select", default_value = "*")] select_expr: String, #[clap(long, arg_enum, default_value = "sequence")] select_output: SelectOutput, #[clap(long)] collect: bool, #[clap(long, arg_enum, value_name = "on/off", default_value = "on")] read_annotations: Boolish, #[clap(long, arg_enum, value_name = "on/off", default_value = "on")] write_annotations: Boolish, #[clap(long, value_name = "filename")] bundle: Vec, // #[clap(long)] // schema: Option, } #[derive(ArgEnum, Clone, Debug)] enum StringInputTerminator { EOF, Newline, Nul, } #[derive(Clone, Debug, Parser)] struct StringQuotation { #[clap(long, arg_enum, default_value = "eof")] input_terminator: StringInputTerminator, #[clap(long)] include_terminator: bool, #[clap(long)] escape_spaces: bool, } #[derive(Clone, Debug, Parser)] enum QuotationOutput { String(StringQuotation), ByteString, Symbol(StringQuotation), } #[derive(Clone, Debug, Parser)] struct Quote { #[clap(long, short, arg_enum, default_value = "text")] output_format: OutputFormat, #[clap(subcommand)] output: QuotationOutput, } #[derive(Clone, Debug, Parser)] enum Subcommand { Completions { #[clap(value_parser=value_parser!(Shell))] shell: Shell, }, Convert(Convert), Quote(Quote), } #[derive(Clone, Debug, Parser)] #[clap(name = "preserves-tool")] #[clap(version)] #[clap(args_conflicts_with_subcommands = true)] /// Swiss-army knife tool for working with Preserves data. /// See https://preserves.dev/. /// If no subcommand is specified, the default subcommand will be `convert`. struct CommandLine { #[clap(subcommand)] command: Option, #[clap(flatten, next_help_heading="OPTIONS FOR DEFAULT SUBCOMMAND convert")] convert: Convert, } fn print_completions(gen: G, cmd: &mut Command) { generate(gen, cmd, cmd.get_name().to_string(), &mut io::stdout()); } fn main() -> io::Result<()> { let args = CommandLine::parse(); Ok(match args.command { Some(subcommand) => match subcommand { Subcommand::Completions { shell } => { let mut cmd = CommandLine::into_app(); print_completions(shell, &mut cmd); } Subcommand::Convert(c) => convert(c)?, Subcommand::Quote(q) => quote(q)?, } None => convert(args.convert)?, }) } struct RollingBuffer { r: R, discarded: usize, pos: usize, buf: BytesMut, } impl RollingBuffer { fn new(r: R) -> Self { RollingBuffer { r, discarded: 0, pos: 0, buf: BytesMut::new(), } } fn read_more(&mut self) -> io::Result { let mut buf = [0; 8192]; let n = self.r.read(&mut buf)?; self.buf.put(&buf[..n]); Ok(n) } fn peek_buf(&mut self) -> io::Result<&[u8]> { if self.rhs() == self.pos { let _ = self.read_more()?; } return Ok(&self.buf[self.pos - self.discarded..]); } fn rhs(&self) -> usize { self.buf.remaining() + self.discarded } fn discard(&mut self, count: usize) { self.buf.advance(count); self.discarded += count; } fn discard_to_pos(&mut self) { self.discard(self.pos - self.discarded) } fn read_upto(&mut self, delimiter: u8, inclusive: bool) -> io::Result>> { let mut result = Vec::new(); let mut buf = [0; 1]; while self.read(&mut buf)? == 1 { if buf[0] == delimiter { if inclusive { result.push(delimiter); } return Ok(Some(result)); } result.push(buf[0]); } Ok(if result.is_empty() { None } else { Some(result) }) } } impl io::Seek for RollingBuffer { fn seek(&mut self, offset: io::SeekFrom) -> io::Result { let new_position = match offset { io::SeekFrom::Current(delta) => { if delta >= 0 { self.pos + delta as usize } else { self.pos - (-delta) as usize } } io::SeekFrom::End(_) => Err(io::Error::new( io::ErrorKind::Unsupported, "Cannot seek wrt end on open-ended stream", ))?, io::SeekFrom::Start(new_position) => new_position as usize, }; if new_position > self.rhs() { Err(io::Error::new( io::ErrorKind::InvalidInput, "Attempt to seek beyond end of buffer", ))?; } if new_position < self.discarded { Err(io::Error::new( io::ErrorKind::InvalidInput, "Attempt to seek before start of buffer", ))?; } self.pos = new_position; Ok(new_position as u64) } fn stream_position(&mut self) -> io::Result { Ok(self.pos as u64) } } impl io::Read for RollingBuffer { fn read(&mut self, buf: &mut [u8]) -> io::Result { let i = self.pos - self.discarded; loop { let n = std::cmp::min(self.buf.remaining() - i, buf.len()); if n == 0 { if self.read_more()? == 0 { return Ok(0); } continue; } let _ = &buf[..n].copy_from_slice(&self.buf[i..i + n]); self.pos += n; return Ok(n); } } } // TODO: extract this and RollingBuffer to some preserves utils module pub struct ValueStream { input_format: InputFormat, read_annotations: bool, source: IOBinarySource>, count: usize, } impl ValueStream { fn new(input_format: InputFormat, read_annotations: bool, r: R) -> Self { ValueStream { input_format, read_annotations, source: IOBinarySource::new(RollingBuffer::new(r)), count: 0, } } fn read(&mut self) -> io::Result> { let is_text = { let peek_buf = self.source.read.peek_buf()?; if peek_buf.is_empty() { return Ok(None); } peek_buf[0] < 128 }; let maybe_value: Option = if is_text { match self.input_format { InputFormat::AutoDetect | InputFormat::Text => (), InputFormat::Binary => { return Err(io::Error::new( io::ErrorKind::InvalidData, "Expected binary input, saw text input", )) } } TextReader::new(&mut self.source, ViaCodec::new(IOValueDomainCodec)) .next(self.read_annotations)? } else { match self.input_format { InputFormat::AutoDetect | InputFormat::Binary => (), InputFormat::Text => { return Err(io::Error::new( io::ErrorKind::InvalidData, "Expected text input, saw binary input", )) } } PackedReader::new(&mut self.source, IOValueDomainCodec).next(self.read_annotations)? }; match maybe_value { None => return Ok(None), Some(value) => { self.source.read.discard_to_pos(); self.count += 1; Ok(Some(value)) } } } } impl std::iter::Iterator for ValueStream { type Item = io::Result; fn next(&mut self) -> Option { self.read().transpose() } } fn print_unquoted(v: &IOValue) { match v.value() { Value::String(s) => println!("{}", &s), Value::Symbol(s) => println!("{}", &s), _ => (), } } fn convert(c: Convert) -> io::Result<()> { let mut env = preserves_path::Env::new(); for f in c.bundle.iter() { env.load_bundle(f)?; } let select = preserves_path::Node::from_str(&env, &c.select_expr).map_err(|e| { io::Error::new( io::ErrorKind::InvalidData, format!("Invalid select expression: {}: {:?}", e, c.select_expr), ) })?; let mut vs = ValueStream::new(c.input_format, c.read_annotations.into(), io::stdin()); let write_ann: bool = c.write_annotations.into(); let mut w: Box io::Result<()>> = match c.output_format { OutputFormat::Text => { let mut t = TextWriter::new(io::stdout()) .set_escape_spaces(c.escape_spaces) .set_comma_style(c.commas.into()); if c.indent.into() { t.indentation = 2; } Box::new(move |v| { if write_ann { t.write(&mut IOValueDomainCodec, v)?; } else { t.write(&mut IOValueDomainCodec, &v.strip_annotations::())?; } println!(); Ok(()) }) } OutputFormat::Binary => { let mut p = PackedWriter::new(io::stdout()); Box::new(move |v| { if write_ann { p.write(&mut IOValueDomainCodec, v)?; } else { p.write(&mut IOValueDomainCodec, &v.strip_annotations::())?; } Ok(()) }) } OutputFormat::Unquoted => Box::new(|v| { print_unquoted(v); Ok(()) }), }; while let Some(value) = vs.next() { let value = value?; let matches = select.exec(&mut env.to_context(), &value); if c.collect { match c.select_output { SelectOutput::Sequence => w(&IOValue::new(matches))?, SelectOutput::Set => w(&IOValue::new(Set::from_iter(matches)))?, } } else { match c.select_output { SelectOutput::Sequence => { for v in matches { w(&v)?; } } SelectOutput::Set => { for v in Set::from_iter(matches) { w(&v)?; } } } } if let Some(limit) = c.limit { if vs.count >= limit { break; } } } Ok(()) } impl Quote { fn escape_spaces(&self) -> bool { match &self.output { QuotationOutput::ByteString => false, QuotationOutput::String(s) | QuotationOutput::Symbol(s) => s.escape_spaces, } } } fn output_one(q: &Quote, v: &IOValue) -> io::Result<()> { match q.output_format { OutputFormat::Binary => PackedWriter::new(io::stdout()).write(&mut IOValueDomainCodec, v), OutputFormat::Text => { TextWriter::new(io::stdout()) .set_escape_spaces(q.escape_spaces()) .write(&mut IOValueDomainCodec, v)?; println!(); Ok(()) } OutputFormat::Unquoted => { print_unquoted(v); Ok(()) } } } fn quote(q: Quote) -> io::Result<()> { match &q.output { QuotationOutput::ByteString => { let mut buf = Vec::new(); io::stdin().read_to_end(&mut buf)?; output_one(&q, &IOValue::new(&buf[..])) } QuotationOutput::String(s) | QuotationOutput::Symbol(s) => match s.input_terminator { StringInputTerminator::EOF => { let mut buf = String::new(); io::stdin().read_to_string(&mut buf)?; quote_chunk(&q, buf) } StringInputTerminator::Newline => quote_terminated_strings(b'\n', &q, s), StringInputTerminator::Nul => quote_terminated_strings(b'\0', &q, s), }, } } fn quote_chunk(q: &Quote, buf: String) -> io::Result<()> { match q.output { QuotationOutput::Symbol(_) => output_one(q, &IOValue::symbol(&buf)), _ => output_one(q, &IOValue::new(buf)), } } fn quote_terminated_strings(delimiter: u8, q: &Quote, s: &StringQuotation) -> io::Result<()> { let mut r = RollingBuffer::new(io::stdin()); while let Some(chunk) = r.read_upto(delimiter, s.include_terminator)? { quote_chunk( q, String::from_utf8(chunk) .map_err(|_| io::Error::new(io::ErrorKind::InvalidData, "Invalid UTF-8"))?, )? } Ok(()) }