diff --git a/implementations/rust/Cargo.toml b/implementations/rust/Cargo.toml index 433e87a..8961f48 100644 --- a/implementations/rust/Cargo.toml +++ b/implementations/rust/Cargo.toml @@ -2,4 +2,5 @@ members = [ "preserves", "preserves-schema", + "preserves-tools", ] diff --git a/implementations/rust/preserves-tools/Cargo.toml b/implementations/rust/preserves-tools/Cargo.toml new file mode 100644 index 0000000..59132b5 --- /dev/null +++ b/implementations/rust/preserves-tools/Cargo.toml @@ -0,0 +1,16 @@ +[package] +name = "preserves-tools" +version = "0.1.0" +authors = ["Tony Garnock-Jones "] +edition = "2018" +description = "Command-line utilities for working with Preserves documents." +homepage = "https://preserves.gitlab.io/" +repository = "https://gitlab.com/preserves/preserves" +license = "Apache-2.0" + +[dependencies] +preserves = { path = "../preserves", version = "0.16.0" } + +bytes = "1.0" +clap = "3.0.0-beta.2" +clap_generate = "3.0.0-beta.2" diff --git a/implementations/rust/preserves-tools/src/bin/preserves-tool.rs b/implementations/rust/preserves-tools/src/bin/preserves-tool.rs new file mode 100644 index 0000000..4046d4c --- /dev/null +++ b/implementations/rust/preserves-tools/src/bin/preserves-tool.rs @@ -0,0 +1,277 @@ +use bytes::Buf; +use bytes::BufMut; +use bytes::BytesMut; + +use clap::ArgEnum; +use clap::Clap; +use clap::IntoApp; +use clap_generate::{generate, generators}; + +use preserves::value::IOBinarySource; +use preserves::value::IOValue; +use preserves::value::IOValueDomainCodec; +use preserves::value::PackedReader; +use preserves::value::PackedWriter; +use preserves::value::Reader; +use preserves::value::TextReader; +use preserves::value::TextWriter; +use preserves::value::ViaCodec; +use preserves::value::Writer; + +use std::io; + +// #[derive(ArgEnum, Clone, Debug)] +// enum Encoding { +// None, +// Base64, +// Hex, +// } + +#[derive(ArgEnum, Clone, Debug)] +enum InputFormat { + AutoDetect, + Text, + Binary, +} + +#[derive(ArgEnum, Clone, Debug)] +enum OutputFormat { + Text, + Binary, +} + +#[derive(ArgEnum, Clone, Debug)] +enum CompletionDialect { + Bash, + Zsh, + PowerShell, + Fish, + Elvish, +} + +#[derive(ArgEnum, Clone, Copy, Debug)] +enum Boolish { + #[clap(alias = "no", alias = "n", alias = "off", alias = "0", alias = "false")] + Disabled, + #[clap(alias = "yes", alias = "y", alias = "on", alias = "1", alias = "true")] + Enabled, +} + +impl From for bool { + fn from(b: Boolish) -> Self { + matches!(b, Boolish::Enabled) + } +} + +#[derive(Clap, Clone, Debug)] +struct Convert { + // #[clap(long, arg_enum, default_value = "none")] + // input_encoding: Encoding, + + // #[clap(long, arg_enum, default_value = "none")] + // output_encoding: Encoding, + + #[clap(long, short, arg_enum, default_value = "auto-detect")] + input_format: InputFormat, + + #[clap(long, short, arg_enum, default_value = "text")] + output_format: OutputFormat, + + #[clap(long)] + limit: Option, + + #[clap(long, arg_enum, value_name = "on/off", default_value = "on")] + indent: Boolish, + + #[clap(long, arg_enum, value_name = "on/off", default_value = "on")] + annotations: Boolish, +} + +#[derive(Clap, Clone, Debug)] +enum Subcommand { + Convert(Convert), + Completions { + #[clap(arg_enum, value_name = "dialect")] + dialect: CompletionDialect, + } +} + +#[derive(Clap, Clone, Debug)] +#[clap(version)] +struct CommandLine { + #[clap(subcommand)] + command: Subcommand, +} + +fn main() -> io::Result<()> { + let args = CommandLine::parse(); + match args.command { + Subcommand::Completions { dialect } => { + let mut app = CommandLine::into_app(); + match dialect { + CompletionDialect::Bash => generate::(&mut app, "preserves-tool", &mut io::stdout()), + CompletionDialect::Zsh => generate::(&mut app, "preserves-tool", &mut io::stdout()), + CompletionDialect::PowerShell => generate::(&mut app, "preserves-tool", &mut io::stdout()), + CompletionDialect::Fish => generate::(&mut app, "preserves-tool", &mut io::stdout()), + CompletionDialect::Elvish => generate::(&mut app, "preserves-tool", &mut io::stdout()), + } + }, + Subcommand::Convert(c) => convert(c)?, + } + Ok(()) +} + +struct RollingBuffer { + r: R, + discarded: usize, + pos: usize, + buf: BytesMut, +} + +impl RollingBuffer { + fn new(r: R) -> Self { + RollingBuffer { + r, + discarded: 0, + pos: 0, + buf: BytesMut::new(), + } + } + + fn read_more(&mut self) -> io::Result { + let mut buf = [0; 8192]; + let n = self.r.read(&mut buf)?; + self.buf.put(&buf[.. n]); + Ok(n) + } + + fn peek_buf(&mut self) -> io::Result<&[u8]> { + if self.rhs() == self.pos { + let _ = self.read_more()?; + } + return Ok(&self.buf[self.pos - self.discarded ..]); + } + + fn rhs(&self) -> usize { + self.buf.remaining() + self.discarded + } + + fn discard(&mut self, count: usize) { + self.buf.advance(count); + self.discarded += count; + } + + fn discard_to_pos(&mut self) { + self.discard(self.pos - self.discarded) + } +} + +impl io::Seek for RollingBuffer { + fn seek(&mut self, offset: io::SeekFrom) -> io::Result { + let new_position = match offset { + io::SeekFrom::Current(delta) => if delta >= 0 { + self.pos + delta as usize + } else { + self.pos - (-delta) as usize + }, + io::SeekFrom::End(_) => Err(io::Error::new( + io::ErrorKind::Unsupported, "Cannot seek wrt end on open-ended stream"))?, + io::SeekFrom::Start(new_position) => new_position as usize, + }; + if new_position > self.rhs() { + Err(io::Error::new( + io::ErrorKind::InvalidInput, "Attempt to seek beyond end of buffer"))?; + } + if new_position < self.discarded { + Err(io::Error::new( + io::ErrorKind::InvalidInput, "Attempt to seek before start of buffer"))?; + } + self.pos = new_position; + Ok(new_position as u64) + } + + fn stream_position(&mut self) -> io::Result { + Ok(self.pos as u64) + } +} + +impl io::Read for RollingBuffer { + fn read(&mut self, buf: &mut [u8]) -> io::Result { + let i = self.pos - self.discarded; + loop { + let n = std::cmp::min(self.buf.remaining() - i , buf.len()); + if n == 0 { + if self.read_more()? == 0 { + return Ok(0); + } + continue; + } + &buf[.. n].copy_from_slice(&self.buf[i .. i + n]); + self.pos += n; + return Ok(n); + } + } +} + +fn convert(c: Convert) -> io::Result<()> { + let mut r = RollingBuffer::new(io::stdin()); + let mut count = 0; + let mut w: Box io::Result<()>> = match c.output_format { + OutputFormat::Text => { + let mut t = TextWriter::new(io::stdout()); + if c.indent.into() { + t.indentation = 2; + } + Box::new(move |v| { + t.write(&mut IOValueDomainCodec, v)?; + println!(); + Ok(()) + }) + } + OutputFormat::Binary => { + let mut p = PackedWriter::new(io::stdout()); + Box::new(move |v| p.write(&mut IOValueDomainCodec, v)) + } + }; + loop { + let is_text = { + let peek_buf = r.peek_buf()?; + if peek_buf.is_empty() { + return Ok(()); + } + peek_buf[0] < 128 + }; + + let maybe_value: Option = if is_text { + match c.input_format { + InputFormat::AutoDetect | InputFormat::Text => (), + InputFormat::Binary => return Err(io::Error::new( + io::ErrorKind::InvalidData, "Expected binary input, saw text input")), + } + TextReader::new(&mut IOBinarySource::new(&mut r), ViaCodec::new(IOValueDomainCodec)) + .next(c.annotations.into())? + } else { + match c.input_format { + InputFormat::AutoDetect | InputFormat::Binary => (), + InputFormat::Text => return Err(io::Error::new( + io::ErrorKind::InvalidData, "Expected text input, saw binary input")), + } + PackedReader::new(&mut IOBinarySource::new(&mut r), IOValueDomainCodec) + .next(c.annotations.into())? + }; + + match maybe_value { + None => return Ok(()), + Some(value) => { + w(&value)?; + r.discard_to_pos(); + count += 1; + if let Some(limit) = c.limit { + if count >= limit { + return Ok(()); + } + } + } + } + } +}