diff --git a/implementations/cpp/main.cpp b/implementations/cpp/main.cpp index bc1e2cc..c6a5e3e 100644 --- a/implementations/cpp/main.cpp +++ b/implementations/cpp/main.cpp @@ -1,4 +1,8 @@ #include "preserves.hpp" +#include "preserves_binary.hpp" + +#include + #include "googletest/gtest/gtest.h" using namespace std; @@ -7,11 +11,21 @@ using namespace Preserves; TEST(Value, Basics) { auto vs = Value<>::from(vector>{ Value<>::from(1), - Value<>::from(2U), - Value<>::from(3), + Value<>::from(2.0), + Value<>::from("three"), }); ASSERT_EQ(3U, vs.size()); ASSERT_EQ(1U, vs[0].to_unsigned()); - ASSERT_EQ(2U, vs[1].to_unsigned()); - ASSERT_EQ(3U, vs[2].to_unsigned()); + ASSERT_EQ(1.0, vs[0].to_double()); + ASSERT_EQ(2.0, vs[1].to_double()); + ASSERT_EQ("three", vs[2].to_string()); + ASSERT_EQ(ValueKind::Sequence, vs.value_kind()); + ASSERT_EQ(ValueKind::String, vs[2].value_kind()); } + +TEST(BinaryReader, ReadSamples) { + ifstream f("../../tests/samples.bin", ios::binary); + BinaryReader<> r(f, &GenericEmbedded::wrap); + auto v = r.next(); + ASSERT_TRUE(v); +} \ No newline at end of file diff --git a/implementations/cpp/preserves.hpp b/implementations/cpp/preserves.hpp index d875281..84d8e08 100644 --- a/implementations/cpp/preserves.hpp +++ b/implementations/cpp/preserves.hpp @@ -40,6 +40,8 @@ namespace Preserves { Value(std::shared_ptr> const& p) : p(p) {} Value(ValueImpl *p) : p(p) {} + std::shared_ptr> _impl() const { return p; } + static Value from_bool(bool b); static Value from_float(float f); static Value from_double(double d); @@ -59,6 +61,9 @@ namespace Preserves { static Value from_embedded(std::shared_ptr const& p); + static Value from_unsigned(uint64_t i) { return from_int(i); } + static Value from_signed(int64_t i) { return from_int(i); } + static Value from_number(uint64_t i) { return from_int(i); } static Value from_number(int64_t i) { return from_int(i); } static Value from_number(float f) { return from_float(f); } @@ -72,6 +77,7 @@ namespace Preserves { static Value from(int64_t i) { return from_int(i); } static Value from(signed i) { return from_int(int64_t(i)); } static Value from(std::string const& s) { return from_string(s); } + static Value from(char const* s) { return from_string(s); } static Value from(std::vector const& v) { return from_bytes(v); } static Value from(std::vector const& v) { return from_bytes(v); } static Value from(void *p, size_t len) { return from_bytes(p, len); } @@ -198,6 +204,7 @@ namespace Preserves { Atom value; public: Atomic(Atom const& value) : value(value) {} + Atom& _value() { return value; } ValueKind value_kind() const { return kind; } }; @@ -211,7 +218,10 @@ namespace Preserves { } PRESERVES_ATOMIC_VALUE_CLASS(Boolean, bool, bool, ValueKind::Boolean, as_bool,); - PRESERVES_ATOMIC_VALUE_CLASS(Float, float, float, ValueKind::Float, as_float,); + PRESERVES_ATOMIC_VALUE_CLASS(Float, float, float, ValueKind::Float, as_float, + boost::optional as_double() const override { + return this->value; + }); PRESERVES_ATOMIC_VALUE_CLASS(Double, double, double, ValueKind::Double, as_double,); PRESERVES_ATOMIC_VALUE_CLASS(Uint64, uint64_t, uint64_t, ValueKind::SignedInteger, as_unsigned, boost::optional as_signed() const override { @@ -220,6 +230,20 @@ namespace Preserves { } else { return boost::none; } + } + boost::optional as_float() const override { + if (uint64_t(float(this->value)) == this->value) { + return float(this->value); + } else { + return boost::none; + } + } + boost::optional as_double() const override { + if (uint64_t(double(this->value)) == this->value) { + return double(this->value); + } else { + return boost::none; + } }); PRESERVES_ATOMIC_VALUE_CLASS(Int64, int64_t, int64_t, ValueKind::SignedInteger, as_signed, boost::optional as_unsigned() const override { @@ -228,6 +252,20 @@ namespace Preserves { } else { return boost::none; } + } + boost::optional as_float() const override { + if (int64_t(float(this->value)) == this->value) { + return float(this->value); + } else { + return boost::none; + } + } + boost::optional as_double() const override { + if (int64_t(double(this->value)) == this->value) { + return double(this->value); + } else { + return boost::none; + } }); PRESERVES_ATOMIC_VALUE_CLASS(String, std::string, std::string const&, ValueKind::String, as_string,); PRESERVES_ATOMIC_VALUE_CLASS(ByteString, std::vector, std::vector const&, ValueKind::ByteString, as_bytes,); @@ -261,6 +299,11 @@ namespace Preserves { } return false; } + bool operator<(Record const& other) const { + if (labelValue < other.labelValue) return true; + if (other.labelValue < labelValue) return false; + return fields < other.fields; + } }; template @@ -308,7 +351,7 @@ namespace Preserves { size_t size() const override { return values.size(); } bool contains(Value const& key) const override { return values.count(key) > 0; } bool add(Value const& value) override { - return values.insert(value)->second; + return values.insert(value).second; } bool erase(Value const& value) override { return values.erase(value) > 0; @@ -334,7 +377,7 @@ namespace Preserves { return i->second; } bool set(Value const& key, Value const& value) override { - return values.emplace(key, value)->second; + return values.emplace(key, value).second; } bool erase(Value const& key) override { return values.erase(key) > 0; @@ -346,17 +389,45 @@ namespace Preserves { public: std::shared_ptr value; - Embedded(std::shared_ptr const& value) : value() {} + Embedded(std::shared_ptr const& value) : value(value) {} ValueKind value_kind() const { return ValueKind::Embedded; } boost::optional> as_embedded() const override { return value; } }; - class GenericEmbedded: public Value {}; + class GenericEmbedded: public Value { + public: + GenericEmbedded(std::shared_ptr> p) : + Value(p) + {} + + static std::shared_ptr wrap(Value<> v) { + return std::make_shared(v._impl()); + } + }; template - Value Value::from_int(uint64_t i) { + Value Value::from_bool(bool b) + { + return Value(new Boolean(b)); + } + + template + Value Value::from_float(float f) + { + return Value(new Float(f)); + } + + template + Value Value::from_double(double d) + { + return Value(new Double(d)); + } + + template + Value Value::from_int(uint64_t i) + { return Value(new Uint64(i)); } @@ -365,12 +436,77 @@ namespace Preserves { return Value(new Int64(i)); } + template + Value Value::from_string(std::string const& s) { + return Value(new String(s)); + } + template Value Value::sequence(std::vector> const& values) { return Value(new Sequence(values)); } - template boost::optional Value::as_unsigned() const { return p->as_unsigned(); } + template + Value Value::from_embedded(std::shared_ptr const& v) { + return Value(new Embedded(v)); + } + +#define PRESERVES_DELEGATE_CAST(t, name) \ + template boost::optional Value::name() const { return p->name(); } + PRESERVES_DELEGATE_CAST(bool, as_bool); + PRESERVES_DELEGATE_CAST(float, as_float); + PRESERVES_DELEGATE_CAST(double, as_double); + PRESERVES_DELEGATE_CAST(uint64_t, as_unsigned); + PRESERVES_DELEGATE_CAST(int64_t, as_signed); + PRESERVES_DELEGATE_CAST(std::string const&, as_string); + PRESERVES_DELEGATE_CAST(std::vector const&, as_bytes); + PRESERVES_DELEGATE_CAST(std::string const&, as_symbol); + PRESERVES_DELEGATE_CAST(Record const&, as_record); + PRESERVES_DELEGATE_CAST(std::vector> const&, as_sequence); + PRESERVES_DELEGATE_CAST(std::set> const&, as_set); + #define COMMA , + PRESERVES_DELEGATE_CAST(std::map COMMA Value> const&, as_dictionary); + #undef COMMA + PRESERVES_DELEGATE_CAST(std::shared_ptr, as_embedded); +#undef PRESERVES_DELEGATE_CAST + template boost::optional> Value::get(size_t index) const { return p->get(index); } template size_t Value::size() const { return p->size(); } + + template + bool operator<(Value const& a, Value const &b) { + auto aKind = a.value_kind(); + auto bKind = b.value_kind(); + if (aKind < bKind) return true; + if (bKind < aKind) return false; + switch (aKind) { + case ValueKind::Boolean: return a.to_bool() < b.to_bool(); + case ValueKind::Float: return a.to_float() < b.to_float(); + case ValueKind::Double: return a.to_double() < b.to_double(); + case ValueKind::SignedInteger: { + if (auto av = a.as_signed()) { + if (auto bv = b.as_signed()) { + return *av < *bv; + } else { + return true; + } + } else { + if (auto bv = b.as_signed()) { + return false; + } else { + return a.to_unsigned() < b.to_unsigned(); + } + } + } + case ValueKind::String: return a.to_string() < b.to_string(); + case ValueKind::ByteString: return a.to_bytes() < b.to_bytes(); + case ValueKind::Symbol: return a.to_symbol() < b.to_symbol(); + case ValueKind::Record: return a.to_record() < b.to_record(); + case ValueKind::Sequence: return a.to_sequence() < b.to_sequence(); + case ValueKind::Set: return a.to_set() < b.to_set(); + case ValueKind::Dictionary: return a.to_dictionary() < b.to_dictionary(); + case ValueKind::Embedded: return *a.to_embedded() < *b.to_embedded(); + default: throw std::runtime_error("Invalid ValueKind"); + } + } } \ No newline at end of file diff --git a/implementations/cpp/preserves_binary.hpp b/implementations/cpp/preserves_binary.hpp new file mode 100644 index 0000000..50cbbc4 --- /dev/null +++ b/implementations/cpp/preserves_binary.hpp @@ -0,0 +1,192 @@ +#pragma once + +#include +#include +#include + +#include "preserves.hpp" + +namespace Preserves { + enum class BinaryTag { + False = 0x80, + True = 0x81, + Float = 0x82, + Double = 0x83, + End = 0x84, + Annotation = 0x85, + Embedded = 0x86, + SmallInteger_lo = 0x90, + SmallInteger_hi = 0x9f, + MediumInteger_lo = 0xa0, + MediumInteger_hi = 0xaf, + SignedInteger = 0xb0, + String = 0xb1, + ByteString = 0xb2, + Symbol = 0xb3, + Record = 0xb4, + Sequence = 0xb5, + Set = 0xb6, + Dictionary = 0xb7, + }; + + template + class BinaryReader { + std::istream& i; + std::function(Value<>)> decodeEmbedded; + + bool next_chunk(void* p, size_t n) { + i.read(static_cast(p), n); + return i.good(); + } + + public: + static boost::optional varint(std::istream &i) { + uint64_t n = 0; + // Can read max 9 bytes, each with 7 bits of payload, for 9*7 = 63 bits. + for (size_t count = 0; count < 9; count++) { + int b = i.get(); + if (b == i.eof()) return boost::none; + n |= (b & 0x7f) << (count * 7); + if ((b & 0x80) == 0) return n; + } + return boost::none; + } + + BinaryReader(std::istream& i, std::function(Value<>)> decodeEmbedded) : + i(i), + decodeEmbedded(decodeEmbedded) + {} + + boost::optional next_float() { + uint8_t buf[4]; + if (!next_chunk(buf, sizeof(buf))) return boost::none; + uint32_t n = buf[0] << 24 | buf[1] << 16 | buf[2] << 8 | buf[3]; + float f; + memcpy(&f, &n, sizeof(f)); + return f; + } + + boost::optional next_double() { + uint8_t buf[8]; + if (!next_chunk(buf, sizeof(buf))) return boost::none; + uint32_t n1 = buf[0] << 24 | buf[1] << 16 | buf[2] << 8 | buf[3]; + uint32_t n2 = buf[4] << 24 | buf[5] << 16 | buf[6] << 8 | buf[7]; + uint64_t n = uint64_t(n1) << 32 | n2; + double d; + memcpy(&d, &n, sizeof(d)); + return d; + } + + boost::optional next_unsigned(size_t n) { + uint8_t buf[n]; + if (!next_chunk(buf, n)) return boost::none; + uint64_t v = 0; + for (size_t j = 0; j < n; j++) { + v = (v << 8) | buf[j]; + } + return v; + } + + boost::optional> next() { + bool end_sentinel; + return _next(end_sentinel); + } + + boost::optional> _next(bool& end_sentinel) { + more: + end_sentinel = false; + switch (auto tag = BinaryTag(i.get())) { + case BinaryTag::False: return Value::from_bool(false); + case BinaryTag::True: return Value::from_bool(true); + case BinaryTag::Float: return next_float().map(Value::from_float); + case BinaryTag::Double: return next_double().map(Value::from_double); + case BinaryTag::End: end_sentinel = true; return boost::none; + case BinaryTag::Annotation: + if (!next()) return boost::none; + goto more; + case BinaryTag::Embedded: + return BinaryReader(i, &GenericEmbedded::wrap) + .next().map(decodeEmbedded).map(Value::from_embedded); + case BinaryTag::SmallInteger_lo ... BinaryTag::SmallInteger_hi: { + int64_t n = int64_t(tag) - int64_t(BinaryTag::SmallInteger_lo); + return Value::from_int(n <= 12 ? n : n - 16); + } + case BinaryTag::MediumInteger_lo ... BinaryTag::MediumInteger_hi: { + int n = int(tag) - int(BinaryTag::MediumInteger_lo) + 1; + if (n < 9) { + return next_unsigned(n).map([](uint64_t v) { return Value::from_int(int64_t(v)); }); + } + if (n == 9) { + // We can only handle this if it's unsigned and the first byte is 0. + if (i.get() != 0) return boost::none; + return next_unsigned(8).map(Value::from_unsigned); + } + return boost::none; + } + case BinaryTag::SignedInteger: return boost::none; + case BinaryTag::String: return varint(i).flat_map([&](size_t len)-> boost::optional> { + auto s = std::make_shared>(std::string()); + s->_value().resize(len); + if (!next_chunk(&s->_value()[0], len)) return boost::none; + return Value(s); + }); + case BinaryTag::ByteString: return varint(i).flat_map([&](size_t len)-> boost::optional> { + auto s = std::make_shared>(std::vector()); + s->_value().resize(len); + if (!next_chunk(&s->_value()[0], len)) return boost::none; + return Value(s); + }); + case BinaryTag::Symbol: return varint(i).flat_map([&](size_t len)-> boost::optional> { + auto s = std::make_shared>(std::string()); + s->_value().resize(len); + if (!next_chunk(&s->_value()[0], len)) return boost::none; + return Value(s); + }); + case BinaryTag::Record: return next().flat_map([&](auto label)-> boost::optional> { + auto r = std::make_shared>(label); + while (true) { + bool end_rec = false; + auto v = _next(end_rec); + if (end_rec) return Value(r); + if (!v) return boost::none; + r->fields.push_back(*v); + } + }); + case BinaryTag::Sequence: { + auto s = std::make_shared>(); + while (true) { + bool end_rec = false; + auto v = _next(end_rec); + if (end_rec) return Value(s); + if (!v) return boost::none; + s->values.push_back(*v); + } + } + case BinaryTag::Set: { + auto s = std::make_shared>(); + while (true) { + bool end_rec = false; + auto v = _next(end_rec); + if (end_rec) return Value(s); + if (!v) return boost::none; + s->values.insert(*v); + } + } + case BinaryTag::Dictionary: { + auto s = std::make_shared>(); + while (true) { + bool end_rec = false; + auto k = _next(end_rec); + if (end_rec) return Value(s); + if (!k) return boost::none; + auto v = next(); + if (!v) return boost::none; + s->values.emplace(*k, *v); + } + } + default: + return boost::none; + } + } + }; +} \ No newline at end of file