Binary codec (not yet working)

This commit is contained in:
Tony Garnock-Jones 2023-06-21 00:45:04 +02:00
parent 3c8d9dcf98
commit 8788a72a8d
3 changed files with 353 additions and 11 deletions

View File

@ -1,4 +1,8 @@
#include "preserves.hpp"
#include "preserves_binary.hpp"
#include <fstream>
#include "googletest/gtest/gtest.h"
using namespace std;
@ -7,11 +11,21 @@ using namespace Preserves;
TEST(Value, Basics) {
auto vs = Value<>::from(vector<Value<>>{
Value<>::from(1),
Value<>::from(2U),
Value<>::from(3),
Value<>::from(2.0),
Value<>::from("three"),
});
ASSERT_EQ(3U, vs.size());
ASSERT_EQ(1U, vs[0].to_unsigned());
ASSERT_EQ(2U, vs[1].to_unsigned());
ASSERT_EQ(3U, vs[2].to_unsigned());
ASSERT_EQ(1.0, vs[0].to_double());
ASSERT_EQ(2.0, vs[1].to_double());
ASSERT_EQ("three", vs[2].to_string());
ASSERT_EQ(ValueKind::Sequence, vs.value_kind());
ASSERT_EQ(ValueKind::String, vs[2].value_kind());
}
TEST(BinaryReader, ReadSamples) {
ifstream f("../../tests/samples.bin", ios::binary);
BinaryReader<> r(f, &GenericEmbedded::wrap);
auto v = r.next();
ASSERT_TRUE(v);
}

View File

@ -40,6 +40,8 @@ namespace Preserves {
Value(std::shared_ptr<ValueImpl<T>> const& p) : p(p) {}
Value(ValueImpl<T> *p) : p(p) {}
std::shared_ptr<ValueImpl<T>> _impl() const { return p; }
static Value from_bool(bool b);
static Value from_float(float f);
static Value from_double(double d);
@ -59,6 +61,9 @@ namespace Preserves {
static Value from_embedded(std::shared_ptr<T> const& p);
static Value from_unsigned(uint64_t i) { return from_int(i); }
static Value from_signed(int64_t i) { return from_int(i); }
static Value from_number(uint64_t i) { return from_int(i); }
static Value from_number(int64_t i) { return from_int(i); }
static Value from_number(float f) { return from_float(f); }
@ -72,6 +77,7 @@ namespace Preserves {
static Value from(int64_t i) { return from_int(i); }
static Value from(signed i) { return from_int(int64_t(i)); }
static Value from(std::string const& s) { return from_string(s); }
static Value from(char const* s) { return from_string(s); }
static Value from(std::vector<uint8_t> const& v) { return from_bytes(v); }
static Value from(std::vector<char> const& v) { return from_bytes(v); }
static Value from(void *p, size_t len) { return from_bytes(p, len); }
@ -198,6 +204,7 @@ namespace Preserves {
Atom value;
public:
Atomic(Atom const& value) : value(value) {}
Atom& _value() { return value; }
ValueKind value_kind() const { return kind; }
};
@ -211,7 +218,10 @@ namespace Preserves {
}
PRESERVES_ATOMIC_VALUE_CLASS(Boolean, bool, bool, ValueKind::Boolean, as_bool,);
PRESERVES_ATOMIC_VALUE_CLASS(Float, float, float, ValueKind::Float, as_float,);
PRESERVES_ATOMIC_VALUE_CLASS(Float, float, float, ValueKind::Float, as_float,
boost::optional<double> as_double() const override {
return this->value;
});
PRESERVES_ATOMIC_VALUE_CLASS(Double, double, double, ValueKind::Double, as_double,);
PRESERVES_ATOMIC_VALUE_CLASS(Uint64, uint64_t, uint64_t, ValueKind::SignedInteger, as_unsigned,
boost::optional<int64_t> as_signed() const override {
@ -220,6 +230,20 @@ namespace Preserves {
} else {
return boost::none;
}
}
boost::optional<float> as_float() const override {
if (uint64_t(float(this->value)) == this->value) {
return float(this->value);
} else {
return boost::none;
}
}
boost::optional<double> as_double() const override {
if (uint64_t(double(this->value)) == this->value) {
return double(this->value);
} else {
return boost::none;
}
});
PRESERVES_ATOMIC_VALUE_CLASS(Int64, int64_t, int64_t, ValueKind::SignedInteger, as_signed,
boost::optional<uint64_t> as_unsigned() const override {
@ -228,6 +252,20 @@ namespace Preserves {
} else {
return boost::none;
}
}
boost::optional<float> as_float() const override {
if (int64_t(float(this->value)) == this->value) {
return float(this->value);
} else {
return boost::none;
}
}
boost::optional<double> as_double() const override {
if (int64_t(double(this->value)) == this->value) {
return double(this->value);
} else {
return boost::none;
}
});
PRESERVES_ATOMIC_VALUE_CLASS(String, std::string, std::string const&, ValueKind::String, as_string,);
PRESERVES_ATOMIC_VALUE_CLASS(ByteString, std::vector<uint8_t>, std::vector<uint8_t> const&, ValueKind::ByteString, as_bytes,);
@ -261,6 +299,11 @@ namespace Preserves {
}
return false;
}
bool operator<(Record<T> const& other) const {
if (labelValue < other.labelValue) return true;
if (other.labelValue < labelValue) return false;
return fields < other.fields;
}
};
template <typename T = class GenericEmbedded>
@ -308,7 +351,7 @@ namespace Preserves {
size_t size() const override { return values.size(); }
bool contains(Value<T> const& key) const override { return values.count(key) > 0; }
bool add(Value<T> const& value) override {
return values.insert(value)->second;
return values.insert(value).second;
}
bool erase(Value<T> const& value) override {
return values.erase(value) > 0;
@ -334,7 +377,7 @@ namespace Preserves {
return i->second;
}
bool set(Value<T> const& key, Value<T> const& value) override {
return values.emplace(key, value)->second;
return values.emplace(key, value).second;
}
bool erase(Value<T> const& key) override {
return values.erase(key) > 0;
@ -346,17 +389,45 @@ namespace Preserves {
public:
std::shared_ptr<T> value;
Embedded(std::shared_ptr<T> const& value) : value() {}
Embedded(std::shared_ptr<T> const& value) : value(value) {}
ValueKind value_kind() const { return ValueKind::Embedded; }
boost::optional<std::shared_ptr<T>> as_embedded() const override {
return value;
}
};
class GenericEmbedded: public Value<GenericEmbedded> {};
class GenericEmbedded: public Value<GenericEmbedded> {
public:
GenericEmbedded(std::shared_ptr<ValueImpl<GenericEmbedded>> p) :
Value(p)
{}
static std::shared_ptr<GenericEmbedded> wrap(Value<> v) {
return std::make_shared<GenericEmbedded>(v._impl());
}
};
template <typename T>
Value<T> Value<T>::from_int(uint64_t i) {
Value<T> Value<T>::from_bool(bool b)
{
return Value<T>(new Boolean<T>(b));
}
template <typename T>
Value<T> Value<T>::from_float(float f)
{
return Value<T>(new Float<T>(f));
}
template <typename T>
Value<T> Value<T>::from_double(double d)
{
return Value<T>(new Double<T>(d));
}
template <typename T>
Value<T> Value<T>::from_int(uint64_t i)
{
return Value<T>(new Uint64<T>(i));
}
@ -365,12 +436,77 @@ namespace Preserves {
return Value<T>(new Int64<T>(i));
}
template <typename T>
Value<T> Value<T>::from_string(std::string const& s) {
return Value<T>(new String<T>(s));
}
template <typename T>
Value<T> Value<T>::sequence(std::vector<Value<T>> const& values) {
return Value<T>(new Sequence<T>(values));
}
template <typename T> boost::optional<uint64_t> Value<T>::as_unsigned() const { return p->as_unsigned(); }
template <typename T>
Value<T> Value<T>::from_embedded(std::shared_ptr<T> const& v) {
return Value<T>(new Embedded<T>(v));
}
#define PRESERVES_DELEGATE_CAST(t, name) \
template <typename T> boost::optional<t> Value<T>::name() const { return p->name(); }
PRESERVES_DELEGATE_CAST(bool, as_bool);
PRESERVES_DELEGATE_CAST(float, as_float);
PRESERVES_DELEGATE_CAST(double, as_double);
PRESERVES_DELEGATE_CAST(uint64_t, as_unsigned);
PRESERVES_DELEGATE_CAST(int64_t, as_signed);
PRESERVES_DELEGATE_CAST(std::string const&, as_string);
PRESERVES_DELEGATE_CAST(std::vector<uint8_t> const&, as_bytes);
PRESERVES_DELEGATE_CAST(std::string const&, as_symbol);
PRESERVES_DELEGATE_CAST(Record<T> const&, as_record);
PRESERVES_DELEGATE_CAST(std::vector<Value<T>> const&, as_sequence);
PRESERVES_DELEGATE_CAST(std::set<Value<T>> const&, as_set);
#define COMMA ,
PRESERVES_DELEGATE_CAST(std::map<Value<T> COMMA Value<T>> const&, as_dictionary);
#undef COMMA
PRESERVES_DELEGATE_CAST(std::shared_ptr<T>, as_embedded);
#undef PRESERVES_DELEGATE_CAST
template <typename T> boost::optional<Value<T>> Value<T>::get(size_t index) const { return p->get(index); }
template <typename T> size_t Value<T>::size() const { return p->size(); }
template <typename T>
bool operator<(Value<T> const& a, Value<T> const &b) {
auto aKind = a.value_kind();
auto bKind = b.value_kind();
if (aKind < bKind) return true;
if (bKind < aKind) return false;
switch (aKind) {
case ValueKind::Boolean: return a.to_bool() < b.to_bool();
case ValueKind::Float: return a.to_float() < b.to_float();
case ValueKind::Double: return a.to_double() < b.to_double();
case ValueKind::SignedInteger: {
if (auto av = a.as_signed()) {
if (auto bv = b.as_signed()) {
return *av < *bv;
} else {
return true;
}
} else {
if (auto bv = b.as_signed()) {
return false;
} else {
return a.to_unsigned() < b.to_unsigned();
}
}
}
case ValueKind::String: return a.to_string() < b.to_string();
case ValueKind::ByteString: return a.to_bytes() < b.to_bytes();
case ValueKind::Symbol: return a.to_symbol() < b.to_symbol();
case ValueKind::Record: return a.to_record() < b.to_record();
case ValueKind::Sequence: return a.to_sequence() < b.to_sequence();
case ValueKind::Set: return a.to_set() < b.to_set();
case ValueKind::Dictionary: return a.to_dictionary() < b.to_dictionary();
case ValueKind::Embedded: return *a.to_embedded() < *b.to_embedded();
default: throw std::runtime_error("Invalid ValueKind");
}
}
}

View File

@ -0,0 +1,192 @@
#pragma once
#include <cstring>
#include <iostream>
#include <functional>
#include "preserves.hpp"
namespace Preserves {
enum class BinaryTag {
False = 0x80,
True = 0x81,
Float = 0x82,
Double = 0x83,
End = 0x84,
Annotation = 0x85,
Embedded = 0x86,
SmallInteger_lo = 0x90,
SmallInteger_hi = 0x9f,
MediumInteger_lo = 0xa0,
MediumInteger_hi = 0xaf,
SignedInteger = 0xb0,
String = 0xb1,
ByteString = 0xb2,
Symbol = 0xb3,
Record = 0xb4,
Sequence = 0xb5,
Set = 0xb6,
Dictionary = 0xb7,
};
template <typename T = GenericEmbedded>
class BinaryReader {
std::istream& i;
std::function<std::shared_ptr<T>(Value<>)> decodeEmbedded;
bool next_chunk(void* p, size_t n) {
i.read(static_cast<char *>(p), n);
return i.good();
}
public:
static boost::optional<uint64_t> varint(std::istream &i) {
uint64_t n = 0;
// Can read max 9 bytes, each with 7 bits of payload, for 9*7 = 63 bits.
for (size_t count = 0; count < 9; count++) {
int b = i.get();
if (b == i.eof()) return boost::none;
n |= (b & 0x7f) << (count * 7);
if ((b & 0x80) == 0) return n;
}
return boost::none;
}
BinaryReader(std::istream& i, std::function<std::shared_ptr<T>(Value<>)> decodeEmbedded) :
i(i),
decodeEmbedded(decodeEmbedded)
{}
boost::optional<float> next_float() {
uint8_t buf[4];
if (!next_chunk(buf, sizeof(buf))) return boost::none;
uint32_t n = buf[0] << 24 | buf[1] << 16 | buf[2] << 8 | buf[3];
float f;
memcpy(&f, &n, sizeof(f));
return f;
}
boost::optional<double> next_double() {
uint8_t buf[8];
if (!next_chunk(buf, sizeof(buf))) return boost::none;
uint32_t n1 = buf[0] << 24 | buf[1] << 16 | buf[2] << 8 | buf[3];
uint32_t n2 = buf[4] << 24 | buf[5] << 16 | buf[6] << 8 | buf[7];
uint64_t n = uint64_t(n1) << 32 | n2;
double d;
memcpy(&d, &n, sizeof(d));
return d;
}
boost::optional<uint64_t> next_unsigned(size_t n) {
uint8_t buf[n];
if (!next_chunk(buf, n)) return boost::none;
uint64_t v = 0;
for (size_t j = 0; j < n; j++) {
v = (v << 8) | buf[j];
}
return v;
}
boost::optional<Value<T>> next() {
bool end_sentinel;
return _next(end_sentinel);
}
boost::optional<Value<T>> _next(bool& end_sentinel) {
more:
end_sentinel = false;
switch (auto tag = BinaryTag(i.get())) {
case BinaryTag::False: return Value<T>::from_bool(false);
case BinaryTag::True: return Value<T>::from_bool(true);
case BinaryTag::Float: return next_float().map(Value<T>::from_float);
case BinaryTag::Double: return next_double().map(Value<T>::from_double);
case BinaryTag::End: end_sentinel = true; return boost::none;
case BinaryTag::Annotation:
if (!next()) return boost::none;
goto more;
case BinaryTag::Embedded:
return BinaryReader<GenericEmbedded>(i, &GenericEmbedded::wrap)
.next().map(decodeEmbedded).map(Value<T>::from_embedded);
case BinaryTag::SmallInteger_lo ... BinaryTag::SmallInteger_hi: {
int64_t n = int64_t(tag) - int64_t(BinaryTag::SmallInteger_lo);
return Value<T>::from_int(n <= 12 ? n : n - 16);
}
case BinaryTag::MediumInteger_lo ... BinaryTag::MediumInteger_hi: {
int n = int(tag) - int(BinaryTag::MediumInteger_lo) + 1;
if (n < 9) {
return next_unsigned(n).map([](uint64_t v) { return Value<T>::from_int(int64_t(v)); });
}
if (n == 9) {
// We can only handle this if it's unsigned and the first byte is 0.
if (i.get() != 0) return boost::none;
return next_unsigned(8).map(Value<T>::from_unsigned);
}
return boost::none;
}
case BinaryTag::SignedInteger: return boost::none;
case BinaryTag::String: return varint(i).flat_map([&](size_t len)-> boost::optional<Value<T>> {
auto s = std::make_shared<String<T>>(std::string());
s->_value().resize(len);
if (!next_chunk(&s->_value()[0], len)) return boost::none;
return Value<T>(s);
});
case BinaryTag::ByteString: return varint(i).flat_map([&](size_t len)-> boost::optional<Value<T>> {
auto s = std::make_shared<ByteString<T>>(std::vector<uint8_t>());
s->_value().resize(len);
if (!next_chunk(&s->_value()[0], len)) return boost::none;
return Value<T>(s);
});
case BinaryTag::Symbol: return varint(i).flat_map([&](size_t len)-> boost::optional<Value<T>> {
auto s = std::make_shared<Symbol<T>>(std::string());
s->_value().resize(len);
if (!next_chunk(&s->_value()[0], len)) return boost::none;
return Value<T>(s);
});
case BinaryTag::Record: return next().flat_map([&](auto label)-> boost::optional<Value<T>> {
auto r = std::make_shared<Record<T>>(label);
while (true) {
bool end_rec = false;
auto v = _next(end_rec);
if (end_rec) return Value<T>(r);
if (!v) return boost::none;
r->fields.push_back(*v);
}
});
case BinaryTag::Sequence: {
auto s = std::make_shared<Sequence<T>>();
while (true) {
bool end_rec = false;
auto v = _next(end_rec);
if (end_rec) return Value<T>(s);
if (!v) return boost::none;
s->values.push_back(*v);
}
}
case BinaryTag::Set: {
auto s = std::make_shared<Set<T>>();
while (true) {
bool end_rec = false;
auto v = _next(end_rec);
if (end_rec) return Value<T>(s);
if (!v) return boost::none;
s->values.insert(*v);
}
}
case BinaryTag::Dictionary: {
auto s = std::make_shared<Dictionary<T>>();
while (true) {
bool end_rec = false;
auto k = _next(end_rec);
if (end_rec) return Value<T>(s);
if (!k) return boost::none;
auto v = next();
if (!v) return boost::none;
s->values.emplace(*k, *v);
}
}
default:
return boost::none;
}
}
};
}