preserves/implementations/cpp/preserves_binary_reader.hpp

190 lines
7.9 KiB
C++

#pragma once
#include <cstdint>
#include <cstring>
#include <iostream>
#include <functional>
#include <type_traits>
namespace Preserves {
template <typename T = class GenericEmbedded>
class BinaryReader {
std::istream& i;
std::function<std::shared_ptr<T>(Value<>)> decodeEmbedded;
bool next_chunk(void* p, size_t n) {
i.read(static_cast<char *>(p), n);
return i.good();
}
public:
static boost::optional<uint64_t> varint(std::istream &i) {
uint64_t n = 0;
// Can read max 9 bytes, each with 7 bits of payload, for 9*7 = 63 bits.
for (size_t count = 0; count < 9; count++) {
int b = i.get();
if (i.eof()) return boost::none;
n |= (b & 0x7f) << (count * 7);
if ((b & 0x80) == 0) {
return n;
}
}
return boost::none;
}
BinaryReader(typename std::enable_if<std::is_same<T, GenericEmbedded>::value, std::istream&>::type i) :
BinaryReader(i, &GenericEmbedded::wrap)
{}
BinaryReader(std::istream& i, std::function<std::shared_ptr<T>(Value<>)> decodeEmbedded) :
i(i),
decodeEmbedded(decodeEmbedded)
{}
boost::optional<double> next_double() {
uint8_t buf[8];
if (!next_chunk(buf, sizeof(buf))) return boost::none;
uint32_t n1 = buf[0] << 24 | buf[1] << 16 | buf[2] << 8 | buf[3];
uint32_t n2 = buf[4] << 24 | buf[5] << 16 | buf[6] << 8 | buf[7];
uint64_t n = uint64_t(n1) << 32 | n2;
double d;
memcpy(&d, &n, sizeof(d));
return d;
}
boost::optional<Value<T>> next_machineword(size_t n, bool always_unsigned) {
uint8_t buf[n];
if (!next_chunk(buf, n)) return boost::none;
if ((buf[0] & 0x80) && !always_unsigned) {
int64_t v = -1;
for (size_t j = 0; j < n; j++) v = (v << 8) | buf[j];
return Value<T>::from_int(v);
} else {
uint64_t v = 0;
for (size_t j = 0; j < n; j++) v = (v << 8) | buf[j];
return Value<T>::from_int(v);
}
}
boost::optional<Value<T>> next_bignum(size_t n) {
auto b = std::make_shared<BigNum<T>>(std::vector<uint8_t>());
b->_value().resize(n);
if (!next_chunk(&b->_value()[0], n)) return boost::none;
return Value<T>(b);
}
boost::optional<Value<T>> next() {
bool end_sentinel;
return _next(end_sentinel);
}
boost::optional<Value<T>> _next(bool& end_sentinel) {
end_sentinel = false;
auto tag = BinaryTag(i.get());
// std::cout << "tag " << std::hex << int(tag) << " pos " << i.tellg() - 1 << std::endl;
if (i.eof()) return boost::none;
switch (tag) {
case BinaryTag::False: return Value<T>::from_bool(false);
case BinaryTag::True: return Value<T>::from_bool(true);
case BinaryTag::End: end_sentinel = true; return boost::none;
case BinaryTag::Annotation: {
std::vector<Value<T>> annotations;
while (true) {
auto ann = next();
if (!ann) return boost::none;
annotations.push_back(*ann);
if (BinaryTag(i.peek()) != BinaryTag::Annotation) break;
i.get();
}
auto underlying = next();
if (!underlying) return boost::none;
return Value<T>(new AnnotatedValue<T>(std::move(annotations), *underlying));
}
case BinaryTag::Embedded:
return BinaryReader<>(i).next().map(decodeEmbedded).map(Value<T>::from_embedded);
case BinaryTag::Ieee754: return varint(i).flat_map([&](size_t len)-> boost::optional<Value<T>> {
switch (len) {
case 8: return next_double().map(Value<T>::from_double);
default: return boost::none;
}
});
case BinaryTag::SignedInteger: return varint(i).flat_map([&](size_t n)-> boost::optional<Value<T>> {
if (n == 0) return Value<T>::from_int(uint64_t(0));
if (n < 9) return next_machineword(n, false);
if (n == 9) {
// We can handle this with uint64_t if it's unsigned and the first byte is 0.
if (i.peek() == 0) {
i.get();
return next_machineword(8, true);
}
}
return next_bignum(n);
});
case BinaryTag::String: return varint(i).flat_map([&](size_t len)-> boost::optional<Value<T>> {
auto s = std::make_shared<String<T>>(std::string());
s->_value().resize(len);
if (!next_chunk(&s->_value()[0], len)) return boost::none;
return Value<T>(s);
});
case BinaryTag::ByteString: return varint(i).flat_map([&](size_t len)-> boost::optional<Value<T>> {
auto s = std::make_shared<ByteString<T>>(std::vector<uint8_t>());
s->_value().resize(len);
if (!next_chunk(&s->_value()[0], len)) return boost::none;
return Value<T>(s);
});
case BinaryTag::Symbol: return varint(i).flat_map([&](size_t len)-> boost::optional<Value<T>> {
auto s = std::make_shared<Symbol<T>>(std::string());
s->_value().resize(len);
if (!next_chunk(&s->_value()[0], len)) return boost::none;
return Value<T>(s);
});
case BinaryTag::Record: return next().flat_map([&](auto label)-> boost::optional<Value<T>> {
auto r = std::make_shared<Record<T>>(label);
while (true) {
bool end_rec = false;
auto v = _next(end_rec);
if (end_rec) return Value<T>(r);
if (!v) return boost::none;
r->fields.push_back(*v);
}
});
case BinaryTag::Sequence: {
auto s = std::make_shared<Sequence<T>>();
while (true) {
bool end_rec = false;
auto v = _next(end_rec);
if (end_rec) return Value<T>(s);
if (!v) return boost::none;
s->values.push_back(*v);
}
}
case BinaryTag::Set: {
auto s = std::make_shared<Set<T>>();
while (true) {
bool end_rec = false;
auto v = _next(end_rec);
if (end_rec) return Value<T>(s);
if (!v) return boost::none;
s->values.insert(*v);
}
}
case BinaryTag::Dictionary: {
auto s = std::make_shared<Dictionary<T>>();
while (true) {
bool end_rec = false;
auto k = _next(end_rec);
if (end_rec) return Value<T>(s);
if (!k) return boost::none;
auto v = next();
if (!v) return boost::none;
s->values.emplace(*k, *v);
}
}
default:
return boost::none;
}
}
};
}