1086 lines
38 KiB
C
1086 lines
38 KiB
C
|
/// SPDX-License-Identifier: Apache-2.0
|
||
|
/// SPDX-FileCopyrightText: Copyright © 2022 Tony Garnock-Jones <tonyg@leastfixedpoint.com>
|
||
|
|
||
|
#ifndef libpreserves_26109214_f3bd_44c8_95ba_8c650c954965
|
||
|
#define libpreserves_26109214_f3bd_44c8_95ba_8c650c954965
|
||
|
|
||
|
// Single file header. #define PRESERVES_IMPLEMENTATION to get the implementations.
|
||
|
|
||
|
#ifdef PRESERVES_IMPLEMENTATION
|
||
|
#define PRESERVES_INLINE
|
||
|
#define PRESERVES_IMPLEMENTATION_CHUNK(...) __VA_ARGS__
|
||
|
#else
|
||
|
#define PRESERVES_INLINE static inline
|
||
|
#define PRESERVES_IMPLEMENTATION_CHUNK(...)
|
||
|
#endif
|
||
|
|
||
|
#define PRESERVES_OUTOFLINE(declaration, ...) \
|
||
|
extern declaration; \
|
||
|
PRESERVES_IMPLEMENTATION_CHUNK(inline declaration __VA_ARGS__)
|
||
|
|
||
|
#include <stdlib.h>
|
||
|
#include <stdint.h>
|
||
|
#include <stdbool.h>
|
||
|
#include <string.h>
|
||
|
#include <arpa/inet.h> // for ntohl, htonl
|
||
|
|
||
|
///////////////////////////////////////////////////////////////////////////
|
||
|
// General-purpose fat pointer, for e.g. strings, binary blobs, etc.
|
||
|
|
||
|
typedef struct preserves_bytes {
|
||
|
bool borrowed:1;
|
||
|
size_t len:(sizeof(size_t) * 8 - 1);
|
||
|
void *ptr;
|
||
|
} preserves_bytes_t;
|
||
|
|
||
|
#define PRESERVES_ARRAY_ELEMENT_COUNT(bytes_ptr, element_type) \
|
||
|
((bytes_ptr)->len / sizeof(element_type))
|
||
|
|
||
|
#define PRESERVES_ARRAY_ELEMENT(bytes_ptr, element_type, index) \
|
||
|
(((element_type *) (bytes_ptr)->ptr)[index])
|
||
|
|
||
|
#define PRESERVES_RESIZE_ARRAY(bytes_ptr, element_type, size) \
|
||
|
preserves_resize_bytes(bytes_ptr, sizeof(element_type) * (size))
|
||
|
|
||
|
#define PRESERVES_ARRAY_ACCESS(bytes_ptr, element_type, length_var, base_ptr_var) \
|
||
|
size_t length_var = PRESERVES_ARRAY_ELEMENT_COUNT(bytes_ptr, element_type); \
|
||
|
element_type *base_ptr_var = &PRESERVES_ARRAY_ELEMENT(bytes_ptr, element_type, 0)
|
||
|
|
||
|
PRESERVES_INLINE preserves_bytes_t preserves_create_bytes(void) {
|
||
|
return (preserves_bytes_t) { .borrowed = 0, .len = 0, .ptr = NULL };
|
||
|
}
|
||
|
|
||
|
PRESERVES_INLINE int preserves_resize_bytes(preserves_bytes_t *bs, size_t size) {
|
||
|
if (bs->borrowed) abort();
|
||
|
|
||
|
if (size == 0) {
|
||
|
free(bs->ptr);
|
||
|
bs->ptr = NULL;
|
||
|
bs->len = 0;
|
||
|
return 0;
|
||
|
}
|
||
|
|
||
|
void *ptr = realloc(bs->ptr, size);
|
||
|
if (ptr == NULL) return -1;
|
||
|
|
||
|
bs->ptr = ptr;
|
||
|
if (size > bs->len) {
|
||
|
memset(((uint8_t *) bs->ptr) + bs->len, 0, size - bs->len);
|
||
|
}
|
||
|
bs->len = size;
|
||
|
return 0;
|
||
|
}
|
||
|
|
||
|
PRESERVES_INLINE void preserves_free_bytes(preserves_bytes_t *bs) {
|
||
|
if (!bs->borrowed) preserves_resize_bytes(bs, 0);
|
||
|
*bs = (preserves_bytes_t) { .borrowed = 0, .len = 0, .ptr = NULL };
|
||
|
}
|
||
|
|
||
|
PRESERVES_INLINE void preserves_bytes_move(preserves_bytes_t *dest, preserves_bytes_t *src) {
|
||
|
preserves_free_bytes(dest);
|
||
|
*dest = *src;
|
||
|
*src = preserves_create_bytes();
|
||
|
}
|
||
|
|
||
|
PRESERVES_INLINE int preserves_extend_bytes(preserves_bytes_t *dest, preserves_bytes_t src) {
|
||
|
if (dest->borrowed) abort();
|
||
|
|
||
|
void *ptr = realloc(dest->ptr, dest->len + src.len);
|
||
|
if (ptr == NULL) return -1;
|
||
|
|
||
|
dest->ptr = ptr;
|
||
|
memcpy(((uint8_t *) dest->ptr) + dest->len, src.ptr, src.len);
|
||
|
dest->len += src.len;
|
||
|
return 0;
|
||
|
}
|
||
|
|
||
|
PRESERVES_INLINE preserves_bytes_t preserves_bytes_subsequence(preserves_bytes_t *bs,
|
||
|
size_t offset,
|
||
|
size_t len) {
|
||
|
if (offset >= bs->len) return preserves_create_bytes();
|
||
|
if (len > bs->len) return preserves_create_bytes();
|
||
|
if (offset > (bs->len - len)) len = bs->len - offset;
|
||
|
return (preserves_bytes_t) { .borrowed = 1, .len = len, .ptr = ((uint8_t *) bs->ptr) + offset };
|
||
|
}
|
||
|
|
||
|
///////////////////////////////////////////////////////////////////////////
|
||
|
// Memory arenas
|
||
|
|
||
|
typedef struct preserves_pool {
|
||
|
size_t pagesize;
|
||
|
preserves_bytes_t page_pointers; // for allocations smaller than pagesize
|
||
|
preserves_bytes_t large_block_pointers; // for allocations larger than or equal to pagesize
|
||
|
size_t next_page;
|
||
|
uint8_t *alloc_block_base;
|
||
|
size_t alloc_block_used;
|
||
|
} preserves_pool_t;
|
||
|
|
||
|
PRESERVES_INLINE preserves_pool_t preserves_create_pool(size_t pagesize) {
|
||
|
return (preserves_pool_t) {
|
||
|
.pagesize = pagesize,
|
||
|
.page_pointers = preserves_create_bytes(),
|
||
|
.large_block_pointers = preserves_create_bytes(),
|
||
|
.next_page = 0,
|
||
|
.alloc_block_base = NULL,
|
||
|
.alloc_block_used = pagesize,
|
||
|
};
|
||
|
}
|
||
|
|
||
|
PRESERVES_INLINE void preserves_free_blocklist(preserves_bytes_t *bl) {
|
||
|
PRESERVES_ARRAY_ACCESS(bl, void *, num_blocks, blocklist);
|
||
|
for (size_t i = 0; i < num_blocks; i++) {
|
||
|
free(blocklist[i]);
|
||
|
}
|
||
|
preserves_free_bytes(bl);
|
||
|
}
|
||
|
|
||
|
PRESERVES_INLINE void preserves_recycle_pool(preserves_pool_t *pool) {
|
||
|
preserves_free_blocklist(&pool->large_block_pointers);
|
||
|
pool->next_page = 0;
|
||
|
pool->alloc_block_base = NULL;
|
||
|
pool->alloc_block_used = pool->pagesize;
|
||
|
}
|
||
|
|
||
|
PRESERVES_INLINE void preserves_free_pool(preserves_pool_t *pool) {
|
||
|
preserves_recycle_pool(pool);
|
||
|
preserves_free_blocklist(&pool->page_pointers);
|
||
|
}
|
||
|
|
||
|
PRESERVES_OUTOFLINE(void *_preserves_pool_record_block(preserves_bytes_t *bl, size_t blocksize), {
|
||
|
void *ptr = calloc(1, blocksize);
|
||
|
if (ptr == NULL) return NULL;
|
||
|
if (preserves_resize_bytes(bl, bl->len + sizeof(void *)) == -1) {
|
||
|
free(ptr);
|
||
|
return NULL;
|
||
|
}
|
||
|
PRESERVES_ARRAY_ACCESS(bl, void *, num_blocks, blocklist);
|
||
|
blocklist[num_blocks - 1] = ptr;
|
||
|
return ptr;
|
||
|
});
|
||
|
|
||
|
PRESERVES_OUTOFLINE
|
||
|
(
|
||
|
int _preserves_pool_add_page_and_alloc(preserves_pool_t *pool, preserves_bytes_t *bs, size_t count), {
|
||
|
if (pool->next_page >= PRESERVES_ARRAY_ELEMENT_COUNT(&pool->page_pointers, void *)) {
|
||
|
void *ptr = _preserves_pool_record_block(&pool->page_pointers, pool->pagesize);
|
||
|
if (ptr == NULL) return -1;
|
||
|
pool->alloc_block_base = ptr;
|
||
|
pool->next_page = PRESERVES_ARRAY_ELEMENT_COUNT(&pool->page_pointers, void *);
|
||
|
} else {
|
||
|
pool->alloc_block_base = PRESERVES_ARRAY_ELEMENT(&pool->page_pointers, void *, pool->next_page);
|
||
|
pool->next_page++;
|
||
|
}
|
||
|
|
||
|
pool->alloc_block_used = count;
|
||
|
*bs = (preserves_bytes_t) { .borrowed = 1, .len = count, .ptr = pool->alloc_block_base };
|
||
|
return 0;
|
||
|
});
|
||
|
|
||
|
PRESERVES_INLINE int preserves_pool_alloc_bytes_align(preserves_pool_t *pool,
|
||
|
preserves_bytes_t *bs,
|
||
|
size_t count,
|
||
|
size_t alignment) {
|
||
|
preserves_free_bytes(bs);
|
||
|
if (count == 0) return 0;
|
||
|
|
||
|
count = (count + alignment - 1) & (~(alignment - 1));
|
||
|
// ^ round up to nearest `alignment`-byte boundary
|
||
|
|
||
|
if (count > pool->pagesize) {
|
||
|
void *ptr = _preserves_pool_record_block(&pool->large_block_pointers, count);
|
||
|
if (ptr == NULL) return -1;
|
||
|
*bs = (preserves_bytes_t) { .borrowed = 1, .len = count, .ptr = ptr };
|
||
|
return 0;
|
||
|
}
|
||
|
|
||
|
if (pool->alloc_block_used + count <= pool->pagesize) {
|
||
|
*bs = (preserves_bytes_t) {
|
||
|
.borrowed = 1,
|
||
|
.len = count,
|
||
|
.ptr = pool->alloc_block_base + pool->alloc_block_used,
|
||
|
};
|
||
|
pool->alloc_block_used += count;
|
||
|
return 0;
|
||
|
}
|
||
|
|
||
|
return _preserves_pool_add_page_and_alloc(pool, bs, count);
|
||
|
}
|
||
|
|
||
|
PRESERVES_INLINE int preserves_pool_alloc_bytes(preserves_pool_t *pool,
|
||
|
preserves_bytes_t *bs,
|
||
|
size_t count) {
|
||
|
return preserves_pool_alloc_bytes_align(pool, bs, count, 16);
|
||
|
}
|
||
|
|
||
|
///////////////////////////////////////////////////////////////////////////
|
||
|
// Binary codec details
|
||
|
|
||
|
typedef enum preserves_binary_format_tag {
|
||
|
PRESERVES_BINARY_FORMAT_TAG_FALSE = 0x80,
|
||
|
PRESERVES_BINARY_FORMAT_TAG_TRUE = 0x81,
|
||
|
PRESERVES_BINARY_FORMAT_TAG_FLOAT = 0x82,
|
||
|
PRESERVES_BINARY_FORMAT_TAG_DOUBLE = 0x83,
|
||
|
PRESERVES_BINARY_FORMAT_TAG_END = 0x84,
|
||
|
PRESERVES_BINARY_FORMAT_TAG_ANNOTATION = 0x85,
|
||
|
PRESERVES_BINARY_FORMAT_TAG_EMBEDDED = 0x86,
|
||
|
|
||
|
PRESERVES_BINARY_FORMAT_TAG_SMALL_INTEGER_LO = 0x90,
|
||
|
PRESERVES_BINARY_FORMAT_TAG_SMALL_INTEGER_HI = 0x9F,
|
||
|
|
||
|
PRESERVES_BINARY_FORMAT_TAG_MEDIUM_INTEGER_LO = 0xA0,
|
||
|
PRESERVES_BINARY_FORMAT_TAG_MEDIUM_INTEGER_HI = 0xAF,
|
||
|
|
||
|
PRESERVES_BINARY_FORMAT_TAG_LARGE_INTEGER = 0xB0,
|
||
|
PRESERVES_BINARY_FORMAT_TAG_STRING = 0xB1,
|
||
|
PRESERVES_BINARY_FORMAT_TAG_BYTE_STRING = 0xB2,
|
||
|
PRESERVES_BINARY_FORMAT_TAG_SYMBOL = 0xB3,
|
||
|
|
||
|
PRESERVES_BINARY_FORMAT_TAG_RECORD = 0xB4,
|
||
|
PRESERVES_BINARY_FORMAT_TAG_SEQUENCE = 0xB5,
|
||
|
PRESERVES_BINARY_FORMAT_TAG_SET = 0xB6,
|
||
|
PRESERVES_BINARY_FORMAT_TAG_DICTIONARY = 0xB7,
|
||
|
} preserves_binary_format_tag_t;
|
||
|
|
||
|
PRESERVES_OUTOFLINE
|
||
|
(
|
||
|
char const *preserves_binary_format_tag_name(preserves_binary_format_tag_t tag), {
|
||
|
switch (tag) {
|
||
|
case PRESERVES_BINARY_FORMAT_TAG_FALSE: return "FALSE";
|
||
|
case PRESERVES_BINARY_FORMAT_TAG_TRUE: return "TRUE";
|
||
|
case PRESERVES_BINARY_FORMAT_TAG_FLOAT: return "FLOAT";
|
||
|
case PRESERVES_BINARY_FORMAT_TAG_DOUBLE: return "DOUBLE";
|
||
|
case PRESERVES_BINARY_FORMAT_TAG_END: return "END";
|
||
|
case PRESERVES_BINARY_FORMAT_TAG_ANNOTATION: return "ANNOTATION";
|
||
|
case PRESERVES_BINARY_FORMAT_TAG_EMBEDDED: return "EMBEDDED";
|
||
|
case PRESERVES_BINARY_FORMAT_TAG_LARGE_INTEGER: return "LARGE_INTEGER";
|
||
|
case PRESERVES_BINARY_FORMAT_TAG_STRING: return "STRING";
|
||
|
case PRESERVES_BINARY_FORMAT_TAG_BYTE_STRING: return "BYTE_STRING";
|
||
|
case PRESERVES_BINARY_FORMAT_TAG_SYMBOL: return "SYMBOL";
|
||
|
case PRESERVES_BINARY_FORMAT_TAG_RECORD: return "RECORD";
|
||
|
case PRESERVES_BINARY_FORMAT_TAG_SEQUENCE: return "SEQUENCE";
|
||
|
case PRESERVES_BINARY_FORMAT_TAG_SET: return "SET";
|
||
|
case PRESERVES_BINARY_FORMAT_TAG_DICTIONARY: return "DICTIONARY";
|
||
|
default:
|
||
|
if ((tag >= PRESERVES_BINARY_FORMAT_TAG_SMALL_INTEGER_LO) &&
|
||
|
(tag <= PRESERVES_BINARY_FORMAT_TAG_SMALL_INTEGER_HI)) {
|
||
|
return "SMALL_INTEGER";
|
||
|
} else if ((tag >= PRESERVES_BINARY_FORMAT_TAG_MEDIUM_INTEGER_LO) &&
|
||
|
(tag <= PRESERVES_BINARY_FORMAT_TAG_MEDIUM_INTEGER_HI)) {
|
||
|
return "MEDIUM_INTEGER";
|
||
|
} else {
|
||
|
return "UNKNOWN";
|
||
|
}
|
||
|
}
|
||
|
});
|
||
|
|
||
|
///////////////////////////////////////////////////////////////////////////
|
||
|
// Index representation
|
||
|
|
||
|
typedef enum preserves_type_tag {
|
||
|
PRESERVES_BOOLEAN = 0,
|
||
|
PRESERVES_FLOAT,
|
||
|
PRESERVES_DOUBLE,
|
||
|
|
||
|
PRESERVES_SIGNED_INTEGER,
|
||
|
PRESERVES_STRING,
|
||
|
PRESERVES_BYTE_STRING,
|
||
|
PRESERVES_SYMBOL,
|
||
|
|
||
|
PRESERVES_RECORD,
|
||
|
PRESERVES_SEQUENCE,
|
||
|
PRESERVES_SET,
|
||
|
PRESERVES_DICTIONARY,
|
||
|
|
||
|
PRESERVES_EMBEDDED,
|
||
|
PRESERVES_ANNOTATION,
|
||
|
PRESERVES_END_MARKER,
|
||
|
} preserves_type_tag_t;
|
||
|
|
||
|
PRESERVES_OUTOFLINE(char const *preserves_type_tag_name(preserves_type_tag_t type), {
|
||
|
switch (type) {
|
||
|
case PRESERVES_BOOLEAN: return "BOOLEAN";
|
||
|
case PRESERVES_FLOAT: return "FLOAT";
|
||
|
case PRESERVES_DOUBLE: return "DOUBLE";
|
||
|
case PRESERVES_SIGNED_INTEGER: return "SIGNED_INTEGER";
|
||
|
case PRESERVES_STRING: return "STRING";
|
||
|
case PRESERVES_BYTE_STRING: return "BYTE_STRING";
|
||
|
case PRESERVES_SYMBOL: return "SYMBOL";
|
||
|
case PRESERVES_RECORD: return "RECORD";
|
||
|
case PRESERVES_SEQUENCE: return "SEQUENCE";
|
||
|
case PRESERVES_SET: return "SET";
|
||
|
case PRESERVES_DICTIONARY: return "DICTIONARY";
|
||
|
case PRESERVES_EMBEDDED: return "EMBEDDED";
|
||
|
case PRESERVES_ANNOTATION: return "ANNOTATION";
|
||
|
case PRESERVES_END_MARKER: return "END_MARKER";
|
||
|
default: return "UNKNOWN";
|
||
|
}
|
||
|
});
|
||
|
|
||
|
typedef enum preserves_error_code {
|
||
|
PRESERVES_END_SYSTEM_ERROR = -2,
|
||
|
PRESERVES_END_NO_ERROR = -1,
|
||
|
|
||
|
PRESERVES_END_EOF = 0,
|
||
|
PRESERVES_END_MORE_INPUT_REMAINING,
|
||
|
PRESERVES_END_INCOMPLETE_INPUT,
|
||
|
PRESERVES_END_UNEXPECTED_END,
|
||
|
PRESERVES_END_DICTIONARY_MISSING_VALUE,
|
||
|
PRESERVES_END_RECORD_MISSING_LABEL,
|
||
|
PRESERVES_END_VARINT_TOO_BIG,
|
||
|
PRESERVES_END_INVALID_UTF8,
|
||
|
PRESERVES_END_INVALID_TAG,
|
||
|
} preserves_error_code_t;
|
||
|
|
||
|
PRESERVES_OUTOFLINE(char const *preserves_error_code_name(preserves_error_code_t code), {
|
||
|
switch (code) {
|
||
|
case PRESERVES_END_SYSTEM_ERROR: return "SYSTEM_ERROR";
|
||
|
case PRESERVES_END_NO_ERROR: return "NO_ERROR";
|
||
|
case PRESERVES_END_EOF: return "EOF";
|
||
|
case PRESERVES_END_MORE_INPUT_REMAINING: return "MORE_INPUT_REMAINING";
|
||
|
case PRESERVES_END_INCOMPLETE_INPUT: return "INCOMPLETE_INPUT";
|
||
|
case PRESERVES_END_UNEXPECTED_END: return "UNEXPECTED_END";
|
||
|
case PRESERVES_END_DICTIONARY_MISSING_VALUE: return "DICTIONARY_MISSING_VALUE";
|
||
|
case PRESERVES_END_RECORD_MISSING_LABEL: return "RECORD_MISSING_LABEL";
|
||
|
case PRESERVES_END_VARINT_TOO_BIG: return "VARINT_TOO_BIG";
|
||
|
case PRESERVES_END_INVALID_UTF8: return "INVALID_UTF8";
|
||
|
case PRESERVES_END_INVALID_TAG: return "INVALID_TAG";
|
||
|
default: return "UNKNOWN";
|
||
|
}
|
||
|
});
|
||
|
|
||
|
typedef enum preserves_index_entry_representation {
|
||
|
PRESERVES_REPR_NONE = 0,
|
||
|
PRESERVES_INT_SIGNED,
|
||
|
PRESERVES_INT_UNSIGNED,
|
||
|
PRESERVES_INT_LARGE_BINARY,
|
||
|
PRESERVES_INT_LARGE_TEXT,
|
||
|
PRESERVES_LITERAL,
|
||
|
PRESERVES_ESCAPED,
|
||
|
PRESERVES_HEX,
|
||
|
PRESERVES_BASE64,
|
||
|
} preserves_index_entry_representation_t;
|
||
|
|
||
|
PRESERVES_OUTOFLINE
|
||
|
(
|
||
|
char const *preserves_index_entry_representation_name(preserves_index_entry_representation_t repr), {
|
||
|
switch (repr) {
|
||
|
case PRESERVES_REPR_NONE: return "REPR_NONE";
|
||
|
case PRESERVES_INT_SIGNED: return "INT_SIGNED";
|
||
|
case PRESERVES_INT_UNSIGNED: return "INT_UNSIGNED";
|
||
|
case PRESERVES_INT_LARGE_BINARY: return "INT_LARGE_BINARY";
|
||
|
case PRESERVES_INT_LARGE_TEXT: return "INT_LARGE_TEXT";
|
||
|
case PRESERVES_LITERAL: return "LITERAL";
|
||
|
case PRESERVES_ESCAPED: return "ESCAPED";
|
||
|
case PRESERVES_HEX: return "HEX";
|
||
|
case PRESERVES_BASE64: return "BASE64";
|
||
|
default: return "UNKNOWN";
|
||
|
}
|
||
|
});
|
||
|
|
||
|
/*
|
||
|
PRESERVES_BOOLEAN: repr==PRESERVES_REPR_NONE, len=0, data._boolean
|
||
|
PRESERVES_FLOAT: repr=PRESERVES_REPR_NONE, len=0, data._float
|
||
|
PRESERVES_DOUBLE: repr=PRESERVES_REPR_NONE, len=0, data._double
|
||
|
|
||
|
PRESERVES_SIGNED_INTEGER:
|
||
|
- repr==PRESERVES_INT_SIGNED -> len=0, data._signed
|
||
|
- repr==PRESERVES_INT_UNSIGNED -> len=0, data._unsigned
|
||
|
- repr==PRESERVES_INT_LARGE_BINARY -> len, data._unsigned as absolute offset within input
|
||
|
- repr==PRESERVES_INT_LARGE_TEXT -> len, data._unsigned as absolute offset within input
|
||
|
PRESERVES_STRING:
|
||
|
- repr=PRESERVES_LITERAL -> len, data._unsigned as absolute offset within input to utf-8 bytes
|
||
|
- repr=PRESERVES_ESCAPED -> len, data._unsigned as absolute offset within input to utf-8 bytes
|
||
|
that need String-style backslash-escapes interpreted
|
||
|
PRESERVES_BYTE_STRING:
|
||
|
- repr=PRESERVES_LITERAL -> len, data._unsigned as absolute offset within input to utf-8 bytes
|
||
|
- repr=PRESERVES_ESCAPED -> len, data._unsigned as absolute offset within input to utf-8 bytes
|
||
|
that need ByteString-style backslash-escapes interpreted
|
||
|
- repr=PRESERVES_HEX -> len, data._unsigned as absolute offset within input to ASCII bytes of hex
|
||
|
- repr=PRESERVES_BASE64 -> len, data._unsigned as absolute offset within input to ASCII bytes of base64
|
||
|
PRESERVES_SYMBOL:
|
||
|
- repr=PRESERVES_LITERAL -> len, data._unsigned as absolute offset within input to utf-8 bytes
|
||
|
- repr=PRESERVES_ESCAPED -> len, data._unsigned as absolute offset within input to utf-8 bytes
|
||
|
that need Symbol-style backslash-escapes interpreted
|
||
|
|
||
|
PRESERVES_RECORD, PRESERVES_SEQUENCE, PRESERVES_SET, PRESERVES_DICTIONARY:
|
||
|
- repr==PRESERVES_REPR_NONE,
|
||
|
- len counts number of items:
|
||
|
- PRESERVES_RECORD -> number of fields plus one (for the label)
|
||
|
- PRESERVES_SEQUENCE -> number of items
|
||
|
- PRESERVES_SET -> number of items
|
||
|
- PRESERVES_DICTIONARY -> twice the number of key-value pairs
|
||
|
- data._unsigned as relative offset within index to next item,
|
||
|
starting from this entry; zero means "no end known"
|
||
|
|
||
|
PRESERVES_EMBEDDED: repr==PRESERVES_REPR_NONE, len==0, following item is the embedded value
|
||
|
PRESERVES_ANNOTATION:
|
||
|
- repr=PRESERVES_REPR_NONE,
|
||
|
- len counts number of annotations,
|
||
|
- data._unsigned as relative offset within index to annotated
|
||
|
item, starting from this entry; zero means "no end known"
|
||
|
- the annotated item will not be a PRESERVES_ANNOTATION
|
||
|
|
||
|
PRESERVES_END_MARKER: repr==PRESERVES_REPR_NONE, len==0, data._err
|
||
|
*/
|
||
|
typedef struct preserves_index_entry {
|
||
|
preserves_type_tag_t type:4;
|
||
|
preserves_index_entry_representation_t repr:4;
|
||
|
uint64_t len:56;
|
||
|
|
||
|
union {
|
||
|
bool _boolean;
|
||
|
float _float;
|
||
|
double _double;
|
||
|
int64_t _signed;
|
||
|
uint64_t _unsigned;
|
||
|
preserves_error_code_t _err;
|
||
|
} data;
|
||
|
} preserves_index_entry_t;
|
||
|
|
||
|
#ifndef NDEBUG
|
||
|
extern void preserves_dump_index_entry(FILE* f, preserves_bytes_t *input, preserves_index_entry_t *i, bool add_newline);
|
||
|
#endif
|
||
|
|
||
|
typedef struct preserves_reader {
|
||
|
preserves_bytes_t input;
|
||
|
preserves_bytes_t index;
|
||
|
preserves_bytes_t stack;
|
||
|
size_t stack_top; /* ascending empty */
|
||
|
size_t input_pos; /* ascending full */
|
||
|
size_t index_pos; /* ascending empty */
|
||
|
bool annotation_tag_seen;
|
||
|
} preserves_reader_t;
|
||
|
|
||
|
typedef struct preserves_reader_result {
|
||
|
preserves_index_entry_t *index;
|
||
|
preserves_index_entry_t *end_marker;
|
||
|
} preserves_reader_result_t;
|
||
|
|
||
|
PRESERVES_INLINE preserves_reader_result_t preserves_reader_error_result(void) {
|
||
|
return (preserves_reader_result_t) { .index = NULL, .end_marker = NULL };
|
||
|
}
|
||
|
|
||
|
PRESERVES_INLINE preserves_reader_t preserves_create_reader(void) {
|
||
|
return (preserves_reader_t) {
|
||
|
.input = preserves_create_bytes(),
|
||
|
.index = preserves_create_bytes(),
|
||
|
.stack = preserves_create_bytes(),
|
||
|
.stack_top = 0,
|
||
|
.input_pos = 0,
|
||
|
.index_pos = 0,
|
||
|
.annotation_tag_seen = false,
|
||
|
};
|
||
|
}
|
||
|
|
||
|
PRESERVES_OUTOFLINE(void preserves_free_reader(preserves_reader_t *r), {
|
||
|
preserves_free_bytes(&r->input);
|
||
|
preserves_free_bytes(&r->index);
|
||
|
preserves_free_bytes(&r->stack);
|
||
|
r->stack_top = 0;
|
||
|
r->input_pos = 0;
|
||
|
r->index_pos = 0;
|
||
|
r->annotation_tag_seen = false;
|
||
|
});
|
||
|
|
||
|
PRESERVES_IMPLEMENTATION_CHUNK
|
||
|
(
|
||
|
#define MINIMUM_PRESERVES_READER_STACK_SIZE 32
|
||
|
typedef uint64_t preserves_index_offset_t;
|
||
|
|
||
|
static inline bool _preserves_reader_ateof(preserves_reader_t *r) {
|
||
|
return (r->input_pos >= r->input.len);
|
||
|
}
|
||
|
|
||
|
static inline int _preserves_reader_peek(preserves_reader_t *r) {
|
||
|
if (_preserves_reader_ateof(r)) return -1;
|
||
|
return PRESERVES_ARRAY_ELEMENT(&r->input, uint8_t, r->input_pos);
|
||
|
}
|
||
|
|
||
|
static inline int _preserves_reader_next(preserves_reader_t *r) {
|
||
|
if (r->input_pos >= r->input.len) return -1;
|
||
|
int result = PRESERVES_ARRAY_ELEMENT(&r->input, uint8_t, r->input_pos);
|
||
|
r->input_pos++;
|
||
|
return result;
|
||
|
}
|
||
|
|
||
|
static inline void *_preserves_reader_next_bytes(preserves_reader_t *r, size_t count) {
|
||
|
preserves_bytes_t bs = preserves_bytes_subsequence(&r->input, r->input_pos, count);
|
||
|
if (bs.len != count) return NULL;
|
||
|
r->input_pos += count;
|
||
|
return bs.ptr;
|
||
|
}
|
||
|
|
||
|
static inline preserves_index_entry_t *_preserves_reader_index_entry(preserves_reader_t *r,
|
||
|
size_t i) {
|
||
|
size_t limit = PRESERVES_ARRAY_ELEMENT_COUNT(&r->index, preserves_index_entry_t);
|
||
|
while (i >= limit) {
|
||
|
limit = limit * 2;
|
||
|
if (limit < 16) limit = 16;
|
||
|
if (PRESERVES_RESIZE_ARRAY(&r->index, preserves_index_entry_t, limit) == -1) {
|
||
|
return NULL;
|
||
|
}
|
||
|
}
|
||
|
return &PRESERVES_ARRAY_ELEMENT(&r->index, preserves_index_entry_t, i);
|
||
|
}
|
||
|
|
||
|
static inline size_t _preserves_reader_stack_peek(preserves_reader_t *r) {
|
||
|
if (r->stack_top >= PRESERVES_ARRAY_ELEMENT_COUNT(&r->stack, size_t)) {
|
||
|
abort();
|
||
|
}
|
||
|
return PRESERVES_ARRAY_ELEMENT(&r->stack, size_t, r->stack_top - 1);
|
||
|
}
|
||
|
|
||
|
static inline preserves_index_entry_t *_preserves_reader_stack_top_entry(preserves_reader_t *r) {
|
||
|
return _preserves_reader_index_entry(r, _preserves_reader_stack_peek(r));
|
||
|
}
|
||
|
|
||
|
static inline void _preserves_reader_stack_drop(preserves_reader_t *r) {
|
||
|
if (r->stack_top == 0) abort();
|
||
|
/* printf("popping "); */
|
||
|
/* preserves_dump_index_entry(stdout, &r->input, _preserves_reader_stack_top_entry(r), true); */
|
||
|
r->stack_top--;
|
||
|
}
|
||
|
|
||
|
static inline preserves_index_entry_t *_preserves_reader_finish_seq(preserves_reader_t *r) {
|
||
|
size_t base_index = _preserves_reader_stack_peek(r);
|
||
|
preserves_index_entry_t *base = _preserves_reader_stack_top_entry(r);
|
||
|
base->data._unsigned = r->index_pos - base_index;
|
||
|
_preserves_reader_stack_drop(r);
|
||
|
return base;
|
||
|
}
|
||
|
|
||
|
static inline bool _preserves_reader_in_annotations(preserves_reader_t *r) {
|
||
|
return (r->stack_top > 0) &&
|
||
|
(_preserves_reader_stack_top_entry(r)->type == PRESERVES_ANNOTATION);
|
||
|
}
|
||
|
|
||
|
static inline void _preserves_reader_inc_collection_len(preserves_reader_t *r, size_t *count_ptr) {
|
||
|
if (r->stack_top > 0) {
|
||
|
check_for_embedded:
|
||
|
preserves_index_entry_t *base = _preserves_reader_stack_top_entry(r);
|
||
|
if (base->type == PRESERVES_EMBEDDED) {
|
||
|
_preserves_reader_stack_drop(r);
|
||
|
goto check_for_embedded;
|
||
|
} else {
|
||
|
base->len++;
|
||
|
}
|
||
|
/* printf("added to base, which is now "); */
|
||
|
/* preserves_dump_index_entry(stdout, &r->input, base, true); */
|
||
|
} else {
|
||
|
(*count_ptr)--;
|
||
|
}
|
||
|
}
|
||
|
|
||
|
static inline preserves_index_entry_t *_preserves_reader_emit_entry(preserves_reader_t *r,
|
||
|
size_t *count_ptr,
|
||
|
preserves_index_entry_t e) {
|
||
|
if (!r->annotation_tag_seen && _preserves_reader_in_annotations(r)) {
|
||
|
/* printf("(popping annotation collector)\n"); */
|
||
|
_preserves_reader_finish_seq(r);
|
||
|
}
|
||
|
if (count_ptr != NULL) {
|
||
|
_preserves_reader_inc_collection_len(r, count_ptr);
|
||
|
}
|
||
|
|
||
|
/* printf("-- emitting: "); */
|
||
|
/* preserves_dump_index_entry(stdout, &r->input, &e, true); */
|
||
|
|
||
|
preserves_index_entry_t *ix = _preserves_reader_index_entry(r, r->index_pos);
|
||
|
if (ix == NULL) return NULL;
|
||
|
*ix = e;
|
||
|
r->index_pos++;
|
||
|
|
||
|
r->annotation_tag_seen = false;
|
||
|
|
||
|
return ix;
|
||
|
}
|
||
|
|
||
|
static inline preserves_reader_result_t _preserves_reader_finish(preserves_reader_t *r,
|
||
|
preserves_error_code_t code) {
|
||
|
if (code == PRESERVES_END_SYSTEM_ERROR) {
|
||
|
return preserves_reader_error_result();
|
||
|
} else {
|
||
|
preserves_index_entry_t *index = _preserves_reader_index_entry(r, 0);
|
||
|
if (index == NULL) return preserves_reader_error_result();
|
||
|
preserves_index_entry_t *end_marker =
|
||
|
_preserves_reader_emit_entry(r, NULL, (preserves_index_entry_t) {
|
||
|
.type = PRESERVES_END_MARKER,
|
||
|
.repr = PRESERVES_REPR_NONE,
|
||
|
.len = 0,
|
||
|
.data = { ._err = code },
|
||
|
});
|
||
|
if (end_marker == NULL) return preserves_reader_error_result();
|
||
|
return (preserves_reader_result_t) { .index = index, .end_marker = end_marker };
|
||
|
}
|
||
|
}
|
||
|
|
||
|
static inline int _preserves_reader_varint(preserves_reader_t *r, size_t *v) {
|
||
|
unsigned int shift_amount = 0;
|
||
|
size_t result = 0;
|
||
|
while (true) {
|
||
|
int b = _preserves_reader_next(r);
|
||
|
if (b == -1) return -1;
|
||
|
result |= (b & 0x7f) << shift_amount;
|
||
|
if (b & 0x80) {
|
||
|
shift_amount += 7;
|
||
|
if (shift_amount > ((sizeof(size_t) * 8) - 7)) return -2;
|
||
|
} else {
|
||
|
*v = result;
|
||
|
return 0;
|
||
|
}
|
||
|
}
|
||
|
}
|
||
|
|
||
|
static inline preserves_index_entry_t *_preserves_emit_small_int(preserves_reader_t *r,
|
||
|
size_t *count_ptr,
|
||
|
bool is_unsigned,
|
||
|
int64_t value) {
|
||
|
return _preserves_reader_emit_entry(r, count_ptr, (preserves_index_entry_t) {
|
||
|
.type = PRESERVES_SIGNED_INTEGER,
|
||
|
.repr = is_unsigned ? PRESERVES_INT_UNSIGNED : PRESERVES_INT_SIGNED,
|
||
|
.len = 0,
|
||
|
.data = { ._signed = value },
|
||
|
});
|
||
|
}
|
||
|
|
||
|
static inline int _preserves_reader_decode_intbytes(preserves_reader_t *r,
|
||
|
size_t *count_ptr,
|
||
|
size_t len) {
|
||
|
size_t starting_pos = r->input_pos;
|
||
|
uint8_t *bs = _preserves_reader_next_bytes(r, len);
|
||
|
if (bs == NULL) return -1;
|
||
|
|
||
|
bool is_unsigned = false;
|
||
|
size_t remaining = len;
|
||
|
while ((remaining > 0) && (*bs == 0)) {
|
||
|
is_unsigned = true;
|
||
|
bs++;
|
||
|
remaining--;
|
||
|
}
|
||
|
|
||
|
if (remaining == 0) {
|
||
|
// This shouldn't happen, but it does have a denotation.
|
||
|
return (_preserves_emit_small_int(r, count_ptr, is_unsigned, 0) == NULL) ? -1 : 0;
|
||
|
}
|
||
|
|
||
|
if (remaining > 8) {
|
||
|
if (is_unsigned && (*bs & 0x80)) {
|
||
|
remaining++;
|
||
|
bs--;
|
||
|
}
|
||
|
return (_preserves_reader_emit_entry(r, count_ptr, (preserves_index_entry_t) {
|
||
|
.type = PRESERVES_SIGNED_INTEGER,
|
||
|
.repr = PRESERVES_INT_LARGE_BINARY,
|
||
|
.len = remaining,
|
||
|
.data = { ._unsigned = starting_pos + (len - remaining) },
|
||
|
}) == NULL) ? -1 : 0;
|
||
|
}
|
||
|
|
||
|
uint64_t buf = 0;
|
||
|
while (remaining > 0) {
|
||
|
remaining--;
|
||
|
buf = buf | ((*bs) << (remaining << 3));
|
||
|
bs++;
|
||
|
}
|
||
|
|
||
|
int64_t value = *(int64_t *)&buf;
|
||
|
return (_preserves_emit_small_int(r, count_ptr, is_unsigned, value) == NULL) ? -1 : 0;
|
||
|
}
|
||
|
|
||
|
static inline bool utf8_tail(uint8_t b) {
|
||
|
return (b >= 0x80 && b <= 0xbf);
|
||
|
}
|
||
|
|
||
|
static inline int check_utf8(uint8_t *bs, size_t len) {
|
||
|
// https://datatracker.ietf.org/doc/html/rfc3629#section-4
|
||
|
while (len > 0) {
|
||
|
uint8_t b0 = *bs++;
|
||
|
len--;
|
||
|
if (b0 >= 0x80) {
|
||
|
if (len < 1) return -1;
|
||
|
uint8_t b1 = *bs++;
|
||
|
len--;
|
||
|
if (b0 >= 0xc2 && b0 <= 0xdf) {
|
||
|
if (!utf8_tail(b1)) return -1;
|
||
|
} else {
|
||
|
if (len < 1) return -1;
|
||
|
uint8_t b2 = *bs++;
|
||
|
len--;
|
||
|
if (b0 == 0xe0) {
|
||
|
if (!(b1 >= 0xa0 && b1 <= 0xbf && utf8_tail(b2))) return -1;
|
||
|
} else if (b0 >= 0xe1 && b0 <= 0xec) {
|
||
|
if (!(utf8_tail(b1) && utf8_tail(b2))) return -1;
|
||
|
} else if (b0 == 0xed) {
|
||
|
if (!(b1 >= 0x80 && b1 <= 0x9f && utf8_tail(b2))) return -1;
|
||
|
} else if (b0 >= 0xee && b0 <= 0xef) {
|
||
|
if (!(utf8_tail(b1) && utf8_tail(b2))) return -1;
|
||
|
} else {
|
||
|
if (len < 1) return -1;
|
||
|
uint8_t b3 = *bs++;
|
||
|
len--;
|
||
|
if (b0 == 0xf0) {
|
||
|
if (!(b1 >= 0x90 && b1 <= 0xbf && utf8_tail(b2) && utf8_tail(b3))) return -1;
|
||
|
} else if (b0 >= 0xf1 && b0 <= 0xf3) {
|
||
|
if (!(utf8_tail(b1) && utf8_tail(b2) && utf8_tail(b3))) return -1;
|
||
|
} else if (b0 == 0xf4) {
|
||
|
if (!(b1 >= 0x80 && b1 <= 0x8f && utf8_tail(b2) && utf8_tail(b3))) return -1;
|
||
|
} else {
|
||
|
// ok!
|
||
|
}
|
||
|
}
|
||
|
}
|
||
|
}
|
||
|
}
|
||
|
return 0;
|
||
|
}
|
||
|
|
||
|
static inline preserves_error_code_t _preserves_reader_read_stringlike(preserves_reader_t *r,
|
||
|
size_t *count_ptr,
|
||
|
preserves_type_tag_t type) {
|
||
|
size_t len = 0;
|
||
|
switch (_preserves_reader_varint(r, &len)) {
|
||
|
case -1: return PRESERVES_END_INCOMPLETE_INPUT;
|
||
|
case -2: return PRESERVES_END_VARINT_TOO_BIG;
|
||
|
default: break;
|
||
|
}
|
||
|
size_t starting_pos = r->input_pos;
|
||
|
uint8_t *maybe_utf = _preserves_reader_next_bytes(r, len);
|
||
|
if ((type != PRESERVES_BYTE_STRING) && (check_utf8(maybe_utf, len) == -1)) {
|
||
|
return PRESERVES_END_INVALID_UTF8;
|
||
|
}
|
||
|
if (_preserves_reader_emit_entry(r, count_ptr, (preserves_index_entry_t) {
|
||
|
.type = type,
|
||
|
.repr = PRESERVES_LITERAL,
|
||
|
.len = len,
|
||
|
.data = { ._unsigned = starting_pos },
|
||
|
}) == NULL) {
|
||
|
return PRESERVES_END_SYSTEM_ERROR;
|
||
|
}
|
||
|
return PRESERVES_END_NO_ERROR;
|
||
|
}
|
||
|
|
||
|
static inline preserves_index_entry_t *_preserves_reader_push(preserves_reader_t *r,
|
||
|
preserves_type_tag_t type) {
|
||
|
preserves_index_entry_t *ix = _preserves_reader_emit_entry(r, NULL, (preserves_index_entry_t) {
|
||
|
.type = type, .repr = PRESERVES_REPR_NONE, .len = 0, .data = { ._unsigned = 0 }});
|
||
|
if (ix == NULL) return NULL;
|
||
|
|
||
|
size_t limit = PRESERVES_ARRAY_ELEMENT_COUNT(&r->stack, size_t);
|
||
|
if (r->stack_top >= limit) {
|
||
|
limit += 32;
|
||
|
if (PRESERVES_RESIZE_ARRAY(&r->stack, size_t, limit) == -1) return NULL;
|
||
|
}
|
||
|
PRESERVES_ARRAY_ELEMENT(&r->stack, size_t, r->stack_top) = r->index_pos - 1;
|
||
|
r->stack_top++;
|
||
|
return ix;
|
||
|
}
|
||
|
)
|
||
|
|
||
|
PRESERVES_INLINE preserves_index_entry_t *preserves_skip_annotations(preserves_index_entry_t *ix) {
|
||
|
if (ix == NULL) return NULL;
|
||
|
if (ix->type != PRESERVES_ANNOTATION) return ix;
|
||
|
ix += ix->data._unsigned;
|
||
|
if (ix->type == PRESERVES_ANNOTATION) abort();
|
||
|
return ix;
|
||
|
}
|
||
|
|
||
|
#define RETURN_ON_FAIL(e) if ((e) == NULL) return preserves_reader_error_result()
|
||
|
PRESERVES_OUTOFLINE
|
||
|
(
|
||
|
preserves_reader_result_t preserves_read_binary_continue(preserves_reader_t *r, size_t count), {
|
||
|
while (count) {
|
||
|
/* for (int i = r->stack_top - 1; i >= 0; i--) { */
|
||
|
/* size_t ip = PRESERVES_ARRAY_ELEMENT(&r->stack, size_t, i); */
|
||
|
/* printf(" %02d: (%5lu) ", i, ip); */
|
||
|
/* preserves_dump_index_entry(stdout, &r->input, _preserves_reader_index_entry(r, ip), true); */
|
||
|
/* } */
|
||
|
/* printf("pos %lu (%05lx), count %lu, annotation tag seen %d: ", */
|
||
|
/* r->input_pos, */
|
||
|
/* r->input_pos, */
|
||
|
/* count, */
|
||
|
/* r->annotation_tag_seen); */
|
||
|
int b = _preserves_reader_next(r);
|
||
|
/* printf("tag 0x%02x %s\n", b, preserves_binary_format_tag_name(b)); */
|
||
|
if (b == -1) return _preserves_reader_finish(r, PRESERVES_END_INCOMPLETE_INPUT);
|
||
|
|
||
|
switch (b) {
|
||
|
case PRESERVES_BINARY_FORMAT_TAG_FALSE:
|
||
|
RETURN_ON_FAIL(_preserves_reader_emit_entry(r, &count, (preserves_index_entry_t) {
|
||
|
.type = PRESERVES_BOOLEAN, .repr = PRESERVES_REPR_NONE, .len = 0, .data = {
|
||
|
._boolean = false
|
||
|
}}));
|
||
|
break;
|
||
|
|
||
|
case PRESERVES_BINARY_FORMAT_TAG_TRUE:
|
||
|
RETURN_ON_FAIL(_preserves_reader_emit_entry(r, &count, (preserves_index_entry_t) {
|
||
|
.type = PRESERVES_BOOLEAN, .repr = PRESERVES_REPR_NONE, .len = 0, .data = {
|
||
|
._boolean = true
|
||
|
}}));
|
||
|
break;
|
||
|
|
||
|
case PRESERVES_BINARY_FORMAT_TAG_FLOAT: {
|
||
|
uint8_t *bs = _preserves_reader_next_bytes(r, 4);
|
||
|
if (bs == NULL) return _preserves_reader_finish(r, PRESERVES_END_INCOMPLETE_INPUT);
|
||
|
uint32_t i;
|
||
|
memcpy(&i, bs, 4);
|
||
|
i = ntohl(i);
|
||
|
float f;
|
||
|
memcpy(&f, &i, 4);
|
||
|
RETURN_ON_FAIL(_preserves_reader_emit_entry(r, &count, (preserves_index_entry_t) {
|
||
|
.type = PRESERVES_FLOAT, .repr = PRESERVES_REPR_NONE, .len = 0, .data = {
|
||
|
._float = f
|
||
|
}}));
|
||
|
break;
|
||
|
}
|
||
|
|
||
|
case PRESERVES_BINARY_FORMAT_TAG_DOUBLE: {
|
||
|
uint8_t *bs = _preserves_reader_next_bytes(r, 8);
|
||
|
if (bs == NULL) return _preserves_reader_finish(r, PRESERVES_END_INCOMPLETE_INPUT);
|
||
|
uint32_t lo, hi;
|
||
|
memcpy(&hi, bs, 4);
|
||
|
memcpy(&lo, bs + 4, 4);
|
||
|
lo = ntohl(lo);
|
||
|
hi = ntohl(hi);
|
||
|
uint64_t i = (((uint64_t) hi) << 32) | ((uint64_t) lo);
|
||
|
double f;
|
||
|
memcpy(&f, &i, 8);
|
||
|
RETURN_ON_FAIL(_preserves_reader_emit_entry(r, &count, (preserves_index_entry_t) {
|
||
|
.type = PRESERVES_DOUBLE, .repr = PRESERVES_REPR_NONE, .len = 0, .data = {
|
||
|
._double = f
|
||
|
}}));
|
||
|
break;
|
||
|
}
|
||
|
|
||
|
case PRESERVES_BINARY_FORMAT_TAG_END:
|
||
|
if (r->stack_top == 0) {
|
||
|
return _preserves_reader_finish(r, PRESERVES_END_UNEXPECTED_END);
|
||
|
}
|
||
|
preserves_index_entry_t *base = _preserves_reader_finish_seq(r);
|
||
|
_preserves_reader_inc_collection_len(r, &count);
|
||
|
if ((base->type == PRESERVES_DICTIONARY) && ((base->len % 2) != 0)) {
|
||
|
return _preserves_reader_finish(r, PRESERVES_END_DICTIONARY_MISSING_VALUE);
|
||
|
}
|
||
|
if ((base->type == PRESERVES_RECORD) && (base->len == 0)) {
|
||
|
return _preserves_reader_finish(r, PRESERVES_END_RECORD_MISSING_LABEL);
|
||
|
}
|
||
|
break;
|
||
|
|
||
|
case PRESERVES_BINARY_FORMAT_TAG_ANNOTATION:
|
||
|
if (r->annotation_tag_seen || !_preserves_reader_in_annotations(r)) {
|
||
|
RETURN_ON_FAIL(_preserves_reader_push(r, PRESERVES_ANNOTATION));
|
||
|
}
|
||
|
r->annotation_tag_seen = true;
|
||
|
break;
|
||
|
|
||
|
case PRESERVES_BINARY_FORMAT_TAG_EMBEDDED:
|
||
|
RETURN_ON_FAIL(_preserves_reader_push(r, PRESERVES_EMBEDDED));
|
||
|
break;
|
||
|
|
||
|
case PRESERVES_BINARY_FORMAT_TAG_LARGE_INTEGER: {
|
||
|
size_t len = 0;
|
||
|
switch (_preserves_reader_varint(r, &len)) {
|
||
|
case -1: return _preserves_reader_finish(r, PRESERVES_END_INCOMPLETE_INPUT);
|
||
|
case -2: return _preserves_reader_finish(r, PRESERVES_END_VARINT_TOO_BIG);
|
||
|
default: break;
|
||
|
}
|
||
|
if (_preserves_reader_decode_intbytes(r, &count, len) == -1) {
|
||
|
return _preserves_reader_finish(r, PRESERVES_END_INCOMPLETE_INPUT);
|
||
|
}
|
||
|
break;
|
||
|
}
|
||
|
|
||
|
case PRESERVES_BINARY_FORMAT_TAG_STRING: {
|
||
|
preserves_error_code_t code =
|
||
|
_preserves_reader_read_stringlike(r, &count, PRESERVES_STRING);
|
||
|
if (code != PRESERVES_END_NO_ERROR) return _preserves_reader_finish(r, code);
|
||
|
break;
|
||
|
}
|
||
|
|
||
|
case PRESERVES_BINARY_FORMAT_TAG_BYTE_STRING: {
|
||
|
preserves_error_code_t code =
|
||
|
_preserves_reader_read_stringlike(r, &count, PRESERVES_BYTE_STRING);
|
||
|
if (code != PRESERVES_END_NO_ERROR) return _preserves_reader_finish(r, code);
|
||
|
break;
|
||
|
}
|
||
|
|
||
|
case PRESERVES_BINARY_FORMAT_TAG_SYMBOL: {
|
||
|
preserves_error_code_t code =
|
||
|
_preserves_reader_read_stringlike(r, &count, PRESERVES_SYMBOL);
|
||
|
if (code != PRESERVES_END_NO_ERROR) return _preserves_reader_finish(r, code);
|
||
|
break;
|
||
|
}
|
||
|
|
||
|
case PRESERVES_BINARY_FORMAT_TAG_RECORD:
|
||
|
RETURN_ON_FAIL(_preserves_reader_push(r, PRESERVES_RECORD));
|
||
|
break;
|
||
|
|
||
|
case PRESERVES_BINARY_FORMAT_TAG_SEQUENCE:
|
||
|
RETURN_ON_FAIL(_preserves_reader_push(r, PRESERVES_SEQUENCE));
|
||
|
break;
|
||
|
|
||
|
case PRESERVES_BINARY_FORMAT_TAG_SET:
|
||
|
RETURN_ON_FAIL(_preserves_reader_push(r, PRESERVES_SET));
|
||
|
break;
|
||
|
|
||
|
case PRESERVES_BINARY_FORMAT_TAG_DICTIONARY:
|
||
|
RETURN_ON_FAIL(_preserves_reader_push(r, PRESERVES_DICTIONARY));
|
||
|
break;
|
||
|
|
||
|
default:
|
||
|
if ((b >= PRESERVES_BINARY_FORMAT_TAG_SMALL_INTEGER_LO) &&
|
||
|
(b <= PRESERVES_BINARY_FORMAT_TAG_SMALL_INTEGER_HI)) {
|
||
|
int64_t value = b - PRESERVES_BINARY_FORMAT_TAG_SMALL_INTEGER_LO;
|
||
|
if (value > 12) value -= 16;
|
||
|
RETURN_ON_FAIL(_preserves_emit_small_int(r, &count, false, value));
|
||
|
} else if ((b >= PRESERVES_BINARY_FORMAT_TAG_MEDIUM_INTEGER_LO) &&
|
||
|
(b <= PRESERVES_BINARY_FORMAT_TAG_MEDIUM_INTEGER_HI)) {
|
||
|
size_t len = (b - PRESERVES_BINARY_FORMAT_TAG_MEDIUM_INTEGER_LO) + 1;
|
||
|
if (_preserves_reader_decode_intbytes(r, &count, len) == -1) {
|
||
|
return _preserves_reader_finish(r, PRESERVES_END_INCOMPLETE_INPUT);
|
||
|
}
|
||
|
} else {
|
||
|
return _preserves_reader_finish(r, PRESERVES_END_INVALID_TAG);
|
||
|
}
|
||
|
break;
|
||
|
}
|
||
|
}
|
||
|
|
||
|
return _preserves_reader_finish(r,
|
||
|
(_preserves_reader_ateof(r) ? PRESERVES_END_EOF :
|
||
|
(r->stack_top > 0) ? PRESERVES_END_INCOMPLETE_INPUT :
|
||
|
PRESERVES_END_MORE_INPUT_REMAINING));
|
||
|
}
|
||
|
);
|
||
|
#undef RETURN_ON_FAIL
|
||
|
|
||
|
PRESERVES_OUTOFLINE
|
||
|
(
|
||
|
preserves_reader_result_t preserves_read_binary(preserves_reader_t *r,
|
||
|
preserves_bytes_t *input,
|
||
|
size_t count), {
|
||
|
{
|
||
|
size_t required_stack_bytes = MINIMUM_PRESERVES_READER_STACK_SIZE * sizeof(size_t);
|
||
|
if (r->stack.len < required_stack_bytes) {
|
||
|
if (preserves_resize_bytes(&r->stack, required_stack_bytes) == -1) {
|
||
|
return preserves_reader_error_result();
|
||
|
}
|
||
|
}
|
||
|
}
|
||
|
|
||
|
r->stack_top = 0;
|
||
|
r->input_pos = 0;
|
||
|
r->index_pos = 0;
|
||
|
r->annotation_tag_seen = false;
|
||
|
preserves_bytes_move(&r->input, input);
|
||
|
|
||
|
return preserves_read_binary_continue(r, count);
|
||
|
}
|
||
|
);
|
||
|
|
||
|
///////////////////////////////////////////////////////////////////////////
|
||
|
// Debug utilities
|
||
|
|
||
|
#ifndef NDEBUG
|
||
|
|
||
|
PRESERVES_IMPLEMENTATION_CHUNK
|
||
|
(
|
||
|
static void preserves_dump_bytes(FILE *f,
|
||
|
preserves_bytes_t *data) {
|
||
|
fprintf(f, ">>>");
|
||
|
for (size_t i = 0; i < data->len; i++) {
|
||
|
uint8_t c = PRESERVES_ARRAY_ELEMENT(data, uint8_t, i);
|
||
|
if (c < 0x20 || c >= 0x80) {
|
||
|
fprintf(f, "\\x%02x", c);
|
||
|
} else {
|
||
|
fprintf(f, "%c", c);
|
||
|
}
|
||
|
}
|
||
|
fprintf(f, "<<<");
|
||
|
}
|
||
|
|
||
|
void preserves_dump_index_entry(FILE *f,
|
||
|
preserves_bytes_t *input,
|
||
|
preserves_index_entry_t *i,
|
||
|
bool add_newline) {
|
||
|
fprintf(f,
|
||
|
"%s %s length %lu",
|
||
|
preserves_type_tag_name(i->type),
|
||
|
i->repr == PRESERVES_REPR_NONE ? "-" : preserves_index_entry_representation_name(i->repr),
|
||
|
(size_t) i->len);
|
||
|
switch (i->type) {
|
||
|
case PRESERVES_BOOLEAN:
|
||
|
fprintf(f, i->data._boolean ? " #t" : " #f");
|
||
|
break;
|
||
|
|
||
|
case PRESERVES_FLOAT:
|
||
|
fprintf(f, " %f", i->data._float);
|
||
|
break;
|
||
|
|
||
|
case PRESERVES_DOUBLE:
|
||
|
fprintf(f, " %f", i->data._double);
|
||
|
break;
|
||
|
|
||
|
case PRESERVES_STRING:
|
||
|
case PRESERVES_BYTE_STRING:
|
||
|
case PRESERVES_SYMBOL: {
|
||
|
fprintf(f, " offset %lu ", i->data._unsigned);
|
||
|
preserves_bytes_t data = preserves_bytes_subsequence(input, i->data._unsigned, i->len);
|
||
|
preserves_dump_bytes(f, &data);
|
||
|
break;
|
||
|
}
|
||
|
|
||
|
case PRESERVES_RECORD:
|
||
|
case PRESERVES_SEQUENCE:
|
||
|
case PRESERVES_SET:
|
||
|
case PRESERVES_DICTIONARY:
|
||
|
fprintf(f, " skip %lu", i->data._unsigned - 1);
|
||
|
break;
|
||
|
|
||
|
case PRESERVES_EMBEDDED:
|
||
|
case PRESERVES_ANNOTATION:
|
||
|
break;
|
||
|
|
||
|
case PRESERVES_END_MARKER:
|
||
|
fprintf(f, ": %s", preserves_error_code_name(i->data._err));
|
||
|
break;
|
||
|
|
||
|
case PRESERVES_SIGNED_INTEGER:
|
||
|
switch (i->repr) {
|
||
|
case PRESERVES_INT_SIGNED:
|
||
|
fprintf(f, ": %ld", i->data._signed);
|
||
|
break;
|
||
|
case PRESERVES_INT_UNSIGNED:
|
||
|
default:
|
||
|
fprintf(f, ": %lu", i->data._unsigned);
|
||
|
break;
|
||
|
case PRESERVES_INT_LARGE_BINARY:
|
||
|
case PRESERVES_INT_LARGE_TEXT: {
|
||
|
fprintf(f, " offset %lu ", i->data._unsigned);
|
||
|
preserves_bytes_t data = preserves_bytes_subsequence(input, i->data._unsigned, i->len);
|
||
|
preserves_dump_bytes(f, &data);
|
||
|
break;
|
||
|
}
|
||
|
}
|
||
|
break;
|
||
|
|
||
|
default:
|
||
|
fprintf(f, ": %lu (%ld)", i->data._unsigned, i->data._signed);
|
||
|
break;
|
||
|
}
|
||
|
|
||
|
if (add_newline) {
|
||
|
fprintf(f, "\n");
|
||
|
}
|
||
|
}
|
||
|
)
|
||
|
|
||
|
#endif
|
||
|
|
||
|
///////////////////////////////////////////////////////////////////////////
|
||
|
|
||
|
#undef PRESERVES_INLINE
|
||
|
#undef PRESERVES_IMPLEMENTATION_CHUNK
|
||
|
#undef PRESERVES_OUTOFLINE
|
||
|
|
||
|
#endif
|