PRESERVES_COMPACT

This commit is contained in:
Tony Garnock-Jones 2022-03-04 09:18:59 +01:00
parent 4a41fac6ec
commit 9c71df9abb
1 changed files with 11 additions and 5 deletions

View File

@ -284,6 +284,7 @@ typedef enum preserves_type_tag {
PRESERVES_SIGNED_INTEGER,
PRESERVES_STRING,
PRESERVES_BYTE_STRING,
PRESERVES_COMPACT,
PRESERVES_SYMBOL,
PRESERVES_RECORD,
@ -304,6 +305,7 @@ PRESERVES_OUTOFLINE(char const *preserves_type_tag_name(preserves_type_tag_t typ
case PRESERVES_SIGNED_INTEGER: return "SIGNED_INTEGER";
case PRESERVES_STRING: return "STRING";
case PRESERVES_BYTE_STRING: return "BYTE_STRING";
case PRESERVES_COMPACT: return "COMPACT";
case PRESERVES_SYMBOL: return "SYMBOL";
case PRESERVES_RECORD: return "RECORD";
case PRESERVES_SEQUENCE: return "SEQUENCE";
@ -397,6 +399,8 @@ PRESERVES_OUTOFLINE
that need ByteString-style backslash-escapes interpreted
- repr=PRESERVES_HEX -> len, data._unsigned as absolute offset within input to ASCII bytes of hex
- repr=PRESERVES_BASE64 -> len, data._unsigned as absolute offset within input to ASCII bytes of base64
PRESERVES_COMPACT:
- repr as for BYTE_STRING, but bytes denote a nested binary-encoded value.
PRESERVES_SYMBOL:
- repr=PRESERVES_LITERAL -> len, data._unsigned as absolute offset within input to utf-8 bytes
- repr=PRESERVES_ESCAPED -> len, data._unsigned as absolute offset within input to utf-8 bytes
@ -736,7 +740,8 @@ PRESERVES_IMPLEMENTATION_CHUNK
static inline preserves_error_code_t _preserves_reader_read_stringlike(preserves_reader_t *r,
size_t *count_ptr,
preserves_type_tag_t type) {
preserves_type_tag_t type,
bool should_check_utf8) {
size_t len = 0;
switch (_preserves_reader_varint(r, &len)) {
case -1: return PRESERVES_END_INCOMPLETE_INPUT;
@ -745,7 +750,7 @@ PRESERVES_IMPLEMENTATION_CHUNK
}
size_t starting_pos = r->input_pos;
uint8_t *maybe_utf = _preserves_reader_next_bytes(r, len);
if ((type != PRESERVES_BYTE_STRING) && (check_utf8(maybe_utf, len) == -1)) {
if (should_check_utf8 && (check_utf8(maybe_utf, len) == -1)) {
return PRESERVES_END_INVALID_UTF8;
}
if (_preserves_reader_emit_entry(r, count_ptr, (preserves_index_entry_t) {
@ -891,21 +896,21 @@ PRESERVES_OUTOFLINE
case PRESERVES_BINARY_FORMAT_TAG_STRING: {
preserves_error_code_t code =
_preserves_reader_read_stringlike(r, &count, PRESERVES_STRING);
_preserves_reader_read_stringlike(r, &count, PRESERVES_STRING, true);
if (code != PRESERVES_END_NO_ERROR) return _preserves_reader_finish(r, code);
break;
}
case PRESERVES_BINARY_FORMAT_TAG_BYTE_STRING: {
preserves_error_code_t code =
_preserves_reader_read_stringlike(r, &count, PRESERVES_BYTE_STRING);
_preserves_reader_read_stringlike(r, &count, PRESERVES_BYTE_STRING, false);
if (code != PRESERVES_END_NO_ERROR) return _preserves_reader_finish(r, code);
break;
}
case PRESERVES_BINARY_FORMAT_TAG_SYMBOL: {
preserves_error_code_t code =
_preserves_reader_read_stringlike(r, &count, PRESERVES_SYMBOL);
_preserves_reader_read_stringlike(r, &count, PRESERVES_SYMBOL, true);
if (code != PRESERVES_END_NO_ERROR) return _preserves_reader_finish(r, code);
break;
}
@ -1022,6 +1027,7 @@ PRESERVES_IMPLEMENTATION_CHUNK
case PRESERVES_STRING:
case PRESERVES_BYTE_STRING:
case PRESERVES_COMPACT:
case PRESERVES_SYMBOL: {
fprintf(f, " offset %lu ", i->data._unsigned);
preserves_bytes_t data = preserves_bytes_subsequence(input, i->data._unsigned, i->len);