diff --git a/implementations/c/preserves.h b/implementations/c/preserves.h index c80d71a..446b21d 100644 --- a/implementations/c/preserves.h +++ b/implementations/c/preserves.h @@ -284,6 +284,7 @@ typedef enum preserves_type_tag { PRESERVES_SIGNED_INTEGER, PRESERVES_STRING, PRESERVES_BYTE_STRING, + PRESERVES_COMPACT, PRESERVES_SYMBOL, PRESERVES_RECORD, @@ -304,6 +305,7 @@ PRESERVES_OUTOFLINE(char const *preserves_type_tag_name(preserves_type_tag_t typ case PRESERVES_SIGNED_INTEGER: return "SIGNED_INTEGER"; case PRESERVES_STRING: return "STRING"; case PRESERVES_BYTE_STRING: return "BYTE_STRING"; + case PRESERVES_COMPACT: return "COMPACT"; case PRESERVES_SYMBOL: return "SYMBOL"; case PRESERVES_RECORD: return "RECORD"; case PRESERVES_SEQUENCE: return "SEQUENCE"; @@ -397,6 +399,8 @@ PRESERVES_OUTOFLINE that need ByteString-style backslash-escapes interpreted - repr=PRESERVES_HEX -> len, data._unsigned as absolute offset within input to ASCII bytes of hex - repr=PRESERVES_BASE64 -> len, data._unsigned as absolute offset within input to ASCII bytes of base64 + PRESERVES_COMPACT: + - repr as for BYTE_STRING, but bytes denote a nested binary-encoded value. PRESERVES_SYMBOL: - repr=PRESERVES_LITERAL -> len, data._unsigned as absolute offset within input to utf-8 bytes - repr=PRESERVES_ESCAPED -> len, data._unsigned as absolute offset within input to utf-8 bytes @@ -736,7 +740,8 @@ PRESERVES_IMPLEMENTATION_CHUNK static inline preserves_error_code_t _preserves_reader_read_stringlike(preserves_reader_t *r, size_t *count_ptr, - preserves_type_tag_t type) { + preserves_type_tag_t type, + bool should_check_utf8) { size_t len = 0; switch (_preserves_reader_varint(r, &len)) { case -1: return PRESERVES_END_INCOMPLETE_INPUT; @@ -745,7 +750,7 @@ PRESERVES_IMPLEMENTATION_CHUNK } size_t starting_pos = r->input_pos; uint8_t *maybe_utf = _preserves_reader_next_bytes(r, len); - if ((type != PRESERVES_BYTE_STRING) && (check_utf8(maybe_utf, len) == -1)) { + if (should_check_utf8 && (check_utf8(maybe_utf, len) == -1)) { return PRESERVES_END_INVALID_UTF8; } if (_preserves_reader_emit_entry(r, count_ptr, (preserves_index_entry_t) { @@ -891,21 +896,21 @@ PRESERVES_OUTOFLINE case PRESERVES_BINARY_FORMAT_TAG_STRING: { preserves_error_code_t code = - _preserves_reader_read_stringlike(r, &count, PRESERVES_STRING); + _preserves_reader_read_stringlike(r, &count, PRESERVES_STRING, true); if (code != PRESERVES_END_NO_ERROR) return _preserves_reader_finish(r, code); break; } case PRESERVES_BINARY_FORMAT_TAG_BYTE_STRING: { preserves_error_code_t code = - _preserves_reader_read_stringlike(r, &count, PRESERVES_BYTE_STRING); + _preserves_reader_read_stringlike(r, &count, PRESERVES_BYTE_STRING, false); if (code != PRESERVES_END_NO_ERROR) return _preserves_reader_finish(r, code); break; } case PRESERVES_BINARY_FORMAT_TAG_SYMBOL: { preserves_error_code_t code = - _preserves_reader_read_stringlike(r, &count, PRESERVES_SYMBOL); + _preserves_reader_read_stringlike(r, &count, PRESERVES_SYMBOL, true); if (code != PRESERVES_END_NO_ERROR) return _preserves_reader_finish(r, code); break; } @@ -1022,6 +1027,7 @@ PRESERVES_IMPLEMENTATION_CHUNK case PRESERVES_STRING: case PRESERVES_BYTE_STRING: + case PRESERVES_COMPACT: case PRESERVES_SYMBOL: { fprintf(f, " offset %lu ", i->data._unsigned); preserves_bytes_t data = preserves_bytes_subsequence(input, i->data._unsigned, i->len);