#include #include #include #include #include #include #include "treetrie.h" #include "fasthash.h" /* These two only included for tt_dump_arena_dot_styled. */ #include "route.h" #include /* Customized special-purpose fasthash variation */ #define mix(h) ({ \ (h) ^= (h) >> 23; \ (h) *= 0x2127599bf4325c37ULL; \ (h) ^= (h) >> 47; }) static inline uint64_t fasthash_4_ints(uint32_t v1, uint32_t v2, uint32_t v3, uint32_t v4) { const uint64_t m = 0x880355f21e6d1965ULL; uint64_t h = (16 * m); uint64_t v; v = (((uint64_t) v2) << 32) | v1; h ^= mix(v); h *= m; v = (((uint64_t) v4) << 32) | v3; h ^= mix(v); h *= m; return mix(h); } static inline tt_hash_t hash(uint32_t tag, uint32_t index, tt_node_ptr_t a, tt_node_ptr_t b) { uint64_t x = fasthash_4_ints(tag, index, a, b); return x - (x >> 32); /* uint32_t keyblock[4] = { tag, */ /* index, */ /* a, */ /* b }; */ /* assert(sizeof(keyblock) == 4 * sizeof(uint32_t)); */ /* return (tt_hash_t) fasthash32(keyblock, sizeof(keyblock), 0); */ } static inline tt_hash_t tt_hash_node(tt_arena_t *a, tt_node_ptr_t p) { tt_node_idx_t i = tt_ptr_idx(p); return hash(tt_ptr_tag(p), a->headers[i].inuse.index, a->nodes[i].a, a->nodes[i].b); } static void chain_init(tt_arena_t *a, tt_free_chain_t *chain) { chain->head = chain->tail = TT_NO_IDX; } static void chain_append(tt_arena_t *a, tt_free_chain_t *chain, tt_node_idx_t i) { a->headers[i].next_free = TT_NO_IDX; if (chain->tail == TT_NO_IDX) { chain->head = i; } else { a->headers[chain->tail].next_free = i; } chain->tail = i; } static void chain_prepend(tt_arena_t *a, tt_free_chain_t *chain, tt_node_idx_t i) { a->headers[i].next_free = chain->head; if (chain->tail == TT_NO_IDX) { chain->tail = i; } chain->head = i; } /* Does not modify chain2. */ static void chain_splice(tt_arena_t *a, tt_free_chain_t *chain1, tt_free_chain_t *chain2) { if (chain2->head == TT_NO_IDX) { /* do nothing */ } else if (chain1->head == TT_NO_IDX) { *chain1 = *chain2; } else { a->headers[chain1->tail].next_free = chain2->head; chain1->tail = chain2->tail; } } static tt_node_idx_t chain_pop(tt_arena_t *a, tt_free_chain_t *chain) { tt_node_idx_t i = chain->head; if (i != TT_NO_IDX) { chain->head = a->headers[i].next_free; if (chain->tail == i) { chain->tail = chain->head; } } return i; } int tt_arena_init(tt_arena_t *a, void *atom_context, void (*atom_incref)(void *context, tt_arena_t *a, tt_atom_t atom), void (*atom_decref)(void *context, tt_arena_t *a, tt_atom_t atom)) { a->atom_context = atom_context; a->atom_incref = atom_incref; a->atom_decref = atom_decref; a->max_probe = 0; a->table_length = 16411; /* 16384; */ a->table = calloc(a->table_length, sizeof(a->table[0])); a->headers = calloc(a->table_length, sizeof(a->headers[0])); a->nodes = calloc(a->table_length, sizeof(a->nodes[0])); a->free_count = 0; chain_init(a, &a->free_chain); if (a->table == NULL || a->headers == NULL || a->nodes == NULL) { free(a->table); free(a->headers); free(a->nodes); errno = ENOMEM; return -1; } { int i; for (i = 0; i < a->table_length; i++) { chain_append(a, &a->free_chain, i); a->free_count++; } } return 0; } static void register_node(tt_arena_t *a, tt_node_ptr_t p, tt_hash_t initial_hash) { tt_hash_t h = initial_hash; int i = 0; while (1) { unsigned int index = (h + i) % a->table_length; tt_node_ptr_t candidate = a->table[index].ptr; /* printf("checking robinhood at h %d i %d index %d candidate %d\n", h, i, index, candidate); */ if (i > a->max_probe) { a->max_probe = i; } if (TT_NO_PTR_P(candidate)) { /* This slot in the table is free. */ /* printf("slot free!\n"); */ a->table[index].ptr = p; a->table[index].hash = h; break; } /* printf("slot not free.\n"); */ { tt_hash_t candidate_h = a->table[index].hash; int distance = index - (candidate_h % a->table_length); if (distance < 0) distance += a->table_length; if (distance < i) { a->table[index].ptr = p; a->table[index].hash = h; h = candidate_h; i = distance + 1; p = candidate; } else { /* keep scanning. */ i++; } } } } static int tt_grow(tt_arena_t *a) { tt_hashtable_entry_t *old_table = a->table; unsigned int old_table_length = a->table_length; unsigned int new_table_length = old_table_length << 1; /* printf("PREGROW\n"); */ /* tt_dump_arena(a, 1); */ { tt_hashtable_entry_t *new_table = calloc(new_table_length, sizeof(a->table[0])); tt_header_t *new_headers = realloc(a->headers, new_table_length * sizeof(a->headers[0])); tt_node_t *new_nodes = realloc(a->nodes, new_table_length * sizeof(a->nodes[0])); if (new_table == NULL || new_headers == NULL || new_nodes == NULL) { free(new_table); free(new_headers); free(new_nodes); errno = ENOMEM; return -1; } memset(new_headers + old_table_length, 0, (new_table_length - old_table_length) * sizeof(a->headers[0])); memset(new_nodes + old_table_length, 0, (new_table_length - old_table_length) * sizeof(a->nodes[0])); a->max_probe = 0; a->table_length = new_table_length; a->table = new_table; a->headers = new_headers; a->nodes = new_nodes; { int i; for (i = old_table_length; i < new_table_length; i++) { chain_append(a, &a->free_chain, i); a->free_count++; } } } /* printf("//////////////////////////////////////// GROW starting (length %d)\n", a->table_length); */ { int i; for (i = 0; i < old_table_length; i++) { tt_node_ptr_t p = old_table[i].ptr; tt_hash_t h = old_table[i].hash; if (!TT_NO_PTR_P(p)) { register_node(a, p, h); } } } /* printf("//////////////////////////////////////// GROW finished (length %d)\n", a->table_length); */ /* printf("POSTGROW\n"); */ /* tt_dump_arena_summary(a); */ free(old_table); return 0; } void tt_arena_done(tt_arena_t *a) { free(a->table); free(a->headers); free(a->nodes); memset(a, 0, sizeof(*a)); } static size_t arena_size(tt_arena_t *a) { return sizeof(*a) + (a->table_length * sizeof(a->table[0])) + (a->table_length * sizeof(a->headers[0])) + (a->table_length * sizeof(a->nodes[0])); } void tt_dump_arena_summary(tt_arena_t *a) { printf("size in bytes: %lu\n", arena_size(a)); printf("max_probe: %u\n", a->max_probe); printf("table_length: %u\n", a->table_length); printf("free_count: %u\n", a->free_count); } void tt_dump_arena(tt_arena_t *a, int show_free_chain) { tt_dump_arena_summary(a); if (show_free_chain) { int chain_counter = 0; tt_node_idx_t fp = a->free_chain.head; printf("free_chain:"); while (fp != TT_NO_IDX) { if ((chain_counter % 10) == 0) { printf("\n "); } printf(" %d", fp); fp = a->headers[fp].next_free; chain_counter++; } printf("\n"); } { int i; for (i = 0; i < a->table_length; i++) { tt_node_ptr_t p = a->table[i].ptr; tt_node_idx_t n = tt_ptr_idx(p); if (TT_NO_PTR_P(p)) { continue; } if (n >= a->table_length) { printf("%12u -> %12u ?!?!?!\n", i, n); } else { tt_hash_t h = a->table[i].hash; int distance = i - (h % a->table_length); if (distance < 0) distance += a->table_length; printf("%12u -> %12u: dist %d ref %d ", i, n, distance, a->headers[n].inuse.refcount); switch (tt_ptr_tag(p)) { case TT_TAG_TAIL: printf("tail %u/%u\n", tt_ptr_idx(a->nodes[n].a), tt_ptr_tag(a->nodes[n].a)); break; case TT_TAG_OK: printf("ok %u/%u\n", tt_ptr_idx(a->nodes[n].a), tt_ptr_tag(a->nodes[n].a)); break; case TT_TAG_BRANCH: printf("branch %u/%u %u/%u\n", tt_ptr_idx(a->nodes[n].a), tt_ptr_tag(a->nodes[n].a), tt_ptr_idx(a->nodes[n].b), tt_ptr_tag(a->nodes[n].b)); break; case TT_TAG_LEAF: printf("leaf %u/%u %d\n", tt_ptr_idx(a->nodes[n].a), tt_ptr_tag(a->nodes[n].a), (int) a->nodes[n].b); break; case TT_TAG_NODE: printf("node index %d, %u/%u %u/%u\n", a->headers[n].inuse.index, tt_ptr_idx(a->nodes[n].a), tt_ptr_tag(a->nodes[n].a), tt_ptr_idx(a->nodes[n].b), tt_ptr_tag(a->nodes[n].b)); break; case TT_TAG_DICT: printf("dict %u/%u %u\n", tt_ptr_idx(a->nodes[n].a), tt_ptr_tag(a->nodes[n].a), a->nodes[n].b); break; default: printf("???? %08x\n", p); assert(0); } } } } } static void dump_dot_edge(tt_arena_t *a, tt_node_ptr_t p, int lr, char const *edgename) { tt_node_idx_t n = tt_ptr_idx(p); tt_tag_t target_t = tt_ptr_tag(lr ? a->nodes[n].b : a->nodes[n].a); if (target_t != TT_TAG_INVALID && target_t != TT_TAG_SPECIAL) { printf(" i%u_%u -> i%u_%u [label=\"%s\"];\n", n, tt_ptr_tag(p), tt_ptr_idx(lr ? a->nodes[n].b : a->nodes[n].a), target_t, edgename); } } void tt_dump_arena_dot_styled(char const *rootlabel, tt_node_ptr_t rootptr, tt_arena_t *a, int flags) { int i; printf("digraph Arena {\n"); printf(" root [shape=box, label=\"%s\"];\n", rootlabel); if (tt_ptr_tag(rootptr) != TT_TAG_INVALID && tt_ptr_tag(rootptr) != TT_TAG_SPECIAL) { printf(" root -> i%u_%u;\n", tt_ptr_idx(rootptr), tt_ptr_tag(rootptr)); } for (i = 0; i < a->table_length; i++) { tt_node_ptr_t p = a->table[i].ptr; tt_node_idx_t n = tt_ptr_idx(p); if (TT_NO_PTR_P(p)) { continue; } if (n >= a->table_length) { printf(" // %12u -> %12u ?!?!?!\n", i, n); } else { tt_hash_t h = a->table[i].hash; int distance = i - (h % a->table_length); if (distance < 0) distance += a->table_length; printf(" i%u_%u [", n, tt_ptr_tag(p)); switch (tt_ptr_tag(p)) { case TT_TAG_TAIL: printf("label=\"tail"); break; case TT_TAG_OK: printf("shape=underline,label=\"ok"); break; case TT_TAG_BRANCH: printf("label=\"branch"); break; case TT_TAG_LEAF: if (flags & TT_STYLE_TEXT_LABELS) { char buf[5]; int v = (int) a->nodes[n].b; if (v < -9 || v >= 32) { int j; v = htonl(v); memcpy(buf, &v, 4); for (j = 0; j < 4; j++) { if (buf[j] < 32) { buf[j] = ' '; } } buf[4] = '\0'; } else { switch (v) { case TT_WILD: buf[0] = '*'; buf[1] = '\0'; break; case TT_BOS: buf[0] = buf[1] = '<'; buf[2] = '\0'; break; case TT_EOS: buf[0] = buf[1] = '>'; buf[2] = '\0'; break; case TT_BOC: buf[0] = buf[1] = '{'; buf[2] = '\0'; break; case TT_EOC: buf[0] = buf[1] = '}'; buf[2] = '\0'; break; default: snprintf(buf, sizeof(buf), "%d", v); buf[4] = '\0'; } } printf("shape=underline,label=\"leaf '%s'", buf); } else { printf("shape=underline,label=\"leaf a%d", (int) a->nodes[n].b); } break; case TT_TAG_NODE: printf("label=\"node @%d", a->headers[n].inuse.index); break; case TT_TAG_DICT: printf("label=\"dict s%u", a->nodes[n].b); break; default: printf("???? %08x\n", p); assert(0); } /* printf(" #%u d%d r%d\"];\n", i, distance, a->headers[n].inuse.refcount); */ if (flags & TT_STYLE_HIDE_DETAILS) { printf("\"];\n"); } else { printf(" i%u r%d\"];\n", n, a->headers[n].inuse.refcount); } switch (tt_ptr_tag(p)) { case TT_TAG_TAIL: dump_dot_edge(a, p, 0, "trie"); break; case TT_TAG_OK: dump_dot_edge(a, p, 0, "dict"); break; case TT_TAG_BRANCH: dump_dot_edge(a, p, 0, "wild"); dump_dot_edge(a, p, 1, "others"); break; case TT_TAG_LEAF: dump_dot_edge(a, p, 0, "trie"); break; case TT_TAG_NODE: dump_dot_edge(a, p, 0, "zero"); dump_dot_edge(a, p, 1, "one"); break; case TT_TAG_DICT: dump_dot_edge(a, p, 0, "root"); break; default: printf("???? %08x\n", p); assert(0); } } } printf("}\n"); } void tt_dump_arena_dot(char const *rootlabel, tt_node_ptr_t rootptr, tt_arena_t *a) { tt_dump_arena_dot_styled(rootlabel, rootptr, a, 0); } void tt_arena_flush1(tt_arena_t *a, tt_free_chain_t *c) { tt_node_idx_t i = a->free_chain.head; chain_splice(a, c, &a->free_chain); chain_init(a, &a->free_chain); while (i != TT_NO_IDX) { tt_drop(a, a->nodes[i].a); tt_drop(a, a->nodes[i].b); a->nodes[i].a = TT_NO_PTR; a->nodes[i].b = TT_NO_PTR; i = a->headers[i].next_free; } } void tt_arena_flush(tt_arena_t *a) { tt_free_chain_t c; c.head = c.tail = TT_NO_IDX; while (a->free_chain.head != TT_NO_IDX) { tt_arena_flush1(a, &c); } a->free_chain = c; } static inline int heap_tag_p(tt_node_ptr_t p) { tt_tag_t tag = tt_ptr_tag(p); return tag != TT_TAG_SPECIAL && tag != TT_TAG_INVALID; } static void recycle_node(tt_arena_t *a, tt_node_ptr_t p) { tt_node_idx_t ni = tt_ptr_idx(p); tt_hash_t h; int i; /* printf("++++++++++++++++++++++++++++++++++++++++ recycling %d\n", ni); */ assert(heap_tag_p(p)); h = tt_hash_node(a, p); switch (tt_ptr_tag(p)) { case TT_TAG_LEAF: a->atom_decref(a->atom_context, a, a->nodes[ni].b); a->nodes[ni].b = TT_NO_PTR; break; case TT_TAG_DICT: a->nodes[ni].b = TT_NO_PTR; break; default: break; } chain_prepend(a, &a->free_chain, ni); a->free_count++; for (i = 0; i < a->max_probe+1; i++) { unsigned int index = (h + i) % a->table_length; tt_node_ptr_t candidate = a->table[index].ptr; /* printf("hunting i=%d index=%d p=%d candidate=%d\n", i, index, p, candidate); */ assert(!TT_NO_PTR_P(candidate)); /* Internal error if node not in table */ if (candidate == p) { /* We found it. Now swap in elements. */ while (1) { unsigned int nextindex = (index + 1) % a->table_length; tt_node_ptr_t next_p = a->table[nextindex].ptr; tt_hash_t next_h; int distance; a->table[index].ptr = TT_NO_PTR; if (TT_NO_PTR_P(next_p)) { break; } next_h = a->table[nextindex].hash; distance = nextindex - (next_h % a->table_length); if (distance < 0) distance += a->table_length; if (distance == 0) { break; } a->table[index].ptr = next_p; a->table[index].hash = next_h; index = nextindex; } break; } } } inline tt_node_ptr_t tt_grab(tt_arena_t *a, tt_node_ptr_t p) { tt_node_idx_t i = tt_ptr_idx(p); tt_tag_t t = tt_ptr_tag(p); if (t != TT_TAG_INVALID && t != TT_TAG_SPECIAL && a->headers[i].inuse.refcount < TT_REFCOUNT_LIMIT) { a->headers[i].inuse.refcount++; } return p; } inline void tt_drop(tt_arena_t *a, tt_node_ptr_t p) { tt_node_idx_t i = tt_ptr_idx(p); tt_tag_t t = tt_ptr_tag(p); if (t != TT_TAG_INVALID && t != TT_TAG_SPECIAL) { uint32_t oldcount = a->headers[i].inuse.refcount; if (oldcount < TT_REFCOUNT_LIMIT) { /* printf("++++++++++++++++++++++++++++++ dropping %d\n", i); */ assert(oldcount != 0); if (--(a->headers[i].inuse.refcount) == 0) { recycle_node(a, p); } } } } tt_node_ptr_t tt_arena_cons(tt_arena_t *a, tt_tag_t tag, uint32_t nindex, tt_node_ptr_t na, tt_node_ptr_t nb) { tt_hash_t h = hash(tag, nindex, na, nb); int i; for (i = 0; i < a->max_probe+1; i++) { unsigned int index = (h + i) % a->table_length; tt_node_ptr_t candidate = a->table[index].ptr; tt_node_idx_t candidate_i = tt_ptr_idx(candidate); /* printf("cons at %d candidate %d\n", i, candidate); */ if (TT_NO_PTR_P(candidate)) { /* printf("cons empty cell\n"); */ break; } { tt_hash_t candidate_h = a->table[index].hash; int distance = index - (candidate_h % a->table_length); if (distance < 0) distance += a->table_length; if (i > distance) { /* printf("early exit as we found a slot that would have been used instead\n"); */ break; } } /* printf("tag %d %d\n", tt_ptr_tag(candidate), tag); */ /* printf("index %d %d\n", a->headers[candidate].inuse.index, nindex); */ /* printf("a %d %d\n", a->nodes[candidate].a, na); */ /* printf("b %d %d\n", a->nodes[candidate].b, nb); */ if (tt_ptr_tag(candidate) == tag && a->headers[candidate_i].inuse.index == nindex && a->nodes[candidate_i].a == na && a->nodes[candidate_i].b == nb) { /* printf("cons located correct candidate\n"); */ return candidate; } } /* Not found */ /* printf("cons needs to alloc\n"); */ if (a->free_count < (a->table_length >> 2)) { if (tt_grow(a) != 0) { return TT_NO_PTR; } } { tt_node_idx_t node = chain_pop(a, &a->free_chain); tt_node_ptr_t p = tt_mkptr(node, tag); tt_grab(a, na); switch (tag) { case TT_TAG_LEAF: a->atom_incref(a->atom_context, a, nb); break; case TT_TAG_DICT: break; default: tt_grab(a, nb); } tt_drop(a, a->nodes[node].a); tt_drop(a, a->nodes[node].b); a->free_count--; a->headers[node].inuse.refcount = 0; a->headers[node].inuse.index = nindex; a->nodes[node].a = na; a->nodes[node].b = nb; register_node(a, p, h); return p; } }