treetrie-2015/treetrie.c

677 lines
17 KiB
C

#include <stdlib.h>
#include <string.h>
#include <stdio.h>
#include <stdint.h>
#include <errno.h>
#include <assert.h>
#include "treetrie.h"
#include "fasthash.h"
/* These two only included for tt_dump_arena_dot_styled. */
#include "route.h"
#include <arpa/inet.h>
/* Customized special-purpose fasthash variation */
#define mix(h) ({ \
(h) ^= (h) >> 23; \
(h) *= 0x2127599bf4325c37ULL; \
(h) ^= (h) >> 47; })
static inline uint64_t fasthash_4_ints(uint32_t v1, uint32_t v2, uint32_t v3, uint32_t v4) {
const uint64_t m = 0x880355f21e6d1965ULL;
uint64_t h = (16 * m);
uint64_t v;
v = (((uint64_t) v2) << 32) | v1;
h ^= mix(v);
h *= m;
v = (((uint64_t) v4) << 32) | v3;
h ^= mix(v);
h *= m;
return mix(h);
}
static inline tt_hash_t hash(uint32_t tag,
uint32_t index,
tt_node_ptr_t a,
tt_node_ptr_t b)
{
uint64_t x = fasthash_4_ints(tag, index, a, b);
return x - (x >> 32);
/* uint32_t keyblock[4] = { tag, */
/* index, */
/* a, */
/* b }; */
/* assert(sizeof(keyblock) == 4 * sizeof(uint32_t)); */
/* return (tt_hash_t) fasthash32(keyblock, sizeof(keyblock), 0); */
}
static inline tt_hash_t tt_hash_node(tt_arena_t *a, tt_node_ptr_t p) {
tt_node_idx_t i = tt_ptr_idx(p);
return hash(tt_ptr_tag(p),
a->headers[i].inuse.index,
a->nodes[i].a,
a->nodes[i].b);
}
static void chain_init(tt_arena_t *a, tt_free_chain_t *chain) {
chain->head = chain->tail = TT_NO_IDX;
}
static void chain_append(tt_arena_t *a, tt_free_chain_t *chain, tt_node_idx_t i) {
a->headers[i].next_free = TT_NO_IDX;
if (chain->tail == TT_NO_IDX) {
chain->head = i;
} else {
a->headers[chain->tail].next_free = i;
}
chain->tail = i;
}
static void chain_prepend(tt_arena_t *a, tt_free_chain_t *chain, tt_node_idx_t i) {
a->headers[i].next_free = chain->head;
if (chain->tail == TT_NO_IDX) {
chain->tail = i;
}
chain->head = i;
}
/* Does not modify chain2. */
static void chain_splice(tt_arena_t *a, tt_free_chain_t *chain1, tt_free_chain_t *chain2) {
if (chain2->head == TT_NO_IDX) {
/* do nothing */
} else if (chain1->head == TT_NO_IDX) {
*chain1 = *chain2;
} else {
a->headers[chain1->tail].next_free = chain2->head;
chain1->tail = chain2->tail;
}
}
static tt_node_idx_t chain_pop(tt_arena_t *a, tt_free_chain_t *chain) {
tt_node_idx_t i = chain->head;
if (i != TT_NO_IDX) {
chain->head = a->headers[i].next_free;
if (chain->tail == i) {
chain->tail = chain->head;
}
}
return i;
}
int tt_arena_init(tt_arena_t *a,
void *atom_context,
void (*atom_incref)(void *context, tt_arena_t *a, tt_atom_t atom),
void (*atom_decref)(void *context, tt_arena_t *a, tt_atom_t atom))
{
a->atom_context = atom_context;
a->atom_incref = atom_incref;
a->atom_decref = atom_decref;
a->max_probe = 0;
a->table_length = 16411; /* 16384; */
a->table = calloc(a->table_length, sizeof(a->table[0]));
a->headers = calloc(a->table_length, sizeof(a->headers[0]));
a->nodes = calloc(a->table_length, sizeof(a->nodes[0]));
a->free_count = 0;
chain_init(a, &a->free_chain);
if (a->table == NULL || a->headers == NULL || a->nodes == NULL) {
free(a->table);
free(a->headers);
free(a->nodes);
errno = ENOMEM;
return -1;
}
{
int i;
for (i = 0; i < a->table_length; i++) {
chain_append(a, &a->free_chain, i);
a->free_count++;
}
}
return 0;
}
static void register_node(tt_arena_t *a, tt_node_ptr_t p, tt_hash_t initial_hash) {
tt_hash_t h = initial_hash;
int i = 0;
while (1) {
unsigned int index = (h + i) % a->table_length;
tt_node_ptr_t candidate = a->table[index].ptr;
/* printf("checking robinhood at h %d i %d index %d candidate %d\n", h, i, index, candidate); */
if (i > a->max_probe) {
a->max_probe = i;
}
if (TT_NO_PTR_P(candidate)) {
/* This slot in the table is free. */
/* printf("slot free!\n"); */
a->table[index].ptr = p;
a->table[index].hash = h;
break;
}
/* printf("slot not free.\n"); */
{
tt_hash_t candidate_h = a->table[index].hash;
int distance = index - (candidate_h % a->table_length);
if (distance < 0) distance += a->table_length;
if (distance < i) {
a->table[index].ptr = p;
a->table[index].hash = h;
h = candidate_h;
i = distance + 1;
p = candidate;
} else {
/* keep scanning. */
i++;
}
}
}
}
static int tt_grow(tt_arena_t *a) {
tt_hashtable_entry_t *old_table = a->table;
unsigned int old_table_length = a->table_length;
unsigned int new_table_length = old_table_length << 1;
/* printf("PREGROW\n"); */
/* tt_dump_arena(a, 1); */
{
tt_hashtable_entry_t *new_table = calloc(new_table_length, sizeof(a->table[0]));
tt_header_t *new_headers = realloc(a->headers, new_table_length * sizeof(a->headers[0]));
tt_node_t *new_nodes = realloc(a->nodes, new_table_length * sizeof(a->nodes[0]));
if (new_table == NULL || new_headers == NULL || new_nodes == NULL) {
free(new_table);
free(new_headers);
free(new_nodes);
errno = ENOMEM;
return -1;
}
memset(new_headers + old_table_length, 0,
(new_table_length - old_table_length) * sizeof(a->headers[0]));
memset(new_nodes + old_table_length, 0,
(new_table_length - old_table_length) * sizeof(a->nodes[0]));
a->max_probe = 0;
a->table_length = new_table_length;
a->table = new_table;
a->headers = new_headers;
a->nodes = new_nodes;
{
int i;
for (i = old_table_length; i < new_table_length; i++) {
chain_append(a, &a->free_chain, i);
a->free_count++;
}
}
}
/* printf("//////////////////////////////////////// GROW starting (length %d)\n", a->table_length); */
{
int i;
for (i = 0; i < old_table_length; i++) {
tt_node_ptr_t p = old_table[i].ptr;
tt_hash_t h = old_table[i].hash;
if (!TT_NO_PTR_P(p)) {
register_node(a, p, h);
}
}
}
/* printf("//////////////////////////////////////// GROW finished (length %d)\n", a->table_length); */
/* printf("POSTGROW\n"); */
/* tt_dump_arena_summary(a); */
free(old_table);
return 0;
}
void tt_arena_done(tt_arena_t *a) {
free(a->table);
free(a->headers);
free(a->nodes);
memset(a, 0, sizeof(*a));
}
static size_t arena_size(tt_arena_t *a) {
return sizeof(*a) +
(a->table_length * sizeof(a->table[0])) +
(a->table_length * sizeof(a->headers[0])) +
(a->table_length * sizeof(a->nodes[0]));
}
void tt_dump_arena_summary(tt_arena_t *a) {
printf("size in bytes: %lu\n", arena_size(a));
printf("max_probe: %u\n", a->max_probe);
printf("table_length: %u\n", a->table_length);
printf("free_count: %u\n", a->free_count);
}
void tt_dump_arena(tt_arena_t *a, int show_free_chain) {
tt_dump_arena_summary(a);
if (show_free_chain) {
int chain_counter = 0;
tt_node_idx_t fp = a->free_chain.head;
printf("free_chain:");
while (fp != TT_NO_IDX) {
if ((chain_counter % 10) == 0) {
printf("\n ");
}
printf(" %d", fp);
fp = a->headers[fp].next_free;
chain_counter++;
}
printf("\n");
}
{
int i;
for (i = 0; i < a->table_length; i++) {
tt_node_ptr_t p = a->table[i].ptr;
tt_node_idx_t n = tt_ptr_idx(p);
if (TT_NO_PTR_P(p)) {
continue;
}
if (n >= a->table_length) {
printf("%12u -> %12u ?!?!?!\n", i, n);
} else {
tt_hash_t h = a->table[i].hash;
int distance = i - (h % a->table_length);
if (distance < 0) distance += a->table_length;
printf("%12u -> %12u: dist %d ref %d ",
i,
n,
distance,
a->headers[n].inuse.refcount);
switch (tt_ptr_tag(p)) {
case TT_TAG_TAIL:
printf("tail %u/%u\n",
tt_ptr_idx(a->nodes[n].a),
tt_ptr_tag(a->nodes[n].a));
break;
case TT_TAG_OK:
printf("ok %u/%u\n",
tt_ptr_idx(a->nodes[n].a),
tt_ptr_tag(a->nodes[n].a));
break;
case TT_TAG_BRANCH:
printf("branch %u/%u %u/%u\n",
tt_ptr_idx(a->nodes[n].a),
tt_ptr_tag(a->nodes[n].a),
tt_ptr_idx(a->nodes[n].b),
tt_ptr_tag(a->nodes[n].b));
break;
case TT_TAG_LEAF:
printf("leaf %u/%u %d\n",
tt_ptr_idx(a->nodes[n].a),
tt_ptr_tag(a->nodes[n].a),
(int) a->nodes[n].b);
break;
case TT_TAG_NODE:
printf("node index %d, %u/%u %u/%u\n",
a->headers[n].inuse.index,
tt_ptr_idx(a->nodes[n].a),
tt_ptr_tag(a->nodes[n].a),
tt_ptr_idx(a->nodes[n].b),
tt_ptr_tag(a->nodes[n].b));
break;
case TT_TAG_DICT:
printf("dict %u/%u %u\n",
tt_ptr_idx(a->nodes[n].a),
tt_ptr_tag(a->nodes[n].a),
a->nodes[n].b);
break;
default:
printf("???? %08x\n", p);
assert(0);
}
}
}
}
}
static void dump_dot_edge(tt_arena_t *a, tt_node_ptr_t p, int lr, char const *edgename) {
tt_node_idx_t n = tt_ptr_idx(p);
tt_tag_t target_t = tt_ptr_tag(lr ? a->nodes[n].b : a->nodes[n].a);
if (target_t != TT_TAG_INVALID && target_t != TT_TAG_SPECIAL) {
printf(" i%u_%u -> i%u_%u [label=\"%s\"];\n",
n,
tt_ptr_tag(p),
tt_ptr_idx(lr ? a->nodes[n].b : a->nodes[n].a),
target_t,
edgename);
}
}
void tt_dump_arena_dot_styled(char const *rootlabel,
tt_node_ptr_t rootptr,
tt_arena_t *a,
int flags)
{
int i;
printf("digraph Arena {\n");
printf(" root [shape=box, label=\"%s\"];\n", rootlabel);
if (tt_ptr_tag(rootptr) != TT_TAG_INVALID && tt_ptr_tag(rootptr) != TT_TAG_SPECIAL) {
printf(" root -> i%u_%u;\n",
tt_ptr_idx(rootptr),
tt_ptr_tag(rootptr));
}
for (i = 0; i < a->table_length; i++) {
tt_node_ptr_t p = a->table[i].ptr;
tt_node_idx_t n = tt_ptr_idx(p);
if (TT_NO_PTR_P(p)) {
continue;
}
if (n >= a->table_length) {
printf(" // %12u -> %12u ?!?!?!\n", i, n);
} else {
tt_hash_t h = a->table[i].hash;
int distance = i - (h % a->table_length);
if (distance < 0) distance += a->table_length;
printf(" i%u_%u [",
n,
tt_ptr_tag(p));
switch (tt_ptr_tag(p)) {
case TT_TAG_TAIL:
printf("label=\"tail");
break;
case TT_TAG_OK:
printf("shape=underline,label=\"ok");
break;
case TT_TAG_BRANCH:
printf("label=\"branch");
break;
case TT_TAG_LEAF:
if (flags & TT_STYLE_TEXT_LABELS) {
char buf[5];
int v = (int) a->nodes[n].b;
if (v < -9 || v >= 32) {
int j;
v = htonl(v);
memcpy(buf, &v, 4);
for (j = 0; j < 4; j++) {
if (buf[j] < 32) { buf[j] = ' '; }
}
buf[4] = '\0';
} else {
switch (v) {
case TT_WILD: buf[0] = '*'; buf[1] = '\0'; break;
case TT_BOS: buf[0] = buf[1] = '<'; buf[2] = '\0'; break;
case TT_EOS: buf[0] = buf[1] = '>'; buf[2] = '\0'; break;
case TT_BOC: buf[0] = buf[1] = '{'; buf[2] = '\0'; break;
case TT_EOC: buf[0] = buf[1] = '}'; buf[2] = '\0'; break;
default:
snprintf(buf, sizeof(buf), "%d", v);
buf[4] = '\0';
}
}
printf("shape=underline,label=\"leaf '%s'", buf);
} else {
printf("shape=underline,label=\"leaf a%d", (int) a->nodes[n].b);
}
break;
case TT_TAG_NODE:
printf("label=\"node @%d", a->headers[n].inuse.index);
break;
case TT_TAG_DICT:
printf("label=\"dict s%u", a->nodes[n].b);
break;
default:
printf("???? %08x\n", p);
assert(0);
}
/* printf(" #%u d%d r%d\"];\n", i, distance, a->headers[n].inuse.refcount); */
if (flags & TT_STYLE_HIDE_DETAILS) {
printf("\"];\n");
} else {
printf(" i%u r%d\"];\n", n, a->headers[n].inuse.refcount);
}
switch (tt_ptr_tag(p)) {
case TT_TAG_TAIL:
dump_dot_edge(a, p, 0, "trie");
break;
case TT_TAG_OK:
dump_dot_edge(a, p, 0, "dict");
break;
case TT_TAG_BRANCH:
dump_dot_edge(a, p, 0, "wild");
dump_dot_edge(a, p, 1, "others");
break;
case TT_TAG_LEAF:
dump_dot_edge(a, p, 0, "trie");
break;
case TT_TAG_NODE:
dump_dot_edge(a, p, 0, "zero");
dump_dot_edge(a, p, 1, "one");
break;
case TT_TAG_DICT:
dump_dot_edge(a, p, 0, "root");
break;
default:
printf("???? %08x\n", p);
assert(0);
}
}
}
printf("}\n");
}
void tt_dump_arena_dot(char const *rootlabel, tt_node_ptr_t rootptr, tt_arena_t *a) {
tt_dump_arena_dot_styled(rootlabel, rootptr, a, 0);
}
void tt_arena_flush1(tt_arena_t *a, tt_free_chain_t *c) {
tt_node_idx_t i = a->free_chain.head;
chain_splice(a, c, &a->free_chain);
chain_init(a, &a->free_chain);
while (i != TT_NO_IDX) {
tt_drop(a, a->nodes[i].a);
tt_drop(a, a->nodes[i].b);
a->nodes[i].a = TT_NO_PTR;
a->nodes[i].b = TT_NO_PTR;
i = a->headers[i].next_free;
}
}
void tt_arena_flush(tt_arena_t *a) {
tt_free_chain_t c;
c.head = c.tail = TT_NO_IDX;
while (a->free_chain.head != TT_NO_IDX) {
tt_arena_flush1(a, &c);
}
a->free_chain = c;
}
static inline int heap_tag_p(tt_node_ptr_t p) {
tt_tag_t tag = tt_ptr_tag(p);
return tag != TT_TAG_SPECIAL && tag != TT_TAG_INVALID;
}
static void recycle_node(tt_arena_t *a, tt_node_ptr_t p) {
tt_node_idx_t ni = tt_ptr_idx(p);
tt_hash_t h;
int i;
/* printf("++++++++++++++++++++++++++++++++++++++++ recycling %d\n", ni); */
assert(heap_tag_p(p));
h = tt_hash_node(a, p);
switch (tt_ptr_tag(p)) {
case TT_TAG_LEAF:
a->atom_decref(a->atom_context, a, a->nodes[ni].b);
a->nodes[ni].b = TT_NO_PTR;
break;
case TT_TAG_DICT:
a->nodes[ni].b = TT_NO_PTR;
break;
default:
break;
}
chain_prepend(a, &a->free_chain, ni);
a->free_count++;
for (i = 0; i < a->max_probe+1; i++) {
unsigned int index = (h + i) % a->table_length;
tt_node_ptr_t candidate = a->table[index].ptr;
/* printf("hunting i=%d index=%d p=%d candidate=%d\n", i, index, p, candidate); */
assert(!TT_NO_PTR_P(candidate)); /* Internal error if node not in table */
if (candidate == p) {
/* We found it. Now swap in elements. */
while (1) {
unsigned int nextindex = (index + 1) % a->table_length;
tt_node_ptr_t next_p = a->table[nextindex].ptr;
tt_hash_t next_h;
int distance;
a->table[index].ptr = TT_NO_PTR;
if (TT_NO_PTR_P(next_p)) {
break;
}
next_h = a->table[nextindex].hash;
distance = nextindex - (next_h % a->table_length);
if (distance < 0) distance += a->table_length;
if (distance == 0) {
break;
}
a->table[index].ptr = next_p;
a->table[index].hash = next_h;
index = nextindex;
}
break;
}
}
}
inline tt_node_ptr_t tt_grab(tt_arena_t *a, tt_node_ptr_t p) {
tt_node_idx_t i = tt_ptr_idx(p);
tt_tag_t t = tt_ptr_tag(p);
if (t != TT_TAG_INVALID && t != TT_TAG_SPECIAL
&& a->headers[i].inuse.refcount < TT_REFCOUNT_LIMIT) {
a->headers[i].inuse.refcount++;
}
return p;
}
inline void tt_drop(tt_arena_t *a, tt_node_ptr_t p) {
tt_node_idx_t i = tt_ptr_idx(p);
tt_tag_t t = tt_ptr_tag(p);
if (t != TT_TAG_INVALID && t != TT_TAG_SPECIAL) {
uint32_t oldcount = a->headers[i].inuse.refcount;
if (oldcount < TT_REFCOUNT_LIMIT) {
/* printf("++++++++++++++++++++++++++++++ dropping %d\n", i); */
assert(oldcount != 0);
if (--(a->headers[i].inuse.refcount) == 0) {
recycle_node(a, p);
}
}
}
}
tt_node_ptr_t tt_arena_cons(tt_arena_t *a,
tt_tag_t tag,
uint32_t nindex,
tt_node_ptr_t na,
tt_node_ptr_t nb)
{
tt_hash_t h = hash(tag, nindex, na, nb);
int i;
for (i = 0; i < a->max_probe+1; i++) {
unsigned int index = (h + i) % a->table_length;
tt_node_ptr_t candidate = a->table[index].ptr;
tt_node_idx_t candidate_i = tt_ptr_idx(candidate);
/* printf("cons at %d candidate %d\n", i, candidate); */
if (TT_NO_PTR_P(candidate)) {
/* printf("cons empty cell\n"); */
break;
}
{
tt_hash_t candidate_h = a->table[index].hash;
int distance = index - (candidate_h % a->table_length);
if (distance < 0) distance += a->table_length;
if (i > distance) {
/* printf("early exit as we found a slot that would have been used instead\n"); */
break;
}
}
/* printf("tag %d %d\n", tt_ptr_tag(candidate), tag); */
/* printf("index %d %d\n", a->headers[candidate].inuse.index, nindex); */
/* printf("a %d %d\n", a->nodes[candidate].a, na); */
/* printf("b %d %d\n", a->nodes[candidate].b, nb); */
if (tt_ptr_tag(candidate) == tag &&
a->headers[candidate_i].inuse.index == nindex &&
a->nodes[candidate_i].a == na &&
a->nodes[candidate_i].b == nb) {
/* printf("cons located correct candidate\n"); */
return candidate;
}
}
/* Not found */
/* printf("cons needs to alloc\n"); */
if (a->free_count < (a->table_length >> 2)) {
if (tt_grow(a) != 0) {
return TT_NO_PTR;
}
}
{
tt_node_idx_t node = chain_pop(a, &a->free_chain);
tt_node_ptr_t p = tt_mkptr(node, tag);
tt_grab(a, na);
switch (tag) {
case TT_TAG_LEAF:
a->atom_incref(a->atom_context, a, nb);
break;
case TT_TAG_DICT:
break;
default:
tt_grab(a, nb);
}
tt_drop(a, a->nodes[node].a);
tt_drop(a, a->nodes[node].b);
a->free_count--;
a->headers[node].inuse.refcount = 0;
a->headers[node].inuse.index = nindex;
a->nodes[node].a = na;
a->nodes[node].b = nb;
register_node(a, p, h);
return p;
}
}