Move tags into the pointers

This commit is contained in:
Tony Garnock-Jones 2015-06-30 12:07:48 -04:00
parent 7cede97080
commit 5b3efe4475
3 changed files with 119 additions and 85 deletions

6
main.c
View File

@ -9,7 +9,7 @@
int main(int argc, char *argv[]) {
tt_arena_t a;
int i, outer;
tt_node_idx_t prev = TT_EMPTY;
tt_node_ptr_t prev = TT_EMPTY;
setbuf(stdout, NULL);
tt_arena_init(&a);
@ -25,12 +25,12 @@ int main(int argc, char *argv[]) {
/* printf("======================================== LOOP ITERATION %d\n", outer); */
/* tt_dump_arena_summary(&a); */
for (i = 0; i < 1000000; i++) {
tt_node_idx_t leaf = tt_arena_cons(&a,
tt_node_ptr_t leaf = tt_arena_cons(&a,
TT_TAG_LEAF,
0,
TT_OK,
1001);
tt_node_idx_t curr = tt_arena_cons(&a,
tt_node_ptr_t curr = tt_arena_cons(&a,
TT_TAG_NODE,
0,
leaf,

View File

@ -30,8 +30,8 @@ typedef uint32_t tt_hash_t;
static inline tt_hash_t hash(uint32_t tag,
uint32_t index,
tt_node_idx_t a,
tt_node_idx_t b)
tt_node_ptr_t a,
tt_node_ptr_t b)
{
/* uint64_t x = fasthash_4_ints(tag, index, a, b); */
/* return x - (x >> 32); */
@ -56,20 +56,21 @@ static inline tt_hash_t hash(uint32_t tag,
return x - (x >> 32);
}
static inline tt_hash_t tt_hash_node(tt_arena_t *a, tt_node_idx_t i) {
return hash(a->headers[i].inuse.tag,
static inline tt_hash_t tt_hash_node(tt_arena_t *a, tt_node_ptr_t p) {
tt_node_idx_t i = tt_ptr_idx(p);
return hash(tt_ptr_tag(p),
a->headers[i].inuse.index,
a->nodes[i].a,
a->nodes[i].b);
}
static void chain_init(tt_arena_t *a, tt_free_chain_t *chain) {
chain->head = chain->tail = TT_ERROR;
chain->head = chain->tail = TT_NO_IDX;
}
static void chain_append(tt_arena_t *a, tt_free_chain_t *chain, tt_node_idx_t i) {
a->headers[i].next_free = TT_ERROR;
if (chain->tail == TT_ERROR) {
a->headers[i].next_free = TT_NO_IDX;
if (chain->tail == TT_NO_IDX) {
chain->head = i;
} else {
a->headers[chain->tail].next_free = i;
@ -79,9 +80,9 @@ static void chain_append(tt_arena_t *a, tt_free_chain_t *chain, tt_node_idx_t i)
/* Does not modify chain2. */
static void chain_splice(tt_arena_t *a, tt_free_chain_t *chain1, tt_free_chain_t *chain2) {
if (chain2->head == TT_ERROR) {
if (chain2->head == TT_NO_IDX) {
/* do nothing */
} else if (chain1->head == TT_ERROR) {
} else if (chain1->head == TT_NO_IDX) {
*chain1 = *chain2;
} else {
a->headers[chain1->tail].next_free = chain2->head;
@ -91,7 +92,7 @@ static void chain_splice(tt_arena_t *a, tt_free_chain_t *chain1, tt_free_chain_t
static tt_node_idx_t chain_pop(tt_arena_t *a, tt_free_chain_t *chain) {
tt_node_idx_t i = chain->head;
if (i != TT_ERROR) {
if (i != TT_NO_IDX) {
chain->head = a->headers[i].next_free;
if (chain->tail == i) {
chain->tail = chain->head;
@ -128,12 +129,12 @@ int tt_arena_init(tt_arena_t *a) {
return 0;
}
static void register_node(tt_arena_t *a, tt_node_idx_t node, tt_hash_t initial_hash) {
static void register_node(tt_arena_t *a, tt_node_ptr_t p, tt_hash_t initial_hash) {
tt_hash_t h = initial_hash;
int i = 0;
while (1) {
unsigned int index = (h + i) % a->table_length;
tt_node_idx_t candidate = a->table[index];
tt_node_ptr_t candidate = a->table[index];
/* printf("checking robinhood at h %d i %d index %d candidate %d\n", h, i, index, candidate); */
@ -141,10 +142,10 @@ static void register_node(tt_arena_t *a, tt_node_idx_t node, tt_hash_t initial_h
a->max_probe = i;
}
if (candidate < TT_FIRST_VALID_NODE_IDX) {
if (candidate < TT_FIRST_VALID_NODE_PTR) {
/* This slot in the table is free. */
/* printf("slot free!\n"); */
a->table[index] = node;
a->table[index] = p;
break;
}
@ -155,10 +156,10 @@ static void register_node(tt_arena_t *a, tt_node_idx_t node, tt_hash_t initial_h
if (distance < 0) distance += a->table_length;
if (distance < i) {
a->table[index] = node;
a->table[index] = p;
h = candidate_h;
i = distance + 1;
node = candidate;
p = candidate;
} else {
/* keep scanning. */
i++;
@ -168,7 +169,7 @@ static void register_node(tt_arena_t *a, tt_node_idx_t node, tt_hash_t initial_h
}
static int tt_grow(tt_arena_t *a) {
tt_node_idx_t *old_table = a->table;
tt_node_ptr_t *old_table = a->table;
unsigned int old_table_length = a->table_length;
unsigned int new_table_length = old_table_length << 1;
@ -176,7 +177,7 @@ static int tt_grow(tt_arena_t *a) {
/* tt_dump_arena(a); */
{
tt_node_idx_t *new_table = calloc(new_table_length, sizeof(a->table[0]));
tt_node_ptr_t *new_table = calloc(new_table_length, sizeof(a->table[0]));
tt_header_t *new_headers = realloc(a->headers, new_table_length * sizeof(a->headers[0]));
tt_node_t *new_nodes = realloc(a->nodes, new_table_length * sizeof(a->nodes[0]));
@ -213,9 +214,9 @@ static int tt_grow(tt_arena_t *a) {
{
int i;
for (i = 0; i < old_table_length; i++) {
tt_node_idx_t n = old_table[i];
if (n >= TT_FIRST_VALID_NODE_IDX) {
register_node(a, n, tt_hash_node(a, n));
tt_node_ptr_t p = old_table[i];
if (p >= TT_FIRST_VALID_NODE_PTR) {
register_node(a, p, tt_hash_node(a, p));
}
}
}
@ -256,7 +257,7 @@ void tt_dump_arena(tt_arena_t *a) {
printf("free_chain:");
{
tt_node_idx_t fp = a->free_chain.head;
while (fp != TT_ERROR) {
while (fp != TT_NO_IDX) {
printf(" %d", fp);
fp = a->headers[fp].next_free;
}
@ -266,13 +267,14 @@ void tt_dump_arena(tt_arena_t *a) {
{
int i;
for (i = 0; i < a->table_length; i++) {
tt_node_idx_t n = a->table[i];
tt_node_ptr_t p = a->table[i];
tt_node_idx_t n = tt_ptr_idx(p);
if (n < TT_FIRST_VALID_NODE_IDX) {
/* Skip. */
} else if (n >= a->table_length) {
printf("%12u -> %12u ?!?!?!\n", i, n);
} else {
tt_hash_t h = tt_hash_node(a, n);
tt_hash_t h = tt_hash_node(a, p);
int distance = i - (h % a->table_length);
if (distance < 0) distance += a->table_length;
printf("%12u -> %12u: dist %d ref %d ",
@ -280,21 +282,32 @@ void tt_dump_arena(tt_arena_t *a) {
n,
distance,
a->headers[n].inuse.refcount);
switch (a->headers[n].inuse.tag) {
switch (tt_ptr_tag(p)) {
case TT_TAG_TAIL:
printf("tail %u\n", a->nodes[n].a);
printf("tail %u/%u\n",
tt_ptr_idx(a->nodes[n].a),
tt_ptr_tag(a->nodes[n].a));
break;
case TT_TAG_BRANCH:
printf("branch %u %u\n", a->nodes[n].a, a->nodes[n].b);
printf("branch %u/%u %u/%u\n",
tt_ptr_idx(a->nodes[n].a),
tt_ptr_tag(a->nodes[n].a),
tt_ptr_idx(a->nodes[n].b),
tt_ptr_tag(a->nodes[n].b));
break;
case TT_TAG_LEAF:
printf("leaf %u %u\n", a->nodes[n].a, a->nodes[n].b);
printf("leaf %u/%u %u\n",
tt_ptr_idx(a->nodes[n].a),
tt_ptr_tag(a->nodes[n].a),
a->nodes[n].b);
break;
case TT_TAG_NODE:
printf("node index %d, %u %u\n",
printf("node index %d, %u/%u %u/%u\n",
a->headers[n].inuse.index,
a->nodes[n].a,
a->nodes[n].b);
tt_ptr_idx(a->nodes[n].a),
tt_ptr_tag(a->nodes[n].a),
tt_ptr_idx(a->nodes[n].b),
tt_ptr_tag(a->nodes[n].b));
break;
}
}
@ -309,58 +322,59 @@ void tt_arena_flush1(tt_arena_t *a, tt_free_chain_t *c) {
while (i >= TT_FIRST_VALID_NODE_IDX) {
tt_drop(a, a->nodes[i].a);
tt_drop(a, a->nodes[i].b);
a->nodes[i].a = TT_ERROR;
a->nodes[i].b = TT_ERROR;
a->nodes[i].a = TT_NO_PTR;
a->nodes[i].b = TT_NO_PTR;
i = a->headers[i].next_free;
}
}
void tt_arena_flush(tt_arena_t *a) {
tt_free_chain_t c;
c.head = c.tail = TT_ERROR;
while (a->free_chain.head != TT_ERROR) {
c.head = c.tail = TT_NO_IDX;
while (a->free_chain.head != TT_NO_IDX) {
tt_arena_flush1(a, &c);
}
a->free_chain = c;
}
static void recycle_node(tt_arena_t *a, tt_node_idx_t ni) {
static void recycle_node(tt_arena_t *a, tt_node_ptr_t p) {
tt_node_idx_t ni = tt_ptr_idx(p);
tt_hash_t h;
int i;
/* printf("++++++++++++++++++++++++++++++++++++++++ recycling %d\n", ni); */
assert(ni >= TT_FIRST_VALID_NODE_IDX);
h = tt_hash_node(a, ni);
assert(p >= TT_FIRST_VALID_NODE_PTR);
h = tt_hash_node(a, p);
if (a->headers[ni].inuse.tag == TT_TAG_LEAF) {
a->nodes[ni].b = TT_ERROR;
if (tt_ptr_tag(p) == TT_TAG_LEAF) {
a->nodes[ni].b = TT_NO_PTR;
}
chain_append(a, &a->free_chain, ni);
a->free_count++;
for (i = 0; i < a->max_probe+1; i++) {
unsigned int index = (h + i) % a->table_length;
tt_node_idx_t candidate = a->table[index];
tt_node_ptr_t candidate = a->table[index];
/* printf("hunting i=%d index=%d ni=%d candidate=%d\n", i, index, ni, candidate); */
assert(candidate >= TT_FIRST_VALID_NODE_IDX); /* Internal error if node not in table */
/* printf("hunting i=%d index=%d p=%d candidate=%d\n", i, index, p, candidate); */
assert(candidate >= TT_FIRST_VALID_NODE_PTR); /* Internal error if node not in table */
if (candidate == ni) {
if (candidate == p) {
/* We found it. Now swap in elements. */
while (1) {
unsigned int nextindex = (index + 1) % a->table_length;
tt_node_idx_t next_n = a->table[nextindex];
tt_node_ptr_t next_p = a->table[nextindex];
tt_hash_t next_h;
int distance;
a->table[index] = TT_ERROR;
a->table[index] = TT_NO_PTR;
if (next_n < TT_FIRST_VALID_NODE_IDX) {
if (next_p < TT_FIRST_VALID_NODE_PTR) {
break;
}
next_h = tt_hash_node(a, next_n);
next_h = tt_hash_node(a, next_p);
distance = nextindex - (next_h % a->table_length);
if (distance < 0) distance += a->table_length;
@ -368,7 +382,7 @@ static void recycle_node(tt_arena_t *a, tt_node_idx_t ni) {
break;
}
a->table[index] = next_n;
a->table[index] = next_p;
index = nextindex;
}
break;
@ -376,35 +390,36 @@ static void recycle_node(tt_arena_t *a, tt_node_idx_t ni) {
}
}
tt_node_idx_t tt_arena_cons(tt_arena_t *a,
uint32_t tag,
tt_node_ptr_t tt_arena_cons(tt_arena_t *a,
tt_tag_t tag,
uint32_t nindex,
tt_node_idx_t na,
tt_node_idx_t nb)
tt_node_ptr_t na,
tt_node_ptr_t nb)
{
tt_hash_t h = hash(tag, nindex, na, nb);
int i;
for (i = 0; i < a->max_probe+1; i++) {
unsigned int index = (h + i) % a->table_length;
tt_node_idx_t candidate = a->table[index];
tt_node_ptr_t candidate = a->table[index];
tt_node_idx_t candidate_i = tt_ptr_idx(candidate);
/* printf("cons at %d candidate %d\n", i, candidate); */
/* TODO: perhaps also bail early if we detect that the hash code changes */
if (candidate < TT_FIRST_VALID_NODE_IDX) {
if (candidate < TT_FIRST_VALID_NODE_PTR) {
/* printf("cons empty cell\n"); */
break;
}
/* printf("tag %d %d\n", a->headers[candidate].inuse.tag, tag); */
/* printf("tag %d %d\n", tt_ptr_tag(candidate), tag); */
/* printf("index %d %d\n", a->headers[candidate].inuse.index, nindex); */
/* printf("a %d %d\n", a->nodes[candidate].a, na); */
/* printf("b %d %d\n", a->nodes[candidate].b, nb); */
if (a->headers[candidate].inuse.tag == tag &&
a->headers[candidate].inuse.index == nindex &&
a->nodes[candidate].a == na &&
a->nodes[candidate].b == nb) {
if (tt_ptr_tag(candidate) == tag &&
a->headers[candidate_i].inuse.index == nindex &&
a->nodes[candidate_i].a == na &&
a->nodes[candidate_i].b == nb) {
/* printf("cons located correct candidate\n"); */
return candidate;
}
@ -415,12 +430,13 @@ tt_node_idx_t tt_arena_cons(tt_arena_t *a,
if (a->free_count < (a->table_length >> 2)) {
if (tt_grow(a) != 0) {
return TT_ERROR;
return TT_NO_PTR;
}
}
{
tt_node_idx_t node = chain_pop(a, &a->free_chain);
tt_node_ptr_t p = tt_mkptr(node, tag);
tt_grab(a, na);
if (tag != TT_TAG_LEAF) tt_grab(a, nb);
@ -429,28 +445,29 @@ tt_node_idx_t tt_arena_cons(tt_arena_t *a,
a->free_count--;
a->headers[node].inuse.refcount = 0;
a->headers[node].inuse.tag = tag;
a->headers[node].inuse.index = nindex;
a->nodes[node].a = na;
a->nodes[node].b = nb;
register_node(a, node, h);
return node;
register_node(a, p, h);
return p;
}
}
tt_node_idx_t tt_grab(tt_arena_t *a, tt_node_idx_t i) {
tt_node_ptr_t tt_grab(tt_arena_t *a, tt_node_ptr_t p) {
tt_node_idx_t i = tt_ptr_idx(p);
if (i >= TT_FIRST_VALID_NODE_IDX && a->headers[i].inuse.refcount < TT_REFCOUNT_LIMIT) {
a->headers[i].inuse.refcount++;
}
return i;
return p;
}
void tt_drop(tt_arena_t *a, tt_node_idx_t i) {
void tt_drop(tt_arena_t *a, tt_node_ptr_t p) {
tt_node_idx_t i = tt_ptr_idx(p);
if (i >= TT_FIRST_VALID_NODE_IDX && a->headers[i].inuse.refcount < TT_REFCOUNT_LIMIT) {
/* printf("++++++++++++++++++++++++++++++ dropping %d\n", i); */
if (--(a->headers[i].inuse.refcount) == 0) {
recycle_node(a, i);
recycle_node(a, p);
}
}
}

View File

@ -13,31 +13,36 @@ typedef enum tt_tag_t {
} tt_tag_t;
typedef enum tt_reserved_node_idx_t {
TT_ERROR = 0, /* invalid node index, means "no node at all", not even empty */
TT_EMPTY, /* empty treetrie */
TT_OK, /* terminal marker */
TT_NO_IDX = 0, /* invalid node index, means "no node at all", not even empty */
TT_EMPTY_IDX, /* empty treetrie */
TT_OK_IDX, /* terminal marker */
TT_FIRST_VALID_NODE_IDX
} tt_reserved_node_idx_t;
#define TT_NO_PTR (tt_mkptr(TT_NO_IDX, 0))
#define TT_EMPTY (tt_mkptr(TT_EMPTY_IDX, 0))
#define TT_OK (tt_mkptr(TT_OK_IDX, 0))
#define TT_FIRST_VALID_NODE_PTR (tt_mkptr(TT_FIRST_VALID_NODE_IDX, 0))
typedef uint32_t tt_node_idx_t; /* N.B. tt_reserved_node_idx_t */
typedef uint32_t tt_node_ptr_t; /* An index shifted left 2 with tag or'd in low bits */
typedef uint32_t tt_atom_t; /* Atom number 0 is the wildcard atom. */
typedef union tt_header_t {
uint32_t next_free;
tt_node_idx_t next_free;
struct {
uint32_t refcount : 24;
uint32_t index : 6;
tt_tag_t tag : 2;
uint32_t index : 8; /* this really doesn't need to be more than 5 or 6 bits wide */
} inuse;
} tt_header_t;
#define TT_REFCOUNT_LIMIT ((1 << 24) - 1)
typedef struct tt_node_t {
tt_node_idx_t a; /* always a real node idx */
tt_node_idx_t b; /* a real node idx unless corresponding tag is TT_TAG_LEAF */
tt_node_ptr_t a; /* always a real node ptr */
tt_node_ptr_t b; /* a real node ptr unless corresponding tag is TT_TAG_LEAF */
} tt_node_t;
typedef struct tt_free_chain_t {
@ -49,7 +54,7 @@ typedef struct tt_arena_t {
/* Fields for the Robin Hood hashset used for hashconsing of tt_nodes */
unsigned int max_probe;
unsigned int table_length;
tt_node_idx_t *table;
tt_node_ptr_t *table;
tt_header_t *headers;
tt_node_t *nodes;
@ -58,6 +63,18 @@ typedef struct tt_arena_t {
tt_free_chain_t free_chain;
} tt_arena_t;
static inline tt_node_ptr_t tt_mkptr(tt_node_idx_t i, tt_tag_t tag) {
return (i << 2) | tag;
}
static inline tt_node_idx_t tt_ptr_idx(tt_node_ptr_t p) {
return p >> 2;
}
static inline tt_tag_t tt_ptr_tag(tt_node_ptr_t p) {
return p & 3;
}
extern int tt_arena_init(tt_arena_t *a);
extern void tt_arena_done(tt_arena_t *a);
@ -70,14 +87,14 @@ extern void tt_arena_flush(tt_arena_t *a);
Otherwise, returns a nonzero index.
Grabs na and nb (according to tag) IF it needs to allocate a new node, otherwise does not.
DOES NOT increase the reference count of the returned node. */
extern tt_node_idx_t tt_arena_cons(tt_arena_t *a,
extern tt_node_ptr_t tt_arena_cons(tt_arena_t *a,
uint32_t tag,
uint32_t index,
tt_node_idx_t na,
tt_node_idx_t nb);
tt_node_ptr_t na,
tt_node_ptr_t nb);
extern tt_node_idx_t tt_grab(tt_arena_t *a, tt_node_idx_t i);
extern void tt_drop(tt_arena_t *a, tt_node_idx_t i);
extern tt_node_ptr_t tt_grab(tt_arena_t *a, tt_node_ptr_t i);
extern void tt_drop(tt_arena_t *a, tt_node_ptr_t i);
#ifdef __cplusplus
}