Arena growth

This commit is contained in:
Tony Garnock-Jones 2015-06-29 23:19:44 -04:00
parent 985baab2cb
commit c70e926970
5 changed files with 278 additions and 116 deletions

View File

@ -1,5 +1,5 @@
t: *.c t: *.c
gcc -Wall -o $@ *.c gcc -Wall -o $@ -g *.c
clean: clean:
rm -f t rm -f t

View File

@ -1,2 +1 @@
Change freelist from a stack into a queue. Combined with the Consider whether a stack or a queue makes most sense for the freelist.
hashconsing technique, might this give us an LRU queue???

56
main.c
View File

@ -6,44 +6,6 @@
#include "fasthash.h" #include "fasthash.h"
#include "treetrie.h" #include "treetrie.h"
static void dump_arena(tt_arena_t *a) {
int i;
printf("max_probe: %u\n", a->max_probe);
printf("live_count: %u\n", a->live_count);
printf("table_length: %u\n", a->table_length);
for (i = 0; i < a->table_length; i++) {
tt_node_idx_t n = a->table[i];
tt_hash_t h = tt_hash_node(a, n);
int distance = i - (h % a->table_length);
if (distance < 0) distance += a->table_length;
if (n >= TT_FIRST_VALID_NODE_IDX) {
printf("%12u -> %12u: dist %d ref %d ",
i,
n,
distance,
a->headers[n].inuse.refcount);
switch (a->headers[n].inuse.tag) {
case TT_TAG_TAIL:
printf("tail %u\n", a->nodes[n].a);
break;
case TT_TAG_BRANCH:
printf("branch %u %u\n", a->nodes[n].a, a->nodes[n].b);
break;
case TT_TAG_LEAF:
printf("leaf %u %u\n", a->nodes[n].a, a->nodes[n].b);
break;
case TT_TAG_NODE:
printf("node index %d, %u %u\n",
a->headers[n].inuse.index,
a->nodes[n].a,
a->nodes[n].b);
break;
}
}
}
}
int main(int argc, char *argv[]) { int main(int argc, char *argv[]) {
tt_arena_t a; tt_arena_t a;
int i, outer; int i, outer;
@ -53,13 +15,15 @@ int main(int argc, char *argv[]) {
tt_arena_init(&a); tt_arena_init(&a);
for (outer = 0; outer < 10; outer++) { for (outer = 0; outer < 10; outer++) {
/* printf("---------------------------------------- grab/drop of %d\n", prev); */
tt_grab(&a, prev); tt_grab(&a, prev);
tt_drop(&a, prev); tt_drop(&a, prev);
printf("---------------------------------------- AFTER DROP of %d:\n", prev); /* tt_arena_flush(&a); */
dump_arena(&a); /* printf("---------------------------------------- AFTER DROP of %d:\n", prev); */
/* tt_dump_arena(&a); */
prev = TT_EMPTY; prev = TT_EMPTY;
printf("======================================== LOOP ITERATION %d\n", outer); /* printf("======================================== LOOP ITERATION %d\n", outer); */
for (i = 0; i < 10; i++) { for (i = 0; i < 100; i++) {
tt_node_idx_t leaf = tt_arena_cons(&a, tt_node_idx_t leaf = tt_arena_cons(&a,
TT_TAG_LEAF, TT_TAG_LEAF,
0, 0,
@ -70,11 +34,17 @@ int main(int argc, char *argv[]) {
0, 0,
leaf, leaf,
prev); prev);
dump_arena(&a); /* tt_dump_arena(&a); */
prev = curr; prev = curr;
} }
} }
/* tt_dump_arena(&a); */
tt_grab(&a, prev);
tt_drop(&a, prev);
/* tt_arena_flush(&a); */
tt_dump_arena_summary(&a);
tt_arena_done(&a); tt_arena_done(&a);
return EXIT_SUCCESS; return EXIT_SUCCESS;
} }

View File

@ -28,36 +28,170 @@ inline tt_hash_t tt_hash_node(tt_arena_t *a, tt_node_idx_t i) {
a->nodes[i].b); a->nodes[i].b);
} }
static void chain_init(tt_arena_t *a, tt_free_chain_t *chain) {
chain->head = chain->tail = TT_ERROR;
}
static void chain_append(tt_arena_t *a, tt_free_chain_t *chain, tt_node_idx_t i) {
a->headers[i].next_free = TT_ERROR;
if (chain->tail == TT_ERROR) {
chain->head = i;
} else {
a->headers[chain->tail].next_free = i;
}
chain->tail = i;
}
/* Does not modify chain2. */
static void chain_splice(tt_arena_t *a, tt_free_chain_t *chain1, tt_free_chain_t *chain2) {
if (chain2->head == TT_ERROR) {
/* do nothing */
} else if (chain1->head == TT_ERROR) {
*chain1 = *chain2;
} else {
a->headers[chain1->tail].next_free = chain2->head;
chain1->tail = chain2->tail;
}
}
static tt_node_idx_t chain_pop(tt_arena_t *a, tt_free_chain_t *chain) {
tt_node_idx_t i = chain->head;
if (i != TT_ERROR) {
chain->head = a->headers[i].next_free;
if (chain->tail == i) {
chain->tail = chain->head;
}
}
return i;
}
int tt_arena_init(tt_arena_t *a) { int tt_arena_init(tt_arena_t *a) {
a->max_probe = 0; a->max_probe = 0;
a->live_count = 0; a->table_length = 1048576;
a->table_length = 16;
a->table = calloc(a->table_length, sizeof(a->table[0])); a->table = calloc(a->table_length, sizeof(a->table[0]));
a->headers = calloc(a->table_length, sizeof(a->headers[0])); a->headers = calloc(a->table_length, sizeof(a->headers[0]));
a->nodes = calloc(a->table_length, sizeof(a->nodes[0])); a->nodes = calloc(a->table_length, sizeof(a->nodes[0]));
a->free_chain = TT_ERROR; a->free_count = 0;
chain_init(a, &a->free_chain);
if (a->table == NULL || a->headers == NULL || a->nodes == NULL) { if (a->table == NULL || a->headers == NULL || a->nodes == NULL) {
if (a->table != NULL) free(a->table); free(a->table);
if (a->headers != NULL) free(a->headers); free(a->headers);
if (a->nodes != NULL) free(a->nodes); free(a->nodes);
errno = ENOMEM; errno = ENOMEM;
return -1; return -1;
} }
{ {
int i; int i;
for (i = a->table_length - 1; i >= TT_FIRST_VALID_NODE_IDX; i--) { for (i = TT_FIRST_VALID_NODE_IDX; i < a->table_length; i++) {
a->headers[i].next_free = a->free_chain; chain_append(a, &a->free_chain, i);
a->free_chain = i; a->free_count++;
} }
} }
return 0; return 0;
} }
static void register_node(tt_arena_t *a, tt_node_idx_t node, tt_hash_t initial_hash) {
tt_hash_t h = initial_hash;
int i = 0;
while (1) {
unsigned int index = (h + i) % a->table_length;
tt_node_idx_t candidate = a->table[index];
/* printf("checking robinhood at h %d i %d index %d candidate %d\n", h, i, index, candidate); */
if (i > a->max_probe) {
a->max_probe = i;
}
if (candidate < TT_FIRST_VALID_NODE_IDX) {
/* This slot in the table is free. */
/* printf("slot free!\n"); */
a->table[index] = node;
break;
}
/* printf("slot not free.\n"); */
{
tt_hash_t candidate_h = tt_hash_node(a, candidate);
int distance = index - (candidate_h % a->table_length);
if (distance < 0) distance += a->table_length;
if (distance < i) {
a->table[index] = node;
h = candidate_h;
i = distance + 1;
node = candidate;
} else {
/* keep scanning. */
i++;
}
}
}
}
static int tt_grow(tt_arena_t *a) { static int tt_grow(tt_arena_t *a) {
assert(0); tt_node_idx_t *old_table = a->table;
unsigned int old_table_length = a->table_length;
unsigned int new_table_length = old_table_length << 1;
/* printf("PREGROW\n"); */
/* tt_dump_arena(a); */
{
tt_node_idx_t *new_table = calloc(new_table_length, sizeof(a->table[0]));
tt_header_t *new_headers = realloc(a->headers, new_table_length * sizeof(a->headers[0]));
tt_node_t *new_nodes = realloc(a->nodes, new_table_length * sizeof(a->nodes[0]));
if (new_table == NULL || new_headers == NULL || new_nodes == NULL) {
free(new_table);
free(new_headers);
free(new_nodes);
errno = ENOMEM;
return -1;
}
memset(new_headers + old_table_length, 0,
(new_table_length - old_table_length) * sizeof(a->headers[0]));
memset(new_nodes + old_table_length, 0,
(new_table_length - old_table_length) * sizeof(a->nodes[0]));
a->max_probe = 0;
a->table_length = new_table_length;
a->table = new_table;
a->headers = new_headers;
a->nodes = new_nodes;
{
int i;
for (i = old_table_length; i < new_table_length; i++) {
chain_append(a, &a->free_chain, i);
a->free_count++;
}
}
}
/* printf("//////////////////////////////////////// GROW starting (length %d)\n", a->table_length); */
{
int i;
for (i = 0; i < old_table_length; i++) {
tt_node_idx_t n = old_table[i];
if (n >= TT_FIRST_VALID_NODE_IDX) {
register_node(a, n, tt_hash_node(a, n));
}
}
}
/* printf("//////////////////////////////////////// GROW finished (length %d)\n", a->table_length); */
/* printf("POSTGROW\n"); */
/* tt_dump_arena(a); */
free(old_table);
return 0;
} }
void tt_arena_done(tt_arena_t *a) { void tt_arena_done(tt_arena_t *a) {
@ -67,11 +201,99 @@ void tt_arena_done(tt_arena_t *a) {
memset(a, 0, sizeof(*a)); memset(a, 0, sizeof(*a));
} }
static size_t arena_size(tt_arena_t *a) {
return sizeof(*a) +
(a->table_length * sizeof(a->table[0])) +
(a->table_length * sizeof(a->headers[0])) +
(a->table_length * sizeof(a->nodes[0]));
}
void tt_dump_arena_summary(tt_arena_t *a) {
printf("size in bytes: %lu\n", arena_size(a));
printf("max_probe: %u\n", a->max_probe);
printf("table_length: %u\n", a->table_length);
printf("free_count: %u\n", a->free_count);
}
void tt_dump_arena(tt_arena_t *a) {
tt_dump_arena_summary(a);
printf("free_chain:");
{
tt_node_idx_t fp = a->free_chain.head;
while (fp != TT_ERROR) {
printf(" %d", fp);
fp = a->headers[fp].next_free;
}
}
printf("\n");
{
int i;
for (i = 0; i < a->table_length; i++) {
tt_node_idx_t n = a->table[i];
if (n < TT_FIRST_VALID_NODE_IDX) {
/* Skip. */
} else if (n >= a->table_length) {
printf("%12u -> %12u ?!?!?!\n", i, n);
} else {
tt_hash_t h = tt_hash_node(a, n);
int distance = i - (h % a->table_length);
if (distance < 0) distance += a->table_length;
printf("%12u -> %12u: dist %d ref %d ",
i,
n,
distance,
a->headers[n].inuse.refcount);
switch (a->headers[n].inuse.tag) {
case TT_TAG_TAIL:
printf("tail %u\n", a->nodes[n].a);
break;
case TT_TAG_BRANCH:
printf("branch %u %u\n", a->nodes[n].a, a->nodes[n].b);
break;
case TT_TAG_LEAF:
printf("leaf %u %u\n", a->nodes[n].a, a->nodes[n].b);
break;
case TT_TAG_NODE:
printf("node index %d, %u %u\n",
a->headers[n].inuse.index,
a->nodes[n].a,
a->nodes[n].b);
break;
}
}
}
}
}
void tt_arena_flush1(tt_arena_t *a, tt_free_chain_t *c) {
tt_node_idx_t i = a->free_chain.head;
chain_splice(a, c, &a->free_chain);
chain_init(a, &a->free_chain);
while (i >= TT_FIRST_VALID_NODE_IDX) {
tt_drop(a, a->nodes[i].a);
tt_drop(a, a->nodes[i].b);
a->nodes[i].a = TT_ERROR;
a->nodes[i].b = TT_ERROR;
i = a->headers[i].next_free;
}
}
void tt_arena_flush(tt_arena_t *a) {
tt_free_chain_t c;
c.head = c.tail = TT_ERROR;
while (a->free_chain.head != TT_ERROR) {
tt_arena_flush1(a, &c);
}
a->free_chain = c;
}
static void recycle_node(tt_arena_t *a, tt_node_idx_t ni) { static void recycle_node(tt_arena_t *a, tt_node_idx_t ni) {
tt_hash_t h; tt_hash_t h;
int i; int i;
printf("++++++++++++++++++++++++++++++++++++++++ recycling %d\n", ni); /* printf("++++++++++++++++++++++++++++++++++++++++ recycling %d\n", ni); */
assert(ni >= TT_FIRST_VALID_NODE_IDX); assert(ni >= TT_FIRST_VALID_NODE_IDX);
h = tt_hash_node(a, ni); h = tt_hash_node(a, ni);
@ -79,15 +301,14 @@ static void recycle_node(tt_arena_t *a, tt_node_idx_t ni) {
if (a->headers[ni].inuse.tag == TT_TAG_LEAF) { if (a->headers[ni].inuse.tag == TT_TAG_LEAF) {
a->nodes[ni].b = TT_ERROR; a->nodes[ni].b = TT_ERROR;
} }
a->headers[ni].next_free = a->free_chain; chain_append(a, &a->free_chain, ni);
a->free_chain = ni; a->free_count++;
a->live_count--;
for (i = 0; i < a->max_probe+1; i++) { for (i = 0; i < a->max_probe+1; i++) {
unsigned int index = (h + i) % a->table_length; unsigned int index = (h + i) % a->table_length;
tt_node_idx_t candidate = a->table[index]; tt_node_idx_t candidate = a->table[index];
printf("hunting i=%d index=%d ni=%d candidate=%d\n", i, index, ni, candidate); /* printf("hunting i=%d index=%d ni=%d candidate=%d\n", i, index, ni, candidate); */
assert(candidate >= TT_FIRST_VALID_NODE_IDX); /* Internal error if node not in table */ assert(candidate >= TT_FIRST_VALID_NODE_IDX); /* Internal error if node not in table */
if (candidate == ni) { if (candidate == ni) {
@ -133,46 +354,44 @@ tt_node_idx_t tt_arena_cons(tt_arena_t *a,
unsigned int index = (h + i) % a->table_length; unsigned int index = (h + i) % a->table_length;
tt_node_idx_t candidate = a->table[index]; tt_node_idx_t candidate = a->table[index];
printf("cons at %d candidate %d\n", i, candidate); /* printf("cons at %d candidate %d\n", i, candidate); */
/* TODO: perhaps also bail early if we detect that the hash code changes */ /* TODO: perhaps also bail early if we detect that the hash code changes */
if (candidate < TT_FIRST_VALID_NODE_IDX) { if (candidate < TT_FIRST_VALID_NODE_IDX) {
printf("cons empty cell\n"); /* printf("cons empty cell\n"); */
break; break;
} }
printf("tag %d %d\n", a->headers[candidate].inuse.tag, tag); /* printf("tag %d %d\n", a->headers[candidate].inuse.tag, tag); */
printf("index %d %d\n", a->headers[candidate].inuse.index, nindex); /* printf("index %d %d\n", a->headers[candidate].inuse.index, nindex); */
printf("a %d %d\n", a->nodes[candidate].a, na); /* printf("a %d %d\n", a->nodes[candidate].a, na); */
printf("b %d %d\n", a->nodes[candidate].b, nb); /* printf("b %d %d\n", a->nodes[candidate].b, nb); */
if (a->headers[candidate].inuse.tag == tag && if (a->headers[candidate].inuse.tag == tag &&
a->headers[candidate].inuse.index == nindex && a->headers[candidate].inuse.index == nindex &&
a->nodes[candidate].a == na && a->nodes[candidate].a == na &&
a->nodes[candidate].b == nb) { a->nodes[candidate].b == nb) {
printf("cons located correct candidate\n"); /* printf("cons located correct candidate\n"); */
return candidate; return candidate;
} }
} }
printf("cons needs to alloc\n"); /* Not found */
/* printf("cons needs to alloc\n"); */
if (a->free_chain == TT_ERROR) { if (a->free_count < (a->table_length >> 2)) {
if (tt_grow(a) != 0) { if (tt_grow(a) != 0) {
return TT_ERROR; return TT_ERROR;
} }
} }
{ {
tt_node_idx_t node = a->free_chain; tt_node_idx_t node = chain_pop(a, &a->free_chain);
tt_node_idx_t tostore = node;
tt_grab(a, na); tt_grab(a, na);
if (tag != TT_TAG_LEAF) tt_grab(a, nb); if (tag != TT_TAG_LEAF) tt_grab(a, nb);
a->free_chain = a->headers[node].next_free;
tt_drop(a, a->nodes[node].a); tt_drop(a, a->nodes[node].a);
tt_drop(a, a->nodes[node].b); tt_drop(a, a->nodes[node].b);
a->live_count++; a->free_count--;
a->headers[node].inuse.refcount = 0; a->headers[node].inuse.refcount = 0;
a->headers[node].inuse.tag = tag; a->headers[node].inuse.tag = tag;
@ -180,43 +399,7 @@ tt_node_idx_t tt_arena_cons(tt_arena_t *a,
a->nodes[node].a = na; a->nodes[node].a = na;
a->nodes[node].b = nb; a->nodes[node].b = nb;
/* Not found */ register_node(a, node, h);
i = 0;
while (1) {
unsigned int index = (h + i) % a->table_length;
tt_node_idx_t candidate = a->table[index];
printf("checking robinhood at h %d i %d index %d candidate %d\n", h, i, index, candidate);
if (i > a->max_probe) {
a->max_probe = i;
}
if (candidate < TT_FIRST_VALID_NODE_IDX) {
/* This slot in the table is free. */
printf("slot free!\n");
a->table[index] = tostore;
break;
}
printf("slot not free.\n");
{
tt_hash_t candidate_h = tt_hash_node(a, candidate);
int distance = index - (candidate_h % a->table_length);
if (distance < 0) distance += a->table_length;
if (distance < i) {
a->table[index] = tostore;
h = candidate_h;
i = distance + 1;
tostore = candidate;
} else {
/* keep scanning. */
i++;
}
}
}
return node; return node;
} }
} }
@ -230,7 +413,7 @@ tt_node_idx_t tt_grab(tt_arena_t *a, tt_node_idx_t i) {
void tt_drop(tt_arena_t *a, tt_node_idx_t i) { void tt_drop(tt_arena_t *a, tt_node_idx_t i) {
if (i >= TT_FIRST_VALID_NODE_IDX && a->headers[i].inuse.refcount < TT_REFCOUNT_LIMIT) { if (i >= TT_FIRST_VALID_NODE_IDX && a->headers[i].inuse.refcount < TT_REFCOUNT_LIMIT) {
printf("++++++++++++++++++++++++++++++ dropping %d\n", i); /* printf("++++++++++++++++++++++++++++++ dropping %d\n", i); */
if (--(a->headers[i].inuse.refcount) == 0) { if (--(a->headers[i].inuse.refcount) == 0) {
recycle_node(a, i); recycle_node(a, i);
} }

View File

@ -22,7 +22,7 @@ typedef enum tt_reserved_node_idx_t {
typedef uint32_t tt_node_idx_t; /* N.B. tt_reserved_node_idx_t */ typedef uint32_t tt_node_idx_t; /* N.B. tt_reserved_node_idx_t */
typedef uint32_t tt_atom_t; typedef uint32_t tt_atom_t; /* Atom number 0 is the wildcard atom. */
typedef union tt_header_t { typedef union tt_header_t {
uint32_t next_free; uint32_t next_free;
@ -40,22 +40,32 @@ typedef struct tt_node_t {
tt_node_idx_t b; /* a real node idx unless corresponding tag is TT_TAG_LEAF */ tt_node_idx_t b; /* a real node idx unless corresponding tag is TT_TAG_LEAF */
} tt_node_t; } tt_node_t;
typedef struct tt_free_chain_t {
tt_node_idx_t head; /* remove links from here */
tt_node_idx_t tail; /* append links here */
} tt_free_chain_t;
typedef struct tt_arena_t { typedef struct tt_arena_t {
/* Fields for the Robin Hood hashset used for hashconsing of tt_nodes */ /* Fields for the Robin Hood hashset used for hashconsing of tt_nodes */
unsigned int max_probe; unsigned int max_probe;
unsigned int live_count;
unsigned int table_length; unsigned int table_length;
tt_node_idx_t *table; tt_node_idx_t *table;
tt_header_t *headers; tt_header_t *headers;
tt_node_t *nodes; tt_node_t *nodes;
tt_node_idx_t free_chain; unsigned int free_count;
tt_free_chain_t free_chain;
} tt_arena_t; } tt_arena_t;
extern int tt_arena_init(tt_arena_t *a); extern int tt_arena_init(tt_arena_t *a);
extern void tt_arena_done(tt_arena_t *a); extern void tt_arena_done(tt_arena_t *a);
extern void tt_dump_arena_summary(tt_arena_t *a);
extern void tt_dump_arena(tt_arena_t *a);
extern void tt_arena_flush(tt_arena_t *a);
/* Returns 0 if consing failed (because of out-of-memory). /* Returns 0 if consing failed (because of out-of-memory).
Otherwise, returns a nonzero index. Otherwise, returns a nonzero index.
Grabs na and nb (according to tag) IF it needs to allocate a new node, otherwise does not. Grabs na and nb (according to tag) IF it needs to allocate a new node, otherwise does not.