From c70e926970a82ce83d974d38f95e6383a121936c Mon Sep 17 00:00:00 2001 From: Tony Garnock-Jones Date: Mon, 29 Jun 2015 23:19:44 -0400 Subject: [PATCH] Arena growth --- Makefile | 2 +- TODO.md | 3 +- main.c | 56 +++------- treetrie.c | 317 ++++++++++++++++++++++++++++++++++++++++++----------- treetrie.h | 16 ++- 5 files changed, 278 insertions(+), 116 deletions(-) diff --git a/Makefile b/Makefile index 60a451b..8c016da 100644 --- a/Makefile +++ b/Makefile @@ -1,5 +1,5 @@ t: *.c - gcc -Wall -o $@ *.c + gcc -Wall -o $@ -g *.c clean: rm -f t diff --git a/TODO.md b/TODO.md index 0572579..9a6fafa 100644 --- a/TODO.md +++ b/TODO.md @@ -1,2 +1 @@ -Change freelist from a stack into a queue. Combined with the -hashconsing technique, might this give us an LRU queue??? +Consider whether a stack or a queue makes most sense for the freelist. diff --git a/main.c b/main.c index 6855b81..b65011a 100644 --- a/main.c +++ b/main.c @@ -6,44 +6,6 @@ #include "fasthash.h" #include "treetrie.h" -static void dump_arena(tt_arena_t *a) { - int i; - printf("max_probe: %u\n", a->max_probe); - printf("live_count: %u\n", a->live_count); - printf("table_length: %u\n", a->table_length); - - for (i = 0; i < a->table_length; i++) { - tt_node_idx_t n = a->table[i]; - tt_hash_t h = tt_hash_node(a, n); - int distance = i - (h % a->table_length); - if (distance < 0) distance += a->table_length; - if (n >= TT_FIRST_VALID_NODE_IDX) { - printf("%12u -> %12u: dist %d ref %d ", - i, - n, - distance, - a->headers[n].inuse.refcount); - switch (a->headers[n].inuse.tag) { - case TT_TAG_TAIL: - printf("tail %u\n", a->nodes[n].a); - break; - case TT_TAG_BRANCH: - printf("branch %u %u\n", a->nodes[n].a, a->nodes[n].b); - break; - case TT_TAG_LEAF: - printf("leaf %u %u\n", a->nodes[n].a, a->nodes[n].b); - break; - case TT_TAG_NODE: - printf("node index %d, %u %u\n", - a->headers[n].inuse.index, - a->nodes[n].a, - a->nodes[n].b); - break; - } - } - } -} - int main(int argc, char *argv[]) { tt_arena_t a; int i, outer; @@ -53,13 +15,15 @@ int main(int argc, char *argv[]) { tt_arena_init(&a); for (outer = 0; outer < 10; outer++) { + /* printf("---------------------------------------- grab/drop of %d\n", prev); */ tt_grab(&a, prev); tt_drop(&a, prev); - printf("---------------------------------------- AFTER DROP of %d:\n", prev); - dump_arena(&a); + /* tt_arena_flush(&a); */ + /* printf("---------------------------------------- AFTER DROP of %d:\n", prev); */ + /* tt_dump_arena(&a); */ prev = TT_EMPTY; - printf("======================================== LOOP ITERATION %d\n", outer); - for (i = 0; i < 10; i++) { + /* printf("======================================== LOOP ITERATION %d\n", outer); */ + for (i = 0; i < 100; i++) { tt_node_idx_t leaf = tt_arena_cons(&a, TT_TAG_LEAF, 0, @@ -70,11 +34,17 @@ int main(int argc, char *argv[]) { 0, leaf, prev); - dump_arena(&a); + /* tt_dump_arena(&a); */ prev = curr; } } + /* tt_dump_arena(&a); */ + tt_grab(&a, prev); + tt_drop(&a, prev); + /* tt_arena_flush(&a); */ + tt_dump_arena_summary(&a); + tt_arena_done(&a); return EXIT_SUCCESS; } diff --git a/treetrie.c b/treetrie.c index ac5f835..a5d21ca 100644 --- a/treetrie.c +++ b/treetrie.c @@ -28,36 +28,170 @@ inline tt_hash_t tt_hash_node(tt_arena_t *a, tt_node_idx_t i) { a->nodes[i].b); } +static void chain_init(tt_arena_t *a, tt_free_chain_t *chain) { + chain->head = chain->tail = TT_ERROR; +} + +static void chain_append(tt_arena_t *a, tt_free_chain_t *chain, tt_node_idx_t i) { + a->headers[i].next_free = TT_ERROR; + if (chain->tail == TT_ERROR) { + chain->head = i; + } else { + a->headers[chain->tail].next_free = i; + } + chain->tail = i; +} + +/* Does not modify chain2. */ +static void chain_splice(tt_arena_t *a, tt_free_chain_t *chain1, tt_free_chain_t *chain2) { + if (chain2->head == TT_ERROR) { + /* do nothing */ + } else if (chain1->head == TT_ERROR) { + *chain1 = *chain2; + } else { + a->headers[chain1->tail].next_free = chain2->head; + chain1->tail = chain2->tail; + } +} + +static tt_node_idx_t chain_pop(tt_arena_t *a, tt_free_chain_t *chain) { + tt_node_idx_t i = chain->head; + if (i != TT_ERROR) { + chain->head = a->headers[i].next_free; + if (chain->tail == i) { + chain->tail = chain->head; + } + } + return i; +} + int tt_arena_init(tt_arena_t *a) { a->max_probe = 0; - a->live_count = 0; - a->table_length = 16; + a->table_length = 1048576; a->table = calloc(a->table_length, sizeof(a->table[0])); a->headers = calloc(a->table_length, sizeof(a->headers[0])); a->nodes = calloc(a->table_length, sizeof(a->nodes[0])); - a->free_chain = TT_ERROR; + a->free_count = 0; + chain_init(a, &a->free_chain); if (a->table == NULL || a->headers == NULL || a->nodes == NULL) { - if (a->table != NULL) free(a->table); - if (a->headers != NULL) free(a->headers); - if (a->nodes != NULL) free(a->nodes); + free(a->table); + free(a->headers); + free(a->nodes); errno = ENOMEM; return -1; } { int i; - for (i = a->table_length - 1; i >= TT_FIRST_VALID_NODE_IDX; i--) { - a->headers[i].next_free = a->free_chain; - a->free_chain = i; + for (i = TT_FIRST_VALID_NODE_IDX; i < a->table_length; i++) { + chain_append(a, &a->free_chain, i); + a->free_count++; } } return 0; } +static void register_node(tt_arena_t *a, tt_node_idx_t node, tt_hash_t initial_hash) { + tt_hash_t h = initial_hash; + int i = 0; + while (1) { + unsigned int index = (h + i) % a->table_length; + tt_node_idx_t candidate = a->table[index]; + + /* printf("checking robinhood at h %d i %d index %d candidate %d\n", h, i, index, candidate); */ + + if (i > a->max_probe) { + a->max_probe = i; + } + + if (candidate < TT_FIRST_VALID_NODE_IDX) { + /* This slot in the table is free. */ + /* printf("slot free!\n"); */ + a->table[index] = node; + break; + } + + /* printf("slot not free.\n"); */ + { + tt_hash_t candidate_h = tt_hash_node(a, candidate); + int distance = index - (candidate_h % a->table_length); + if (distance < 0) distance += a->table_length; + + if (distance < i) { + a->table[index] = node; + h = candidate_h; + i = distance + 1; + node = candidate; + } else { + /* keep scanning. */ + i++; + } + } + } +} + static int tt_grow(tt_arena_t *a) { - assert(0); + tt_node_idx_t *old_table = a->table; + unsigned int old_table_length = a->table_length; + unsigned int new_table_length = old_table_length << 1; + + /* printf("PREGROW\n"); */ + /* tt_dump_arena(a); */ + + { + tt_node_idx_t *new_table = calloc(new_table_length, sizeof(a->table[0])); + tt_header_t *new_headers = realloc(a->headers, new_table_length * sizeof(a->headers[0])); + tt_node_t *new_nodes = realloc(a->nodes, new_table_length * sizeof(a->nodes[0])); + + if (new_table == NULL || new_headers == NULL || new_nodes == NULL) { + free(new_table); + free(new_headers); + free(new_nodes); + errno = ENOMEM; + return -1; + } + + memset(new_headers + old_table_length, 0, + (new_table_length - old_table_length) * sizeof(a->headers[0])); + memset(new_nodes + old_table_length, 0, + (new_table_length - old_table_length) * sizeof(a->nodes[0])); + + a->max_probe = 0; + a->table_length = new_table_length; + a->table = new_table; + a->headers = new_headers; + a->nodes = new_nodes; + + { + int i; + for (i = old_table_length; i < new_table_length; i++) { + chain_append(a, &a->free_chain, i); + a->free_count++; + } + } + } + + /* printf("//////////////////////////////////////// GROW starting (length %d)\n", a->table_length); */ + + { + int i; + for (i = 0; i < old_table_length; i++) { + tt_node_idx_t n = old_table[i]; + if (n >= TT_FIRST_VALID_NODE_IDX) { + register_node(a, n, tt_hash_node(a, n)); + } + } + } + + /* printf("//////////////////////////////////////// GROW finished (length %d)\n", a->table_length); */ + + /* printf("POSTGROW\n"); */ + /* tt_dump_arena(a); */ + + free(old_table); + return 0; } void tt_arena_done(tt_arena_t *a) { @@ -67,11 +201,99 @@ void tt_arena_done(tt_arena_t *a) { memset(a, 0, sizeof(*a)); } +static size_t arena_size(tt_arena_t *a) { + return sizeof(*a) + + (a->table_length * sizeof(a->table[0])) + + (a->table_length * sizeof(a->headers[0])) + + (a->table_length * sizeof(a->nodes[0])); +} + +void tt_dump_arena_summary(tt_arena_t *a) { + printf("size in bytes: %lu\n", arena_size(a)); + printf("max_probe: %u\n", a->max_probe); + printf("table_length: %u\n", a->table_length); + printf("free_count: %u\n", a->free_count); +} + +void tt_dump_arena(tt_arena_t *a) { + tt_dump_arena_summary(a); + + printf("free_chain:"); + { + tt_node_idx_t fp = a->free_chain.head; + while (fp != TT_ERROR) { + printf(" %d", fp); + fp = a->headers[fp].next_free; + } + } + printf("\n"); + + { + int i; + for (i = 0; i < a->table_length; i++) { + tt_node_idx_t n = a->table[i]; + if (n < TT_FIRST_VALID_NODE_IDX) { + /* Skip. */ + } else if (n >= a->table_length) { + printf("%12u -> %12u ?!?!?!\n", i, n); + } else { + tt_hash_t h = tt_hash_node(a, n); + int distance = i - (h % a->table_length); + if (distance < 0) distance += a->table_length; + printf("%12u -> %12u: dist %d ref %d ", + i, + n, + distance, + a->headers[n].inuse.refcount); + switch (a->headers[n].inuse.tag) { + case TT_TAG_TAIL: + printf("tail %u\n", a->nodes[n].a); + break; + case TT_TAG_BRANCH: + printf("branch %u %u\n", a->nodes[n].a, a->nodes[n].b); + break; + case TT_TAG_LEAF: + printf("leaf %u %u\n", a->nodes[n].a, a->nodes[n].b); + break; + case TT_TAG_NODE: + printf("node index %d, %u %u\n", + a->headers[n].inuse.index, + a->nodes[n].a, + a->nodes[n].b); + break; + } + } + } + } +} + +void tt_arena_flush1(tt_arena_t *a, tt_free_chain_t *c) { + tt_node_idx_t i = a->free_chain.head; + chain_splice(a, c, &a->free_chain); + chain_init(a, &a->free_chain); + while (i >= TT_FIRST_VALID_NODE_IDX) { + tt_drop(a, a->nodes[i].a); + tt_drop(a, a->nodes[i].b); + a->nodes[i].a = TT_ERROR; + a->nodes[i].b = TT_ERROR; + i = a->headers[i].next_free; + } +} + +void tt_arena_flush(tt_arena_t *a) { + tt_free_chain_t c; + c.head = c.tail = TT_ERROR; + while (a->free_chain.head != TT_ERROR) { + tt_arena_flush1(a, &c); + } + a->free_chain = c; +} + static void recycle_node(tt_arena_t *a, tt_node_idx_t ni) { tt_hash_t h; int i; - printf("++++++++++++++++++++++++++++++++++++++++ recycling %d\n", ni); + /* printf("++++++++++++++++++++++++++++++++++++++++ recycling %d\n", ni); */ assert(ni >= TT_FIRST_VALID_NODE_IDX); h = tt_hash_node(a, ni); @@ -79,15 +301,14 @@ static void recycle_node(tt_arena_t *a, tt_node_idx_t ni) { if (a->headers[ni].inuse.tag == TT_TAG_LEAF) { a->nodes[ni].b = TT_ERROR; } - a->headers[ni].next_free = a->free_chain; - a->free_chain = ni; - a->live_count--; + chain_append(a, &a->free_chain, ni); + a->free_count++; for (i = 0; i < a->max_probe+1; i++) { unsigned int index = (h + i) % a->table_length; tt_node_idx_t candidate = a->table[index]; - printf("hunting i=%d index=%d ni=%d candidate=%d\n", i, index, ni, candidate); + /* printf("hunting i=%d index=%d ni=%d candidate=%d\n", i, index, ni, candidate); */ assert(candidate >= TT_FIRST_VALID_NODE_IDX); /* Internal error if node not in table */ if (candidate == ni) { @@ -133,46 +354,44 @@ tt_node_idx_t tt_arena_cons(tt_arena_t *a, unsigned int index = (h + i) % a->table_length; tt_node_idx_t candidate = a->table[index]; - printf("cons at %d candidate %d\n", i, candidate); + /* printf("cons at %d candidate %d\n", i, candidate); */ /* TODO: perhaps also bail early if we detect that the hash code changes */ if (candidate < TT_FIRST_VALID_NODE_IDX) { - printf("cons empty cell\n"); + /* printf("cons empty cell\n"); */ break; } - printf("tag %d %d\n", a->headers[candidate].inuse.tag, tag); - printf("index %d %d\n", a->headers[candidate].inuse.index, nindex); - printf("a %d %d\n", a->nodes[candidate].a, na); - printf("b %d %d\n", a->nodes[candidate].b, nb); + /* printf("tag %d %d\n", a->headers[candidate].inuse.tag, tag); */ + /* printf("index %d %d\n", a->headers[candidate].inuse.index, nindex); */ + /* printf("a %d %d\n", a->nodes[candidate].a, na); */ + /* printf("b %d %d\n", a->nodes[candidate].b, nb); */ if (a->headers[candidate].inuse.tag == tag && a->headers[candidate].inuse.index == nindex && a->nodes[candidate].a == na && a->nodes[candidate].b == nb) { - printf("cons located correct candidate\n"); + /* printf("cons located correct candidate\n"); */ return candidate; } } - printf("cons needs to alloc\n"); + /* Not found */ + /* printf("cons needs to alloc\n"); */ - if (a->free_chain == TT_ERROR) { + if (a->free_count < (a->table_length >> 2)) { if (tt_grow(a) != 0) { return TT_ERROR; } } { - tt_node_idx_t node = a->free_chain; - tt_node_idx_t tostore = node; + tt_node_idx_t node = chain_pop(a, &a->free_chain); tt_grab(a, na); if (tag != TT_TAG_LEAF) tt_grab(a, nb); - - a->free_chain = a->headers[node].next_free; tt_drop(a, a->nodes[node].a); tt_drop(a, a->nodes[node].b); - a->live_count++; + a->free_count--; a->headers[node].inuse.refcount = 0; a->headers[node].inuse.tag = tag; @@ -180,43 +399,7 @@ tt_node_idx_t tt_arena_cons(tt_arena_t *a, a->nodes[node].a = na; a->nodes[node].b = nb; - /* Not found */ - i = 0; - while (1) { - unsigned int index = (h + i) % a->table_length; - tt_node_idx_t candidate = a->table[index]; - - printf("checking robinhood at h %d i %d index %d candidate %d\n", h, i, index, candidate); - - if (i > a->max_probe) { - a->max_probe = i; - } - - if (candidate < TT_FIRST_VALID_NODE_IDX) { - /* This slot in the table is free. */ - printf("slot free!\n"); - a->table[index] = tostore; - break; - } - - printf("slot not free.\n"); - { - tt_hash_t candidate_h = tt_hash_node(a, candidate); - int distance = index - (candidate_h % a->table_length); - if (distance < 0) distance += a->table_length; - - if (distance < i) { - a->table[index] = tostore; - h = candidate_h; - i = distance + 1; - tostore = candidate; - } else { - /* keep scanning. */ - i++; - } - } - } - + register_node(a, node, h); return node; } } @@ -230,7 +413,7 @@ tt_node_idx_t tt_grab(tt_arena_t *a, tt_node_idx_t i) { void tt_drop(tt_arena_t *a, tt_node_idx_t i) { if (i >= TT_FIRST_VALID_NODE_IDX && a->headers[i].inuse.refcount < TT_REFCOUNT_LIMIT) { - printf("++++++++++++++++++++++++++++++ dropping %d\n", i); + /* printf("++++++++++++++++++++++++++++++ dropping %d\n", i); */ if (--(a->headers[i].inuse.refcount) == 0) { recycle_node(a, i); } diff --git a/treetrie.h b/treetrie.h index 2174038..c9a90d7 100644 --- a/treetrie.h +++ b/treetrie.h @@ -22,7 +22,7 @@ typedef enum tt_reserved_node_idx_t { typedef uint32_t tt_node_idx_t; /* N.B. tt_reserved_node_idx_t */ -typedef uint32_t tt_atom_t; +typedef uint32_t tt_atom_t; /* Atom number 0 is the wildcard atom. */ typedef union tt_header_t { uint32_t next_free; @@ -40,22 +40,32 @@ typedef struct tt_node_t { tt_node_idx_t b; /* a real node idx unless corresponding tag is TT_TAG_LEAF */ } tt_node_t; +typedef struct tt_free_chain_t { + tt_node_idx_t head; /* remove links from here */ + tt_node_idx_t tail; /* append links here */ +} tt_free_chain_t; + typedef struct tt_arena_t { /* Fields for the Robin Hood hashset used for hashconsing of tt_nodes */ unsigned int max_probe; - unsigned int live_count; unsigned int table_length; tt_node_idx_t *table; tt_header_t *headers; tt_node_t *nodes; - tt_node_idx_t free_chain; + unsigned int free_count; + tt_free_chain_t free_chain; } tt_arena_t; extern int tt_arena_init(tt_arena_t *a); extern void tt_arena_done(tt_arena_t *a); +extern void tt_dump_arena_summary(tt_arena_t *a); +extern void tt_dump_arena(tt_arena_t *a); + +extern void tt_arena_flush(tt_arena_t *a); + /* Returns 0 if consing failed (because of out-of-memory). Otherwise, returns a nonzero index. Grabs na and nb (according to tag) IF it needs to allocate a new node, otherwise does not.