From 2ed7bb0d88060bf0567041f254f8c7a2c5b5b9b5 Mon Sep 17 00:00:00 2001 From: Tony Garnock-Jones Date: Tue, 30 Jun 2015 23:13:13 -0400 Subject: [PATCH] Avoid recomputing hashes all the time; store them instead --- treetrie.c | 74 ++++++++++++++++++++++++++++-------------------------- treetrie.h | 10 +++++++- 2 files changed, 47 insertions(+), 37 deletions(-) diff --git a/treetrie.c b/treetrie.c index fcbe4a5..0861b2d 100644 --- a/treetrie.c +++ b/treetrie.c @@ -8,8 +8,6 @@ #include "treetrie.h" #include "fasthash.h" -typedef uint32_t tt_hash_t; - /* Customized special-purpose fasthash variation */ #define mix(h) ({ \ (h) ^= (h) >> 23; \ @@ -137,7 +135,7 @@ static void register_node(tt_arena_t *a, tt_node_ptr_t p, tt_hash_t initial_hash int i = 0; while (1) { unsigned int index = (h + i) % a->table_length; - tt_node_ptr_t candidate = a->table[index]; + tt_node_ptr_t candidate = a->table[index].ptr; /* printf("checking robinhood at h %d i %d index %d candidate %d\n", h, i, index, candidate); */ @@ -148,18 +146,20 @@ static void register_node(tt_arena_t *a, tt_node_ptr_t p, tt_hash_t initial_hash if (candidate == TT_NO_PTR) { /* This slot in the table is free. */ /* printf("slot free!\n"); */ - a->table[index] = p; + a->table[index].ptr = p; + a->table[index].hash = h; break; } /* printf("slot not free.\n"); */ { - tt_hash_t candidate_h = tt_hash_node(a, candidate); + tt_hash_t candidate_h = a->table[index].hash; int distance = index - (candidate_h % a->table_length); if (distance < 0) distance += a->table_length; if (distance < i) { - a->table[index] = p; + a->table[index].ptr = p; + a->table[index].hash = h; h = candidate_h; i = distance + 1; p = candidate; @@ -172,7 +172,7 @@ static void register_node(tt_arena_t *a, tt_node_ptr_t p, tt_hash_t initial_hash } static int tt_grow(tt_arena_t *a) { - tt_node_ptr_t *old_table = a->table; + tt_hashtable_entry_t *old_table = a->table; unsigned int old_table_length = a->table_length; unsigned int new_table_length = old_table_length << 1; @@ -180,7 +180,7 @@ static int tt_grow(tt_arena_t *a) { /* tt_dump_arena(a); */ { - tt_node_ptr_t *new_table = calloc(new_table_length, sizeof(a->table[0])); + tt_hashtable_entry_t *new_table = calloc(new_table_length, sizeof(a->table[0])); tt_header_t *new_headers = realloc(a->headers, new_table_length * sizeof(a->headers[0])); tt_node_t *new_nodes = realloc(a->nodes, new_table_length * sizeof(a->nodes[0])); @@ -217,9 +217,10 @@ static int tt_grow(tt_arena_t *a) { { int i; for (i = 0; i < old_table_length; i++) { - tt_node_ptr_t p = old_table[i]; + tt_node_ptr_t p = old_table[i].ptr; + tt_hash_t h = old_table[i].hash; if (p != TT_NO_PTR) { - register_node(a, p, tt_hash_node(a, p)); + register_node(a, p, h); } } } @@ -270,7 +271,7 @@ void tt_dump_arena(tt_arena_t *a) { { int i; for (i = 0; i < a->table_length; i++) { - tt_node_ptr_t p = a->table[i]; + tt_node_ptr_t p = a->table[i].ptr; tt_node_idx_t n = tt_ptr_idx(p); if (p == TT_NO_PTR) { @@ -280,7 +281,7 @@ void tt_dump_arena(tt_arena_t *a) { if (n >= a->table_length) { printf("%12u -> %12u ?!?!?!\n", i, n); } else { - tt_hash_t h = tt_hash_node(a, p); + tt_hash_t h = a->table[i].hash; int distance = i - (h % a->table_length); if (distance < 0) distance += a->table_length; printf("%12u -> %12u: dist %d ref %d ", @@ -383,7 +384,7 @@ static void recycle_node(tt_arena_t *a, tt_node_ptr_t p) { for (i = 0; i < a->max_probe+1; i++) { unsigned int index = (h + i) % a->table_length; - tt_node_ptr_t candidate = a->table[index]; + tt_node_ptr_t candidate = a->table[index].ptr; /* printf("hunting i=%d index=%d p=%d candidate=%d\n", i, index, p, candidate); */ assert(candidate != TT_NO_PTR); /* Internal error if node not in table */ @@ -392,17 +393,17 @@ static void recycle_node(tt_arena_t *a, tt_node_ptr_t p) { /* We found it. Now swap in elements. */ while (1) { unsigned int nextindex = (index + 1) % a->table_length; - tt_node_ptr_t next_p = a->table[nextindex]; + tt_node_ptr_t next_p = a->table[nextindex].ptr; tt_hash_t next_h; int distance; - a->table[index] = TT_NO_PTR; + a->table[index].ptr = TT_NO_PTR; if (next_p == TT_NO_PTR) { break; } - next_h = tt_hash_node(a, next_p); + next_h = a->table[nextindex].hash; distance = nextindex - (next_h % a->table_length); if (distance < 0) distance += a->table_length; @@ -410,7 +411,8 @@ static void recycle_node(tt_arena_t *a, tt_node_ptr_t p) { break; } - a->table[index] = next_p; + a->table[index].ptr = next_p; + a->table[index].hash = next_h; index = nextindex; } break; @@ -418,6 +420,24 @@ static void recycle_node(tt_arena_t *a, tt_node_ptr_t p) { } } +inline tt_node_ptr_t tt_grab(tt_arena_t *a, tt_node_ptr_t p) { + tt_node_idx_t i = tt_ptr_idx(p); + if (tt_ptr_tag(p) != TT_TAG_SPECIAL && a->headers[i].inuse.refcount < TT_REFCOUNT_LIMIT) { + a->headers[i].inuse.refcount++; + } + return p; +} + +inline void tt_drop(tt_arena_t *a, tt_node_ptr_t p) { + tt_node_idx_t i = tt_ptr_idx(p); + if (tt_ptr_tag(p) != TT_TAG_SPECIAL && a->headers[i].inuse.refcount < TT_REFCOUNT_LIMIT) { + /* printf("++++++++++++++++++++++++++++++ dropping %d\n", i); */ + if (--(a->headers[i].inuse.refcount) == 0) { + recycle_node(a, p); + } + } +} + tt_node_ptr_t tt_arena_cons(tt_arena_t *a, tt_tag_t tag, uint32_t nindex, @@ -429,7 +449,7 @@ tt_node_ptr_t tt_arena_cons(tt_arena_t *a, for (i = 0; i < a->max_probe+1; i++) { unsigned int index = (h + i) % a->table_length; - tt_node_ptr_t candidate = a->table[index]; + tt_node_ptr_t candidate = a->table[index].ptr; tt_node_idx_t candidate_i = tt_ptr_idx(candidate); /* printf("cons at %d candidate %d\n", i, candidate); */ @@ -490,21 +510,3 @@ tt_node_ptr_t tt_arena_cons(tt_arena_t *a, return p; } } - -tt_node_ptr_t tt_grab(tt_arena_t *a, tt_node_ptr_t p) { - tt_node_idx_t i = tt_ptr_idx(p); - if (tt_ptr_tag(p) != TT_TAG_SPECIAL && a->headers[i].inuse.refcount < TT_REFCOUNT_LIMIT) { - a->headers[i].inuse.refcount++; - } - return p; -} - -void tt_drop(tt_arena_t *a, tt_node_ptr_t p) { - tt_node_idx_t i = tt_ptr_idx(p); - if (tt_ptr_tag(p) != TT_TAG_SPECIAL && a->headers[i].inuse.refcount < TT_REFCOUNT_LIMIT) { - /* printf("++++++++++++++++++++++++++++++ dropping %d\n", i); */ - if (--(a->headers[i].inuse.refcount) == 0) { - recycle_node(a, p); - } - } -} diff --git a/treetrie.h b/treetrie.h index 46130e9..e50a746 100644 --- a/treetrie.h +++ b/treetrie.h @@ -34,6 +34,8 @@ typedef uint32_t tt_node_ptr_t; /* An index shifted left 2 with tag or'd in low typedef uint32_t tt_atom_t; /* Atom number 0 is the wildcard atom. */ +typedef uint32_t tt_hash_t; + typedef union tt_header_t { tt_node_idx_t next_free; struct { @@ -54,6 +56,11 @@ typedef struct tt_free_chain_t { tt_node_idx_t tail; /* append links here */ } tt_free_chain_t; +typedef struct tt_hashtable_entry_t { + tt_hash_t hash; + tt_node_ptr_t ptr; +} tt_hashtable_entry_t; + typedef struct tt_arena_t { void *atom_context; void (*atom_incref)(void *atom_context, struct tt_arena_t *a, tt_atom_t atom); @@ -62,7 +69,8 @@ typedef struct tt_arena_t { /* Fields for the Robin Hood hashset used for hashconsing of tt_nodes */ unsigned int max_probe; unsigned int table_length; - tt_node_ptr_t *table; + tt_hash_t *hashes; + tt_hashtable_entry_t *table; tt_header_t *headers; tt_node_t *nodes;