From 80576ad8737ac8dc59ff37de43d1639a11fd0bb8 Mon Sep 17 00:00:00 2001 From: Tony Garnock-Jones Date: Tue, 30 Jun 2015 18:10:57 -0400 Subject: [PATCH] Widen tags to three bits, so we have a proper dict representation so we can refcount atoms --- critbit.c | 37 +++++++++++------------- critbit.h | 2 -- main.c | 31 +++++++++++++++----- treetrie.c | 83 ++++++++++++++++++++++++++++++++++++++++-------------- treetrie.h | 58 +++++++++++++++++++++++++------------- 5 files changed, 140 insertions(+), 71 deletions(-) diff --git a/critbit.c b/critbit.c index c0e6d66..1412643 100644 --- a/critbit.c +++ b/critbit.c @@ -8,19 +8,11 @@ #include "treetrie.h" #include "critbit.h" -/* We abuse the representation of leaves (being a pair of a GC'd - pointer and a non-GC'd integer) as the representation of our dict - trees, too, since we need a place to store the size of the dict. */ -#define TT_TAG_DICT TT_TAG_LEAF -#define TT_DICT_ROOT(a,p) TT_LEAF_TRIE(a,p) -#define TT_DICT_SIZE(a,p) TT_LEAF_ATOM(a,p) -#define tt_cons_dict(a,root,size) tt_cons_leaf(a,root,size) - #define RET_IF_NO_PTR(v) \ ({ tt_node_ptr_t ___w = (v); if (___w == TT_NO_PTR) return TT_NO_PTR; ___w; }) int tt_dict_size(tt_arena_t *a, tt_node_ptr_t t) { - if (t == TT_EMPTY) { + if (t == TT_EMPTY_DICT) { return 0; } else { assert(tt_ptr_tag(t) == TT_TAG_DICT); @@ -33,7 +25,7 @@ static inline int bit_ref(tt_atom_t key, unsigned int bit) { } tt_node_ptr_t tt_dict_get(tt_arena_t *a, tt_node_ptr_t t, tt_atom_t key) { - if (t == TT_EMPTY) { + if (t == TT_EMPTY_DICT) { return TT_NO_PTR; } @@ -141,20 +133,23 @@ tt_node_ptr_t tt_dict_set(tt_arena_t *a, tt_atom_t key, tt_node_ptr_t trie) { - if (t == TT_EMPTY) { + if (t == TT_EMPTY_DICT) { return tt_cons_dict(a, RET_IF_NO_PTR(tt_cons_leaf(a, trie, key)), 1); } assert(tt_ptr_tag(t) == TT_TAG_DICT); { + tt_node_ptr_t old_root = TT_DICT_ROOT(a,t); tt_node_ptr_t result = TT_NO_PTR; - int first_differing_bit = set_walk(a, key, trie, TT_DICT_ROOT(a,t), &result); + int first_differing_bit = set_walk(a, key, trie, old_root, &result); if (first_differing_bit != -1) { - result = splice_key(a, key, trie, first_differing_bit, TT_DICT_ROOT(a,t)); + result = splice_key(a, key, trie, first_differing_bit, old_root); } if (result == TT_NO_PTR) { return TT_NO_PTR; + } else if (result == old_root) { + return t; } else { return tt_cons_dict(a, result, TT_DICT_SIZE(a,t) + 1); } @@ -170,7 +165,7 @@ tt_node_ptr_t tt_dict_remove1(tt_arena_t *a, case TT_TAG_LEAF: { if (TT_LEAF_ATOM(a,n) == key) { *removed_count = 1; - return TT_EMPTY; + return TT_EMPTY_DICT; } else { return n; } @@ -180,7 +175,7 @@ tt_node_ptr_t tt_dict_remove1(tt_arena_t *a, if (bit_ref(key, index)) { tt_node_ptr_t n1 = RET_IF_NO_PTR(tt_dict_remove1(a, TT_NODE_ONE(a,n), key, removed_count)); - if (n1 == TT_EMPTY) { + if (n1 == TT_EMPTY_DICT) { return TT_NODE_ZERO(a,n); } else { return tt_cons_node(a, index, TT_NODE_ZERO(a,n), n1); @@ -188,7 +183,7 @@ tt_node_ptr_t tt_dict_remove1(tt_arena_t *a, } else { tt_node_ptr_t n1 = RET_IF_NO_PTR(tt_dict_remove1(a, TT_NODE_ZERO(a,n), key, removed_count)); - if (n1 == TT_EMPTY) { + if (n1 == TT_EMPTY_DICT) { return TT_NODE_ONE(a,n); } else { return tt_cons_node(a, index, n1, TT_NODE_ONE(a,n)); @@ -204,8 +199,8 @@ tt_node_ptr_t tt_dict_remove(tt_arena_t *a, tt_node_ptr_t t, tt_atom_t key) { - if (t == TT_EMPTY) { - return TT_EMPTY; + if (t == TT_EMPTY_DICT) { + return TT_EMPTY_DICT; } assert(tt_ptr_tag(t) == TT_TAG_DICT); @@ -214,8 +209,8 @@ tt_node_ptr_t tt_dict_remove(tt_arena_t *a, int removed_count = 0; tt_node_ptr_t n = RET_IF_NO_PTR(tt_dict_remove1(a, TT_DICT_ROOT(a,t), key, &removed_count)); - if (n == TT_EMPTY) { - return TT_EMPTY; + if (n == TT_EMPTY_DICT) { + return TT_EMPTY_DICT; } else { return tt_cons_dict(a, n, TT_DICT_SIZE(a,t) - removed_count); } @@ -247,7 +242,7 @@ void tt_dict_foreach(tt_arena_t *a, void *context, void (*f)(void *, tt_atom_t key, tt_node_ptr_t trie)) { - if (t != TT_EMPTY) { + if (t != TT_EMPTY_DICT) { assert(tt_ptr_tag(t) == TT_TAG_DICT); tt_dict_foreach1(a, TT_DICT_ROOT(a,t), context, f); } diff --git a/critbit.h b/critbit.h index 9387546..2800941 100644 --- a/critbit.h +++ b/critbit.h @@ -5,8 +5,6 @@ extern "C" { #endif -#define TT_EMPTY_DICT TT_EMPTY - extern int tt_dict_size(tt_arena_t *a, tt_node_ptr_t t); /* Returns TT_NO_PTR when key not present. Does not manipulate references. */ diff --git a/main.c b/main.c index edfd74c..c374f6a 100644 --- a/main.c +++ b/main.c @@ -7,13 +7,21 @@ #include "treetrie.h" #include "critbit.h" +static void atom_incref(void *atom_context, tt_arena_t *a, tt_atom_t atom) { + /* printf("incref %u\n", atom); */ +} + +static void atom_decref(void *atom_context, tt_arena_t *a, tt_atom_t atom) { + /* printf("decref %u\n", atom); */ +} + int main0(int argc, char *argv[]) { tt_arena_t a; int i, outer; tt_node_ptr_t prev = TT_EMPTY; setbuf(stdout, NULL); - tt_arena_init(&a); + tt_arena_init(&a, NULL, atom_incref, atom_decref); for (outer = 0; outer < 10; outer++) { /* printf("---------------------------------------- grab/drop of %d\n", prev); */ @@ -49,23 +57,32 @@ static void dump_mapping(void *context, tt_atom_t key, tt_node_ptr_t trie) { int main(int argc, char *argv[]) { tt_arena_t a; - tt_node_ptr_t curr = TT_EMPTY; + tt_node_ptr_t curr = TT_EMPTY_DICT; int i; setbuf(stdout, NULL); - tt_arena_init(&a); + tt_arena_init(&a, NULL, atom_incref, atom_decref); /* tt_dump_arena(&a); */ for (i = 0; i < 1000000; i++) { - tt_node_ptr_t next = tt_dict_set(&a, curr, i, TT_OK); + tt_node_ptr_t next = tt_grab(&a, tt_dict_set(&a, curr, i, TT_OK)); tt_drop(&a, curr); - curr = tt_grab(&a, next); + curr = next; /* printf("\nAfter i=%d...\n", i); */ /* tt_dump_arena(&a); */ } + /* for (i = 0; i < 1000000; i++) { */ + /* tt_node_ptr_t next = tt_grab(&a, tt_dict_remove(&a, curr, i << 1)); */ + /* tt_drop(&a, curr); */ + /* curr = next; */ + /* } */ + /* tt_arena_flush(&a); */ - printf("\nFinal tree node index is %u/%u\n", tt_ptr_idx(curr), tt_ptr_tag(curr)); + printf("\nFinal tree node index is %u/%u; dict size is %u\n", + tt_ptr_idx(curr), + tt_ptr_tag(curr), + TT_DICT_SIZE(&a, curr)); /* tt_dump_arena(&a); */ /* tt_dict_foreach(&a, curr, NULL, dump_mapping); */ @@ -74,7 +91,7 @@ int main(int argc, char *argv[]) { curr = TT_NO_PTR; /* tt_arena_flush(&a); */ /* tt_dump_arena(&a); */ - /* tt_dump_arena_summary(&a); */ + tt_dump_arena_summary(&a); tt_arena_done(&a); return EXIT_SUCCESS; diff --git a/treetrie.c b/treetrie.c index 900b0f2..fcbe4a5 100644 --- a/treetrie.c +++ b/treetrie.c @@ -12,9 +12,9 @@ typedef uint32_t tt_hash_t; /* Customized special-purpose fasthash variation */ #define mix(h) ({ \ - (h) ^= (h) >> 23; \ - (h) *= 0x2127599bf4325c37ULL; \ - (h) ^= (h) >> 47; }) + (h) ^= (h) >> 23; \ + (h) *= 0x2127599bf4325c37ULL; \ + (h) ^= (h) >> 47; }) static inline uint64_t fasthash_4_ints(uint32_t v1, uint32_t v2, uint32_t v3, uint32_t v4) { const uint64_t m = 0x880355f21e6d1965ULL; uint64_t h = (16 * m); @@ -97,9 +97,16 @@ static tt_node_idx_t chain_pop(tt_arena_t *a, tt_free_chain_t *chain) { return i; } -int tt_arena_init(tt_arena_t *a) { +int tt_arena_init(tt_arena_t *a, + void *atom_context, + void (*atom_incref)(void *context, tt_arena_t *a, tt_atom_t atom), + void (*atom_decref)(void *context, tt_arena_t *a, tt_atom_t atom)) +{ + a->atom_context = atom_context; + a->atom_incref = atom_incref; + a->atom_decref = atom_decref; a->max_probe = 0; - a->table_length = 16411; /* 16384; */ + a->table_length = 16; //16411; /* 16384; */ a->table = calloc(a->table_length, sizeof(a->table[0])); a->headers = calloc(a->table_length, sizeof(a->headers[0])); a->nodes = calloc(a->table_length, sizeof(a->nodes[0])); @@ -116,7 +123,7 @@ int tt_arena_init(tt_arena_t *a) { { int i; - for (i = TT_FIRST_VALID_NODE_IDX; i < a->table_length; i++) { + for (i = 0; i < a->table_length; i++) { chain_append(a, &a->free_chain, i); a->free_count++; } @@ -138,7 +145,7 @@ static void register_node(tt_arena_t *a, tt_node_ptr_t p, tt_hash_t initial_hash a->max_probe = i; } - if (candidate < TT_FIRST_VALID_NODE_PTR) { + if (candidate == TT_NO_PTR) { /* This slot in the table is free. */ /* printf("slot free!\n"); */ a->table[index] = p; @@ -211,7 +218,7 @@ static int tt_grow(tt_arena_t *a) { int i; for (i = 0; i < old_table_length; i++) { tt_node_ptr_t p = old_table[i]; - if (p >= TT_FIRST_VALID_NODE_PTR) { + if (p != TT_NO_PTR) { register_node(a, p, tt_hash_node(a, p)); } } @@ -265,9 +272,12 @@ void tt_dump_arena(tt_arena_t *a) { for (i = 0; i < a->table_length; i++) { tt_node_ptr_t p = a->table[i]; tt_node_idx_t n = tt_ptr_idx(p); - if (n < TT_FIRST_VALID_NODE_IDX) { - /* Skip. */ - } else if (n >= a->table_length) { + + if (p == TT_NO_PTR) { + continue; + } + + if (n >= a->table_length) { printf("%12u -> %12u ?!?!?!\n", i, n); } else { tt_hash_t h = tt_hash_node(a, p); @@ -305,6 +315,15 @@ void tt_dump_arena(tt_arena_t *a) { tt_ptr_idx(a->nodes[n].b), tt_ptr_tag(a->nodes[n].b)); break; + case TT_TAG_DICT: + printf("dict %u/%u %u\n", + tt_ptr_idx(a->nodes[n].a), + tt_ptr_tag(a->nodes[n].a), + a->nodes[n].b); + break; + default: + printf("???? %08x\n", p); + assert(0); } } } @@ -315,7 +334,7 @@ void tt_arena_flush1(tt_arena_t *a, tt_free_chain_t *c) { tt_node_idx_t i = a->free_chain.head; chain_splice(a, c, &a->free_chain); chain_init(a, &a->free_chain); - while (i >= TT_FIRST_VALID_NODE_IDX) { + while (i != TT_NO_IDX) { tt_drop(a, a->nodes[i].a); tt_drop(a, a->nodes[i].b); a->nodes[i].a = TT_NO_PTR; @@ -333,6 +352,11 @@ void tt_arena_flush(tt_arena_t *a) { a->free_chain = c; } +static inline int heap_tag_p(tt_node_ptr_t p) { + tt_tag_t tag = tt_ptr_tag(p); + return tag != TT_TAG_SPECIAL && tag != TT_TAG_RESERVED0 && tag != TT_TAG_INVALID; +} + static void recycle_node(tt_arena_t *a, tt_node_ptr_t p) { tt_node_idx_t ni = tt_ptr_idx(p); tt_hash_t h; @@ -340,11 +364,19 @@ static void recycle_node(tt_arena_t *a, tt_node_ptr_t p) { /* printf("++++++++++++++++++++++++++++++++++++++++ recycling %d\n", ni); */ - assert(p >= TT_FIRST_VALID_NODE_PTR); + assert(heap_tag_p(p)); h = tt_hash_node(a, p); - if (tt_ptr_tag(p) == TT_TAG_LEAF) { - a->nodes[ni].b = TT_NO_PTR; + switch (tt_ptr_tag(p)) { + case TT_TAG_LEAF: + a->atom_decref(a->atom_context, a, a->nodes[ni].b); + a->nodes[ni].b = TT_NO_PTR; + break; + case TT_TAG_DICT: + a->nodes[ni].b = TT_NO_PTR; + break; + default: + break; } chain_prepend(a, &a->free_chain, ni); a->free_count++; @@ -354,7 +386,7 @@ static void recycle_node(tt_arena_t *a, tt_node_ptr_t p) { tt_node_ptr_t candidate = a->table[index]; /* printf("hunting i=%d index=%d p=%d candidate=%d\n", i, index, p, candidate); */ - assert(candidate >= TT_FIRST_VALID_NODE_PTR); /* Internal error if node not in table */ + assert(candidate != TT_NO_PTR); /* Internal error if node not in table */ if (candidate == p) { /* We found it. Now swap in elements. */ @@ -366,7 +398,7 @@ static void recycle_node(tt_arena_t *a, tt_node_ptr_t p) { a->table[index] = TT_NO_PTR; - if (next_p < TT_FIRST_VALID_NODE_PTR) { + if (next_p == TT_NO_PTR) { break; } @@ -402,7 +434,7 @@ tt_node_ptr_t tt_arena_cons(tt_arena_t *a, /* printf("cons at %d candidate %d\n", i, candidate); */ /* TODO: perhaps also bail early if we detect that the hash code changes */ - if (candidate < TT_FIRST_VALID_NODE_PTR) { + if (candidate == TT_NO_PTR) { /* printf("cons empty cell\n"); */ break; } @@ -435,7 +467,16 @@ tt_node_ptr_t tt_arena_cons(tt_arena_t *a, tt_node_ptr_t p = tt_mkptr(node, tag); tt_grab(a, na); - if (tag != TT_TAG_LEAF) tt_grab(a, nb); + switch (tag) { + case TT_TAG_LEAF: + a->atom_incref(a->atom_context, a, nb); + break; + case TT_TAG_DICT: + break; + default: + tt_grab(a, nb); + } + tt_drop(a, a->nodes[node].a); tt_drop(a, a->nodes[node].b); a->free_count--; @@ -452,7 +493,7 @@ tt_node_ptr_t tt_arena_cons(tt_arena_t *a, tt_node_ptr_t tt_grab(tt_arena_t *a, tt_node_ptr_t p) { tt_node_idx_t i = tt_ptr_idx(p); - if (i >= TT_FIRST_VALID_NODE_IDX && a->headers[i].inuse.refcount < TT_REFCOUNT_LIMIT) { + if (tt_ptr_tag(p) != TT_TAG_SPECIAL && a->headers[i].inuse.refcount < TT_REFCOUNT_LIMIT) { a->headers[i].inuse.refcount++; } return p; @@ -460,7 +501,7 @@ tt_node_ptr_t tt_grab(tt_arena_t *a, tt_node_ptr_t p) { void tt_drop(tt_arena_t *a, tt_node_ptr_t p) { tt_node_idx_t i = tt_ptr_idx(p); - if (i >= TT_FIRST_VALID_NODE_IDX && a->headers[i].inuse.refcount < TT_REFCOUNT_LIMIT) { + if (tt_ptr_tag(p) != TT_TAG_SPECIAL && a->headers[i].inuse.refcount < TT_REFCOUNT_LIMIT) { /* printf("++++++++++++++++++++++++++++++ dropping %d\n", i); */ if (--(a->headers[i].inuse.refcount) == 0) { recycle_node(a, p); diff --git a/treetrie.h b/treetrie.h index 3b6c767..46130e9 100644 --- a/treetrie.h +++ b/treetrie.h @@ -6,28 +6,32 @@ extern "C" { #endif typedef enum tt_tag_t { - TT_TAG_TAIL = 0, + TT_TAG_INVALID = 0, /* an invalid pointer - should only be used with 0 as index. */ + TT_TAG_TAIL, TT_TAG_BRANCH, - TT_TAG_LEAF, /* only case where one of node a or b points to non-node */ - TT_TAG_NODE + TT_TAG_LEAF, /* node b points to atom, not node */ + TT_TAG_NODE, + TT_TAG_DICT, /* node b is just an integer */ + TT_TAG_RESERVED0, /* never used */ + TT_TAG_SPECIAL, /* immediate special - all others are pointerlike */ } tt_tag_t; -typedef enum tt_reserved_node_idx_t { - TT_NO_IDX = 0, /* invalid node index, means "no node at all", not even empty */ - TT_EMPTY_IDX, /* empty treetrie AND empty dict */ +typedef enum tt_special_idx_t { + TT_EMPTY_IDX, /* empty treetrie */ TT_OK_IDX, /* terminal marker */ + TT_EMPTY_DICT_IDX, /* empty dict */ +} tt_special_idx_t; - TT_FIRST_VALID_NODE_IDX -} tt_reserved_node_idx_t; - -#define TT_NO_PTR (tt_mkptr(TT_NO_IDX, 0)) -#define TT_EMPTY (tt_mkptr(TT_EMPTY_IDX, 0)) -#define TT_OK (tt_mkptr(TT_OK_IDX, 0)) -#define TT_FIRST_VALID_NODE_PTR (tt_mkptr(TT_FIRST_VALID_NODE_IDX, 0)) - -typedef uint32_t tt_node_idx_t; /* N.B. tt_reserved_node_idx_t */ +typedef uint32_t tt_node_idx_t; /* N.B. tt_special_idx_t; and 0 is reserved. */ typedef uint32_t tt_node_ptr_t; /* An index shifted left 2 with tag or'd in low bits */ +#define TT_NO_IDX ((tt_node_idx_t) (0)) +#define TT_NO_PTR ((tt_node_ptr_t) (0)) + +#define TT_EMPTY (tt_mkptr(TT_EMPTY_IDX, TT_TAG_SPECIAL)) +#define TT_OK (tt_mkptr(TT_OK_IDX, TT_TAG_SPECIAL)) +#define TT_EMPTY_DICT (tt_mkptr(TT_EMPTY_DICT_IDX, TT_TAG_SPECIAL)) + typedef uint32_t tt_atom_t; /* Atom number 0 is the wildcard atom. */ typedef union tt_header_t { @@ -42,7 +46,7 @@ typedef union tt_header_t { typedef struct tt_node_t { tt_node_ptr_t a; /* always a real node ptr */ - tt_node_ptr_t b; /* a real node ptr unless corresponding tag is TT_TAG_LEAF */ + tt_node_ptr_t b; /* usually a real node ptr; see definition of tt_tag_t */ } tt_node_t; typedef struct tt_free_chain_t { @@ -51,6 +55,10 @@ typedef struct tt_free_chain_t { } tt_free_chain_t; typedef struct tt_arena_t { + void *atom_context; + void (*atom_incref)(void *atom_context, struct tt_arena_t *a, tt_atom_t atom); + void (*atom_decref)(void *atom_context, struct tt_arena_t *a, tt_atom_t atom); + /* Fields for the Robin Hood hashset used for hashconsing of tt_nodes */ unsigned int max_probe; unsigned int table_length; @@ -64,18 +72,22 @@ typedef struct tt_arena_t { } tt_arena_t; static inline tt_node_ptr_t tt_mkptr(tt_node_idx_t i, tt_tag_t tag) { - return (i << 2) | tag; + return (i << 3) | tag; } static inline tt_node_idx_t tt_ptr_idx(tt_node_ptr_t p) { - return p >> 2; + return p >> 3; } static inline tt_tag_t tt_ptr_tag(tt_node_ptr_t p) { - return p & 3; + return p & 7; } -extern int tt_arena_init(tt_arena_t *a); +extern int tt_arena_init(tt_arena_t *a, + void *atom_context, + void (*atom_incref)(void *atom_context, tt_arena_t *a, tt_atom_t atom), + void (*atom_decref)(void *atom_context, tt_arena_t *a, tt_atom_t atom)); + extern void tt_arena_done(tt_arena_t *a); extern void tt_dump_arena_summary(tt_arena_t *a); @@ -118,6 +130,10 @@ static inline tt_node_ptr_t tt_cons_node(tt_arena_t *a, return tt_arena_cons(a, TT_TAG_NODE, index, zero, one); } +static inline tt_node_ptr_t tt_cons_dict(tt_arena_t *a, tt_node_ptr_t p, uint32_t size) { + return tt_arena_cons(a, TT_TAG_DICT, 0, p, size); +} + static inline tt_node_ptr_t tt_left(tt_arena_t *a, tt_node_ptr_t p) { return a->nodes[tt_ptr_idx(p)].a; } @@ -134,6 +150,8 @@ static inline tt_node_ptr_t tt_right(tt_arena_t *a, tt_node_ptr_t p) { #define TT_NODE_INDEX(a,p) (a->headers[tt_ptr_idx(p)].inuse.index) #define TT_NODE_ZERO(a,p) tt_left(a,p) #define TT_NODE_ONE(a,p) tt_right(a,p) +#define TT_DICT_ROOT(a,p) tt_left(a,p) +#define TT_DICT_SIZE(a,p) ((uint32_t) tt_right(a,p)) #ifdef __cplusplus }