Widen tags to three bits, so we have a proper dict representation so we can refcount atoms

This commit is contained in:
Tony Garnock-Jones 2015-06-30 18:10:57 -04:00
parent 44b6db9ae0
commit 80576ad873
5 changed files with 140 additions and 71 deletions

View File

@ -8,19 +8,11 @@
#include "treetrie.h"
#include "critbit.h"
/* We abuse the representation of leaves (being a pair of a GC'd
pointer and a non-GC'd integer) as the representation of our dict
trees, too, since we need a place to store the size of the dict. */
#define TT_TAG_DICT TT_TAG_LEAF
#define TT_DICT_ROOT(a,p) TT_LEAF_TRIE(a,p)
#define TT_DICT_SIZE(a,p) TT_LEAF_ATOM(a,p)
#define tt_cons_dict(a,root,size) tt_cons_leaf(a,root,size)
#define RET_IF_NO_PTR(v) \
({ tt_node_ptr_t ___w = (v); if (___w == TT_NO_PTR) return TT_NO_PTR; ___w; })
int tt_dict_size(tt_arena_t *a, tt_node_ptr_t t) {
if (t == TT_EMPTY) {
if (t == TT_EMPTY_DICT) {
return 0;
} else {
assert(tt_ptr_tag(t) == TT_TAG_DICT);
@ -33,7 +25,7 @@ static inline int bit_ref(tt_atom_t key, unsigned int bit) {
}
tt_node_ptr_t tt_dict_get(tt_arena_t *a, tt_node_ptr_t t, tt_atom_t key) {
if (t == TT_EMPTY) {
if (t == TT_EMPTY_DICT) {
return TT_NO_PTR;
}
@ -141,20 +133,23 @@ tt_node_ptr_t tt_dict_set(tt_arena_t *a,
tt_atom_t key,
tt_node_ptr_t trie)
{
if (t == TT_EMPTY) {
if (t == TT_EMPTY_DICT) {
return tt_cons_dict(a, RET_IF_NO_PTR(tt_cons_leaf(a, trie, key)), 1);
}
assert(tt_ptr_tag(t) == TT_TAG_DICT);
{
tt_node_ptr_t old_root = TT_DICT_ROOT(a,t);
tt_node_ptr_t result = TT_NO_PTR;
int first_differing_bit = set_walk(a, key, trie, TT_DICT_ROOT(a,t), &result);
int first_differing_bit = set_walk(a, key, trie, old_root, &result);
if (first_differing_bit != -1) {
result = splice_key(a, key, trie, first_differing_bit, TT_DICT_ROOT(a,t));
result = splice_key(a, key, trie, first_differing_bit, old_root);
}
if (result == TT_NO_PTR) {
return TT_NO_PTR;
} else if (result == old_root) {
return t;
} else {
return tt_cons_dict(a, result, TT_DICT_SIZE(a,t) + 1);
}
@ -170,7 +165,7 @@ tt_node_ptr_t tt_dict_remove1(tt_arena_t *a,
case TT_TAG_LEAF: {
if (TT_LEAF_ATOM(a,n) == key) {
*removed_count = 1;
return TT_EMPTY;
return TT_EMPTY_DICT;
} else {
return n;
}
@ -180,7 +175,7 @@ tt_node_ptr_t tt_dict_remove1(tt_arena_t *a,
if (bit_ref(key, index)) {
tt_node_ptr_t n1 =
RET_IF_NO_PTR(tt_dict_remove1(a, TT_NODE_ONE(a,n), key, removed_count));
if (n1 == TT_EMPTY) {
if (n1 == TT_EMPTY_DICT) {
return TT_NODE_ZERO(a,n);
} else {
return tt_cons_node(a, index, TT_NODE_ZERO(a,n), n1);
@ -188,7 +183,7 @@ tt_node_ptr_t tt_dict_remove1(tt_arena_t *a,
} else {
tt_node_ptr_t n1 =
RET_IF_NO_PTR(tt_dict_remove1(a, TT_NODE_ZERO(a,n), key, removed_count));
if (n1 == TT_EMPTY) {
if (n1 == TT_EMPTY_DICT) {
return TT_NODE_ONE(a,n);
} else {
return tt_cons_node(a, index, n1, TT_NODE_ONE(a,n));
@ -204,8 +199,8 @@ tt_node_ptr_t tt_dict_remove(tt_arena_t *a,
tt_node_ptr_t t,
tt_atom_t key)
{
if (t == TT_EMPTY) {
return TT_EMPTY;
if (t == TT_EMPTY_DICT) {
return TT_EMPTY_DICT;
}
assert(tt_ptr_tag(t) == TT_TAG_DICT);
@ -214,8 +209,8 @@ tt_node_ptr_t tt_dict_remove(tt_arena_t *a,
int removed_count = 0;
tt_node_ptr_t n =
RET_IF_NO_PTR(tt_dict_remove1(a, TT_DICT_ROOT(a,t), key, &removed_count));
if (n == TT_EMPTY) {
return TT_EMPTY;
if (n == TT_EMPTY_DICT) {
return TT_EMPTY_DICT;
} else {
return tt_cons_dict(a, n, TT_DICT_SIZE(a,t) - removed_count);
}
@ -247,7 +242,7 @@ void tt_dict_foreach(tt_arena_t *a,
void *context,
void (*f)(void *, tt_atom_t key, tt_node_ptr_t trie))
{
if (t != TT_EMPTY) {
if (t != TT_EMPTY_DICT) {
assert(tt_ptr_tag(t) == TT_TAG_DICT);
tt_dict_foreach1(a, TT_DICT_ROOT(a,t), context, f);
}

View File

@ -5,8 +5,6 @@
extern "C" {
#endif
#define TT_EMPTY_DICT TT_EMPTY
extern int tt_dict_size(tt_arena_t *a, tt_node_ptr_t t);
/* Returns TT_NO_PTR when key not present. Does not manipulate references. */

31
main.c
View File

@ -7,13 +7,21 @@
#include "treetrie.h"
#include "critbit.h"
static void atom_incref(void *atom_context, tt_arena_t *a, tt_atom_t atom) {
/* printf("incref %u\n", atom); */
}
static void atom_decref(void *atom_context, tt_arena_t *a, tt_atom_t atom) {
/* printf("decref %u\n", atom); */
}
int main0(int argc, char *argv[]) {
tt_arena_t a;
int i, outer;
tt_node_ptr_t prev = TT_EMPTY;
setbuf(stdout, NULL);
tt_arena_init(&a);
tt_arena_init(&a, NULL, atom_incref, atom_decref);
for (outer = 0; outer < 10; outer++) {
/* printf("---------------------------------------- grab/drop of %d\n", prev); */
@ -49,23 +57,32 @@ static void dump_mapping(void *context, tt_atom_t key, tt_node_ptr_t trie) {
int main(int argc, char *argv[]) {
tt_arena_t a;
tt_node_ptr_t curr = TT_EMPTY;
tt_node_ptr_t curr = TT_EMPTY_DICT;
int i;
setbuf(stdout, NULL);
tt_arena_init(&a);
tt_arena_init(&a, NULL, atom_incref, atom_decref);
/* tt_dump_arena(&a); */
for (i = 0; i < 1000000; i++) {
tt_node_ptr_t next = tt_dict_set(&a, curr, i, TT_OK);
tt_node_ptr_t next = tt_grab(&a, tt_dict_set(&a, curr, i, TT_OK));
tt_drop(&a, curr);
curr = tt_grab(&a, next);
curr = next;
/* printf("\nAfter i=%d...\n", i); */
/* tt_dump_arena(&a); */
}
/* for (i = 0; i < 1000000; i++) { */
/* tt_node_ptr_t next = tt_grab(&a, tt_dict_remove(&a, curr, i << 1)); */
/* tt_drop(&a, curr); */
/* curr = next; */
/* } */
/* tt_arena_flush(&a); */
printf("\nFinal tree node index is %u/%u\n", tt_ptr_idx(curr), tt_ptr_tag(curr));
printf("\nFinal tree node index is %u/%u; dict size is %u\n",
tt_ptr_idx(curr),
tt_ptr_tag(curr),
TT_DICT_SIZE(&a, curr));
/* tt_dump_arena(&a); */
/* tt_dict_foreach(&a, curr, NULL, dump_mapping); */
@ -74,7 +91,7 @@ int main(int argc, char *argv[]) {
curr = TT_NO_PTR;
/* tt_arena_flush(&a); */
/* tt_dump_arena(&a); */
/* tt_dump_arena_summary(&a); */
tt_dump_arena_summary(&a);
tt_arena_done(&a);
return EXIT_SUCCESS;

View File

@ -12,9 +12,9 @@ typedef uint32_t tt_hash_t;
/* Customized special-purpose fasthash variation */
#define mix(h) ({ \
(h) ^= (h) >> 23; \
(h) *= 0x2127599bf4325c37ULL; \
(h) ^= (h) >> 47; })
(h) ^= (h) >> 23; \
(h) *= 0x2127599bf4325c37ULL; \
(h) ^= (h) >> 47; })
static inline uint64_t fasthash_4_ints(uint32_t v1, uint32_t v2, uint32_t v3, uint32_t v4) {
const uint64_t m = 0x880355f21e6d1965ULL;
uint64_t h = (16 * m);
@ -97,9 +97,16 @@ static tt_node_idx_t chain_pop(tt_arena_t *a, tt_free_chain_t *chain) {
return i;
}
int tt_arena_init(tt_arena_t *a) {
int tt_arena_init(tt_arena_t *a,
void *atom_context,
void (*atom_incref)(void *context, tt_arena_t *a, tt_atom_t atom),
void (*atom_decref)(void *context, tt_arena_t *a, tt_atom_t atom))
{
a->atom_context = atom_context;
a->atom_incref = atom_incref;
a->atom_decref = atom_decref;
a->max_probe = 0;
a->table_length = 16411; /* 16384; */
a->table_length = 16; //16411; /* 16384; */
a->table = calloc(a->table_length, sizeof(a->table[0]));
a->headers = calloc(a->table_length, sizeof(a->headers[0]));
a->nodes = calloc(a->table_length, sizeof(a->nodes[0]));
@ -116,7 +123,7 @@ int tt_arena_init(tt_arena_t *a) {
{
int i;
for (i = TT_FIRST_VALID_NODE_IDX; i < a->table_length; i++) {
for (i = 0; i < a->table_length; i++) {
chain_append(a, &a->free_chain, i);
a->free_count++;
}
@ -138,7 +145,7 @@ static void register_node(tt_arena_t *a, tt_node_ptr_t p, tt_hash_t initial_hash
a->max_probe = i;
}
if (candidate < TT_FIRST_VALID_NODE_PTR) {
if (candidate == TT_NO_PTR) {
/* This slot in the table is free. */
/* printf("slot free!\n"); */
a->table[index] = p;
@ -211,7 +218,7 @@ static int tt_grow(tt_arena_t *a) {
int i;
for (i = 0; i < old_table_length; i++) {
tt_node_ptr_t p = old_table[i];
if (p >= TT_FIRST_VALID_NODE_PTR) {
if (p != TT_NO_PTR) {
register_node(a, p, tt_hash_node(a, p));
}
}
@ -265,9 +272,12 @@ void tt_dump_arena(tt_arena_t *a) {
for (i = 0; i < a->table_length; i++) {
tt_node_ptr_t p = a->table[i];
tt_node_idx_t n = tt_ptr_idx(p);
if (n < TT_FIRST_VALID_NODE_IDX) {
/* Skip. */
} else if (n >= a->table_length) {
if (p == TT_NO_PTR) {
continue;
}
if (n >= a->table_length) {
printf("%12u -> %12u ?!?!?!\n", i, n);
} else {
tt_hash_t h = tt_hash_node(a, p);
@ -305,6 +315,15 @@ void tt_dump_arena(tt_arena_t *a) {
tt_ptr_idx(a->nodes[n].b),
tt_ptr_tag(a->nodes[n].b));
break;
case TT_TAG_DICT:
printf("dict %u/%u %u\n",
tt_ptr_idx(a->nodes[n].a),
tt_ptr_tag(a->nodes[n].a),
a->nodes[n].b);
break;
default:
printf("???? %08x\n", p);
assert(0);
}
}
}
@ -315,7 +334,7 @@ void tt_arena_flush1(tt_arena_t *a, tt_free_chain_t *c) {
tt_node_idx_t i = a->free_chain.head;
chain_splice(a, c, &a->free_chain);
chain_init(a, &a->free_chain);
while (i >= TT_FIRST_VALID_NODE_IDX) {
while (i != TT_NO_IDX) {
tt_drop(a, a->nodes[i].a);
tt_drop(a, a->nodes[i].b);
a->nodes[i].a = TT_NO_PTR;
@ -333,6 +352,11 @@ void tt_arena_flush(tt_arena_t *a) {
a->free_chain = c;
}
static inline int heap_tag_p(tt_node_ptr_t p) {
tt_tag_t tag = tt_ptr_tag(p);
return tag != TT_TAG_SPECIAL && tag != TT_TAG_RESERVED0 && tag != TT_TAG_INVALID;
}
static void recycle_node(tt_arena_t *a, tt_node_ptr_t p) {
tt_node_idx_t ni = tt_ptr_idx(p);
tt_hash_t h;
@ -340,11 +364,19 @@ static void recycle_node(tt_arena_t *a, tt_node_ptr_t p) {
/* printf("++++++++++++++++++++++++++++++++++++++++ recycling %d\n", ni); */
assert(p >= TT_FIRST_VALID_NODE_PTR);
assert(heap_tag_p(p));
h = tt_hash_node(a, p);
if (tt_ptr_tag(p) == TT_TAG_LEAF) {
a->nodes[ni].b = TT_NO_PTR;
switch (tt_ptr_tag(p)) {
case TT_TAG_LEAF:
a->atom_decref(a->atom_context, a, a->nodes[ni].b);
a->nodes[ni].b = TT_NO_PTR;
break;
case TT_TAG_DICT:
a->nodes[ni].b = TT_NO_PTR;
break;
default:
break;
}
chain_prepend(a, &a->free_chain, ni);
a->free_count++;
@ -354,7 +386,7 @@ static void recycle_node(tt_arena_t *a, tt_node_ptr_t p) {
tt_node_ptr_t candidate = a->table[index];
/* printf("hunting i=%d index=%d p=%d candidate=%d\n", i, index, p, candidate); */
assert(candidate >= TT_FIRST_VALID_NODE_PTR); /* Internal error if node not in table */
assert(candidate != TT_NO_PTR); /* Internal error if node not in table */
if (candidate == p) {
/* We found it. Now swap in elements. */
@ -366,7 +398,7 @@ static void recycle_node(tt_arena_t *a, tt_node_ptr_t p) {
a->table[index] = TT_NO_PTR;
if (next_p < TT_FIRST_VALID_NODE_PTR) {
if (next_p == TT_NO_PTR) {
break;
}
@ -402,7 +434,7 @@ tt_node_ptr_t tt_arena_cons(tt_arena_t *a,
/* printf("cons at %d candidate %d\n", i, candidate); */
/* TODO: perhaps also bail early if we detect that the hash code changes */
if (candidate < TT_FIRST_VALID_NODE_PTR) {
if (candidate == TT_NO_PTR) {
/* printf("cons empty cell\n"); */
break;
}
@ -435,7 +467,16 @@ tt_node_ptr_t tt_arena_cons(tt_arena_t *a,
tt_node_ptr_t p = tt_mkptr(node, tag);
tt_grab(a, na);
if (tag != TT_TAG_LEAF) tt_grab(a, nb);
switch (tag) {
case TT_TAG_LEAF:
a->atom_incref(a->atom_context, a, nb);
break;
case TT_TAG_DICT:
break;
default:
tt_grab(a, nb);
}
tt_drop(a, a->nodes[node].a);
tt_drop(a, a->nodes[node].b);
a->free_count--;
@ -452,7 +493,7 @@ tt_node_ptr_t tt_arena_cons(tt_arena_t *a,
tt_node_ptr_t tt_grab(tt_arena_t *a, tt_node_ptr_t p) {
tt_node_idx_t i = tt_ptr_idx(p);
if (i >= TT_FIRST_VALID_NODE_IDX && a->headers[i].inuse.refcount < TT_REFCOUNT_LIMIT) {
if (tt_ptr_tag(p) != TT_TAG_SPECIAL && a->headers[i].inuse.refcount < TT_REFCOUNT_LIMIT) {
a->headers[i].inuse.refcount++;
}
return p;
@ -460,7 +501,7 @@ tt_node_ptr_t tt_grab(tt_arena_t *a, tt_node_ptr_t p) {
void tt_drop(tt_arena_t *a, tt_node_ptr_t p) {
tt_node_idx_t i = tt_ptr_idx(p);
if (i >= TT_FIRST_VALID_NODE_IDX && a->headers[i].inuse.refcount < TT_REFCOUNT_LIMIT) {
if (tt_ptr_tag(p) != TT_TAG_SPECIAL && a->headers[i].inuse.refcount < TT_REFCOUNT_LIMIT) {
/* printf("++++++++++++++++++++++++++++++ dropping %d\n", i); */
if (--(a->headers[i].inuse.refcount) == 0) {
recycle_node(a, p);

View File

@ -6,28 +6,32 @@ extern "C" {
#endif
typedef enum tt_tag_t {
TT_TAG_TAIL = 0,
TT_TAG_INVALID = 0, /* an invalid pointer - should only be used with 0 as index. */
TT_TAG_TAIL,
TT_TAG_BRANCH,
TT_TAG_LEAF, /* only case where one of node a or b points to non-node */
TT_TAG_NODE
TT_TAG_LEAF, /* node b points to atom, not node */
TT_TAG_NODE,
TT_TAG_DICT, /* node b is just an integer */
TT_TAG_RESERVED0, /* never used */
TT_TAG_SPECIAL, /* immediate special - all others are pointerlike */
} tt_tag_t;
typedef enum tt_reserved_node_idx_t {
TT_NO_IDX = 0, /* invalid node index, means "no node at all", not even empty */
TT_EMPTY_IDX, /* empty treetrie AND empty dict */
typedef enum tt_special_idx_t {
TT_EMPTY_IDX, /* empty treetrie */
TT_OK_IDX, /* terminal marker */
TT_EMPTY_DICT_IDX, /* empty dict */
} tt_special_idx_t;
TT_FIRST_VALID_NODE_IDX
} tt_reserved_node_idx_t;
#define TT_NO_PTR (tt_mkptr(TT_NO_IDX, 0))
#define TT_EMPTY (tt_mkptr(TT_EMPTY_IDX, 0))
#define TT_OK (tt_mkptr(TT_OK_IDX, 0))
#define TT_FIRST_VALID_NODE_PTR (tt_mkptr(TT_FIRST_VALID_NODE_IDX, 0))
typedef uint32_t tt_node_idx_t; /* N.B. tt_reserved_node_idx_t */
typedef uint32_t tt_node_idx_t; /* N.B. tt_special_idx_t; and 0 is reserved. */
typedef uint32_t tt_node_ptr_t; /* An index shifted left 2 with tag or'd in low bits */
#define TT_NO_IDX ((tt_node_idx_t) (0))
#define TT_NO_PTR ((tt_node_ptr_t) (0))
#define TT_EMPTY (tt_mkptr(TT_EMPTY_IDX, TT_TAG_SPECIAL))
#define TT_OK (tt_mkptr(TT_OK_IDX, TT_TAG_SPECIAL))
#define TT_EMPTY_DICT (tt_mkptr(TT_EMPTY_DICT_IDX, TT_TAG_SPECIAL))
typedef uint32_t tt_atom_t; /* Atom number 0 is the wildcard atom. */
typedef union tt_header_t {
@ -42,7 +46,7 @@ typedef union tt_header_t {
typedef struct tt_node_t {
tt_node_ptr_t a; /* always a real node ptr */
tt_node_ptr_t b; /* a real node ptr unless corresponding tag is TT_TAG_LEAF */
tt_node_ptr_t b; /* usually a real node ptr; see definition of tt_tag_t */
} tt_node_t;
typedef struct tt_free_chain_t {
@ -51,6 +55,10 @@ typedef struct tt_free_chain_t {
} tt_free_chain_t;
typedef struct tt_arena_t {
void *atom_context;
void (*atom_incref)(void *atom_context, struct tt_arena_t *a, tt_atom_t atom);
void (*atom_decref)(void *atom_context, struct tt_arena_t *a, tt_atom_t atom);
/* Fields for the Robin Hood hashset used for hashconsing of tt_nodes */
unsigned int max_probe;
unsigned int table_length;
@ -64,18 +72,22 @@ typedef struct tt_arena_t {
} tt_arena_t;
static inline tt_node_ptr_t tt_mkptr(tt_node_idx_t i, tt_tag_t tag) {
return (i << 2) | tag;
return (i << 3) | tag;
}
static inline tt_node_idx_t tt_ptr_idx(tt_node_ptr_t p) {
return p >> 2;
return p >> 3;
}
static inline tt_tag_t tt_ptr_tag(tt_node_ptr_t p) {
return p & 3;
return p & 7;
}
extern int tt_arena_init(tt_arena_t *a);
extern int tt_arena_init(tt_arena_t *a,
void *atom_context,
void (*atom_incref)(void *atom_context, tt_arena_t *a, tt_atom_t atom),
void (*atom_decref)(void *atom_context, tt_arena_t *a, tt_atom_t atom));
extern void tt_arena_done(tt_arena_t *a);
extern void tt_dump_arena_summary(tt_arena_t *a);
@ -118,6 +130,10 @@ static inline tt_node_ptr_t tt_cons_node(tt_arena_t *a,
return tt_arena_cons(a, TT_TAG_NODE, index, zero, one);
}
static inline tt_node_ptr_t tt_cons_dict(tt_arena_t *a, tt_node_ptr_t p, uint32_t size) {
return tt_arena_cons(a, TT_TAG_DICT, 0, p, size);
}
static inline tt_node_ptr_t tt_left(tt_arena_t *a, tt_node_ptr_t p) {
return a->nodes[tt_ptr_idx(p)].a;
}
@ -134,6 +150,8 @@ static inline tt_node_ptr_t tt_right(tt_arena_t *a, tt_node_ptr_t p) {
#define TT_NODE_INDEX(a,p) (a->headers[tt_ptr_idx(p)].inuse.index)
#define TT_NODE_ZERO(a,p) tt_left(a,p)
#define TT_NODE_ONE(a,p) tt_right(a,p)
#define TT_DICT_ROOT(a,p) tt_left(a,p)
#define TT_DICT_SIZE(a,p) ((uint32_t) tt_right(a,p))
#ifdef __cplusplus
}