From ce496ceadede6b467f6ca85a94ba202fec414283 Mon Sep 17 00:00:00 2001 From: Tony Garnock-Jones Date: Tue, 30 Jun 2015 14:48:09 -0400 Subject: [PATCH] Start on critbit --- critbit.c | 182 +++++++++++++++++++++++++++++++++++++++++++++++++++++ critbit.h | 29 +++++++++ treetrie.h | 2 +- 3 files changed, 212 insertions(+), 1 deletion(-) create mode 100644 critbit.c create mode 100644 critbit.h diff --git a/critbit.c b/critbit.c new file mode 100644 index 0000000..e5ce991 --- /dev/null +++ b/critbit.c @@ -0,0 +1,182 @@ +#include +#include +#include +#include +#include +#include + +#include "treetrie.h" +#include "critbit.h" + +/* We abuse the representation of leaves (being a pair of a GC'd + pointer and a non-GC'd integer) as the representation of our dict + trees, too, since we need a place to store the size of the dict. */ +#define TT_TAG_DICT TT_TAG_LEAF +#define TT_DICT_ROOT(a,p) TT_LEAF_TRIE(a,p) +#define TT_DICT_SIZE(a,p) TT_LEAF_ATOM(a,p) +#define tt_cons_dict(a,root,size) tt_cons_leaf(a,root,size) + +int tt_dict_size(tt_arena_t *a, tt_node_ptr_t t) { + if (t == TT_EMPTY) { + return 0; + } else { + assert(tt_ptr_tag(t) == TT_TAG_DICT); + return TT_DICT_SIZE(a,t); + } +} + +static inline int bit_ref(tt_atom_t key, unsigned int bit) { + return key & (1 << bit); +} + +tt_node_ptr_t tt_dict_lookup(tt_arena_t *a, tt_node_ptr_t t, tt_atom_t key) { + if (t == TT_EMPTY) { + return TT_NO_PTR; + } + + assert(tt_ptr_tag(t) == TT_TAG_DICT); + t = TT_DICT_ROOT(a,t); + + while (tt_ptr_tag(t) == TT_TAG_NODE) { + t = bit_ref(key, TT_NODE_INDEX(a, t)) ? TT_NODE_ONE(a, t) : TT_NODE_ZERO(a, t); + } + + assert(tt_ptr_tag(t) == TT_TAG_LEAF); + return (TT_LEAF_ATOM(a, t) == key) ? TT_LEAF_TRIE(a, t) : TT_NO_PTR; +} + +static tt_node_ptr_t splice_key(tt_arena_t *a, + tt_atom_t key, + tt_node_ptr_t trie, + unsigned int index, + tt_node_ptr_t sib) +{ + if (bit_ref(key, index)) { + return tt_cons_node(a, index, sib, tt_cons_leaf(a, trie, key)); + } else { + return tt_cons_node(a, index, tt_cons_leaf(a, trie, key), sib); + } +} + +static int set_walk(tt_arena_t *a, + tt_atom_t key, + tt_node_ptr_t trie, + tt_node_ptr_t n, + tt_node_ptr_t *result) +{ + switch (tt_ptr_tag(n)) { + case TT_TAG_LEAF: { + tt_atom_t differences = key ^ TT_LEAF_ATOM(a, n); + if (differences == 0) { + /* Keys identical. If values identical, just return this node. */ + if (TT_LEAF_TRIE(a, n) == trie) { + *result = n; + return -1; + } else { + *result = tt_cons_leaf(a, trie, key); + return -1; + } + } else { + int leading_zeros = __builtin_clz(differences); + int first_differing_bit = (8 * sizeof(differences)) - leading_zeros; + return first_differing_bit; + } + } + case TT_TAG_NODE: { + int index = TT_NODE_INDEX(a, n); + if (bit_ref(key, index)) { + tt_node_ptr_t one = TT_NODE_ONE(a, n); + int first_differing_bit = set_walk(a, key, trie, one, result); + if (first_differing_bit == -1) { + if (*result == one) { + *result = n; + } else { + *result = tt_cons_node(a, index, TT_NODE_ZERO(a, n), *result); + } + return -1; + } else if (first_differing_bit < index) { + return first_differing_bit; + } else { + *result = tt_cons_node(a, + index, + TT_NODE_ZERO(a, n), + splice_key(a, key, trie, first_differing_bit, one)); + return -1; + } + } else { + tt_node_ptr_t zero = TT_NODE_ZERO(a, n); + int first_differing_bit = set_walk(a, key, trie, zero, result); + if (first_differing_bit == -1) { + if (*result == zero) { + *result = n; + } else { + *result = tt_cons_node(a, index, *result, TT_NODE_ONE(a, n)); + } + return -1; + } else if (first_differing_bit < index) { + return first_differing_bit; + } else { + *result = tt_cons_node(a, + index, + splice_key(a, key, trie, first_differing_bit, zero), + TT_NODE_ONE(a, n)); + return -1; + } + } + } + default: + assert(0); + } +} + +tt_node_ptr_t tt_dict_set(tt_arena_t *a, + tt_node_ptr_t t, + tt_atom_t key, + tt_node_ptr_t trie) +{ + if (t == TT_EMPTY) { + return tt_cons_dict(a, tt_cons_leaf(a, trie, key), 1); + } + + assert(tt_ptr_tag(t) == TT_TAG_DICT); + + { + tt_node_ptr_t result = TT_NO_PTR; + int first_differing_bit = set_walk(a, key, trie, TT_DICT_ROOT(a,t), &result); + if (first_differing_bit != -1) { + result = splice_key(a, key, trie, first_differing_bit, TT_DICT_ROOT(a,t)); + } + return tt_cons_dict(a, result, TT_DICT_SIZE(a,t) + 1); + } +} + +void tt_foreach1(tt_arena_t *a, + tt_node_ptr_t n, + void *context, + void (*f)(void *context, tt_atom_t key, tt_node_ptr_t trie)) +{ + switch (tt_ptr_tag(n)) { + case TT_TAG_LEAF: { + f(context, TT_LEAF_ATOM(a,n), TT_LEAF_TRIE(a,n)); + break; + } + case TT_TAG_NODE: { + tt_foreach1(a, TT_NODE_ZERO(a,n), context, f); + tt_foreach1(a, TT_NODE_ONE(a,n), context, f); + break; + } + default: + assert(0); + } +} + +void tt_foreach(tt_arena_t *a, + tt_node_ptr_t t, + void *context, + void (*f)(void *, tt_atom_t key, tt_node_ptr_t trie)) +{ + if (t != TT_EMPTY) { + assert(tt_ptr_tag(t) == TT_TAG_DICT); + tt_foreach1(a, TT_DICT_ROOT(a,t), context, f); + } +} diff --git a/critbit.h b/critbit.h new file mode 100644 index 0000000..2dd6d0e --- /dev/null +++ b/critbit.h @@ -0,0 +1,29 @@ +#ifndef CRITBIT_H_2809050e_f3c9_4363_8208_673da32e70f9 +#define CRITBIT_H_2809050e_f3c9_4363_8208_673da32e70f9 + +#ifdef __cplusplus +extern "C" { +#endif + +extern int tt_dict_size(tt_arena_t *a, tt_node_ptr_t t); + +/* Returns TT_NO_PTR when key not present. Does not manipulate references. */ +extern tt_node_ptr_t tt_dict_lookup(tt_arena_t *a, tt_node_ptr_t t, tt_atom_t key); + +/* Returns TT_NO_PTR when allocation failed. Otherwise, yields a dict. + Grabs `trie` if required. */ +extern tt_node_ptr_t tt_dict_set(tt_arena_t *a, + tt_node_ptr_t t, + tt_atom_t key, + tt_node_ptr_t trie); + +extern void tt_foreach(tt_arena_t *a, + tt_node_ptr_t t, + void *context, + void (*f)(void *, tt_atom_t key, tt_node_ptr_t trie)); + +#ifdef __cplusplus +} +#endif + +#endif diff --git a/treetrie.h b/treetrie.h index 76af74c..3b6c767 100644 --- a/treetrie.h +++ b/treetrie.h @@ -14,7 +14,7 @@ typedef enum tt_tag_t { typedef enum tt_reserved_node_idx_t { TT_NO_IDX = 0, /* invalid node index, means "no node at all", not even empty */ - TT_EMPTY_IDX, /* empty treetrie */ + TT_EMPTY_IDX, /* empty treetrie AND empty dict */ TT_OK_IDX, /* terminal marker */ TT_FIRST_VALID_NODE_IDX