treetrie-2015/critbit.c

255 lines
6.3 KiB
C

#include <stdlib.h>
#include <string.h>
#include <stdio.h>
#include <stdint.h>
#include <errno.h>
#include <assert.h>
#include "treetrie.h"
#include "critbit.h"
/* We abuse the representation of leaves (being a pair of a GC'd
pointer and a non-GC'd integer) as the representation of our dict
trees, too, since we need a place to store the size of the dict. */
#define TT_TAG_DICT TT_TAG_LEAF
#define TT_DICT_ROOT(a,p) TT_LEAF_TRIE(a,p)
#define TT_DICT_SIZE(a,p) TT_LEAF_ATOM(a,p)
#define tt_cons_dict(a,root,size) tt_cons_leaf(a,root,size)
#define RET_IF_NO_PTR(v) \
({ tt_node_ptr_t ___w = (v); if (___w == TT_NO_PTR) return TT_NO_PTR; ___w; })
int tt_dict_size(tt_arena_t *a, tt_node_ptr_t t) {
if (t == TT_EMPTY) {
return 0;
} else {
assert(tt_ptr_tag(t) == TT_TAG_DICT);
return TT_DICT_SIZE(a,t);
}
}
static inline int bit_ref(tt_atom_t key, unsigned int bit) {
return key & (1 << bit);
}
tt_node_ptr_t tt_dict_get(tt_arena_t *a, tt_node_ptr_t t, tt_atom_t key) {
if (t == TT_EMPTY) {
return TT_NO_PTR;
}
assert(tt_ptr_tag(t) == TT_TAG_DICT);
t = TT_DICT_ROOT(a,t);
while (tt_ptr_tag(t) == TT_TAG_NODE) {
t = bit_ref(key, TT_NODE_INDEX(a, t)) ? TT_NODE_ONE(a, t) : TT_NODE_ZERO(a, t);
}
assert(tt_ptr_tag(t) == TT_TAG_LEAF);
return (TT_LEAF_ATOM(a, t) == key) ? TT_LEAF_TRIE(a, t) : TT_NO_PTR;
}
static tt_node_ptr_t splice_key(tt_arena_t *a,
tt_atom_t key,
tt_node_ptr_t trie,
unsigned int index,
tt_node_ptr_t sib)
{
if (bit_ref(key, index)) {
return tt_cons_node(a, index, sib, RET_IF_NO_PTR(tt_cons_leaf(a, trie, key)));
} else {
return tt_cons_node(a, index, RET_IF_NO_PTR(tt_cons_leaf(a, trie, key)), sib);
}
}
static int set_walk(tt_arena_t *a,
tt_atom_t key,
tt_node_ptr_t trie,
tt_node_ptr_t n,
tt_node_ptr_t *result)
{
switch (tt_ptr_tag(n)) {
case TT_TAG_LEAF: {
tt_atom_t differences = key ^ TT_LEAF_ATOM(a, n);
if (differences == 0) {
/* Keys identical. If values identical, just return this node. */
if (TT_LEAF_TRIE(a, n) == trie) {
*result = n;
return -1;
} else {
*result = tt_cons_leaf(a, trie, key);
return -1;
}
} else {
int leading_zeros = __builtin_clz(differences);
int first_differing_bit = (8 * sizeof(differences)) - leading_zeros - 1;
return first_differing_bit;
}
}
case TT_TAG_NODE: {
int index = TT_NODE_INDEX(a, n);
if (bit_ref(key, index)) {
tt_node_ptr_t one = TT_NODE_ONE(a, n);
int first_differing_bit = set_walk(a, key, trie, one, result);
if (first_differing_bit == -1) {
if (*result == one) {
*result = n;
} else if (*result == TT_NO_PTR) {
/* Do nothing. */
} else {
*result = tt_cons_node(a, index, TT_NODE_ZERO(a, n), *result);
}
return -1;
} else if (first_differing_bit > index) {
return first_differing_bit;
} else {
*result = splice_key(a, key, trie, first_differing_bit, one);
if (*result != TT_NO_PTR) {
*result = tt_cons_node(a, index, TT_NODE_ZERO(a, n), *result);
}
return -1;
}
} else {
tt_node_ptr_t zero = TT_NODE_ZERO(a, n);
int first_differing_bit = set_walk(a, key, trie, zero, result);
if (first_differing_bit == -1) {
if (*result == zero) {
*result = n;
} else if (*result == TT_NO_PTR) {
/* Do nothing. */
} else {
*result = tt_cons_node(a, index, *result, TT_NODE_ONE(a, n));
}
return -1;
} else if (first_differing_bit > index) {
return first_differing_bit;
} else {
*result = splice_key(a, key, trie, first_differing_bit, zero);
if (*result != TT_NO_PTR) {
*result = tt_cons_node(a, index, *result, TT_NODE_ONE(a, n));
}
return -1;
}
}
}
default:
assert(0);
}
}
tt_node_ptr_t tt_dict_set(tt_arena_t *a,
tt_node_ptr_t t,
tt_atom_t key,
tt_node_ptr_t trie)
{
if (t == TT_EMPTY) {
return tt_cons_dict(a, RET_IF_NO_PTR(tt_cons_leaf(a, trie, key)), 1);
}
assert(tt_ptr_tag(t) == TT_TAG_DICT);
{
tt_node_ptr_t result = TT_NO_PTR;
int first_differing_bit = set_walk(a, key, trie, TT_DICT_ROOT(a,t), &result);
if (first_differing_bit != -1) {
result = splice_key(a, key, trie, first_differing_bit, TT_DICT_ROOT(a,t));
}
if (result == TT_NO_PTR) {
return TT_NO_PTR;
} else {
return tt_cons_dict(a, result, TT_DICT_SIZE(a,t) + 1);
}
}
}
tt_node_ptr_t tt_dict_remove1(tt_arena_t *a,
tt_node_ptr_t n,
tt_atom_t key,
int *removed_count)
{
switch (tt_ptr_tag(n)) {
case TT_TAG_LEAF: {
if (TT_LEAF_ATOM(a,n) == key) {
*removed_count = 1;
return TT_EMPTY;
} else {
return n;
}
}
case TT_TAG_NODE: {
int index = TT_NODE_INDEX(a,n);
if (bit_ref(key, index)) {
tt_node_ptr_t n1 =
RET_IF_NO_PTR(tt_dict_remove1(a, TT_NODE_ONE(a,n), key, removed_count));
if (n1 == TT_EMPTY) {
return TT_NODE_ZERO(a,n);
} else {
return tt_cons_node(a, index, TT_NODE_ZERO(a,n), n1);
}
} else {
tt_node_ptr_t n1 =
RET_IF_NO_PTR(tt_dict_remove1(a, TT_NODE_ZERO(a,n), key, removed_count));
if (n1 == TT_EMPTY) {
return TT_NODE_ONE(a,n);
} else {
return tt_cons_node(a, index, n1, TT_NODE_ONE(a,n));
}
}
}
default:
assert(0);
}
}
tt_node_ptr_t tt_dict_remove(tt_arena_t *a,
tt_node_ptr_t t,
tt_atom_t key)
{
if (t == TT_EMPTY) {
return TT_EMPTY;
}
assert(tt_ptr_tag(t) == TT_TAG_DICT);
{
int removed_count = 0;
tt_node_ptr_t n =
RET_IF_NO_PTR(tt_dict_remove1(a, TT_DICT_ROOT(a,t), key, &removed_count));
if (n == TT_EMPTY) {
return TT_EMPTY;
} else {
return tt_cons_dict(a, n, TT_DICT_SIZE(a,t) - removed_count);
}
}
}
void tt_dict_foreach1(tt_arena_t *a,
tt_node_ptr_t n,
void *context,
void (*f)(void *context, tt_atom_t key, tt_node_ptr_t trie))
{
switch (tt_ptr_tag(n)) {
case TT_TAG_LEAF: {
f(context, TT_LEAF_ATOM(a,n), TT_LEAF_TRIE(a,n));
break;
}
case TT_TAG_NODE: {
tt_dict_foreach1(a, TT_NODE_ZERO(a,n), context, f);
tt_dict_foreach1(a, TT_NODE_ONE(a,n), context, f);
break;
}
default:
assert(0);
}
}
void tt_dict_foreach(tt_arena_t *a,
tt_node_ptr_t t,
void *context,
void (*f)(void *, tt_atom_t key, tt_node_ptr_t trie))
{
if (t != TT_EMPTY) {
assert(tt_ptr_tag(t) == TT_TAG_DICT);
tt_dict_foreach1(a, TT_DICT_ROOT(a,t), context, f);
}
}