Initial commit
This commit is contained in:
commit
790f63b309
|
@ -0,0 +1,102 @@
|
||||||
|
# TreeTrie
|
||||||
|
|
||||||
|
An implementation of a trie map that can hold (patterns over) ordered
|
||||||
|
trees in its keys.
|
||||||
|
|
||||||
|
Uses djb's "critbit" structure for each branch node.
|
||||||
|
|
||||||
|
There are two types to represent:
|
||||||
|
|
||||||
|
Trie = Ok
|
||||||
|
| Tl Trie
|
||||||
|
| Br Trie Branch
|
||||||
|
|
||||||
|
The first `Trie` argument to `Br` is the wildcard case.
|
||||||
|
|
||||||
|
Branch = Mt
|
||||||
|
| Lf Trie Atom
|
||||||
|
| Nd Int Branch Branch
|
||||||
|
|
||||||
|
The `Branch` type is the "critbit" type, extended with a value slot to
|
||||||
|
make it into a map. We put the `Atom` last in a `Lf`, because we might
|
||||||
|
want to use 32-bit pointers internal to our structures, but the hosted
|
||||||
|
atoms might need 64-bit pointers.
|
||||||
|
|
||||||
|
The empty `Trie`, which we'll write `empty` is a cyclic structure (!):
|
||||||
|
|
||||||
|
empty = Br empty Mt
|
||||||
|
|
||||||
|
It's the only such structure, so we might want to actually represent
|
||||||
|
it as a distinct constant instead.
|
||||||
|
|
||||||
|
Because `Ok` and `Mt` are nullary constants, we can represent them
|
||||||
|
with special bit-patterns. Let's choose `NULL` for this. They don't
|
||||||
|
need to be distinct, since they inhabit different types.
|
||||||
|
|
||||||
|
This leaves us with four cases to represent, two in each type. While
|
||||||
|
we could represent these using a single bit, we will instead use two
|
||||||
|
bits, for debuggability.
|
||||||
|
|
||||||
|
33222222222211111111110000000000
|
||||||
|
10987654321098765432109876543210
|
||||||
|
|--------------------------------|
|
||||||
|
|
||||||
|
|--------------------------------|
|
||||||
|
| Trie pointer 00| Br case
|
||||||
|
|--------------------------------|
|
||||||
|
| Branch pointer 00|
|
||||||
|
|--------------------------------|
|
||||||
|
|
||||||
|
|--------------------------------|
|
||||||
|
| Trie pointer 01| Tl case
|
||||||
|
|--------------------------------|
|
||||||
|
|
||||||
|
|--------------------------------|
|
||||||
|
| Trie pointer 10| Lf case
|
||||||
|
|--------------------------------|
|
||||||
|
| Atom pointer |
|
||||||
|
| (may be 64 bits long) |
|
||||||
|
|--------------------------------|
|
||||||
|
|
||||||
|
|--------------------------------|
|
||||||
|
| Int 11| Nd case
|
||||||
|
|--------------------------------|
|
||||||
|
| Branch pointer 00|
|
||||||
|
|--------------------------------|
|
||||||
|
| Branch pointer 00|
|
||||||
|
|--------------------------------|
|
||||||
|
|
||||||
|
We use a weak hash table to index all our objects, because we need to
|
||||||
|
hash-cons them.
|
||||||
|
|
||||||
|
Perhaps it could be a Robin-Hood hashtable with backward shift
|
||||||
|
deletion,
|
||||||
|
<http://codecapsule.com/2013/11/17/robin-hood-hashing-backward-shift-deletion/>.
|
||||||
|
|
||||||
|
Hmm. For critbit to work, we need to be able to examine the
|
||||||
|
bitpatterns of atoms. However, if those bitpatterns represent pointers
|
||||||
|
to Racket-level objects, and Racket uses a moving collector, then not
|
||||||
|
only will our atom pointers become out of date after a GC, even if we
|
||||||
|
could update them we'd have to reindex each critbit tree.
|
||||||
|
|
||||||
|
So we probably want `Atom` to instead be some index into a *different*
|
||||||
|
table. (Another level of indirection!) While any (referenced) `Trie`
|
||||||
|
holds an indirect reference to a given `Atom`, the underlying Racket
|
||||||
|
object should be preserved across collections. We will need to find
|
||||||
|
the `Atom` for a given Racket object (based on `equal?` rather than
|
||||||
|
`eq?`), and the Racket object for a given `Atom` (an easy table
|
||||||
|
lookup). We'll want to not hold an `Atom`'s Racket object longer than
|
||||||
|
necessary.
|
||||||
|
|
||||||
|
It might be better to have the tag bits in each pointer to an object,
|
||||||
|
rather than in the object header: the tag bits would then identify
|
||||||
|
which of four separate heaps (each with its own object size) is being
|
||||||
|
referred to.
|
||||||
|
|
||||||
|
Our data structures are never cyclic.
|
||||||
|
|
||||||
|
Supporting direct atoms is probably a sensible thing to do, so that
|
||||||
|
e.g. fixnums map to `Atom` without having to take up space in the
|
||||||
|
table. In fact, the host language should probably allocate and manage
|
||||||
|
the `Atom`-to-host-object table itself! That way, our code can be
|
||||||
|
completely ignorant of that kind of detail.
|
|
@ -0,0 +1,75 @@
|
||||||
|
/* The MIT License
|
||||||
|
|
||||||
|
Copyright (C) 2012 Zilong Tan (eric.zltan@gmail.com)
|
||||||
|
|
||||||
|
Permission is hereby granted, free of charge, to any person
|
||||||
|
obtaining a copy of this software and associated documentation
|
||||||
|
files (the "Software"), to deal in the Software without
|
||||||
|
restriction, including without limitation the rights to use, copy,
|
||||||
|
modify, merge, publish, distribute, sublicense, and/or sell copies
|
||||||
|
of the Software, and to permit persons to whom the Software is
|
||||||
|
furnished to do so, subject to the following conditions:
|
||||||
|
|
||||||
|
The above copyright notice and this permission notice shall be
|
||||||
|
included in all copies or substantial portions of the Software.
|
||||||
|
|
||||||
|
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
|
||||||
|
EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
|
||||||
|
MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
|
||||||
|
NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
|
||||||
|
BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
|
||||||
|
ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
|
||||||
|
CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
|
||||||
|
SOFTWARE.
|
||||||
|
*/
|
||||||
|
|
||||||
|
#include "fasthash.h"
|
||||||
|
|
||||||
|
// Compression function for Merkle-Damgard construction.
|
||||||
|
// This function is generated using the framework provided.
|
||||||
|
#define mix(h) ({ \
|
||||||
|
(h) ^= (h) >> 23; \
|
||||||
|
(h) *= 0x2127599bf4325c37ULL; \
|
||||||
|
(h) ^= (h) >> 47; })
|
||||||
|
|
||||||
|
uint64_t fasthash64(const void *buf, size_t len, uint64_t seed)
|
||||||
|
{
|
||||||
|
const uint64_t m = 0x880355f21e6d1965ULL;
|
||||||
|
const uint64_t *pos = (const uint64_t *)buf;
|
||||||
|
const uint64_t *end = pos + (len / 8);
|
||||||
|
const unsigned char *pos2;
|
||||||
|
uint64_t h = seed ^ (len * m);
|
||||||
|
uint64_t v;
|
||||||
|
|
||||||
|
while (pos != end) {
|
||||||
|
v = *pos++;
|
||||||
|
h ^= mix(v);
|
||||||
|
h *= m;
|
||||||
|
}
|
||||||
|
|
||||||
|
pos2 = (const unsigned char*)pos;
|
||||||
|
v = 0;
|
||||||
|
|
||||||
|
switch (len & 7) {
|
||||||
|
case 7: v ^= (uint64_t)pos2[6] << 48;
|
||||||
|
case 6: v ^= (uint64_t)pos2[5] << 40;
|
||||||
|
case 5: v ^= (uint64_t)pos2[4] << 32;
|
||||||
|
case 4: v ^= (uint64_t)pos2[3] << 24;
|
||||||
|
case 3: v ^= (uint64_t)pos2[2] << 16;
|
||||||
|
case 2: v ^= (uint64_t)pos2[1] << 8;
|
||||||
|
case 1: v ^= (uint64_t)pos2[0];
|
||||||
|
h ^= mix(v);
|
||||||
|
h *= m;
|
||||||
|
}
|
||||||
|
|
||||||
|
return mix(h);
|
||||||
|
}
|
||||||
|
|
||||||
|
uint32_t fasthash32(const void *buf, size_t len, uint32_t seed)
|
||||||
|
{
|
||||||
|
// the following trick converts the 64-bit hashcode to Fermat
|
||||||
|
// residue, which shall retain information from both the higher
|
||||||
|
// and lower parts of hashcode.
|
||||||
|
uint64_t h = fasthash64(buf, len, seed);
|
||||||
|
return h - (h >> 32);
|
||||||
|
}
|
|
@ -0,0 +1,56 @@
|
||||||
|
/* The MIT License
|
||||||
|
|
||||||
|
Copyright (C) 2012 Zilong Tan (eric.zltan@gmail.com)
|
||||||
|
|
||||||
|
Permission is hereby granted, free of charge, to any person
|
||||||
|
obtaining a copy of this software and associated documentation
|
||||||
|
files (the "Software"), to deal in the Software without
|
||||||
|
restriction, including without limitation the rights to use, copy,
|
||||||
|
modify, merge, publish, distribute, sublicense, and/or sell copies
|
||||||
|
of the Software, and to permit persons to whom the Software is
|
||||||
|
furnished to do so, subject to the following conditions:
|
||||||
|
|
||||||
|
The above copyright notice and this permission notice shall be
|
||||||
|
included in all copies or substantial portions of the Software.
|
||||||
|
|
||||||
|
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
|
||||||
|
EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
|
||||||
|
MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
|
||||||
|
NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
|
||||||
|
BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
|
||||||
|
ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
|
||||||
|
CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
|
||||||
|
SOFTWARE.
|
||||||
|
*/
|
||||||
|
|
||||||
|
#ifndef _FASTHASH_H
|
||||||
|
#define _FASTHASH_H
|
||||||
|
|
||||||
|
#include <stdint.h>
|
||||||
|
#include <stdio.h>
|
||||||
|
|
||||||
|
#ifdef __cplusplus
|
||||||
|
extern "C" {
|
||||||
|
#endif
|
||||||
|
|
||||||
|
/**
|
||||||
|
* fasthash32 - 32-bit implementation of fasthash
|
||||||
|
* @buf: data buffer
|
||||||
|
* @len: data size
|
||||||
|
* @seed: the seed
|
||||||
|
*/
|
||||||
|
uint32_t fasthash32(const void *buf, size_t len, uint32_t seed);
|
||||||
|
|
||||||
|
/**
|
||||||
|
* fasthash64 - 64-bit implementation of fasthash
|
||||||
|
* @buf: data buffer
|
||||||
|
* @len: data size
|
||||||
|
* @seed: the seed
|
||||||
|
*/
|
||||||
|
uint64_t fasthash64(const void *buf, size_t len, uint64_t seed);
|
||||||
|
|
||||||
|
#ifdef __cplusplus
|
||||||
|
}
|
||||||
|
#endif
|
||||||
|
|
||||||
|
#endif
|
|
@ -0,0 +1,80 @@
|
||||||
|
#include <stdlib.h>
|
||||||
|
#include <string.h>
|
||||||
|
#include <stdio.h>
|
||||||
|
#include <stdint.h>
|
||||||
|
|
||||||
|
#include "fasthash.h"
|
||||||
|
#include "treetrie.h"
|
||||||
|
|
||||||
|
static void dump_arena(tt_arena_t *a) {
|
||||||
|
int i;
|
||||||
|
printf("max_probe: %u\n", a->max_probe);
|
||||||
|
printf("live_count: %u\n", a->live_count);
|
||||||
|
printf("table_length: %u\n", a->table_length);
|
||||||
|
|
||||||
|
for (i = 0; i < a->table_length; i++) {
|
||||||
|
tt_node_idx_t n = a->table[i];
|
||||||
|
tt_hash_t h = tt_hash_node(a, n);
|
||||||
|
int distance = i - (h % a->table_length);
|
||||||
|
if (distance < 0) distance += a->table_length;
|
||||||
|
if (n >= TT_FIRST_VALID_NODE_IDX) {
|
||||||
|
printf("%12u -> %12u: dist %d ref %d ",
|
||||||
|
i,
|
||||||
|
n,
|
||||||
|
distance,
|
||||||
|
a->headers[n].inuse.refcount);
|
||||||
|
switch (a->headers[n].inuse.tag) {
|
||||||
|
case TT_TAG_TAIL:
|
||||||
|
printf("tail %u\n", a->nodes[n].a);
|
||||||
|
break;
|
||||||
|
case TT_TAG_BRANCH:
|
||||||
|
printf("branch %u %u\n", a->nodes[n].a, a->nodes[n].b);
|
||||||
|
break;
|
||||||
|
case TT_TAG_LEAF:
|
||||||
|
printf("leaf %u %u\n", a->nodes[n].a, a->nodes[n].b);
|
||||||
|
break;
|
||||||
|
case TT_TAG_NODE:
|
||||||
|
printf("node index %d, %u %u\n",
|
||||||
|
a->headers[n].inuse.index,
|
||||||
|
a->nodes[n].a,
|
||||||
|
a->nodes[n].b);
|
||||||
|
break;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
int main(int argc, char *argv[]) {
|
||||||
|
tt_arena_t a;
|
||||||
|
int i, outer;
|
||||||
|
tt_node_idx_t prev = TT_EMPTY;
|
||||||
|
|
||||||
|
setbuf(stdout, NULL);
|
||||||
|
tt_arena_init(&a);
|
||||||
|
|
||||||
|
for (outer = 0; outer < 10; outer++) {
|
||||||
|
tt_grab(&a, prev);
|
||||||
|
tt_drop(&a, prev);
|
||||||
|
printf("---------------------------------------- AFTER DROP of %d:\n", prev);
|
||||||
|
dump_arena(&a);
|
||||||
|
prev = TT_EMPTY;
|
||||||
|
printf("======================================== LOOP ITERATION %d\n", outer);
|
||||||
|
for (i = 0; i < 10; i++) {
|
||||||
|
tt_node_idx_t leaf = tt_arena_cons(&a,
|
||||||
|
TT_TAG_LEAF,
|
||||||
|
0,
|
||||||
|
TT_OK,
|
||||||
|
1001);
|
||||||
|
tt_node_idx_t curr = tt_arena_cons(&a,
|
||||||
|
TT_TAG_NODE,
|
||||||
|
0,
|
||||||
|
leaf,
|
||||||
|
prev);
|
||||||
|
dump_arena(&a);
|
||||||
|
prev = curr;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
tt_arena_done(&a);
|
||||||
|
return EXIT_SUCCESS;
|
||||||
|
}
|
|
@ -0,0 +1,238 @@
|
||||||
|
#include <stdlib.h>
|
||||||
|
#include <string.h>
|
||||||
|
#include <stdio.h>
|
||||||
|
#include <stdint.h>
|
||||||
|
#include <errno.h>
|
||||||
|
#include <assert.h>
|
||||||
|
|
||||||
|
#include "treetrie.h"
|
||||||
|
#include "fasthash.h"
|
||||||
|
|
||||||
|
static inline tt_hash_t hash(uint32_t tag,
|
||||||
|
uint32_t index,
|
||||||
|
tt_node_idx_t a,
|
||||||
|
tt_node_idx_t b)
|
||||||
|
{
|
||||||
|
uint32_t keyblock[4] = { tag,
|
||||||
|
index,
|
||||||
|
a,
|
||||||
|
b };
|
||||||
|
assert(sizeof(keyblock) == 4 * sizeof(uint32_t));
|
||||||
|
return (tt_hash_t) fasthash32(keyblock, sizeof(keyblock), 0);
|
||||||
|
}
|
||||||
|
|
||||||
|
inline tt_hash_t tt_hash_node(tt_arena_t *a, tt_node_idx_t i) {
|
||||||
|
return hash(a->headers[i].inuse.tag,
|
||||||
|
a->headers[i].inuse.index,
|
||||||
|
a->nodes[i].a,
|
||||||
|
a->nodes[i].b);
|
||||||
|
}
|
||||||
|
|
||||||
|
int tt_arena_init(tt_arena_t *a) {
|
||||||
|
a->max_probe = 0;
|
||||||
|
a->live_count = 0;
|
||||||
|
a->table_length = 16;
|
||||||
|
a->table = calloc(a->table_length, sizeof(a->table[0]));
|
||||||
|
a->headers = calloc(a->table_length, sizeof(a->headers[0]));
|
||||||
|
a->nodes = calloc(a->table_length, sizeof(a->nodes[0]));
|
||||||
|
a->free_chain = TT_ERROR;
|
||||||
|
|
||||||
|
if (a->table == NULL || a->headers == NULL || a->nodes == NULL) {
|
||||||
|
if (a->table != NULL) free(a->table);
|
||||||
|
if (a->headers != NULL) free(a->headers);
|
||||||
|
if (a->nodes != NULL) free(a->nodes);
|
||||||
|
errno = ENOMEM;
|
||||||
|
return -1;
|
||||||
|
}
|
||||||
|
|
||||||
|
{
|
||||||
|
int i;
|
||||||
|
for (i = a->table_length - 1; i >= TT_FIRST_VALID_NODE_IDX; i--) {
|
||||||
|
a->headers[i].next_free = a->free_chain;
|
||||||
|
a->free_chain = i;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
return 0;
|
||||||
|
}
|
||||||
|
|
||||||
|
static int tt_grow(tt_arena_t *a) {
|
||||||
|
assert(0);
|
||||||
|
}
|
||||||
|
|
||||||
|
void tt_arena_done(tt_arena_t *a) {
|
||||||
|
free(a->table);
|
||||||
|
free(a->headers);
|
||||||
|
free(a->nodes);
|
||||||
|
memset(a, 0, sizeof(*a));
|
||||||
|
}
|
||||||
|
|
||||||
|
static void recycle_node(tt_arena_t *a, tt_node_idx_t ni) {
|
||||||
|
tt_hash_t h;
|
||||||
|
int i;
|
||||||
|
|
||||||
|
printf("++++++++++++++++++++++++++++++++++++++++ recycling %d\n", ni);
|
||||||
|
|
||||||
|
assert(ni >= TT_FIRST_VALID_NODE_IDX);
|
||||||
|
h = tt_hash_node(a, ni);
|
||||||
|
|
||||||
|
if (a->headers[ni].inuse.tag == TT_TAG_LEAF) {
|
||||||
|
a->nodes[ni].b = TT_ERROR;
|
||||||
|
}
|
||||||
|
a->headers[ni].next_free = a->free_chain;
|
||||||
|
a->free_chain = ni;
|
||||||
|
a->live_count--;
|
||||||
|
|
||||||
|
for (i = 0; i < a->max_probe+1; i++) {
|
||||||
|
unsigned int index = (h + i) % a->table_length;
|
||||||
|
tt_node_idx_t candidate = a->table[index];
|
||||||
|
|
||||||
|
printf("hunting i=%d index=%d ni=%d candidate=%d\n", i, index, ni, candidate);
|
||||||
|
assert(candidate >= TT_FIRST_VALID_NODE_IDX); /* Internal error if node not in table */
|
||||||
|
|
||||||
|
if (candidate == ni) {
|
||||||
|
/* We found it. Now swap in elements. */
|
||||||
|
while (1) {
|
||||||
|
unsigned int nextindex = (index + 1) % a->table_length;
|
||||||
|
tt_node_idx_t next_n = a->table[nextindex];
|
||||||
|
tt_hash_t next_h;
|
||||||
|
int distance;
|
||||||
|
|
||||||
|
a->table[index] = TT_ERROR;
|
||||||
|
|
||||||
|
if (next_n < TT_FIRST_VALID_NODE_IDX) {
|
||||||
|
break;
|
||||||
|
}
|
||||||
|
|
||||||
|
next_h = tt_hash_node(a, next_n);
|
||||||
|
distance = nextindex - (next_h % a->table_length);
|
||||||
|
if (distance < 0) distance += a->table_length;
|
||||||
|
|
||||||
|
if (distance == 0) {
|
||||||
|
break;
|
||||||
|
}
|
||||||
|
|
||||||
|
a->table[index] = next_n;
|
||||||
|
index = nextindex;
|
||||||
|
}
|
||||||
|
break;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
tt_node_idx_t tt_arena_cons(tt_arena_t *a,
|
||||||
|
uint32_t tag,
|
||||||
|
uint32_t nindex,
|
||||||
|
tt_node_idx_t na,
|
||||||
|
tt_node_idx_t nb)
|
||||||
|
{
|
||||||
|
tt_hash_t h = hash(tag, nindex, na, nb);
|
||||||
|
int i;
|
||||||
|
|
||||||
|
for (i = 0; i < a->max_probe+1; i++) {
|
||||||
|
unsigned int index = (h + i) % a->table_length;
|
||||||
|
tt_node_idx_t candidate = a->table[index];
|
||||||
|
|
||||||
|
printf("cons at %d candidate %d\n", i, candidate);
|
||||||
|
/* TODO: perhaps also bail early if we detect that the hash code changes */
|
||||||
|
if (candidate < TT_FIRST_VALID_NODE_IDX) {
|
||||||
|
printf("cons empty cell\n");
|
||||||
|
break;
|
||||||
|
}
|
||||||
|
|
||||||
|
printf("tag %d %d\n", a->headers[candidate].inuse.tag, tag);
|
||||||
|
printf("index %d %d\n", a->headers[candidate].inuse.index, nindex);
|
||||||
|
printf("a %d %d\n", a->nodes[candidate].a, na);
|
||||||
|
printf("b %d %d\n", a->nodes[candidate].b, nb);
|
||||||
|
|
||||||
|
if (a->headers[candidate].inuse.tag == tag &&
|
||||||
|
a->headers[candidate].inuse.index == nindex &&
|
||||||
|
a->nodes[candidate].a == na &&
|
||||||
|
a->nodes[candidate].b == nb) {
|
||||||
|
printf("cons located correct candidate\n");
|
||||||
|
return candidate;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
printf("cons needs to alloc\n");
|
||||||
|
|
||||||
|
if (a->free_chain == TT_ERROR) {
|
||||||
|
if (tt_grow(a) != 0) {
|
||||||
|
return TT_ERROR;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
{
|
||||||
|
tt_node_idx_t node = a->free_chain;
|
||||||
|
tt_node_idx_t tostore = node;
|
||||||
|
|
||||||
|
tt_grab(a, na);
|
||||||
|
if (tag != TT_TAG_LEAF) tt_grab(a, nb);
|
||||||
|
|
||||||
|
a->free_chain = a->headers[node].next_free;
|
||||||
|
tt_drop(a, a->nodes[node].a);
|
||||||
|
tt_drop(a, a->nodes[node].b);
|
||||||
|
a->live_count++;
|
||||||
|
|
||||||
|
a->headers[node].inuse.refcount = 0;
|
||||||
|
a->headers[node].inuse.tag = tag;
|
||||||
|
a->headers[node].inuse.index = nindex;
|
||||||
|
a->nodes[node].a = na;
|
||||||
|
a->nodes[node].b = nb;
|
||||||
|
|
||||||
|
/* Not found */
|
||||||
|
i = 0;
|
||||||
|
while (1) {
|
||||||
|
unsigned int index = (h + i) % a->table_length;
|
||||||
|
tt_node_idx_t candidate = a->table[index];
|
||||||
|
|
||||||
|
printf("checking robinhood at h %d i %d index %d candidate %d\n", h, i, index, candidate);
|
||||||
|
|
||||||
|
if (i > a->max_probe) {
|
||||||
|
a->max_probe = i;
|
||||||
|
}
|
||||||
|
|
||||||
|
if (candidate < TT_FIRST_VALID_NODE_IDX) {
|
||||||
|
/* This slot in the table is free. */
|
||||||
|
printf("slot free!\n");
|
||||||
|
a->table[index] = tostore;
|
||||||
|
break;
|
||||||
|
}
|
||||||
|
|
||||||
|
printf("slot not free.\n");
|
||||||
|
{
|
||||||
|
tt_hash_t candidate_h = tt_hash_node(a, candidate);
|
||||||
|
int distance = index - (candidate_h % a->table_length);
|
||||||
|
if (distance < 0) distance += a->table_length;
|
||||||
|
|
||||||
|
if (distance < i) {
|
||||||
|
a->table[index] = tostore;
|
||||||
|
h = candidate_h;
|
||||||
|
i = distance + 1;
|
||||||
|
tostore = candidate;
|
||||||
|
} else {
|
||||||
|
/* keep scanning. */
|
||||||
|
i++;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
return node;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
tt_node_idx_t tt_grab(tt_arena_t *a, tt_node_idx_t i) {
|
||||||
|
if (i >= TT_FIRST_VALID_NODE_IDX && a->headers[i].inuse.refcount < TT_REFCOUNT_LIMIT) {
|
||||||
|
a->headers[i].inuse.refcount++;
|
||||||
|
}
|
||||||
|
return i;
|
||||||
|
}
|
||||||
|
|
||||||
|
void tt_drop(tt_arena_t *a, tt_node_idx_t i) {
|
||||||
|
if (i >= TT_FIRST_VALID_NODE_IDX && a->headers[i].inuse.refcount < TT_REFCOUNT_LIMIT) {
|
||||||
|
printf("++++++++++++++++++++++++++++++ dropping %d\n", i);
|
||||||
|
if (--(a->headers[i].inuse.refcount) == 0) {
|
||||||
|
recycle_node(a, i);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
|
@ -0,0 +1,80 @@
|
||||||
|
#ifndef TREETRIE_H_f55a3f6d_ef43_45d3_bec3_496a196b5db1
|
||||||
|
#define TREETRIE_H_f55a3f6d_ef43_45d3_bec3_496a196b5db1
|
||||||
|
|
||||||
|
#ifdef __cplusplus
|
||||||
|
extern "C" {
|
||||||
|
#endif
|
||||||
|
|
||||||
|
typedef enum tt_tag_t {
|
||||||
|
TT_TAG_TAIL = 0,
|
||||||
|
TT_TAG_BRANCH,
|
||||||
|
TT_TAG_LEAF, /* only case where one of node a or b points to non-node */
|
||||||
|
TT_TAG_NODE
|
||||||
|
} tt_tag_t;
|
||||||
|
|
||||||
|
typedef enum tt_reserved_node_idx_t {
|
||||||
|
TT_ERROR = 0, /* invalid node index, means "no node at all", not even empty */
|
||||||
|
TT_EMPTY, /* empty treetrie */
|
||||||
|
TT_OK, /* terminal marker */
|
||||||
|
|
||||||
|
TT_FIRST_VALID_NODE_IDX
|
||||||
|
} tt_reserved_node_idx_t;
|
||||||
|
|
||||||
|
typedef uint32_t tt_node_idx_t; /* N.B. tt_reserved_node_idx_t */
|
||||||
|
|
||||||
|
typedef uint32_t tt_atom_t;
|
||||||
|
|
||||||
|
typedef union tt_header_t {
|
||||||
|
uint32_t next_free;
|
||||||
|
struct {
|
||||||
|
uint32_t refcount : 24;
|
||||||
|
uint32_t index : 6;
|
||||||
|
tt_tag_t tag : 2;
|
||||||
|
} inuse;
|
||||||
|
} tt_header_t;
|
||||||
|
|
||||||
|
#define TT_REFCOUNT_LIMIT ((1 << 24) - 1)
|
||||||
|
|
||||||
|
typedef struct tt_node_t {
|
||||||
|
tt_node_idx_t a; /* always a real node idx */
|
||||||
|
tt_node_idx_t b; /* a real node idx unless corresponding tag is TT_TAG_LEAF */
|
||||||
|
} tt_node_t;
|
||||||
|
|
||||||
|
typedef struct tt_arena_t {
|
||||||
|
/* Fields for the Robin Hood hashset used for hashconsing of tt_nodes */
|
||||||
|
unsigned int max_probe;
|
||||||
|
unsigned int live_count;
|
||||||
|
unsigned int table_length;
|
||||||
|
tt_node_idx_t *table;
|
||||||
|
|
||||||
|
tt_header_t *headers;
|
||||||
|
tt_node_t *nodes;
|
||||||
|
|
||||||
|
tt_node_idx_t free_chain;
|
||||||
|
} tt_arena_t;
|
||||||
|
|
||||||
|
extern int tt_arena_init(tt_arena_t *a);
|
||||||
|
extern void tt_arena_done(tt_arena_t *a);
|
||||||
|
|
||||||
|
/* Returns 0 if consing failed (because of out-of-memory).
|
||||||
|
Otherwise, returns a nonzero index.
|
||||||
|
Grabs na and nb (according to tag) IF it needs to allocate a new node, otherwise does not.
|
||||||
|
DOES NOT increase the reference count of the returned node. */
|
||||||
|
extern tt_node_idx_t tt_arena_cons(tt_arena_t *a,
|
||||||
|
uint32_t tag,
|
||||||
|
uint32_t index,
|
||||||
|
tt_node_idx_t na,
|
||||||
|
tt_node_idx_t nb);
|
||||||
|
|
||||||
|
extern tt_node_idx_t tt_grab(tt_arena_t *a, tt_node_idx_t i);
|
||||||
|
extern void tt_drop(tt_arena_t *a, tt_node_idx_t i);
|
||||||
|
|
||||||
|
/* WARNING: private, unsafe */
|
||||||
|
typedef uint32_t tt_hash_t;
|
||||||
|
extern tt_hash_t tt_hash_node(tt_arena_t *a, tt_node_idx_t i);
|
||||||
|
|
||||||
|
#ifdef __cplusplus
|
||||||
|
}
|
||||||
|
#endif
|
||||||
|
|
||||||
|
#endif
|
Loading…
Reference in New Issue