Gobligine/etlib/hash.c
2002-08-19 17:12:40 +00:00

265 lines
4.9 KiB
C

#include <stdlib.h>
#include <string.h>
#include "etlib/generic.h"
#include "etlib/xmalloc.h"
#include "etlib/hash.h"
/*
Mixture of hash table and binary tree.
First level is a standard hash with the hashpjw function
from the dragon book. But instead of a linked list in each
slot I use a binary tree.
To balance the tree, I take the low-byte of the full hash value
(before the modulo) as the first char of each key.
Storing increasing keys does not generate a perfectly balanced
tree but one that is as good as one generated by random keys.
usage:
to define a hash table:
hashtable(data-type, table-size) identifier;
hashtable(data-type, table-size) id1, id2, id3;
data-type should be a simple type, a struct, or
a union. the special type hash_no_data is provided
when no data is needed.
table-size should be a prime.
to look for an entry:
hash_find(table-identifier, string)
to look for an entry and create a new one if not present:
hash_get(table-identifier, string)
to add an entry
hash_add(table-identifier, string)
(this is an alias for hash_get(...))
to get the string associated with an entry:
hash_name(table-identifier, hash_get/find(...))
to get statistics about a hashtable;
struct hash_st st;
hash_stat(table-identifier, &st);
*/
struct symbol
{
struct symbol *left;
struct symbol *right;
u8 misc[2]; /* contains user struct and name */
};
static inline u32
hash(const u8 *str)
{
u32 h = 0;
while (*str)
h = (h << 4) ^ (h >> 28) ^ *str++;
return h ? h : 1;
}
/*
Find a symbol. Return 0 if not found.
*/
const void *
_hash_find(const u8 *id, const void *tab, int size, int usize)
{
const struct symbol *s;
u32 h;
int i;
h = hash(id);
s = ((const struct symbol **)tab)[h % size];
while (s)
{
i = (u8)h - s->misc[usize];
if (i == 0)
{
i = strcmp(id, s->misc + usize + 1);
if (i == 0)
return s->misc;
}
s = i < 0 ? s->left : s->right;
}
return 0;
}
/*
Get a symbol. Create if not found.
*/
void *
_hash_get(const u8 *id, void *tab, int size, int usize)
{
struct symbol *s, **ss;
u32 h;
int i;
h = hash(id);
ss = &((struct symbol **)tab)[h % size];
while ( (s = *ss) )
{
i = (u8)h - s->misc[usize];
if (i == 0)
{
i = strcmp(id, s->misc + usize + 1);
if (i == 0)
return s->misc;
}
ss = i < 0 ? &s->left : &s->right;
}
*ss = s = malloc(sizeof(*s) + usize + strlen(id));
s->left = 0;
s->right = 0;
memset(s->misc, 0, usize);
s->misc[usize] = (u8)h;
strcpy(s->misc + usize + 1, id);
return s->misc;
}
/*
Delete a symbol.
*/
void
_hash_del(const u8 *id, void *tab, int size, int usize)
{
struct symbol *s, **ss;
u32 h;
int i;
h = hash(id);
ss = &((struct symbol **)tab)[h % size];
while ( (s = *ss) )
{
i = (u8)h - s->misc[usize];
if (i == 0)
{
i = strcmp(id, s->misc + usize + 1);
if (i == 0)
{
/* found, now remove it */
if (s->left == 0)
*ss = s->right;
else if (s->right == 0)
*ss = s->left;
else
{
struct symbol *t, **tt;
for (tt = &s->right; (t = *tt)->left; tt = &t->left)
;
*tt = t->right;
t->left = s->left;
t->right = s->right;
*ss = t;
}
free(s);
return;
}
}
ss = i < 0 ? &s->left : &s->right;
}
}
static void
_stat(int depth, struct symbol *s, struct hash_st *st)
{
while (s)
{
if (st->maxdepth < depth)
st->maxdepth = depth;
st->nelem++;
st->middepth += depth;
depth++;
_stat(depth, s->left, st);
#if 0
printf("<%s>\t", s->misc+5);
if (s->left) printf("<%s>\t", s->left->misc+5); else printf(".\t");
if (s->right) printf("<%s>\n", s->right->misc+5); else printf(".\n");
#endif
s = s->right;
}
}
void
_hash_stat(void *tab, int size, struct hash_st *st)
{
struct symbol **s;
s = (struct symbol **)tab;
st->nelem = 0;
st->maxdepth = 0;
st->middepth = 0;
st->hashsize = size;
while (size--)
_stat(1, *s++, st);
if (st->nelem)
st->middepth = (st->middepth * 1000) / st->nelem;
}
#if 0
/* some primes:
11 23 31 41 53 61 71 83 97
101 211 307 401 503 601 701 809 907
1009 2003 3001 4001 5003 6007 7001 8009 9001
10007 20011 30011 40009 50021 60013 70001 80021 90001
100003
*/
//hashtable(int, 97) pseudoop;
hashtable(int, 1) pseudoop;
main()
{
struct hash_st st;
u8 buf[256];
#if 1
hash_add(pseudoop, "0");
hash_add(pseudoop, "5");
hash_add(pseudoop, "1");
hash_add(pseudoop, "6");
hash_stat(pseudoop, &st); printf("-----------\n");
hash_del(pseudoop, "5");
#else
while (gets(buf))
if (buf[0])
hash_add(pseudoop, buf);
#endif
hash_stat(pseudoop, &st);
printf("nelem : %d\n", st.nelem);
printf("hashsize: %d\n", st.hashsize);
printf("maxdepth: %d\n", st.maxdepth);
printf("middepth: %d.%03d\n", st.middepth / 1000, st.middepth % 1000);
}
#endif