265 lines
4.9 KiB
C
265 lines
4.9 KiB
C
#include <stdlib.h>
|
|
#include <string.h>
|
|
#include "etlib/generic.h"
|
|
#include "etlib/xmalloc.h"
|
|
#include "etlib/hash.h"
|
|
|
|
/*
|
|
Mixture of hash table and binary tree.
|
|
|
|
First level is a standard hash with the hashpjw function
|
|
from the dragon book. But instead of a linked list in each
|
|
slot I use a binary tree.
|
|
To balance the tree, I take the low-byte of the full hash value
|
|
(before the modulo) as the first char of each key.
|
|
Storing increasing keys does not generate a perfectly balanced
|
|
tree but one that is as good as one generated by random keys.
|
|
|
|
usage:
|
|
to define a hash table:
|
|
hashtable(data-type, table-size) identifier;
|
|
hashtable(data-type, table-size) id1, id2, id3;
|
|
|
|
data-type should be a simple type, a struct, or
|
|
a union. the special type hash_no_data is provided
|
|
when no data is needed.
|
|
table-size should be a prime.
|
|
|
|
to look for an entry:
|
|
hash_find(table-identifier, string)
|
|
|
|
to look for an entry and create a new one if not present:
|
|
hash_get(table-identifier, string)
|
|
|
|
to add an entry
|
|
hash_add(table-identifier, string)
|
|
|
|
(this is an alias for hash_get(...))
|
|
|
|
to get the string associated with an entry:
|
|
hash_name(table-identifier, hash_get/find(...))
|
|
|
|
to get statistics about a hashtable;
|
|
struct hash_st st;
|
|
hash_stat(table-identifier, &st);
|
|
*/
|
|
|
|
struct symbol
|
|
{
|
|
struct symbol *left;
|
|
struct symbol *right;
|
|
u8 misc[2]; /* contains user struct and name */
|
|
};
|
|
|
|
|
|
|
|
static inline u32
|
|
hash(const u8 *str)
|
|
{
|
|
u32 h = 0;
|
|
|
|
while (*str)
|
|
h = (h << 4) ^ (h >> 28) ^ *str++;
|
|
|
|
return h ? h : 1;
|
|
}
|
|
|
|
|
|
/*
|
|
Find a symbol. Return 0 if not found.
|
|
*/
|
|
|
|
const void *
|
|
_hash_find(const u8 *id, const void *tab, int size, int usize)
|
|
{
|
|
const struct symbol *s;
|
|
u32 h;
|
|
int i;
|
|
|
|
h = hash(id);
|
|
s = ((const struct symbol **)tab)[h % size];
|
|
|
|
while (s)
|
|
{
|
|
i = (u8)h - s->misc[usize];
|
|
if (i == 0)
|
|
{
|
|
i = strcmp(id, s->misc + usize + 1);
|
|
if (i == 0)
|
|
return s->misc;
|
|
}
|
|
s = i < 0 ? s->left : s->right;
|
|
}
|
|
return 0;
|
|
}
|
|
|
|
|
|
|
|
/*
|
|
Get a symbol. Create if not found.
|
|
*/
|
|
|
|
void *
|
|
_hash_get(const u8 *id, void *tab, int size, int usize)
|
|
{
|
|
struct symbol *s, **ss;
|
|
u32 h;
|
|
int i;
|
|
|
|
h = hash(id);
|
|
ss = &((struct symbol **)tab)[h % size];
|
|
|
|
while ( (s = *ss) )
|
|
{
|
|
i = (u8)h - s->misc[usize];
|
|
if (i == 0)
|
|
{
|
|
i = strcmp(id, s->misc + usize + 1);
|
|
if (i == 0)
|
|
return s->misc;
|
|
}
|
|
ss = i < 0 ? &s->left : &s->right;
|
|
}
|
|
|
|
*ss = s = malloc(sizeof(*s) + usize + strlen(id));
|
|
|
|
s->left = 0;
|
|
s->right = 0;
|
|
memset(s->misc, 0, usize);
|
|
s->misc[usize] = (u8)h;
|
|
strcpy(s->misc + usize + 1, id);
|
|
|
|
return s->misc;
|
|
}
|
|
|
|
/*
|
|
Delete a symbol.
|
|
*/
|
|
|
|
void
|
|
_hash_del(const u8 *id, void *tab, int size, int usize)
|
|
{
|
|
struct symbol *s, **ss;
|
|
u32 h;
|
|
int i;
|
|
|
|
h = hash(id);
|
|
ss = &((struct symbol **)tab)[h % size];
|
|
|
|
while ( (s = *ss) )
|
|
{
|
|
i = (u8)h - s->misc[usize];
|
|
if (i == 0)
|
|
{
|
|
i = strcmp(id, s->misc + usize + 1);
|
|
if (i == 0)
|
|
{
|
|
/* found, now remove it */
|
|
if (s->left == 0)
|
|
*ss = s->right;
|
|
else if (s->right == 0)
|
|
*ss = s->left;
|
|
else
|
|
{
|
|
struct symbol *t, **tt;
|
|
|
|
for (tt = &s->right; (t = *tt)->left; tt = &t->left)
|
|
;
|
|
*tt = t->right;
|
|
t->left = s->left;
|
|
t->right = s->right;
|
|
*ss = t;
|
|
}
|
|
free(s);
|
|
return;
|
|
}
|
|
}
|
|
ss = i < 0 ? &s->left : &s->right;
|
|
}
|
|
}
|
|
|
|
|
|
|
|
static void
|
|
_stat(int depth, struct symbol *s, struct hash_st *st)
|
|
{
|
|
while (s)
|
|
{
|
|
if (st->maxdepth < depth)
|
|
st->maxdepth = depth;
|
|
st->nelem++;
|
|
st->middepth += depth;
|
|
depth++;
|
|
_stat(depth, s->left, st);
|
|
#if 0
|
|
printf("<%s>\t", s->misc+5);
|
|
if (s->left) printf("<%s>\t", s->left->misc+5); else printf(".\t");
|
|
if (s->right) printf("<%s>\n", s->right->misc+5); else printf(".\n");
|
|
#endif
|
|
s = s->right;
|
|
}
|
|
}
|
|
|
|
|
|
|
|
void
|
|
_hash_stat(void *tab, int size, struct hash_st *st)
|
|
{
|
|
struct symbol **s;
|
|
|
|
s = (struct symbol **)tab;
|
|
|
|
st->nelem = 0;
|
|
st->maxdepth = 0;
|
|
st->middepth = 0;
|
|
st->hashsize = size;
|
|
|
|
while (size--)
|
|
_stat(1, *s++, st);
|
|
|
|
if (st->nelem)
|
|
st->middepth = (st->middepth * 1000) / st->nelem;
|
|
}
|
|
|
|
|
|
|
|
|
|
#if 0
|
|
|
|
/* some primes:
|
|
11 23 31 41 53 61 71 83 97
|
|
101 211 307 401 503 601 701 809 907
|
|
1009 2003 3001 4001 5003 6007 7001 8009 9001
|
|
10007 20011 30011 40009 50021 60013 70001 80021 90001
|
|
100003
|
|
*/
|
|
|
|
//hashtable(int, 97) pseudoop;
|
|
hashtable(int, 1) pseudoop;
|
|
|
|
main()
|
|
{
|
|
struct hash_st st;
|
|
u8 buf[256];
|
|
|
|
#if 1
|
|
hash_add(pseudoop, "0");
|
|
hash_add(pseudoop, "5");
|
|
hash_add(pseudoop, "1");
|
|
hash_add(pseudoop, "6");
|
|
|
|
hash_stat(pseudoop, &st); printf("-----------\n");
|
|
hash_del(pseudoop, "5");
|
|
|
|
#else
|
|
while (gets(buf))
|
|
if (buf[0])
|
|
hash_add(pseudoop, buf);
|
|
#endif
|
|
hash_stat(pseudoop, &st);
|
|
printf("nelem : %d\n", st.nelem);
|
|
printf("hashsize: %d\n", st.hashsize);
|
|
printf("maxdepth: %d\n", st.maxdepth);
|
|
printf("middepth: %d.%03d\n", st.middepth / 1000, st.middepth % 1000);
|
|
}
|
|
#endif
|