#include #include #include "etlib/generic.h" #include "etlib/xmalloc.h" #include "etlib/hash.h" /* Mixture of hash table and binary tree. First level is a standard hash with the hashpjw function from the dragon book. But instead of a linked list in each slot I use a binary tree. To balance the tree, I take the low-byte of the full hash value (before the modulo) as the first char of each key. Storing increasing keys does not generate a perfectly balanced tree but one that is as good as one generated by random keys. usage: to define a hash table: hashtable(data-type, table-size) identifier; hashtable(data-type, table-size) id1, id2, id3; data-type should be a simple type, a struct, or a union. the special type hash_no_data is provided when no data is needed. table-size should be a prime. to look for an entry: hash_find(table-identifier, string) to look for an entry and create a new one if not present: hash_get(table-identifier, string) to add an entry hash_add(table-identifier, string) (this is an alias for hash_get(...)) to get the string associated with an entry: hash_name(table-identifier, hash_get/find(...)) to get statistics about a hashtable; struct hash_st st; hash_stat(table-identifier, &st); */ struct symbol { struct symbol *left; struct symbol *right; u8 misc[2]; /* contains user struct and name */ }; static inline u32 hash(const u8 *str) { u32 h = 0; while (*str) h = (h << 4) ^ (h >> 28) ^ *str++; return h ? h : 1; } /* Find a symbol. Return 0 if not found. */ const void * _hash_find(const u8 *id, const void *tab, int size, int usize) { const struct symbol *s; u32 h; int i; h = hash(id); s = ((const struct symbol **)tab)[h % size]; while (s) { i = (u8)h - s->misc[usize]; if (i == 0) { i = strcmp(id, s->misc + usize + 1); if (i == 0) return s->misc; } s = i < 0 ? s->left : s->right; } return 0; } /* Get a symbol. Create if not found. */ void * _hash_get(const u8 *id, void *tab, int size, int usize) { struct symbol *s, **ss; u32 h; int i; h = hash(id); ss = &((struct symbol **)tab)[h % size]; while ( (s = *ss) ) { i = (u8)h - s->misc[usize]; if (i == 0) { i = strcmp(id, s->misc + usize + 1); if (i == 0) return s->misc; } ss = i < 0 ? &s->left : &s->right; } *ss = s = malloc(sizeof(*s) + usize + strlen(id)); s->left = 0; s->right = 0; memset(s->misc, 0, usize); s->misc[usize] = (u8)h; strcpy(s->misc + usize + 1, id); return s->misc; } /* Delete a symbol. */ void _hash_del(const u8 *id, void *tab, int size, int usize) { struct symbol *s, **ss; u32 h; int i; h = hash(id); ss = &((struct symbol **)tab)[h % size]; while ( (s = *ss) ) { i = (u8)h - s->misc[usize]; if (i == 0) { i = strcmp(id, s->misc + usize + 1); if (i == 0) { /* found, now remove it */ if (s->left == 0) *ss = s->right; else if (s->right == 0) *ss = s->left; else { struct symbol *t, **tt; for (tt = &s->right; (t = *tt)->left; tt = &t->left) ; *tt = t->right; t->left = s->left; t->right = s->right; *ss = t; } free(s); return; } } ss = i < 0 ? &s->left : &s->right; } } static void _stat(int depth, struct symbol *s, struct hash_st *st) { while (s) { if (st->maxdepth < depth) st->maxdepth = depth; st->nelem++; st->middepth += depth; depth++; _stat(depth, s->left, st); #if 0 printf("<%s>\t", s->misc+5); if (s->left) printf("<%s>\t", s->left->misc+5); else printf(".\t"); if (s->right) printf("<%s>\n", s->right->misc+5); else printf(".\n"); #endif s = s->right; } } void _hash_stat(void *tab, int size, struct hash_st *st) { struct symbol **s; s = (struct symbol **)tab; st->nelem = 0; st->maxdepth = 0; st->middepth = 0; st->hashsize = size; while (size--) _stat(1, *s++, st); if (st->nelem) st->middepth = (st->middepth * 1000) / st->nelem; } #if 0 /* some primes: 11 23 31 41 53 61 71 83 97 101 211 307 401 503 601 701 809 907 1009 2003 3001 4001 5003 6007 7001 8009 9001 10007 20011 30011 40009 50021 60013 70001 80021 90001 100003 */ //hashtable(int, 97) pseudoop; hashtable(int, 1) pseudoop; main() { struct hash_st st; u8 buf[256]; #if 1 hash_add(pseudoop, "0"); hash_add(pseudoop, "5"); hash_add(pseudoop, "1"); hash_add(pseudoop, "6"); hash_stat(pseudoop, &st); printf("-----------\n"); hash_del(pseudoop, "5"); #else while (gets(buf)) if (buf[0]) hash_add(pseudoop, buf); #endif hash_stat(pseudoop, &st); printf("nelem : %d\n", st.nelem); printf("hashsize: %d\n", st.hashsize); printf("maxdepth: %d\n", st.maxdepth); printf("middepth: %d.%03d\n", st.middepth / 1000, st.middepth % 1000); } #endif