aboutsummaryrefslogtreecommitdiff
path: root/src/cmd/upas/bayes/hash.c
diff options
context:
space:
mode:
authorrsc <devnull@localhost>2005-10-29 16:26:32 +0000
committerrsc <devnull@localhost>2005-10-29 16:26:32 +0000
commitd1f529f46f957c78a3db73b42c2fcd2d3c9f8a34 (patch)
treea4d6f28106cca984926b9dd5ecddd6053b654617 /src/cmd/upas/bayes/hash.c
parent9f1fdc128738b2ed76258ac22a8574c681f3df3a (diff)
downloadplan9port-d1f529f46f957c78a3db73b42c2fcd2d3c9f8a34.tar.gz
plan9port-d1f529f46f957c78a3db73b42c2fcd2d3c9f8a34.tar.bz2
plan9port-d1f529f46f957c78a3db73b42c2fcd2d3c9f8a34.zip
Thanks to John Cummings.
Diffstat (limited to 'src/cmd/upas/bayes/hash.c')
-rw-r--r--src/cmd/upas/bayes/hash.c312
1 files changed, 312 insertions, 0 deletions
diff --git a/src/cmd/upas/bayes/hash.c b/src/cmd/upas/bayes/hash.c
new file mode 100644
index 00000000..1d0d0ac0
--- /dev/null
+++ b/src/cmd/upas/bayes/hash.c
@@ -0,0 +1,312 @@
+#include <u.h>
+#include <libc.h>
+#include <bio.h>
+#include "hash.h"
+
+/***
+ * String hash tables.
+ */
+
+Stringtab *tfree;
+
+Stringtab*
+taballoc(void)
+{
+ static Stringtab *t;
+ static uint nt;
+
+ if(tfree){
+ Stringtab *tt = tfree;
+ tfree = tt->link;
+ return tt;
+ }
+
+ if(nt == 0){
+ t = malloc(64000*sizeof(Stringtab));
+ if(t == 0)
+ sysfatal("out of memory");
+ nt = 64000;
+ }
+ nt--;
+ return t++;
+}
+
+void
+tabfree(Stringtab *tt)
+{
+ tt->link = tfree;
+ tfree = tt;
+}
+
+char*
+xstrdup(char *s, int len)
+{
+ char *r;
+ static char *t;
+ static int nt;
+
+ if(nt < len){
+ t = malloc(512*1024+len);
+ if(t == 0)
+ sysfatal("out of memory");
+ nt = 512*1024;
+ }
+ r = t;
+ t += len;
+ nt -= len;
+ memmove(r, s, len);
+ return r;
+}
+
+static uint
+hash(char *s, int n)
+{
+ uint h;
+ uchar *p, *ep;
+ h = 0;
+ for(p=(uchar*)s, ep=p+n; p<ep; p++)
+ h = h*37 + *p;
+ return h;
+}
+
+static void
+rehash(Hash *hh)
+{
+ int h;
+ Stringtab *s;
+
+ if(hh->nstab == 0)
+ hh->nstab = 1024;
+ else
+ hh->nstab = hh->ntab*2;
+
+ free(hh->stab);
+ hh->stab = mallocz(hh->nstab*sizeof(Stringtab*), 1);
+ if(hh->stab == nil)
+ sysfatal("out of memory");
+
+ for(s=hh->all; s; s=s->link){
+ h = hash(s->str, s->n) % hh->nstab;
+ s->hash = hh->stab[h];
+ hh->stab[h] = s;
+ }
+}
+
+Stringtab*
+findstab(Hash *hh, char *str, int n, int create)
+{
+ uint h;
+ Stringtab *tab, **l;
+
+ if(hh->nstab == 0)
+ rehash(hh);
+
+ h = hash(str, n) % hh->nstab;
+ for(tab=hh->stab[h], l=&hh->stab[h]; tab; l=&tab->hash, tab=tab->hash)
+ if(n==tab->n && memcmp(str, tab->str, n) == 0){
+ *l = tab->hash;
+ tab->hash = hh->stab[h];
+ hh->stab[h] = tab;
+ return tab;
+ }
+
+ if(!create)
+ return nil;
+
+ hh->sorted = 0;
+ tab = taballoc();
+ tab->str = xstrdup(str, n);
+ tab->hash = hh->stab[h];
+ tab->link = hh->all;
+ hh->all = tab;
+ tab->n = n;
+ tab->count = 0;
+ tab->date = 0;
+ hh->stab[h] = tab;
+
+ hh->ntab++;
+ if(hh->ntab > 2*hh->nstab && !(hh->ntab&(hh->ntab-1)))
+ rehash(hh);
+ return tab;
+}
+
+int
+scmp(Stringtab *a, Stringtab *b)
+{
+ int n, x;
+
+ if(a == 0)
+ return 1;
+ if(b == 0)
+ return -1;
+ n = a->n;
+ if(n > b->n)
+ n = b->n;
+ x = memcmp(a->str, b->str, n);
+ if(x != 0)
+ return x;
+ if(a->n < b->n)
+ return -1;
+ if(a->n > b->n)
+ return 1;
+ return 0; /* shouldn't happen */
+}
+
+Stringtab*
+merge(Stringtab *a, Stringtab *b)
+{
+ Stringtab *s, **l;
+
+ l = &s;
+ while(a || b){
+ if(scmp(a, b) < 0){
+ *l = a;
+ l = &a->link;
+ a = a->link;
+ }else{
+ *l = b;
+ l = &b->link;
+ b = b->link;
+ }
+ }
+ *l = 0;
+ return s;
+}
+
+Stringtab*
+mergesort(Stringtab *s)
+{
+ Stringtab *a, *b;
+ int delay;
+
+ if(s == nil)
+ return nil;
+ if(s->link == nil)
+ return s;
+
+ a = b = s;
+ delay = 1;
+ while(a && b){
+ if(delay) /* easy way to handle 2-element list */
+ delay = 0;
+ else
+ a = a->link;
+ if(b = b->link)
+ b = b->link;
+ }
+
+ b = a->link;
+ a->link = nil;
+
+ a = mergesort(s);
+ b = mergesort(b);
+
+ return merge(a, b);
+}
+
+Stringtab*
+sortstab(Hash *hh)
+{
+ if(!hh->sorted){
+ hh->all = mergesort(hh->all);
+ hh->sorted = 1;
+ }
+ return hh->all;
+}
+
+int
+Bwritehash(Biobuf *b, Hash *hh)
+{
+ Stringtab *s;
+ int now;
+
+ now = time(0);
+ s = sortstab(hh);
+ Bprint(b, "# hash table\n");
+ for(; s; s=s->link){
+ if(s->count <= 0)
+ continue;
+ /*
+ * Entries that haven't been updated in thirty days get tossed.
+ */
+ if(s->date+30*86400 < now)
+ continue;
+ Bwrite(b, s->str, s->n);
+ Bprint(b, "\t%d %d\n", s->count, s->date);
+ }
+ if(Bflush(b) == Beof)
+ return -1;
+ return 0;
+}
+
+void
+Breadhash(Biobuf *b, Hash *hh, int scale)
+{
+ char *s;
+ char *t;
+ int n;
+ int date;
+ Stringtab *st;
+
+ s = Brdstr(b, '\n', 1);
+ if(s == nil)
+ return;
+ if(strcmp(s, "# hash table") != 0)
+ sysfatal("bad hash table format");
+
+ while(s = Brdline(b, '\n')){
+ s[Blinelen(b)-1] = 0;
+ t = strrchr(s, '\t');
+ if(t == nil)
+ sysfatal("bad hash table format");
+ *t++ = '\0';
+ if(*t < '0' || *t > '9')
+ sysfatal("bad hash table format");
+ n = strtol(t, &t, 10);
+ date = time(0);
+ if(*t != 0){
+ if(*t == ' '){
+ t++;
+ date = strtol(t, &t, 10);
+ }
+ if(*t != 0)
+ sysfatal("bad hash table format");
+ }
+ st = findstab(hh, s, strlen(s), 1);
+ if(date > st->date)
+ st->date = date;
+ st->count += n*scale;
+ }
+}
+
+void
+freehash(Hash *h)
+{
+ Stringtab *s, *next;
+
+ for(s=h->all; s; s=next){
+ next = s->link;
+ tabfree(s);
+ }
+ free(h->stab);
+ free(h);
+}
+
+Biobuf*
+Bopenlock(char *file, int mode)
+{
+ int i;
+ Biobuf *b;
+ char err[ERRMAX];
+
+ b = nil;
+ for(i=0; i<120; i++){
+ if((b = Bopen(file, mode)) != nil)
+ break;
+ rerrstr(err, sizeof err);
+ if(strstr(err, "file is locked")==nil && strstr(err, "exclusive lock")==nil)
+ break;
+ sleep(1000);
+ }
+ return b;
+}