aboutsummaryrefslogtreecommitdiff
path: root/src/cmd/venti/srv/bloom.c
diff options
context:
space:
mode:
Diffstat (limited to 'src/cmd/venti/srv/bloom.c')
-rw-r--r--src/cmd/venti/srv/bloom.c210
1 files changed, 210 insertions, 0 deletions
diff --git a/src/cmd/venti/srv/bloom.c b/src/cmd/venti/srv/bloom.c
new file mode 100644
index 00000000..5c50a0df
--- /dev/null
+++ b/src/cmd/venti/srv/bloom.c
@@ -0,0 +1,210 @@
+/*
+ * Bloom filter tracking which scores are present in our arenas
+ * and (more importantly) which are not.
+ */
+
+#include "stdinc.h"
+#include "dat.h"
+#include "fns.h"
+
+int
+bloominit(Bloom *b, vlong vsize, u8int *data)
+{
+ ulong size;
+
+ size = vsize;
+ if(size != vsize){ /* truncation */
+ werrstr("bloom data too big");
+ return -1;
+ }
+
+ b->size = size;
+ b->nhash = 32; /* will be fixed by caller on initialization */
+ if(data != nil)
+ if(unpackbloomhead(b, data) < 0)
+ return -1;
+
+fprint(2, "bloom size %lud nhash %d\n", b->size, b->nhash);
+ b->mask = b->size-1;
+ b->data = data;
+ return 0;
+}
+
+void
+wbbloomhead(Bloom *b)
+{
+ packbloomhead(b, b->data);
+}
+
+Bloom*
+readbloom(Part *p)
+{
+ int i, n;
+ uint ones;
+ uchar buf[512];
+ uchar *data;
+ u32int *a;
+ Bloom *b;
+
+ b = vtmallocz(sizeof *b);
+ if(readpart(p, 0, buf, sizeof buf) < 0)
+ return nil;
+fprint(2, "header %.16H\n", buf);
+ if(bloominit(b, 0, buf) < 0){
+ vtfree(b);
+ return nil;
+ }
+ data = vtmallocz(b->size);
+ if(readpart(p, 0, data, b->size) < 0){
+ vtfree(b);
+ vtfree(data);
+ return nil;
+ }
+ b->data = data;
+ b->part = p;
+
+ a = (u32int*)b->data;
+ n = b->size/4;
+ ones = 0;
+ for(i=0; i<n; i++)
+ ones += countbits(a[i]);
+ addstat(StatBloomOnes, ones);
+
+ if(b->size == MaxBloomSize) /* 2^32 overflows ulong */
+ addstat(StatBloomBits, b->size*8-1);
+ else
+ addstat(StatBloomBits, b->size*8);
+
+ return b;
+}
+
+int
+writebloom(Bloom *b)
+{
+ wbbloomhead(b);
+ return writepart(b->part, 0, b->data, b->size);
+}
+
+/*
+ * Derive two random 32-bit quantities a, b from the score
+ * and then use a+b*i as a sequence of bloom filter indices.
+ * Michael Mitzenmacher has a recent (2005) paper saying this is okay.
+ * We reserve the bottom bytes (BloomHeadSize*8 bits) for the header.
+ */
+static void
+gethashes(u8int *score, ulong *h)
+{
+ int i;
+ u32int a, b;
+
+ a = 0;
+ b = 0;
+ for(i=4; i+8<=VtScoreSize; i+=8){
+ a ^= *(u32int*)(score+i);
+ b ^= *(u32int*)(score+i+4);
+ }
+ if(i+4 <= VtScoreSize) /* 20 is not 4-aligned */
+ a ^= *(u32int*)(score+i);
+ for(i=0; i<BloomMaxHash; i++, a+=b)
+ h[i] = a < BloomHeadSize*8 ? BloomHeadSize*8 : a;
+}
+
+static void
+_markbloomfilter(Bloom *b, u8int *score)
+{
+ int i, nnew;
+ ulong h[BloomMaxHash];
+ u32int x, *y, z, *tab;
+
+ trace("markbloomfilter", "markbloomfilter %V", score);
+ gethashes(score, h);
+ nnew = 0;
+ tab = (u32int*)b->data;
+ for(i=0; i<b->nhash; i++){
+ x = h[i];
+ y = &tab[(x&b->mask)>>5];
+ z = 1<<(x&31);
+ if(!(*y&z)){
+ nnew++;
+ *y |= z;
+ }
+ }
+ if(nnew)
+ addstat(StatBloomOnes, nnew);
+
+ trace("markbloomfilter", "markbloomfilter exit");
+}
+
+static int
+_inbloomfilter(Bloom *b, u8int *score)
+{
+ int i;
+ ulong h[BloomMaxHash], x;
+ u32int *tab;
+
+ gethashes(score, h);
+ tab = (u32int*)b->data;
+ for(i=0; i<b->nhash; i++){
+ x = h[i];
+ if(!(tab[(x&b->mask)>>5] & (1<<(x&31))))
+ return 0;
+ }
+ return 1;
+}
+
+int
+inbloomfilter(Bloom *b, u8int *score)
+{
+ int r;
+ uint ms;
+
+ if(b == nil)
+ return 1;
+
+ ms = msec();
+ rlock(&b->lk);
+ r = _inbloomfilter(b, score);
+ runlock(&b->lk);
+ ms = ms - msec();
+ addstat2(StatBloomLookup, 1, StatBloomLookupTime, ms);
+ if(r)
+ addstat(StatBloomMiss, 1);
+ else
+ addstat(StatBloomHit, 1);
+ return r;
+}
+
+void
+markbloomfilter(Bloom *b, u8int *score)
+{
+ if(b == nil)
+ return;
+
+ rlock(&b->lk);
+ qlock(&b->mod);
+ _markbloomfilter(b, score);
+ qunlock(&b->mod);
+ runlock(&b->lk);
+}
+
+static void
+bloomwriteproc(void *v)
+{
+ Bloom *b;
+
+ b = v;
+ for(;;){
+ recv(b->writechan, 0);
+ if(writebloom(b) < 0)
+ fprint(2, "oops! writing bloom: %r\n");
+ send(b->writedonechan, 0);
+ }
+}
+
+void
+startbloomproc(Bloom *b)
+{
+ b->writechan = chancreate(sizeof(void*), 0);
+ b->writedonechan = chancreate(sizeof(void*), 0);
+ vtproc(bloomwriteproc, b);
+}