aboutsummaryrefslogtreecommitdiff
path: root/src/cmd/venti/srv/fmtbloom.c
diff options
context:
space:
mode:
Diffstat (limited to 'src/cmd/venti/srv/fmtbloom.c')
-rw-r--r--src/cmd/venti/srv/fmtbloom.c115
1 files changed, 115 insertions, 0 deletions
diff --git a/src/cmd/venti/srv/fmtbloom.c b/src/cmd/venti/srv/fmtbloom.c
new file mode 100644
index 00000000..3c50d82f
--- /dev/null
+++ b/src/cmd/venti/srv/fmtbloom.c
@@ -0,0 +1,115 @@
+#include "stdinc.h"
+#include "dat.h"
+#include "fns.h"
+
+Bloom b;
+
+void
+usage(void)
+{
+ fprint(2, "usage: fmtbloom [-s size] [-n nblocks | -N nhash] file\n");
+ threadexitsall(0);
+}
+
+void
+threadmain(int argc, char *argv[])
+{
+ Part *part;
+ char *file;
+ vlong bits, size, size2;
+ int nhash;
+ vlong nblocks;
+
+ ventifmtinstall();
+ statsinit();
+
+ size = 0;
+ nhash = nblocks = 0;
+ ARGBEGIN{
+ case 'n':
+ if(nhash || nblocks)
+ usage();
+ nblocks = unittoull(EARGF(usage()));
+ break;
+ case 'N':
+ if(nhash || nblocks)
+ usage();
+ nhash = unittoull(EARGF(usage()));
+ if(nhash > BloomMaxHash){
+ fprint(2, "maximum possible is -N %d", BloomMaxHash);
+ usage();
+ }
+ break;
+ case 's':
+ size = unittoull(ARGF());
+ if(size == ~0)
+ usage();
+ break;
+ default:
+ usage();
+ break;
+ }ARGEND
+
+ if(argc != 1)
+ usage();
+
+ file = argv[0];
+
+ part = initpart(file, ORDWR|ODIRECT);
+ if(part == nil)
+ sysfatal("can't open partition %s: %r", file);
+
+ if(size == 0)
+ size = part->size;
+
+ if(size < 1024*1024)
+ sysfatal("bloom filter too small");
+
+ if(size > MaxBloomSize){
+ fprint(2, "warning: not using entire %,lld bytes; using only %,lld bytes\n",
+ size, MaxBloomSize);
+ size = MaxBloomSize;
+ }
+ if(size&(size-1)){
+ for(size2=1; size2<size; size2*=2)
+ ;
+ size = size2/2;
+ fprint(2, "warning: size not a power of 2; only using %lldMB\n", size/1024/1024);
+ }
+
+ if(nblocks){
+ /*
+ * no use for more than 32 bits per block
+ * shoot for less than 64 bits per block
+ */
+ size2 = size;
+ while(size2*8 >= nblocks*64)
+ size2 >>= 1;
+ if(size2 != size){
+ size = size2;
+ fprint(2, "warning: using only %lldMB - not enough blocks to warrant more\n",
+ size/1024/1024);
+ }
+
+ /*
+ * optimal is to use ln 2 times as many hash functions as we have bits per blocks.
+ */
+ bits = (8*size)/nblocks;
+ nhash = bits*7/10;
+ if(nhash > BloomMaxHash)
+ nhash = BloomMaxHash;
+ }
+ if(!nhash)
+ nhash = BloomMaxHash;
+ if(bloominit(&b, size, nil) < 0)
+ sysfatal("bloominit: %r");
+ b.nhash = nhash;
+ bits = nhash*10/7;
+ nblocks = (8*size)/bits;
+ fprint(2, "fmtbloom: using %lldMB, %d hashes/score, best up to %,lld blocks\n", size, nhash, nblocks);
+ b.data = vtmallocz(size);
+ b.part = part;
+ if(writebloom(&b) < 0)
+ sysfatal("writing %s: %r", file);
+ threadexitsall(0);
+}