diff options
author | rsc <devnull@localhost> | 2005-07-12 15:23:36 +0000 |
---|---|---|
committer | rsc <devnull@localhost> | 2005-07-12 15:23:36 +0000 |
commit | a0d146edd7a7de6236a0d60baafeeb59f8452aae (patch) | |
tree | b55baa526d9f5adfc73246e6ee2fadf455e0b7a2 /src/cmd/venti/srv/fmtbloom.c | |
parent | 88bb285e3d87ec2508840af33f7e0af53ec3c13c (diff) | |
download | plan9port-a0d146edd7a7de6236a0d60baafeeb59f8452aae.tar.gz plan9port-a0d146edd7a7de6236a0d60baafeeb59f8452aae.tar.bz2 plan9port-a0d146edd7a7de6236a0d60baafeeb59f8452aae.zip |
return of venti
Diffstat (limited to 'src/cmd/venti/srv/fmtbloom.c')
-rw-r--r-- | src/cmd/venti/srv/fmtbloom.c | 115 |
1 files changed, 115 insertions, 0 deletions
diff --git a/src/cmd/venti/srv/fmtbloom.c b/src/cmd/venti/srv/fmtbloom.c new file mode 100644 index 00000000..3c50d82f --- /dev/null +++ b/src/cmd/venti/srv/fmtbloom.c @@ -0,0 +1,115 @@ +#include "stdinc.h" +#include "dat.h" +#include "fns.h" + +Bloom b; + +void +usage(void) +{ + fprint(2, "usage: fmtbloom [-s size] [-n nblocks | -N nhash] file\n"); + threadexitsall(0); +} + +void +threadmain(int argc, char *argv[]) +{ + Part *part; + char *file; + vlong bits, size, size2; + int nhash; + vlong nblocks; + + ventifmtinstall(); + statsinit(); + + size = 0; + nhash = nblocks = 0; + ARGBEGIN{ + case 'n': + if(nhash || nblocks) + usage(); + nblocks = unittoull(EARGF(usage())); + break; + case 'N': + if(nhash || nblocks) + usage(); + nhash = unittoull(EARGF(usage())); + if(nhash > BloomMaxHash){ + fprint(2, "maximum possible is -N %d", BloomMaxHash); + usage(); + } + break; + case 's': + size = unittoull(ARGF()); + if(size == ~0) + usage(); + break; + default: + usage(); + break; + }ARGEND + + if(argc != 1) + usage(); + + file = argv[0]; + + part = initpart(file, ORDWR|ODIRECT); + if(part == nil) + sysfatal("can't open partition %s: %r", file); + + if(size == 0) + size = part->size; + + if(size < 1024*1024) + sysfatal("bloom filter too small"); + + if(size > MaxBloomSize){ + fprint(2, "warning: not using entire %,lld bytes; using only %,lld bytes\n", + size, MaxBloomSize); + size = MaxBloomSize; + } + if(size&(size-1)){ + for(size2=1; size2<size; size2*=2) + ; + size = size2/2; + fprint(2, "warning: size not a power of 2; only using %lldMB\n", size/1024/1024); + } + + if(nblocks){ + /* + * no use for more than 32 bits per block + * shoot for less than 64 bits per block + */ + size2 = size; + while(size2*8 >= nblocks*64) + size2 >>= 1; + if(size2 != size){ + size = size2; + fprint(2, "warning: using only %lldMB - not enough blocks to warrant more\n", + size/1024/1024); + } + + /* + * optimal is to use ln 2 times as many hash functions as we have bits per blocks. + */ + bits = (8*size)/nblocks; + nhash = bits*7/10; + if(nhash > BloomMaxHash) + nhash = BloomMaxHash; + } + if(!nhash) + nhash = BloomMaxHash; + if(bloominit(&b, size, nil) < 0) + sysfatal("bloominit: %r"); + b.nhash = nhash; + bits = nhash*10/7; + nblocks = (8*size)/bits; + fprint(2, "fmtbloom: using %lldMB, %d hashes/score, best up to %,lld blocks\n", size, nhash, nblocks); + b.data = vtmallocz(size); + b.part = part; + if(writebloom(&b) < 0) + sysfatal("writing %s: %r", file); + threadexitsall(0); +} |