aboutsummaryrefslogtreecommitdiff
path: root/src/cmd/venti/srv/fmtbloom.c
blob: f700d7814b9a69d3fdbe2706b2a925dc5379abe7 (plain)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
#include "stdinc.h"
#include "dat.h"
#include "fns.h"

Bloom b;

void
usage(void)
{
	fprint(2, "usage: fmtbloom [-s size] [-n nblocks | -N nhash] file\n");
	threadexitsall(0);
}

void
threadmain(int argc, char *argv[])
{
	Part *part;
	char *file;
	vlong bits, size, size2;
	int nhash;
	vlong nblocks;
	
	ventifmtinstall();
	statsinit();

	size = 0;
	nhash = 0;
	nblocks = 0;
	ARGBEGIN{
	case 'n':
		if(nhash || nblocks)
			usage();
		nblocks = unittoull(EARGF(usage()));
		break;
	case 'N':
		if(nhash || nblocks)
			usage();
		nhash = unittoull(EARGF(usage()));
		if(nhash > BloomMaxHash){
			fprint(2, "maximum possible is -N %d", BloomMaxHash);
			usage();
		}
		break;
	case 's':
		size = unittoull(ARGF());
		if(size == ~0)
			usage();
		break;
	default:
		usage();
		break;
	}ARGEND

	if(argc != 1)
		usage();

	file = argv[0];

	part = initpart(file, ORDWR|ODIRECT);
	if(part == nil)
		sysfatal("can't open partition %s: %r", file);

	if(size == 0)
		size = part->size;
	
	if(size < 1024*1024)
		sysfatal("bloom filter too small");

	if(size > MaxBloomSize){
		fprint(2, "warning: not using entire %,lld bytes; using only %,lld bytes\n",
			size, (vlong)MaxBloomSize);
		size = MaxBloomSize;
	}
	if(size&(size-1)){
		for(size2=1; size2<size; size2*=2)
			;
		size = size2/2;
		fprint(2, "warning: size not a power of 2; only using %lldMB\n", size/1024/1024);
	}

	if(nblocks){
		/*
		 * no use for more than 32 bits per block
		 * shoot for less than 64 bits per block
		 */
		size2 = size;
		while(size2*8 >= nblocks*64)
			size2 >>= 1;
		if(size2 != size){
			size = size2;
			fprint(2, "warning: using only %lldMB - not enough blocks to warrant more\n",
				size/1024/1024);
		}

		/*
		 * optimal is to use ln 2 times as many hash functions as we have bits per blocks.  
		 */
		bits = (8*size)/nblocks;
		nhash = bits*7/10;
		if(nhash > BloomMaxHash)
			nhash = BloomMaxHash;
	}
	if(!nhash)
		nhash = BloomMaxHash;
	if(bloominit(&b, size, nil) < 0)
		sysfatal("bloominit: %r");
	b.nhash = nhash;
	bits = nhash*10/7;
	nblocks = (8*size)/bits;
	fprint(2, "fmtbloom: using %lldMB, %d hashes/score, best up to %,lld blocks\n", size/1024/1024, nhash, nblocks);
	b.data = vtmallocz(size);
	b.part = part;
	if(writebloom(&b) < 0)
		sysfatal("writing %s: %r", file);
	threadexitsall(0);
}