aboutsummaryrefslogtreecommitdiff
path: root/src/cmd/venti/buildindex.c
blob: 8058ba098653cef50a84f99c22b5148590cd2116 (plain)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
#include "stdinc.h"
#include "dat.h"
#include "fns.h"

static int
writebucket(Index *ix, u32int buck, IBucket *ib, ZBlock *b)
{
	ISect *is;

	is = findibucket(ix, buck, &buck);
	if(is == nil)
		return -1;
	qlock(&stats.lock);
	stats.indexwrites++;
	qunlock(&stats.lock);
	packibucket(ib, b->data);
	return writepart(is->part, is->blockbase + ((u64int)buck << is->blocklog), b->data, is->blocksize);
}

static int
buildindex(Index *ix, Part *part, u64int off, u64int clumps, int zero)
{
	IEStream *ies;
	IBucket ib, zib;
	ZBlock *z, *b;
	u32int next, buck;
	int ok;
	u64int found = 0;

//ZZZ make buffer size configurable
	b = alloczblock(ix->blocksize, 0);
	z = alloczblock(ix->blocksize, 1);
	ies = initiestream(part, off, clumps, 64*1024);
	if(b == nil || z == nil || ies == nil){
		ok = 0;
		goto breakout;
		return -1;
	}
	ok = 0;
	next = 0;
	ib.data = b->data + IBucketSize;
	zib.data = z->data + IBucketSize;
	zib.n = 0;
	zib.depth = 0;
	for(;;){
		buck = buildbucket(ix, ies, &ib);
		found += ib.n;
		if(zero){
			for(; next != buck; next++){
				if(next == ix->buckets){
					if(buck != TWID32){
						fprint(2, "bucket out of range\n");
						ok = -1;
					}
					goto breakout;
				}
				if(writebucket(ix, next, &zib, z) < 0){
					fprint(2, "can't write zero bucket to buck=%d: %r", next);
					ok = -1;
				}
			}
		}
		if(buck >= ix->buckets){
			if(buck == TWID32)
				break;
			fprint(2, "bucket out of range\n");
			ok = -1;
			goto breakout;
		}
		if(writebucket(ix, buck, &ib, b) < 0){
			fprint(2, "bad bucket found=%lld: %r\n", found);
			ok = -1;
		}
		next = buck + 1;
	}
breakout:;
	fprint(2, "constructed index with %lld entries\n", found);
	freeiestream(ies);
	freezblock(z);
	freezblock(b);
	return ok;
}

void
usage(void)
{
	fprint(2, "usage: buildindex [-Z] [-B blockcachesize] config tmppart\n");
	threadexitsall(0);
}

void
threadmain(int argc, char *argv[])
{
	Part *part;
	u64int clumps, base;
	u32int bcmem;
	int zero;

	zero = 1;
	bcmem = 0;
	ARGBEGIN{
	case 'B':
		bcmem = unittoull(ARGF());
		break;
	case 'Z':
		zero = 0;
		break;
	default:
		usage();
		break;
	}ARGEND

	if(argc != 2)
		usage();

	if(initventi(argv[0]) < 0)
		sysfatal("can't init venti: %r");

	if(bcmem < maxblocksize * (mainindex->narenas + mainindex->nsects * 4 + 16))
		bcmem = maxblocksize * (mainindex->narenas + mainindex->nsects * 4 + 16);
	fprint(2, "initialize %d bytes of disk block cache\n", bcmem);
	initdcache(bcmem);

	fprint(2, "building a new index %s using %s for temporary storage\n", mainindex->name, argv[1]);

	part = initpart(argv[1], 1);
	if(part == nil)
		sysfatal("can't initialize temporary partition: %r");

	clumps = sortrawientries(mainindex, part, &base);
	if(clumps == TWID64)
		sysfatal("can't build sorted index: %r");
	fprint(2, "found and sorted index entries for clumps=%lld at %lld\n", clumps, base);

	if(buildindex(mainindex, part, base, clumps, zero) < 0)
		sysfatal("can't build new index: %r");
	
	threadexitsall(0);
}