aboutsummaryrefslogtreecommitdiff
path: root/src/cmd/venti/buildbuck.c
blob: 4232bb47a26472ccad56afac0ff31a9bc2f35e6f (plain)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
#include "stdinc.h"
#include "dat.h"
#include "fns.h"

struct IEStream
{
	Part	*part;
	u64int	off;		/* read position within part */
	u64int	n;		/* number of valid ientries left to read */
	u32int	size;		/* allocated space in buffer */
	u8int	*buf;
	u8int	*pos;		/* current place in buffer */
	u8int	*epos;		/* end of valid buffer contents */
};

IEStream*
initiestream(Part *part, u64int off, u64int clumps, u32int size)
{
	IEStream *ies;

//ZZZ out of memory?
	ies = MKZ(IEStream);
	ies->buf = MKN(u8int, size);
	ies->epos = ies->buf;
	ies->pos = ies->epos;
	ies->off = off;
	ies->n = clumps;
	ies->size = size;
	ies->part = part;
	return ies;
}

void
freeiestream(IEStream *ies)
{
	if(ies == nil)
		return;
	free(ies->buf);
	free(ies);
}

static u8int*
peekientry(IEStream *ies)
{
	u32int n, nn;

	n = ies->epos - ies->pos;
	if(n < IEntrySize){
		memmove(ies->buf, ies->pos, n);
		ies->epos = &ies->buf[n];
		ies->pos = ies->buf;
		nn = ies->size;
		if(nn > ies->n * IEntrySize)
			nn = ies->n * IEntrySize;
		nn -= n;
		if(nn == 0)
			return nil;
		if(readpart(ies->part, ies->off, ies->epos, nn) < 0){
			seterr(EOk, "can't read sorted index entries: %r");
			return nil;
		}
		ies->epos += nn;
		ies->off += nn;
	}
	return ies->pos;
}

static u32int
iebuck(Index *ix, u8int *b)
{
	return hashbits(b, 32) / ix->div;
}

u32int
buildbucket(Index *ix, IEStream *ies, IBucket *ib)
{
	IEntry ie1, ie2;
	u8int *b;
	u32int buck;

	buck = TWID32;
	ib->n = 0;
	ib->next = 0;
	while(ies->n){
		b = peekientry(ies);
		if(b == nil)
			return TWID32;
//fprint(2, "b=%p ies->n=%lld ib.n=%d buck=%d score=%V\n", b, ies->n, ib->n, iebuck(ix, b), b);
		if(ib->n == 0)
			buck = iebuck(ix, b);
		else{
			if(buck != iebuck(ix, b))
				break;
			if(ientrycmp(&ib->data[(ib->n - 1)* IEntrySize], b) == 0){
				/*
				 * guess that the larger address is the correct one to use
				 */
				unpackientry(&ie1, &ib->data[(ib->n - 1)* IEntrySize]);
				unpackientry(&ie2, b);
				seterr(EOk, "duplicate index entry for score=%V type=%d\n", ie1.score, ie1.ia.type);
				ib->n--;
				if(ie1.ia.addr > ie2.ia.addr)
					memmove(b, &ib->data[ib->n * IEntrySize], IEntrySize);
			}
		}
		memmove(&ib->data[ib->n * IEntrySize], b, IEntrySize);
		ib->n++;
		ies->n--;
		ies->pos += IEntrySize;
	}
	return buck;
}