aboutsummaryrefslogtreecommitdiff
path: root/src/cmd/venti/srv/buildbuck.c
blob: 240e77d726d97d3079515b44e62c54b3ee446ef3 (plain)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
#include "stdinc.h"
#include "dat.h"
#include "fns.h"

/*
 * An IEStream is a sorted list of index entries.
 */
struct IEStream
{
	Part	*part;
	u64int	off;		/* read position within part */
	u64int	n;		/* number of valid ientries left to read */
	u32int	size;		/* allocated space in buffer */
	u8int	*buf;
	u8int	*pos;		/* current place in buffer */
	u8int	*epos;		/* end of valid buffer contents */
};

IEStream*
initiestream(Part *part, u64int off, u64int clumps, u32int size)
{
	IEStream *ies;

//ZZZ out of memory?
	ies = MKZ(IEStream);
	ies->buf = MKN(u8int, size);
	ies->epos = ies->buf;
	ies->pos = ies->epos;
	ies->off = off;
	ies->n = clumps;
	ies->size = size;
	ies->part = part;
	return ies;
}

void
freeiestream(IEStream *ies)
{
	if(ies == nil)
		return;
	free(ies->buf);
	free(ies);
}

/*
 * Return the next IEntry (still packed) in the stream.
 */
static u8int*
peekientry(IEStream *ies)
{
	u32int n, nn;

	n = ies->epos - ies->pos;
	if(n < IEntrySize){
		memmove(ies->buf, ies->pos, n);
		ies->epos = &ies->buf[n];
		ies->pos = ies->buf;
		nn = ies->size;
		if(nn > ies->n * IEntrySize)
			nn = ies->n * IEntrySize;
		nn -= n;
		if(nn == 0)
			return nil;
//fprint(2, "peek %d from %llud into %p\n", nn, ies->off, ies->epos);
		if(readpart(ies->part, ies->off, ies->epos, nn) < 0){
			seterr(EOk, "can't read sorted index entries: %r");
			return nil;
		}
		ies->epos += nn;
		ies->off += nn;
	}
	return ies->pos;
}

/*
 * Compute the bucket number for the given IEntry.
 * Knows that the score is the first thing in the packed
 * representation.
 */
static u32int
iebuck(Index *ix, u8int *b, IBucket *ib, IEStream *ies)
{
	USED(ies);
	USED(ib);
	return hashbits(b, 32) / ix->div;
}

/*
 * Fill ib with the next bucket in the stream.
 */
u32int
buildbucket(Index *ix, IEStream *ies, IBucket *ib, uint maxdata)
{
	IEntry ie1, ie2;
	u8int *b;
	u32int buck;

	buck = TWID32;
	ib->n = 0;
	while(ies->n){
		b = peekientry(ies);
		if(b == nil)
			return TWID32;
//fprint(2, "b=%p ies->n=%lld ib.n=%d buck=%d score=%V\n", b, ies->n, ib->n, iebuck(ix, b, ib, ies), b);
		if(ib->n == 0)
			buck = iebuck(ix, b, ib, ies);
		else{
			if(buck != iebuck(ix, b, ib, ies))
				break;
			if(ientrycmp(&ib->data[(ib->n - 1)* IEntrySize], b) == 0){
				/*
				 * guess that the larger address is the correct one to use
				 */
				unpackientry(&ie1, &ib->data[(ib->n - 1)* IEntrySize]);
				unpackientry(&ie2, b);
				seterr(EOk, "duplicate index entry for score=%V type=%d", ie1.score, ie1.ia.type);
				ib->n--;
				if(ie1.ia.addr > ie2.ia.addr)
					memmove(b, &ib->data[ib->n * IEntrySize], IEntrySize);
			}
		}
		if((ib->n+1)*IEntrySize > maxdata){
			seterr(EOk, "bucket overflow");
			return TWID32;
		}
		memmove(&ib->data[ib->n * IEntrySize], b, IEntrySize);
		ib->n++;
		ies->n--;
		ies->pos += IEntrySize;
	}
	return buck;
}