1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
|
#include "stdinc.h"
#include "dat.h"
#include "fns.h"
struct IEStream
{
Part *part;
u64int off; /* read position within part */
u64int n; /* number of valid ientries left to read */
u32int size; /* allocated space in buffer */
u8int *buf;
u8int *pos; /* current place in buffer */
u8int *epos; /* end of valid buffer contents */
};
IEStream*
initiestream(Part *part, u64int off, u64int clumps, u32int size)
{
IEStream *ies;
//ZZZ out of memory?
ies = MKZ(IEStream);
ies->buf = MKN(u8int, size);
ies->epos = ies->buf;
ies->pos = ies->epos;
ies->off = off;
ies->n = clumps;
ies->size = size;
ies->part = part;
return ies;
}
void
freeiestream(IEStream *ies)
{
if(ies == nil)
return;
free(ies->buf);
free(ies);
}
static u8int*
peekientry(IEStream *ies)
{
u32int n, nn;
n = ies->epos - ies->pos;
if(n < IEntrySize){
memmove(ies->buf, ies->pos, n);
ies->epos = &ies->buf[n];
ies->pos = ies->buf;
nn = ies->size;
if(nn > ies->n * IEntrySize)
nn = ies->n * IEntrySize;
nn -= n;
if(nn == 0)
return nil;
if(readpart(ies->part, ies->off, ies->epos, nn) < 0){
seterr(EOk, "can't read sorted index entries: %r");
return nil;
}
ies->epos += nn;
ies->off += nn;
}
return ies->pos;
}
static u32int
iebuck(Index *ix, u8int *b)
{
return hashbits(b, 32) / ix->div;
}
u32int
buildbucket(Index *ix, IEStream *ies, IBucket *ib)
{
IEntry ie1, ie2;
u8int *b;
u32int buck;
buck = TWID32;
ib->n = 0;
ib->next = 0;
while(ies->n){
b = peekientry(ies);
if(b == nil)
return TWID32;
//fprint(2, "b=%p ies->n=%lld ib.n=%d buck=%d score=%V\n", b, ies->n, ib->n, iebuck(ix, b), b);
if(ib->n == 0)
buck = iebuck(ix, b);
else{
if(buck != iebuck(ix, b))
break;
if(ientrycmp(&ib->data[(ib->n - 1)* IEntrySize], b) == 0){
/*
* guess that the larger address is the correct one to use
*/
unpackientry(&ie1, &ib->data[(ib->n - 1)* IEntrySize]);
unpackientry(&ie2, b);
seterr(EOk, "duplicate index entry for score=%V type=%d\n", ie1.score, ie1.ia.type);
ib->n--;
if(ie1.ia.addr > ie2.ia.addr)
memmove(b, &ib->data[ib->n * IEntrySize], IEntrySize);
}
}
memmove(&ib->data[ib->n * IEntrySize], b, IEntrySize);
ib->n++;
ies->n--;
ies->pos += IEntrySize;
}
return buck;
}
|