1 files changed, 0 insertions, 1253 deletions
diff --git a/src/cmd/venti/index.c b/src/cmd/venti/index.c
deleted file mode 100644
index c1fc1418..00000000
--- a/src/cmd/venti/index.c
+++ /dev/null
@@ -1,1253 +0,0 @@
-/*
- * Index, mapping scores to log positions. 
- *
- * The index is made up of some number of index sections, each of
- * which is typically stored on a different disk.  The blocks in all the 
- * index sections are logically numbered, with each index section 
- * responsible for a range of blocks.  Blocks are typically 8kB.
- *
- * Index Version 1:
- * 
- * The N index blocks are treated as a giant hash table.  The top 32 bits
- * of score are used as the key for a lookup.  Each index block holds
- * one hash bucket, which is responsible for ceil(2^32 / N) of the key space.
- * 
- * The index is sized so that a particular bucket is extraordinarily 
- * unlikely to overflow: assuming compressed data blocks are 4kB 
- * on disk, and assuming each block has a 40 byte index entry,
- * the index data will be 1% of the total data.  Since scores are essentially
- * random, all buckets should be about the same fullness.
- * A factor of 5 gives us a wide comfort boundary to account for 
- * random variation.  So the index disk space should be 5% of the arena disk space.
- *
- * Problems with Index Version 1:
- * 
- * Because the index size is chosen to handle the worst case load,
- * the index is very sparse, especially when the Venti server is mostly empty.
- * This has a few bad properties.
- * 
- * Loading an index block (which typically requires a random disk seek)
- * is a very expensive operation, yet it yields only a couple index entries.
- * We're not making efficient use of the disk arm.
- *
- * Writing a block requires first checking to see if the block already
- * exists on the server, which in turn requires an index lookup.  When
- * writing fresh data, these lookups will fail.  The index entry cache 
- * cannot serve these, so they must go to disk, which is expensive.
- * 
- * Thus both the reading and the writing of blocks are held back by
- * the expense of accessing the index.
- * 
- * Index Version 2:
- * 
- * The index is sized to be exactly 2^M blocks.  The index blocks are 
- * logically arranged in a (not exactly balanced) binary tree of depth at
- * most M.  The nodes are named by bit strings describing the path from
- * the root to the node.  The root is . (dot).  The left child of the root is .0,
- * the right child .1.  The node you get to by starting at the root and going
- * left right right left is .0110.  At the beginning, there is only the root block.
- * When a block with name .xxx fills, it splits into .xxx0 and .xxx1.
- * All the index data is kept in the leaves of the tree.
- *
- * Index leaf blocks are laid out on disk by interpreting the bitstring as a 
- * binary fraction and multiplying by 2^M -- .0 is the first block, .1 is
- * the block halfway into the index, .0110 is at position 6/16, and
- * .xxx and .xxx0 map to the same block (but only one can be a leaf
- * node at any given time, so this is okay!).  A cheap implementation of
- * this is to append zeros to the bit string to make it M bits long.  That's
- * the integer index block number.
- *
- * To find the leaf block that should hold a score, use the bits of the 
- * score one at a time to walk down the tree to a leaf node.  If the tree
- * has leaf nodes .0, .10, and .11, then score 0110101... ends up in bucket
- * .0 while score 101110101... ends up in bucket .10.  There is no leaf node
- * .1 because it has split into .10 and .11.
- *
- * If we know which disk blocks are in use, we can reconstruct the interior
- * of the tree: if .xxx1 is in use, then .xxx has been split.  We keep an in-use
- * bitmap of all index disk blocks to aid in reconstructing the interior of the
- * tree.  At one bit per index block, the bitmap is small enough to be kept
- * in memory even on the largest of Venti servers.
- *
- * After the root block splits, the index blocks being used will always be
- * at least half full (averaged across the entire index).  So unlike Version 1,
- * Index Version 2 is quite dense, alleviating the two problems above.
- * Index block reads now return many index entries.  More importantly,
- * small servers can keep most of the index in the disk cache, making them
- * more likely to handle negative lookups without going to disk.
- *
- * As the index becomes more full, Index Version 2's performance
- * degrades gracefully into Index Version 1.  V2 is really an optimization
- * for little servers.
- *
- * Write Ordering for Index Version 2:
- * 
- * Unlike Index Version 1, Version 2 must worry about write ordering
- * within the index.  What happens if the in-use bitmap is out of sync
- * with the actual leaf nodes?  What happens if .xxx splits into .xxx0 and
- * .xxx1 but only one of the new blocks gets written to disk?
- *
- * We arrange that when .xxx splits, the .xxx1 block is written first,
- * then the .xxx0 block, and finally the in-use bitmap entry for .xxx1.
- * The split is committed by the writing of .xxx0.  This ordering leaves
- * two possible partial disk writes:
- *
- * (a) If .xxx1 is written but .xxx0 and the bitmap are not, then it's as if
- * the split never happened -- we won't think .xxx1 is in use, and we
- * won't go looking for it.
- *
- * (b) If .xxx1 and .xxx0 are written but the bitmap is not, then the first
- * time we try to load .xxx, we'll get .xxx0 instead, realize the bitmap is
- * out of date, and update the bitmap.
- *
- * Backwards Compatibility
- *
- * Because there are large Venti servers in use with Index V1, this code
- * will read either index version, but when asked to create a new index,
- * will only create V2.
- */
-
-#include "stdinc.h"
-#include "dat.h"
-#include "fns.h"
-
-static int	bucklook(u8int *score, int type, u8int *data, int n);
-static int	writebucket(ISect *is, u32int buck, IBucket *ib, DBlock *b);
-static int	okibucket(IBucket *ib, ISect *is);
-static int	initindex1(Index*);
-static ISect	*initisect1(ISect *is);
-static int	splitiblock(Index *ix, DBlock *b, ISect *is, u32int buck, IBucket *ib);
-
-#define KEY(k,d)	((d) ? (k)>>(32-(d)) : 0)
-
-//static QLock	indexlock;	//ZZZ
-
-static char IndexMagic[] = "venti index configuration";
-
-Index*
-initindex(char *name, ISect **sects, int n)
-{
-	IFile f;
-	Index *ix;
-	ISect *is;
-	u32int last, blocksize, tabsize;
-	int i;
-
-	if(n <= 0){
-		seterr(EOk, "no index sections to initialize index");
-		return nil;
-	}
-	ix = MKZ(Index);
-	if(ix == nil){
-		seterr(EOk, "can't initialize index: out of memory");
-		freeindex(ix);
-		return nil;
-	}
-
-	tabsize = sects[0]->tabsize;
-	if(partifile(&f, sects[0]->part, sects[0]->tabbase, tabsize) < 0)
-		return nil;
-	if(parseindex(&f, ix) < 0){
-		freeifile(&f);
-		freeindex(ix);
-		return nil;
-	}
-	freeifile(&f);
-	if(namecmp(ix->name, name) != 0){
-		seterr(ECorrupt, "mismatched index name: found %s expected %s", ix->name, name);
-		return nil;
-	}
-	if(ix->nsects != n){
-		seterr(ECorrupt, "mismatched number index sections: found %d expected %d", n, ix->nsects);
-		freeindex(ix);
-		return nil;
-	}
-	ix->sects = sects;
-	last = 0;
-	blocksize = ix->blocksize;
-	for(i = 0; i < ix->nsects; i++){
-		is = sects[i];
-		if(namecmp(ix->name, is->index) != 0
-		|| is->blocksize != blocksize
-		|| is->tabsize != tabsize
-		|| namecmp(is->name, ix->smap[i].name) != 0
-		|| is->start != ix->smap[i].start
-		|| is->stop != ix->smap[i].stop
-		|| last != is->start
-		|| is->start > is->stop){
-			seterr(ECorrupt, "inconsistent index sections in %s", ix->name);
-			freeindex(ix);
-			return nil;
-		}
-		last = is->stop;
-	}
-	ix->tabsize = tabsize;
-	ix->buckets = last;
-
-	if(initindex1(ix) < 0){
-		freeindex(ix);
-		return nil;
-	}
-
-	ix->arenas = MKNZ(Arena*, ix->narenas);
-	if(maparenas(ix->amap, ix->arenas, ix->narenas, ix->name) < 0){
-		freeindex(ix);
-		return nil;
-	}
-
-	return ix;
-}
-
-static int
-initindex1(Index *ix)
-{
-	u32int buckets;
-
-	switch(ix->version){
-	case IndexVersion1:
-		ix->div = (((u64int)1 << 32) + ix->buckets - 1) / ix->buckets;
-		buckets = (((u64int)1 << 32) - 1) / ix->div + 1;
-		if(buckets != ix->buckets){
-			seterr(ECorrupt, "inconsistent math for divisor and buckets in %s", ix->name);
-			return -1;
-		}
-		break;
-
-	case IndexVersion2:
-		buckets = ix->buckets - ix->bitblocks;
-		if(ix->buckets < ix->bitblocks || (buckets&(buckets-1)))
-			seterr(ECorrupt, "bucket count not a power of two in %s", ix->name);
-		ix->maxdepth = u64log2(buckets);
-		ix->bitkeylog = u64log2(ix->blocksize*8);
-		ix->bitkeymask = (1<<ix->bitkeylog)-1;
-		break;
-	}
-	return 0;
-}
-
-int
-wbindex(Index *ix)
-{
-	Fmt f;
-	ZBlock *b;
-	int i;
-
-	if(ix->nsects == 0){
-		seterr(EOk, "no sections in index %s", ix->name);
-		return -1;
-	}
-	b = alloczblock(ix->tabsize, 1);
-	if(b == nil){
-		seterr(EOk, "can't write index configuration: out of memory");
-		return -1;
-	}
-	fmtzbinit(&f, b);
-	if(outputindex(&f, ix) < 0){
-		seterr(EOk, "can't make index configuration: table storage too small %d", ix->tabsize);
-		freezblock(b);
-		return -1;
-	}
-	for(i = 0; i < ix->nsects; i++){
-		if(writepart(ix->sects[i]->part, ix->sects[i]->tabbase, b->data, ix->tabsize) < 0){
-			seterr(EOk, "can't write index: %r");
-			freezblock(b);
-			return -1;
-		}
-	}
-	freezblock(b);
-
-	for(i = 0; i < ix->nsects; i++)
-		if(wbisect(ix->sects[i]) < 0)
-			return -1;
-
-	return 0;
-}
-
-/*
- * index: IndexMagic '\n' version '\n' name '\n' blocksize '\n' [V2: bitblocks '\n'] sections arenas
- * version, blocksize: u32int
- * name: max. ANameSize string
- * sections, arenas: AMap
- */
-int
-outputindex(Fmt *f, Index *ix)
-{
-	if(fmtprint(f, "%s\n%ud\n%s\n%ud\n", IndexMagic, ix->version, ix->name, ix->blocksize) < 0
-	|| (ix->version==IndexVersion2 && fmtprint(f, "%ud\n", ix->bitblocks) < 0)
-	|| outputamap(f, ix->smap, ix->nsects) < 0
-	|| outputamap(f, ix->amap, ix->narenas) < 0)
-		return -1;
-	return 0;
-}
-
-int
-parseindex(IFile *f, Index *ix)
-{
-	AMapN amn;
-	u32int v;
-	char *s;
-
-	/*
-	 * magic
-	 */
-	s = ifileline(f);
-	if(s == nil || strcmp(s, IndexMagic) != 0){
-		seterr(ECorrupt, "bad index magic for %s", f->name);
-		return -1;
-	}
-
-	/*
-	 * version
-	 */
-	if(ifileu32int(f, &v) < 0){
-		seterr(ECorrupt, "syntax error: bad version number in %s", f->name);
-		return -1;
-	}
-	ix->version = v;
-	if(ix->version != IndexVersion1 && ix->version != IndexVersion2){
-		seterr(ECorrupt, "bad version number in %s", f->name);
-		return -1;
-	}
-
-	/*
-	 * name
-	 */
-	if(ifilename(f, ix->name) < 0){
-		seterr(ECorrupt, "syntax error: bad index name in %s", f->name);
-		return -1;
-	}
-
-	/*
-	 * block size
-	 */
-	if(ifileu32int(f, &v) < 0){
-		seterr(ECorrupt, "syntax error: bad block size number in %s", f->name);
-		return -1;
-	}
-	ix->blocksize = v;
-
-	if(ix->version == IndexVersion2){
-		/*
-		 * free bitmap size
-		 */
-		if(ifileu32int(f, &v) < 0){
-			seterr(ECorrupt, "syntax error: bad bitmap size in %s", f->name);
-			return -1;
-		}
-		ix->bitblocks = v;
-	}
-
-	if(parseamap(f, &amn) < 0)
-		return -1;
-	ix->nsects = amn.n;
-	ix->smap = amn.map;
-
-	if(parseamap(f, &amn) < 0)
-		return -1;
-	ix->narenas = amn.n;
-	ix->amap = amn.map;
-
-	return 0;
-}
-
-/*
- * initialize an entirely new index
- */
-Index *
-newindex(char *name, ISect **sects, int n)
-{
-	Index *ix;
-	AMap *smap;
-	u64int nb;
-	u32int div, ub, xb, fb, start, stop, blocksize, tabsize;
-	int i, j, version;
-
-	version = IndexVersion2;
-
-	if(n < 1){
-		seterr(EOk, "creating index with no index sections");
-		return nil;
-	}
-
-	/*
-	 * compute the total buckets available in the index,
-	 * and the total buckets which are used.
-	 */
-	nb = 0;
-	blocksize = sects[0]->blocksize;
-	tabsize = sects[0]->tabsize;
-	for(i = 0; i < n; i++){
-		if(sects[i]->start != 0 || sects[i]->stop != 0
-		|| sects[i]->index[0] != '\0'){
-			seterr(EOk, "creating new index using non-empty section %s", sects[i]->name);
-			return nil;
-		}
-		if(blocksize != sects[i]->blocksize){
-			seterr(EOk, "mismatched block sizes in index sections");
-			return nil;
-		}
-		if(tabsize != sects[i]->tabsize){
-			seterr(EOk, "mismatched config table sizes in index sections");
-			return nil;
-		}
-		nb += sects[i]->blocks;
-	}
-
-	/*
-	 * check for duplicate names
-	 */
-	for(i = 0; i < n; i++){
-		for(j = i + 1; j < n; j++){
-			if(namecmp(sects[i]->name, sects[j]->name) == 0){
-				seterr(EOk, "duplicate section name %s for index %s", sects[i]->name, name);
-				return nil;
-			}
-		}
-	}
-
-	if(nb >= ((u64int)1 << 32)){
-		seterr(EBug, "index too large");
-		return nil;
-	}
-
-	div = 0;
-	fb = 0;
-	if(version == IndexVersion1){
-		div = (((u64int)1 << 32) + nb - 1) / nb;
-		ub = (((u64int)1 << 32) - 1) / div + 1;
-		if(div < 100){
-			seterr(EBug, "index divisor too coarse");
-			return nil;
-		}
-	}else{
-		fb = (nb+blocksize*8-1)/(blocksize*8);
-		for(ub=1; ub<=((nb-fb)>>1); ub<<=1)
-			;
-		ub += fb;
-	}
-	if(ub > nb){
-		seterr(EBug, "index initialization math wrong");
-		return nil;
-	}
-	xb = nb - ub;
-
-	/*
-	 * initialize each of the index sections
-	 * and the section map table
-	 */
-	smap = MKNZ(AMap, n);
-	if(smap == nil){
-		seterr(EOk, "can't create new index: out of memory");
-		return nil;
-	}
-	start = 0;
-	for(i = 0; i < n; i++){
-		stop = start + sects[i]->blocks - xb / n;
-		if(i == n - 1)
-			stop = ub;
-		sects[i]->start = start;
-		sects[i]->stop = stop;
-		namecp(sects[i]->index, name);
-
-		smap[i].start = start;
-		smap[i].stop = stop;
-		namecp(smap[i].name, sects[i]->name);
-		start = stop;
-	}
-
-	/*
-	 * initialize the index itself
-	 */
-	ix = MKZ(Index);
-	if(ix == nil){
-		seterr(EOk, "can't create new index: out of memory");
-		free(smap);
-		return nil;
-	}
-	ix->version = version;
-	namecp(ix->name, name);
-	ix->sects = sects;
-	ix->smap = smap;
-	ix->nsects = n;
-	ix->blocksize = blocksize;
-	ix->buckets = ub;
-	ix->tabsize = tabsize;
-	ix->div = div;
-	ix->bitblocks = fb;
-
-	if(initindex1(ix) < 0){
-		free(smap);
-		return nil;
-	}
-
-	return ix;
-}
-
-ISect*
-initisect(Part *part)
-{
-	ISect *is;
-	ZBlock *b;
-	int ok;
-
-	b = alloczblock(HeadSize, 0);
-	if(b == nil || readpart(part, PartBlank, b->data, HeadSize) < 0){
-		seterr(EAdmin, "can't read index section header: %r");
-		return nil;
-	}
-
-	is = MKZ(ISect);
-	if(is == nil){
-		freezblock(b);
-		return nil;
-	}
-	is->part = part;
-	ok = unpackisect(is, b->data);
-	freezblock(b);
-	if(ok < 0){
-		seterr(ECorrupt, "corrupted index section header: %r");
-		freeisect(is);
-		return nil;
-	}
-
-	if(is->version != ISectVersion){
-		seterr(EAdmin, "unknown index section version %d", is->version);
-		freeisect(is);
-		return nil;
-	}
-
-	return initisect1(is);
-}
-
-ISect*
-newisect(Part *part, char *name, u32int blocksize, u32int tabsize)
-{
-	ISect *is;
-	u32int tabbase;
-
-	is = MKZ(ISect);
-	if(is == nil)
-		return nil;
-
-	namecp(is->name, name);
-	is->version = ISectVersion;
-	is->part = part;
-	is->blocksize = blocksize;
-	is->start = 0;
-	is->stop = 0;
-	tabbase = (PartBlank + HeadSize + blocksize - 1) & ~(blocksize - 1);
-	is->blockbase = (tabbase + tabsize + blocksize - 1) & ~(blocksize - 1);
-	is->blocks = is->part->size / blocksize - is->blockbase / blocksize;
-
-	is = initisect1(is);
-	if(is == nil)
-		return nil;
-
-	return is;
-}
-
-/*
- * initialize the computed parameters for an index
- */
-static ISect*
-initisect1(ISect *is)
-{
-	u64int v;
-
-	is->buckmax = (is->blocksize - IBucketSize) / IEntrySize;
-	is->blocklog = u64log2(is->blocksize);
-	if(is->blocksize != (1 << is->blocklog)){
-		seterr(ECorrupt, "illegal non-power-of-2 bucket size %d\n", is->blocksize);
-		freeisect(is);
-		return nil;
-	}
-	partblocksize(is->part, is->blocksize);
-	is->tabbase = (PartBlank + HeadSize + is->blocksize - 1) & ~(is->blocksize - 1);
-	if(is->tabbase >= is->blockbase){
-		seterr(ECorrupt, "index section config table overlaps bucket storage");
-		freeisect(is);
-		return nil;
-	}
-	is->tabsize = is->blockbase - is->tabbase;
-	v = is->part->size & ~(u64int)(is->blocksize - 1);
-	if(is->blockbase + (u64int)is->blocks * is->blocksize != v){
-		seterr(ECorrupt, "invalid blocks in index section %s", is->name);
-//ZZZZZZZZZ
-//		freeisect(is);
-//		return nil;
-	}
-
-	if(is->stop - is->start > is->blocks){
-		seterr(ECorrupt, "index section overflows available space");
-		freeisect(is);
-		return nil;
-	}
-	if(is->start > is->stop){
-		seterr(ECorrupt, "invalid index section range");
-		freeisect(is);
-		return nil;
-	}
-
-	return is;
-}
-
-int
-wbisect(ISect *is)
-{
-	ZBlock *b;
-
-	b = alloczblock(HeadSize, 1);
-	if(b == nil)
-//ZZZ set error?
-		return -1;
-
-	if(packisect(is, b->data) < 0){
-		seterr(ECorrupt, "can't make index section header: %r");
-		freezblock(b);
-		return -1;
-	}
-	if(writepart(is->part, PartBlank, b->data, HeadSize) < 0){
-		seterr(EAdmin, "can't write index section header: %r");
-		freezblock(b);
-		return -1;
-	}
-	freezblock(b);
-
-	return 0;
-}
-
-void
-freeisect(ISect *is)
-{
-	if(is == nil)
-		return;
-	free(is);
-}
-
-void
-freeindex(Index *ix)
-{
-	int i;
-
-	if(ix == nil)
-		return;
-	free(ix->amap);
-	free(ix->arenas);
-	if(ix->sects)
-		for(i = 0; i < ix->nsects; i++)
-			freeisect(ix->sects[i]);
-	free(ix->sects);
-	free(ix->smap);
-	free(ix);
-}
-
-/*
- * write a clump to an available arena in the index
- * and return the address of the clump within the index.
-ZZZ question: should this distinguish between an arena
-filling up and real errors writing the clump?
- */
-u64int
-writeiclump(Index *ix, Clump *c, u8int *clbuf)
-{
-	u64int a;
-	int i;
-
-	for(i = ix->mapalloc; i < ix->narenas; i++){
-		a = writeaclump(ix->arenas[i], c, clbuf);
-		if(a != TWID64)
-			return a + ix->amap[i].start;
-	}
-
-	seterr(EAdmin, "no space left in arenas");
-	return TWID64;
-}
-
-/*
- * convert an arena index to an relative arena address
- */
-Arena*
-amapitoa(Index *ix, u64int a, u64int *aa)
-{
-	int i, r, l, m;
-
-	l = 1;
-	r = ix->narenas - 1;
-	while(l <= r){
-		m = (r + l) / 2;
-		if(ix->amap[m].start <= a)
-			l = m + 1;
-		else
-			r = m - 1;
-	}
-	l--;
-
-	if(a > ix->amap[l].stop){
-for(i=0; i<ix->narenas; i++)
-	print("arena %d: %llux - %llux\n", i, ix->amap[i].start, ix->amap[i].stop);
-print("want arena %d for %llux\n", l, a);
-		seterr(ECrash, "unmapped address passed to amapitoa");
-		return nil;
-	}
-
-	if(ix->arenas[l] == nil){
-		seterr(ECrash, "unmapped arena selected in amapitoa");
-		return nil;
-	}
-	*aa = a - ix->amap[l].start;
-	return ix->arenas[l];
-}
-
-int
-iaddrcmp(IAddr *ia1, IAddr *ia2)
-{
-	return ia1->type != ia2->type
-		|| ia1->size != ia2->size
-		|| ia1->blocks != ia2->blocks
-		|| ia1->addr != ia2->addr;
-}
-
-/*
- * lookup the score in the partition
- *
- * nothing needs to be explicitly locked:
- * only static parts of ix are used, and
- * the bucket is locked by the DBlock lock.
- */
-int
-loadientry(Index *ix, u8int *score, int type, IEntry *ie)
-{
-	ISect *is;
-	DBlock *b;
-	IBucket ib;
-	u32int buck;
-	int h, ok;
-
-	ok = -1;
-
-	qlock(&stats.lock);
-	stats.indexreads++;
-	qunlock(&stats.lock);
-	b = loadibucket(ix, score, &is, &buck, &ib);
-	if(b == nil)
-		return -1;
-
-	if(okibucket(&ib, is) < 0)
-		goto out;
-
-	h = bucklook(score, type, ib.data, ib.n);
-	if(h & 1){
-		h ^= 1;
-		unpackientry(ie, &ib.data[h]);
-		ok = 0;
-		goto out;
-	}
-
-out:
-	putdblock(b);
-	return ok;
-}
-
-/*
- * insert or update an index entry into the appropriate bucket
- */
-int
-storeientry(Index *ix, IEntry *ie)
-{
-	ISect *is;
-	DBlock *b;
-	IBucket ib;
-	u32int buck;
-	int h, ok;
-
-	ok = 0;
-
-	qlock(&stats.lock);
-	stats.indexwreads++;
-	qunlock(&stats.lock);
-
-top:
-	b = loadibucket(ix, ie->score, &is, &buck, &ib);
-	if(b == nil)
-		return -1;
-
-	if(okibucket(&ib, is) < 0)
-		goto out;
-
-	h = bucklook(ie->score, ie->ia.type, ib.data, ib.n);
-	if(h & 1){
-		h ^= 1;
-		dirtydblock(b, DirtyIndex);
-		packientry(ie, &ib.data[h]);
-		ok = writebucket(is, buck, &ib, b);
-		goto out;
-	}
-
-	if(ib.n < is->buckmax){
-		dirtydblock(b, DirtyIndex);
-		memmove(&ib.data[h + IEntrySize], &ib.data[h], ib.n * IEntrySize - h);
-		ib.n++;
-
-		packientry(ie, &ib.data[h]);
-		ok = writebucket(is, buck, &ib, b);
-		goto out;
-	}
-
-	/* block is full -- not supposed to happen in V1 */
-	if(ix->version == IndexVersion1){
-		seterr(EAdmin, "index bucket %ud on %s is full\n", buck, is->part->name);
-		ok = -1;
-		goto out;
-	}
-
-	/* in V2, split the block and try again; splitiblock puts b */
-	if(splitiblock(ix, b, is, buck, &ib) < 0)
-		return -1;
-	goto top;
-
-out:
-	putdblock(b);
-	return ok;
-}
-
-static int
-writebucket(ISect *is, u32int buck, IBucket *ib, DBlock *b)
-{
-	assert(b->dirty == DirtyIndex || b->dirty == DirtyIndexSplit);
-
-	if(buck >= is->blocks){
-		seterr(EAdmin, "index write out of bounds: %d >= %d\n",
-				buck, is->blocks);
-		return -1;
-	}
-	qlock(&stats.lock);
-	stats.indexwrites++;
-	qunlock(&stats.lock);
-	packibucket(ib, b->data);
-	// return writepart(is->part, is->blockbase + ((u64int)buck << is->blocklog), b->data, is->blocksize);
-	return 0;
-}
-
-static int
-okibucket(IBucket *ib, ISect *is)
-{
-	if(ib->n <= is->buckmax)
-		return 0;
-
-	seterr(EICorrupt, "corrupted disk index bucket: n=%ud max=%ud, depth=%lud range=[%lud,%lud)",
-		ib->n, is->buckmax, ib->depth, is->start, is->stop);
-	return -1;
-}
-
-/*
- * look for score within data;
- * return 1 | byte index of matching index,
- * or 0 | index of least element > score
- */
-static int
-bucklook(u8int *score, int otype, u8int *data, int n)
-{
-	int i, r, l, m, h, c, cc, type;
-
-	type = vttodisktype(otype);
-	l = 0;
-	r = n - 1;
-	while(l <= r){
-		m = (r + l) >> 1;
-		h = m * IEntrySize;
-		for(i = 0; i < VtScoreSize; i++){
-			c = score[i];
-			cc = data[h + i];
-			if(c != cc){
-				if(c > cc)
-					l = m + 1;
-				else
-					r = m - 1;
-				goto cont;
-			}
-		}
-		cc = data[h + IEntryTypeOff];
-		if(type != cc){
-			if(type > cc)
-				l = m + 1;
-			else
-				r = m - 1;
-			goto cont;
-		}
-		return h | 1;
-	cont:;
-	}
-
-	return l * IEntrySize;
-}
-
-/*
- * compare two IEntries; consistent with bucklook
- */
-int
-ientrycmp(const void *vie1, const void *vie2)
-{
-	u8int *ie1, *ie2;
-	int i, v1, v2;
-
-	ie1 = (u8int*)vie1;
-	ie2 = (u8int*)vie2;
-	for(i = 0; i < VtScoreSize; i++){
-		v1 = ie1[i];
-		v2 = ie2[i];
-		if(v1 != v2){
-			if(v1 < v2)
-				return -1;
-			return 0;
-		}
-	}
-	v1 = ie1[IEntryTypeOff];
-	v2 = ie2[IEntryTypeOff];
-	if(v1 != v2){
-		if(v1 < v2)
-			return -1;
-		return 0;
-	}
-	return -1;
-}
-
-/*
- * find the number of the index section holding bucket #buck
- */
-static int
-indexsect0(Index *ix, u32int buck)
-{
-	int r, l, m;
-
-	l = 1;
-	r = ix->nsects - 1;
-	while(l <= r){
-		m = (r + l) >> 1;
-		if(ix->sects[m]->start <= buck)
-			l = m + 1;
-		else
-			r = m - 1;
-	}
-	return l - 1;
-}
-
-/*
- * load the index block at bucket #buck
- */
-static DBlock*
-loadibucket0(Index *ix, u32int buck, ISect **pis, u32int *pbuck, IBucket *ib, int read)
-{
-	ISect *is;
-	DBlock *b;
-
-	is = ix->sects[indexsect0(ix, buck)];
-	if(buck < is->start || is->stop <= buck){
-		seterr(EAdmin, "index lookup out of range: %ud not found in index\n", buck);
-		return nil;
-	}
-
-	buck -= is->start;
-	if((b = getdblock(is->part, is->blockbase + ((u64int)buck << is->blocklog), read)) == nil)
-		return nil;
-
-	if(pis)
-		*pis = is;
-	if(pbuck)
-		*pbuck = buck;
-	if(ib)
-		unpackibucket(ib, b->data);
-	return b;
-}
-
-/*
- * find the number of the index section holding score
- */
-static int
-indexsect1(Index *ix, u8int *score)
-{
-	return indexsect0(ix, hashbits(score, 32) / ix->div);
-}
-
-/*
- * load the index block responsible for score.
- */
-static DBlock*
-loadibucket1(Index *ix, u8int *score, ISect **pis, u32int *pbuck, IBucket *ib)
-{
-	return loadibucket0(ix, hashbits(score, 32)/ix->div, pis, pbuck, ib, 1);
-}
-
-static u32int
-keytobuck(Index *ix, u32int key, int d)
-{
-	/* clear all but top d bits; can't depend on boundary case shifts */
-	if(d == 0)
-		key = 0;
-	else if(d != 32)
-		key &= ~((1<<(32-d))-1);
-
-	/* truncate to maxdepth bits */
-	if(ix->maxdepth != 32)
-		key >>= 32 - ix->maxdepth;
-
-	return ix->bitblocks + key;
-}
-
-/*
- * to check whether .xxx has split, check whether .xxx1 is in use.
- * it might be worth caching the block for future lookups, but for now
- * let's assume the dcache is good enough.
- */
-static int
-bitmapop(Index *ix, u32int key, int d, int set)
-{
-	DBlock *b;
-	int inuse;
-	u32int key1, buck1;
-
-	if(d >= ix->maxdepth)
-		return 0;
-
-
-	/* construct .xxx1 in bucket number format */
-	key1 = key | (1<<(32-d-1));
-	buck1 = keytobuck(ix, key1, d+1);
-
-if(0) fprint(2, "key %d/%0*ub key1 %d/%0*ub buck1 %08ux\n",
-	d, d, KEY(key, d), d+1, d+1, KEY(key1, d+1), buck1);
-
-	/* check whether buck1 is in use */
-
-	if((b = loadibucket0(ix, buck1 >> ix->bitkeylog, nil, nil, nil, 1)) == nil){
-		seterr(ECorrupt, "cannot load in-use bitmap block");
-fprint(2, "loadibucket: %r\n");
-		return -1;
-	}
-if(0) fprint(2, "buck1 %08ux bitkeymask %08ux bitkeylog %d\n", buck1, ix->bitkeymask, ix->bitkeylog);
-	buck1 &= ix->bitkeymask;
-	inuse = ((u32int*)b->data)[buck1>>5] & (1<<(buck1&31));
-	if(set && !inuse){
-		dirtydblock(b, DirtyIndexBitmap);
-		((u32int*)b->data)[buck1>>5] |= (1<<(buck1&31));
-	}
-	putdblock(b);
-	return inuse;
-}
-
-static int
-issplit(Index *ix, u32int key, int d)
-{
-	return bitmapop(ix, key, d, 0);
-}
-
-static int
-marksplit(Index *ix, u32int key, int d)
-{
-	return bitmapop(ix, key, d, 1);
-}	
-
-/* 
- * find the number of the index section holding score.
- * it's not terrible to be wrong once in a while, so we just
- * do what the bitmap tells us and don't worry about the 
- * bitmap being out of date.
- */
-static int
-indexsect2(Index *ix, u8int *score)
-{
-	u32int key;
-	int d, is;
-
-	key = hashbits(score, 32);
-	for(d=0; d<=ix->maxdepth; d++){
-		is = issplit(ix, key, d);
-		if(is == -1)
-			return 0;	/* must return something valid! */
-		if(!is)
-			break;
-	}
-
-	if(d > ix->maxdepth){
-		seterr(EBug, "index bitmap inconsistent with maxdepth");
-		return 0;	/* must return something valid! */
-	}
-
-	return indexsect0(ix, keytobuck(ix, key, d));
-}
-
-/*
- * load the index block responsible for score. 
- * (unlike indexsect2, must be correct always.)
- */
-static DBlock*
-loadibucket2(Index *ix, u8int *score, ISect **pis, u32int *pbuck, IBucket *ib)
-{
-	u32int key;
-	int d, try, is;
-	DBlock *b;
-
-	for(try=0; try<32; try++){
-		key = hashbits(score, 32);
-		for(d=0; d<=ix->maxdepth; d++){
-			is = issplit(ix, key, d);
-			if(is == -1)
-				return nil;
-			if(!is)
-				break;
-		}
-		if(d > ix->maxdepth){
-			seterr(EBug, "index bitmap inconsistent with maxdepth");
-			return nil;
-		}
-
-		if((b = loadibucket0(ix, keytobuck(ix, key, d), pis, pbuck, ib, 1)) == nil)
-			return nil;
-
-		if(ib->depth == d)
-			return b;
-
-		if(ib->depth < d){
-			fprint(2, "loaded block %ud for %d/%0*ub got %d/%0*ub\n",
-				*pbuck, d, d, KEY(key,d), ib->depth, ib->depth, KEY(key, ib->depth));
-			seterr(EBug, "index block has smaller depth than expected -- cannot happen");
-			putdblock(b);
-			return nil;
-		}
-
-		/*
-		 * ib->depth > d, meaning the bitmap was out of date.
-		 * fix the bitmap and try again.  if we actually updated
-		 * the bitmap in splitiblock, this would only happen if
-		 * venti crashed at an inopportune moment.  but this way
-		 * the code gets tested more.
-		 */
-if(0) fprint(2, "update bitmap: %d/%0*ub is split\n", d, d, KEY(key,d));
-		putdblock(b);
-		if(marksplit(ix, key, d) < 0)
-			return nil;
-	}
-	seterr(EBug, "loadibucket2 failed to sync bitmap with disk!");
-	return nil;
-}
-
-static int
-splitiblock(Index *ix, DBlock *b, ISect *is, u32int buck, IBucket *ib)
-{
-	int i, d;
-	u8int *score;
-	u32int buck0, buck1, key0, key1, key, dmask;
-	DBlock *b0, *b1;
-	IBucket ib0, ib1;
-
-	if(ib->depth == ix->maxdepth){
-if(0) fprint(2, "depth=%d == maxdepth\n", ib->depth);
-		seterr(EAdmin, "index bucket %ud on %s is full\n", buck, is->part->name);
-		putdblock(b);
-		return -1;
-	}
-
-	buck = is->start+buck - ix->bitblocks;
-	d = ib->depth+1;
-	buck0 = buck;
-	buck1 = buck0 | (1<<(ix->maxdepth-d));
-	if(ix->maxdepth == 32){
-		key0 = buck0;
-		key1 = buck1;
-	}else{
-		key0 = buck0 << (32-ix->maxdepth);
-		key1 = buck1 << (32-ix->maxdepth);
-	}
-	buck0 += ix->bitblocks;
-	buck1 += ix->bitblocks;
-	USED(buck0);
-	USED(key1);
-
-	if(d == 32)
-		dmask = TWID32;
-	else
-		dmask = TWID32 ^ ((1<<(32-d))-1);
-
-	/*
-	 * Since we hold the lock for b, the bitmap
-	 * thinks buck1 doesn't exist, and the bit
-	 * for buck1 can't be updated without first
-	 * locking and splitting b, it's safe to try to
-	 * acquire the lock on buck1 without dropping b.
-	 * No one else will go after it too.
-	 *
-	 * Also, none of the rest of the code ever locks
-	 * more than one block at a time, so it's okay if
-	 * we do.
-	 */
-	if((b1 = loadibucket0(ix, buck1, nil, nil, &ib1, 0)) == nil){
-		putdblock(b);
-		return -1;
-	}
-	b0 = b;
-	ib0 = *ib;
-
-	/*
-	 * Funny locking going on here -- see dirtydblock.
-	 * We must putdblock(b1) before putdblock(b0).
-	 */
-	dirtydblock(b0, DirtyIndex);
-	dirtydblock(b1, DirtyIndexSplit);
-
-	/*
-	 * Split the block contents.
-	 * The block is already sorted, so it's pretty easy:
-	 * the first part stays, the second part goes to b1.
-	 */
-	ib0.n = 0;
-	ib0.depth = d;
-	ib1.n = 0;
-	ib1.depth = d;
-	for(i=0; i<ib->n; i++){
-		score = ib->data+i*IEntrySize;
-		key = hashbits(score, 32);
-		if((key&dmask) != key0)
-			break;
-	}
-	ib0.n = i;
-	ib1.n = ib->n - ib0.n;
-	memmove(ib1.data, ib0.data+ib0.n*IEntrySize, ib1.n*IEntrySize);
-if(0) fprint(2, "splitiblock %d in %d/%0*ub => %d in %d/%0*ub + %d in %d/%0*ub\n",
-	ib->n, d-1, d-1, key0>>(32-d+1), 
-	ib0.n, d, d, key0>>(32-d), 
-	ib1.n, d, d, key1>>(32-d));
-
-	packibucket(&ib0, b0->data);
-	packibucket(&ib1, b1->data);
-
-	/* order matters!  see comment above. */
-	putdblock(b1);
-	putdblock(b0);
-
-	/*
-	 * let the recovery code take care of updating the bitmap.
-	 */
-
-	qlock(&stats.lock);
-	stats.indexsplits++;
-	qunlock(&stats.lock);
-
-	return 0;
-}
-
-int
-indexsect(Index *ix, u8int *score)
-{
-	if(ix->version == IndexVersion1)
-		return indexsect1(ix, score);
-	return indexsect2(ix, score);
-}
-
-DBlock*
-loadibucket(Index *ix, u8int *score, ISect **pis, u32int *pbuck, IBucket *ib)
-{
-	if(ix->version == IndexVersion1)
-		return loadibucket1(ix, score, pis, pbuck, ib);
-	return loadibucket2(ix, score, pis, pbuck, ib);
-}
-
-