aboutsummaryrefslogtreecommitdiff
path: root/src/cmd/venti
diff options
context:
space:
mode:
authorRuss Cox <rsc@swtch.com>2007-09-25 09:47:31 -0400
committerRuss Cox <rsc@swtch.com>2007-09-25 09:47:31 -0400
commit7a400ee957a0815287af806e18ef90dd18b47f82 (patch)
tree023076fb829f630384f2f394eb9577a81fdca59e /src/cmd/venti
parent25a4e89fa907ed5a5f5d84eccfb66180007d9c68 (diff)
downloadplan9port-7a400ee957a0815287af806e18ef90dd18b47f82.tar.gz
plan9port-7a400ee957a0815287af806e18ef90dd18b47f82.tar.bz2
plan9port-7a400ee957a0815287af806e18ef90dd18b47f82.zip
venti: new icache
Diffstat (limited to 'src/cmd/venti')
-rw-r--r--src/cmd/venti/srv/arena.c195
-rw-r--r--src/cmd/venti/srv/checkindex.c11
-rw-r--r--src/cmd/venti/srv/conv.c8
-rw-r--r--src/cmd/venti/srv/dat.h46
-rw-r--r--src/cmd/venti/srv/fns.h11
-rw-r--r--src/cmd/venti/srv/hdisk.c10
-rw-r--r--src/cmd/venti/srv/httpd.c8
-rw-r--r--src/cmd/venti/srv/icache.c708
-rw-r--r--src/cmd/venti/srv/icachewrite.c92
-rw-r--r--src/cmd/venti/srv/ifile.c35
-rw-r--r--src/cmd/venti/srv/index.c19
-rw-r--r--src/cmd/venti/srv/lump.c50
-rw-r--r--src/cmd/venti/srv/stats.c6
-rw-r--r--src/cmd/venti/srv/syncindex.c8
-rw-r--r--src/cmd/venti/srv/syncindex0.c2
-rw-r--r--src/cmd/venti/srv/venti.c21
-rw-r--r--src/cmd/venti/srv/www/stats.js10
17 files changed, 813 insertions, 427 deletions
diff --git a/src/cmd/venti/srv/arena.c b/src/cmd/venti/srv/arena.c
index 7cd2eb73..c576e9aa 100644
--- a/src/cmd/venti/srv/arena.c
+++ b/src/cmd/venti/srv/arena.c
@@ -16,6 +16,7 @@ static int loadarena(Arena *arena);
static CIBlock *getcib(Arena *arena, int clump, int writing, CIBlock *rock);
static void putcib(Arena *arena, CIBlock *cib);
static void sumproc(void *);
+static void loadcig(Arena *arena);
static QLock sumlock;
static Rendez sumwait;
@@ -137,14 +138,23 @@ readclumpinfos(Arena *arena, int clump, ClumpInfo *cis, int n)
CIBlock *cib, r;
int i;
- for(i = 0; i < n; i++){
+ /*
+ * because the clump blocks are laid out
+ * in reverse order at the end of the arena,
+ * it can be a few percent faster to read
+ * the clumps backwards, which reads the
+ * disk blocks forwards.
+ */
+ for(i = n-1; i >= 0; i--){
cib = getcib(arena, clump + i, 0, &r);
- if(cib == nil)
- break;
+ if(cib == nil){
+ n = i;
+ continue;
+ }
unpackclumpinfo(&cis[i], &cib->data->data[cib->offset]);
putcib(arena, cib);
}
- return i;
+ return n;
}
/*
@@ -349,7 +359,28 @@ writeaclump(Arena *arena, Clump *c, u8int *clbuf, u64int start, u64int *pa)
if(c->info.size < c->info.uncsize)
arena->memstats.cclumps++;
- clump = arena->memstats.clumps++;
+ clump = arena->memstats.clumps;
+ if(clump % ArenaCIGSize == 0){
+ if(arena->cig == nil){
+ loadcig(arena);
+ if(arena->cig == nil)
+ goto NoCIG;
+ }
+ /* add aa as start of next cig */
+ if(clump/ArenaCIGSize != arena->ncig){
+ fprint(2, "bad arena cig computation %s: writing clump %d but %d cigs\n",
+ arena->name, clump, arena->ncig);
+ arena->ncig = -1;
+ vtfree(arena->cig);
+ arena->cig = nil;
+ goto NoCIG;
+ }
+ arena->cig = vtrealloc(arena->cig, (arena->ncig+1)*sizeof arena->cig[0]);
+ arena->cig[arena->ncig++].offset = aa;
+ }
+NoCIG:
+ arena->memstats.clumps++;
+
if(arena->memstats.clumps == 0)
sysfatal("clumps wrapped");
arena->wtime = now();
@@ -752,3 +783,157 @@ putcib(Arena *arena, CIBlock *cib)
putdblock(cib->data);
cib->data = nil;
}
+
+
+/*
+ * For index entry readahead purposes, the arenas are
+ * broken into smaller subpieces, called clump info groups
+ * or cigs. Each cig has ArenaCIGSize clumps (ArenaCIGSize
+ * is chosen to make the index entries take up about half
+ * a megabyte). The index entries do not contain enough
+ * information to determine what the clump index is for
+ * a given address in an arena. That info is needed both for
+ * figuring out which clump group an address belongs to
+ * and for prefetching a clump group's index entries from
+ * the arena table of contents. The first time clump groups
+ * are accessed, we scan the entire arena table of contents
+ * (which might be 10s of megabytes), recording the data
+ * offset of each clump group.
+ */
+
+/*
+ * load clump info group information by scanning entire toc.
+ */
+static void
+loadcig(Arena *arena)
+{
+ u32int i, j, ncig, nci;
+ ArenaCIG *cig;
+ ClumpInfo *ci;
+ u64int offset;
+ int ms;
+
+ if(arena->cig || arena->ncig < 0)
+ return;
+
+// fprint(2, "loadcig %s\n", arena->name);
+
+ ncig = (arena->memstats.clumps+ArenaCIGSize-1) / ArenaCIGSize;
+ if(ncig == 0){
+ arena->cig = vtmalloc(1);
+ arena->ncig = 0;
+ return;
+ }
+
+ ms = msec();
+ cig = vtmalloc(ncig*sizeof cig[0]);
+ ci = vtmalloc(ArenaCIGSize*sizeof ci[0]);
+ offset = 0;
+ for(i=0; i<ncig; i++){
+ nci = readclumpinfos(arena, i*ArenaCIGSize, ci, ArenaCIGSize);
+ cig[i].offset = offset;
+ for(j=0; j<nci; j++)
+ offset += ClumpSize + ci[j].size;
+ if(nci < ArenaCIGSize){
+ if(i != ncig-1){
+ vtfree(ci);
+ vtfree(cig);
+ arena->ncig = -1;
+ fprint(2, "loadcig %s: got %ud cigs, expected %ud\n", arena->name, i+1, ncig);
+ goto out;
+ }
+ }
+ }
+ vtfree(ci);
+
+ arena->ncig = ncig;
+ arena->cig = cig;
+
+out:
+ ms = msec() - ms;
+ addstat2(StatCigLoad, 1, StatCigLoadTime, ms);
+}
+
+/*
+ * convert arena address into arena group + data boundaries.
+ */
+int
+arenatog(Arena *arena, u64int addr, u64int *gstart, u64int *glimit, int *g)
+{
+ int r, l, m;
+
+ qlock(&arena->lock);
+ if(arena->cig == nil)
+ loadcig(arena);
+ if(arena->cig == nil || arena->ncig == 0){
+ qunlock(&arena->lock);
+ return -1;
+ }
+
+ l = 1;
+ r = arena->ncig - 1;
+ while(l <= r){
+ m = (r + l) / 2;
+ if(arena->cig[m].offset <= addr)
+ l = m + 1;
+ else
+ r = m - 1;
+ }
+ l--;
+
+ *g = l;
+ *gstart = arena->cig[l].offset;
+ if(l+1 < arena->ncig)
+ *glimit = arena->cig[l+1].offset;
+ else
+ *glimit = arena->memstats.used;
+ qunlock(&arena->lock);
+ return 0;
+}
+
+/*
+ * load the clump info for group g into the index entries.
+ */
+int
+asumload(Arena *arena, int g, IEntry *entries, int nentries)
+{
+ int i, base, limit;
+ u64int addr;
+ ClumpInfo ci;
+ IEntry *ie;
+
+ if(nentries < ArenaCIGSize){
+ fprint(2, "asking for too few entries\n");
+ return -1;
+ }
+
+ qlock(&arena->lock);
+ if(arena->cig == nil)
+ loadcig(arena);
+ if(arena->cig == nil || arena->ncig == 0 || g >= arena->ncig){
+ qunlock(&arena->lock);
+ return -1;
+ }
+
+ addr = 0;
+ base = g*ArenaCIGSize;
+ limit = base + ArenaCIGSize;
+ if(base > arena->memstats.clumps)
+ base = arena->memstats.clumps;
+ ie = entries;
+ for(i=base; i<limit; i++){
+ if(readclumpinfo(arena, i, &ci) < 0)
+ break;
+ if(ci.type != VtCorruptType){
+ scorecp(ie->score, ci.score);
+ ie->ia.type = ci.type;
+ ie->ia.size = ci.uncsize;
+ ie->ia.blocks = (ci.size + ClumpSize + (1<<ABlockLog) - 1) >> ABlockLog;
+ ie->ia.addr = addr;
+ ie++;
+ }
+ addr += ClumpSize + ci.size;
+ }
+ qunlock(&arena->lock);
+ return ie - entries;
+}
diff --git a/src/cmd/venti/srv/checkindex.c b/src/cmd/venti/srv/checkindex.c
index ef0e4ec9..ca955730 100644
--- a/src/cmd/venti/srv/checkindex.c
+++ b/src/cmd/venti/srv/checkindex.c
@@ -8,7 +8,7 @@ static void
phdr(DBlock *eb)
{
static int did;
-
+
if(!did){
did = 1;
print("# diff actual correct\n");
@@ -168,7 +168,7 @@ checkbloom(Bloom *b1, Bloom *b2, int fix)
{
u32int *a1, *a2;
int i, n, extra, missing;
-
+
if(b1==nil && b2==nil)
return 0;
if(b1==nil || b2==nil){
@@ -188,13 +188,14 @@ checkbloom(Bloom *b1, Bloom *b2, int fix)
missing = 0;
for(i=BloomHeadSize/4; i<n; i++){
if(a1[i] != a2[i]){
-print("%.8ux/%.8ux.", a1[i], a2[i]);
- extra += countbits(a1[i] & ~a2[i]);
+// print("%.8ux/%.8ux.", a1[i], a2[i]);
+ extra += countbits(a1[i] & ~a2[i]);
missing += countbits(a2[i] & ~a1[i]);
}
}
if(extra || missing)
- fprint(2, "bloom filter: %d spurious bits, %d missing bits\n", extra, missing);
+ fprint(2, "bloom filter: %d spurious bits, %d missing bits\n",
+ extra, missing);
else
fprint(2, "bloom filter: correct\n");
if(!fix && missing){
diff --git a/src/cmd/venti/srv/conv.c b/src/cmd/venti/srv/conv.c
index 2a0a0bc1..f72511b1 100644
--- a/src/cmd/venti/srv/conv.c
+++ b/src/cmd/venti/srv/conv.c
@@ -581,9 +581,9 @@ unpackientry(IEntry *ie, u8int *buf)
scorecp(ie->score, p);
p += VtScoreSize;
- ie->wtime = U32GET(p);
+ /* ie->wtime = U32GET(p); */
p += U32Size;
- ie->train = U16GET(p);
+ /* ie->train = U16GET(p); */
p += U16Size;
if(p - buf != IEntryAddrOff)
sysfatal("unpackentry bad IEntryAddrOff amount");
@@ -613,9 +613,9 @@ packientry(IEntry *ie, u8int *buf)
scorecp(p, ie->score);
p += VtScoreSize;
- U32PUT(p, ie->wtime);
+ U32PUT(p, 0); /* wtime */
p += U32Size;
- U16PUT(p, ie->train);
+ U16PUT(p, 0); /* train */
p += U16Size;
U64PUT(p, ie->ia.addr, t32);
p += U64Size;
diff --git a/src/cmd/venti/srv/dat.h b/src/cmd/venti/srv/dat.h
index 07be62f4..e0a9e18a 100644
--- a/src/cmd/venti/srv/dat.h
+++ b/src/cmd/venti/srv/dat.h
@@ -3,6 +3,7 @@ typedef struct AMap AMap;
typedef struct AMapN AMapN;
typedef struct Arena Arena;
typedef struct AState AState;
+typedef struct ArenaCIG ArenaCIG;
typedef struct ArenaHead ArenaHead;
typedef struct ArenaPart ArenaPart;
typedef struct ArenaTail ArenaTail;
@@ -28,8 +29,10 @@ typedef struct ZBlock ZBlock;
typedef struct Round Round;
typedef struct Bloom Bloom;
-#define TWID32 ((u32int)~(u32int)0)
-#define TWID64 ((u64int)~(u64int)0)
+#pragma incomplete IEStream
+
+#define TWID32 ((u32int)~(u32int)0)
+#define TWID64 ((u64int)~(u64int)0)
#define TWID8 ((u8int)~(u8int)0)
enum
@@ -44,7 +47,6 @@ enum
IndexBase = 1024*1024, /* initial address to use in an index */
MaxIo = 64*1024, /* max size of a single read or write operation */
ICacheBits = 16, /* default bits for indexing icache */
- ICacheDepth = 4, /* default depth of an icache hash chain */
MaxAMap = 2*1024, /* max. allowed arenas in an address mapping; must be < 32*1024 */
/*
@@ -147,6 +149,8 @@ enum
DirtyArenaCib,
DirtyArenaTrailer,
DirtyMax,
+
+ ArenaCIGSize = 10*1024, // about 0.5 MB worth of IEntry.
VentiZZZZZZZZ
};
@@ -371,6 +375,14 @@ struct Arena
u32int ctime; /* first time a block was written */
u32int wtime; /* last time a block was written */
u32int clumpmagic;
+
+ ArenaCIG *cig;
+ int ncig;
+};
+
+struct ArenaCIG
+{
+ u64int offset; // from arena base
};
/*
@@ -505,14 +517,20 @@ struct IAddr
*/
struct IEntry
{
- u8int score[VtScoreSize];
- IEntry *next; /* next in hash chain */
- IEntry *nextdirty; /* next in dirty chain */
- u32int wtime; /* last write time */
- u16int train; /* relative train containing the most recent ref; 0 if no ref, 1 if in same car */
- u8int rac; /* read ahead count */
- u8int dirty; /* is dirty */
- IAddr ia;
+ /* on disk data - 32 bytes*/
+ u8int score[VtScoreSize];
+ IAddr ia;
+
+ IEntry *nexthash;
+ IEntry *nextdirty;
+ IEntry *next;
+ IEntry *prev;
+ u8int state;
+};
+enum {
+ IEClean = 0,
+ IEDirty = 1,
+ IESummary = 2,
};
/*
@@ -607,6 +625,9 @@ enum
StatIcacheFlush,
StatIcacheStall,
StatIcacheReadTime,
+ StatIcacheLookup,
+ StatScacheHit,
+ StatScachePrefetch,
StatBloomHit,
StatBloomMiss,
@@ -628,6 +649,9 @@ enum
StatSumRead,
StatSumReadBytes,
+
+ StatCigLoad,
+ StatCigLoadTime,
NStat
};
diff --git a/src/cmd/venti/srv/fns.h b/src/cmd/venti/srv/fns.h
index cf529953..05a3e937 100644
--- a/src/cmd/venti/srv/fns.h
+++ b/src/cmd/venti/srv/fns.h
@@ -6,8 +6,11 @@ void addstat(int, int);
void addstat2(int, int, int, int);
ZBlock *alloczblock(u32int size, int zeroed, uint alignment);
Arena *amapitoa(Index *index, u64int a, u64int *aa);
+Arena *amapitoag(Index *index, u64int a, u64int *gstart, u64int *glimit, int *g);
u64int arenadirsize(Arena *arena, u32int clumps);
+int arenatog(Arena *arena, u64int aa, u64int *gstart, u64int *glimit, int *g);
void arenaupdate(Arena *arena, u32int size, u8int *score);
+int asumload(Arena *arena, int g, IEntry *entries, int maxentries);
void backsumarena(Arena *arena);
void binstats(long (*fn)(Stats *s0, Stats *s1, void*), void *arg, long t0, long t1, Statbin *bin, int nbin);
int bloominit(Bloom*, vlong, uchar*);
@@ -64,6 +67,7 @@ int iaddrcmp(IAddr *ia1, IAddr *ia2);
IEntry* icachedirty(u32int, u32int, u64int);
ulong icachedirtyfrac(void);
void icacheclean(IEntry*);
+int icachelookup(u8int *score, int type, IAddr *ia);
int ientrycmp(const void *vie1, const void *vie2);
char *ifileline(IFile *f);
int ifilename(IFile *f, char *dst);
@@ -76,7 +80,7 @@ ArenaPart *initarenapart(Part *part);
int initarenasum(void);
void initbloomfilter(Index*);
void initdcache(u32int mem);
-void initicache(int bits, int depth);
+void initicache(u32int mem);
void initicachewrite(void);
IEStream *initiestream(Part *part, u64int off, u64int clumps, u32int size);
ISect *initisect(Part *part);
@@ -87,7 +91,7 @@ Part* initpart(char *name, int mode);
void initround(Round*, char*, int);
int initventi(char *config, Config *conf);
void insertlump(Lump *lump, Packet *p);
-int insertscore(u8int *score, IAddr *ia, int write);
+int insertscore(u8int *score, IAddr *ia, int state);
void kickdcache(void);
void kickicache(void);
void kickround(Round*, int wait);
@@ -97,8 +101,7 @@ DBlock *loadibucket(Index *index, u8int *score, ISect **is, u32int *buck, IBucke
int loadientry(Index *index, u8int *score, int type, IEntry *ie);
void logerr(int severity, char *fmt, ...);
Lump *lookuplump(u8int *score, int type);
-int _lookupscore(u8int *score, int type, IAddr *ia, int *rac);
-int lookupscore(u8int *score, int type, IAddr *ia, int *rac);
+int lookupscore(u8int *score, int type, IAddr *ia);
int maparenas(AMap *am, Arena **arenas, int n, char *what);
void markbloomfilter(Bloom*, u8int*);
uint msec(void);
diff --git a/src/cmd/venti/srv/hdisk.c b/src/cmd/venti/srv/hdisk.c
index 266218b1..8cf937d1 100644
--- a/src/cmd/venti/srv/hdisk.c
+++ b/src/cmd/venti/srv/hdisk.c
@@ -547,7 +547,7 @@ debugread(HConnect *c, u8int *score)
Lump *u;
IAddr ia;
IEntry ie;
- int i, rac;
+ int i;
Arena *arena;
u64int aa;
ZBlock *zb;
@@ -561,7 +561,7 @@ debugread(HConnect *c, u8int *score)
}
hprint(&c->hout, "<h2>index search %V</h2><pre>\n", score);
- if(_lookupscore(score, -1, &ia, nil) < 0)
+ if(icachelookup(score, -1, &ia) < 0)
hprint(&c->hout, " icache: not found\n");
else
hprint(&c->hout, " icache: addr=%#llx size=%d type=%d blocks=%d\n",
@@ -585,12 +585,12 @@ debugread(HConnect *c, u8int *score)
hprint(&c->hout, " -cache");
putlump(u);
- if(lookupscore(score, type, &ia, &rac) < 0){
+ if(lookupscore(score, type, &ia) < 0){
hprint(&c->hout, " -lookup\n");
continue;
}
- hprint(&c->hout, "\n lookupscore: addr=%#llx size=%d blocks=%d rac=%d\n",
- ia.addr, ia.size, ia.blocks, rac);
+ hprint(&c->hout, "\n lookupscore: addr=%#llx size=%d blocks=%d\n",
+ ia.addr, ia.size, ia.blocks);
arena = amapitoa(mainindex, ia.addr, &aa);
if(arena == nil){
diff --git a/src/cmd/venti/srv/httpd.c b/src/cmd/venti/srv/httpd.c
index 6b7f20fa..51d8b9a1 100644
--- a/src/cmd/venti/srv/httpd.c
+++ b/src/cmd/venti/srv/httpd.c
@@ -895,7 +895,7 @@ static char* graphname[] =
"icachehit",
"icachemiss",
- "icachelookup",
+ "icacheread",
"icachewrite",
"icachefill",
"icacheprefetch",
@@ -904,6 +904,9 @@ static char* graphname[] =
"icacheflush",
"icachestall",
"icachelookuptime",
+ "icachelookup",
+ "scachehit",
+ "scacheprefetch",
"bloomhit",
"bloommiss",
@@ -925,6 +928,9 @@ static char* graphname[] =
"sumread",
"sumreadbyte",
+
+ "cigload",
+ "cigloadtime",
};
static int
diff --git a/src/cmd/venti/srv/icache.c b/src/cmd/venti/srv/icache.c
index 32cbb5d3..384fd2c1 100644
--- a/src/cmd/venti/srv/icache.c
+++ b/src/cmd/venti/srv/icache.c
@@ -2,236 +2,421 @@
#include "dat.h"
#include "fns.h"
+int icacheprefetch = 1;
+
typedef struct ICache ICache;
+typedef struct IHash IHash;
+typedef struct ISum ISum;
+
struct ICache
{
- QLock lock; /* locks hash table & all associated data */
+ QLock lock;
Rendez full;
- IEntry **heads; /* heads of all the hash chains */
- int bits; /* bits to use for indexing heads */
- u32int size; /* number of heads; == 1 << bits, should be < entries */
- IEntry *base; /* all allocated hash table entries */
- IEntry *free;
- u32int entries; /* elements in base */
- IEntry *dirty; /* chain of dirty elements */
- u32int ndirty;
+ IHash *hash;
+ IEntry *entries;
+ int nentries;
+ IEntry free;
+ IEntry clean;
+ IEntry dirty;
u32int maxdirty;
- u32int unused; /* index of first unused element in base */
- u32int stolen; /* last head from which an element was stolen */
+ u32int ndirty;
- Arena *last[4];
- Arena *lastload;
- int nlast;
+ ISum **sum;
+ int nsum;
+ IHash *shash;
+ IEntry *sentries;
+ int nsentries;
};
-int icacheprefetch = 1;
-
static ICache icache;
-static IEntry *icachealloc(IAddr *ia, u8int *score);
-
/*
- * bits is the number of bits in the icache hash table
- * depth is the average depth
- * memory usage is about (1<<bits) * depth * sizeof(IEntry) + (1<<bits) * sizeof(IEntry*)
+ * Hash table of IEntries
*/
-void
-initicache(int bits, int depth)
+
+struct IHash
{
- icache.bits = bits;
- icache.size = 1 << bits;
- icache.entries = depth * icache.size;
- icache.maxdirty = icache.entries/2;
- icache.base = MKNZ(IEntry, icache.entries);
- icache.heads = MKNZ(IEntry*, icache.size);
- icache.full.l = &icache.lock;
- setstat(StatIcacheSize, icache.entries);
-}
+ int bits;
+ u32int size;
+ IEntry **table;
+};
-ulong
-icachedirtyfrac(void)
+static IHash*
+mkihash(int size1)
{
- return (vlong)icache.ndirty*IcacheFrac / icache.entries;
+ u32int size;
+ int bits;
+ IHash *ih;
+
+ bits = 0;
+ size = 1;
+ while(size < size1){
+ bits++;
+ size <<= 1;
+ }
+
+ ih = vtmallocz(sizeof(IHash)+size*sizeof(ih->table[0]));
+ ih->table = (IEntry**)(ih+1);
+ ih->bits = bits;
+ ih->size = size;
+ return ih;
}
-u32int
-hashbits(u8int *sc, int bits)
+static IEntry*
+ihashlookup(IHash *ih, u8int score[VtScoreSize], int type)
{
- u32int v;
-
- v = (sc[0] << 24) | (sc[1] << 16) | (sc[2] << 8) | sc[3];
- if(bits < 32)
- v >>= (32 - bits);
- return v;
+ u32int h;
+ IEntry *ie;
+
+ h = hashbits(score, ih->bits);
+ for(ie=ih->table[h]; ie; ie=ie->nexthash)
+ if((type == -1 || type == ie->ia.type) && scorecmp(score, ie->score) == 0)
+ return ie;
+ return nil;
}
static void
-loadarenaclumps(Arena *arena, u64int aa)
+ihashdelete(IHash *ih, IEntry *ie, char *what)
{
- ulong i;
- ClumpInfo ci;
- IAddr ia;
-
- for(i=0; i<arena->memstats.clumps; i++){
- if(readclumpinfo(arena, i, &ci) < 0)
- break;
- ia.type = ci.type;
- ia.size = ci.uncsize;
- ia.blocks = (ci.size + ClumpSize + (1 << ABlockLog) - 1) >> ABlockLog;
- ia.addr = aa;
- aa += ClumpSize + ci.size;
- if(ia.type != VtCorruptType)
- insertscore(ci.score, &ia, 0);
- }
+ u32int h;
+ IEntry **l;
+
+ h = hashbits(ie->score, ih->bits);
+ for(l=&ih->table[h]; *l; l=&(*l)->nexthash)
+ if(*l == ie){
+ *l = ie->nexthash;
+ return;
+ }
+ fprint(2, "warning: %s %V not found in ihashdelete\n", what, ie->score);
}
-int
-_lookupscore(u8int *score, int type, IAddr *ia, int *rac)
+static void
+ihashinsert(IHash *ih, IEntry *ie)
{
u32int h;
- IEntry *ie, *last;
-
- qlock(&icache.lock);
- h = hashbits(score, icache.bits);
- last = nil;
- for(ie = icache.heads[h]; ie != nil; ie = ie->next){
- if((ie->ia.type == type || type == -1) && scorecmp(ie->score, score)==0){
- if(last != nil)
- last->next = ie->next;
- else
- icache.heads[h] = ie->next;
- addstat(StatIcacheHit, 1);
- if(rac)
- ie->rac = 1;
- trace(TraceLump, "lookupscore incache");
- ie->next = icache.heads[h];
- icache.heads[h] = ie;
-
- *ia = ie->ia;
- if(rac)
- *rac = ie->rac;
- qunlock(&icache.lock);
- return 0;
- }
- last = ie;
- }
- addstat(StatIcacheMiss, 1);
- qunlock(&icache.lock);
- return -1;
+
+ h = hashbits(ie->score, ih->bits);
+ ie->nexthash = ih->table[h];
+ ih->table[h] = ie;
}
/*
-ZZZ need to think about evicting the correct IEntry,
-and writing back the wtime.
- * look up data score in the index cache
- * if this fails, pull it in from the disk index table, if it exists.
- *
- * must be called with the lump for this score locked
+ * IEntry lists.
*/
-int
-lookupscore(u8int *score, int type, IAddr *ia, int *rac)
+
+static IEntry*
+popout(IEntry *ie)
{
- IEntry d, *ie;
- u32int h;
- u64int aa;
- Arena *load;
- int i, ret;
- uint ms;
+ if(ie->prev == nil && ie->next == nil)
+ return ie;
+ ie->prev->next = ie->next;
+ ie->next->prev = ie->prev;
+ ie->next = nil;
+ ie->prev = nil;
+ return ie;
+}
- aa = 0;
- ms = msec();
-
- trace(TraceLump, "lookupscore %V.%d", score, type);
+static IEntry*
+poplast(IEntry *list)
+{
+ if(list->prev == list)
+ return nil;
+ return popout(list->prev);
+}
- ret = 0;
- if(_lookupscore(score, type, ia, rac) < 0){
- if(loadientry(mainindex, score, type, &d) < 0){
- ret = -1;
- goto out;
- }
+static IEntry*
+pushfirst(IEntry *list, IEntry *ie)
+{
+ popout(ie);
+ ie->prev = list;
+ ie->next = list->next;
+ ie->prev->next = ie;
+ ie->next->prev = ie;
+ return ie;
+}
- /* failed in cache but found on disk - fill cache. */
- trace(TraceLump, "lookupscore loaded");
- addstat(StatIcacheFill, 1);
+/*
+ * Arena summary cache.
+ */
+struct ISum
+{
+ QLock lock;
+ IEntry *entries;
+ int nentries;
+ int loaded;
+ u64int addr;
+ u64int limit;
+ Arena *arena;
+ int g;
+};
- /*
- * no one else can load an entry for this score,
- * since we have this score's lump's lock.
- */
- qlock(&icache.lock);
-
- /*
- * If we notice that all the hits are coming from one arena,
- * load the table of contents for that arena into the cache.
- */
- load = nil;
- h = hashbits(score, icache.bits);
- ie = icachealloc(&d.ia, score);
- if(icacheprefetch){
- icache.last[icache.nlast++%nelem(icache.last)] = amapitoa(mainindex, ie->ia.addr, &aa);
- aa = ie->ia.addr - aa; /* compute base addr of arena */
- for(i=0; i<nelem(icache.last); i++)
- if(icache.last[i] != icache.last[0])
- break;
- if(i==nelem(icache.last) && icache.lastload != icache.last[0]){
- load = icache.last[0];
- icache.lastload = load;
+static ISum*
+scachelookup(u64int addr)
+{
+ int i;
+ ISum *s;
+
+ for(i=0; i<icache.nsum; i++){
+ s = icache.sum[i];
+ if(s->addr <= addr && addr < s->limit){
+ if(i > 0){
+ memmove(icache.sum+1, icache.sum, i*sizeof icache.sum[0]);
+ icache.sum[0] = s;
}
+ return s;
}
+ }
+ return nil;
+}
+
+static void
+sumclear(ISum *s)
+{
+ int i;
+
+ for(i=0; i<s->nentries; i++)
+ ihashdelete(icache.shash, &s->entries[i], "scache");
+ s->nentries = 0;
+ s->loaded = 0;
+ s->addr = 0;
+ s->limit = 0;
+ s->arena = nil;
+ s->g = 0;
+}
+
+static ISum*
+scacheevict(void)
+{
+ ISum *s;
+ int i;
- ie->next = icache.heads[h];
- icache.heads[h] = ie;
-
- *ia = ie->ia;
- *rac = ie->rac;
-
- qunlock(&icache.lock);
- if(load){
- trace(TraceProc, "preload 0x%llux", aa);
- loadarenaclumps(load, aa);
+ for(i=icache.nsum-1; i>=0; i--){
+ s = icache.sum[i];
+ if(canqlock(&s->lock)){
+ if(i > 0){
+ memmove(icache.sum+1, icache.sum, i*sizeof icache.sum[0]);
+ icache.sum[0] = s;
+ }
+ sumclear(s);
+ return s;
}
}
+ return nil;
+}
-out:
- ms = msec() - ms;
- addstat2(StatIcacheRead, 1, StatIcacheReadTime, ms);
+static void
+scachehit(u64int addr)
+{
+ scachelookup(addr); /* for move-to-front */
+}
- return ret;
+static void
+scachesetup(ISum *s, u64int addr)
+{
+ u64int addr0, limit;
+ int g;
+
+ s->arena = amapitoag(mainindex, addr, &addr0, &limit, &g);
+ s->addr = addr0;
+ s->limit = limit;
+ s->g = g;
+}
+
+static void
+scacheload(ISum *s)
+{
+ int i, n;
+
+ s->loaded = 1;
+ n = asumload(s->arena, s->g, s->entries, ArenaCIGSize);
+ /*
+ * n can be less then ArenaCIGSize, either if the clump group
+ * is the last in the arena and is only partially filled, or if there
+ * are corrupt clumps in the group -- those are not returned.
+ */
+ for(i=0; i<n; i++){
+ s->entries[i].ia.addr += s->addr;
+ ihashinsert(icache.shash, &s->entries[i]);
+ }
+//fprint(2, "%T scacheload %s %d - %d entries\n", s->arena->name, s->g, n);
+ addstat(StatScachePrefetch, n);
+ s->nentries = n;
+}
+
+static ISum*
+scachemiss(u64int addr)
+{
+ ISum *s;
+
+ s = scachelookup(addr);
+ if(s == nil){
+ /* first time: make an entry in the cache but don't populate it yet */
+ s = scacheevict();
+ if(s == nil)
+ return nil;
+ scachesetup(s, addr);
+ qunlock(&s->lock);
+ return nil;
+ }
+
+ /* second time: load from disk */
+ qlock(&s->lock);
+ if(s->loaded || !icacheprefetch){
+ qunlock(&s->lock);
+ return nil;
+ }
+
+ return s; /* locked */
}
/*
- * insert a new element in the hash table.
+ * Index cache.
*/
-int
-insertscore(u8int *score, IAddr *ia, int write)
+
+void
+initicache(u32int mem0)
{
- IEntry *ie, se;
- u32int h;
+ u32int mem;
+ int i, entries, scache;
+
+ icache.full.l = &icache.lock;
- trace(TraceLump, "insertscore enter");
- if(write)
- addstat(StatIcacheWrite, 1);
- else
- addstat(StatIcachePrefetch, 1);
+ mem = mem0;
+ entries = mem / (sizeof(IEntry)+sizeof(IEntry*));
+ scache = (entries/8) / ArenaCIGSize;
+ entries -= entries/8;
+ if(scache < 4)
+ scache = 4;
+ if(scache > 16)
+ scache = 16;
+ if(entries < 1000)
+ entries = 1000;
+fprint(2, "icache %,d bytes = %,d entries; %d scache\n", mem0, entries, scache);
+
+ icache.clean.prev = icache.clean.next = &icache.clean;
+ icache.dirty.prev = icache.dirty.next = &icache.dirty;
+ icache.free.prev = icache.free.next = &icache.free;
+
+ icache.hash = mkihash(entries);
+ icache.nentries = entries;
+ setstat(StatIcacheSize, entries);
+ icache.entries = vtmallocz(entries*sizeof icache.entries[0]);
+ icache.maxdirty = entries / 2;
+ for(i=0; i<entries; i++)
+ pushfirst(&icache.free, &icache.entries[i]);
+
+ icache.nsum = scache;
+ icache.sum = vtmallocz(scache*sizeof icache.sum[0]);
+ icache.sum[0] = vtmallocz(scache*sizeof icache.sum[0][0]);
+ icache.nsentries = scache * ArenaCIGSize;
+ icache.sentries = vtmallocz(scache*ArenaCIGSize*sizeof icache.sentries[0]);
+ icache.shash = mkihash(scache*ArenaCIGSize);
+ for(i=0; i<scache; i++){
+ icache.sum[i] = icache.sum[0] + i;
+ icache.sum[i]->entries = icache.sentries + i*ArenaCIGSize;
+ }
+}
- qlock(&icache.lock);
- h = hashbits(score, icache.bits);
- ie = icachealloc(ia, score);
- if(write){
+static IEntry*
+evictlru(void)
+{
+ IEntry *ie;
+
+ ie = poplast(&icache.clean);
+ if(ie == nil)
+ return nil;
+ ihashdelete(icache.hash, ie, "evictlru");
+ return ie;
+}
+
+static void
+icacheinsert(u8int score[VtScoreSize], IAddr *ia, int state)
+{
+ IEntry *ie;
+
+ if((ie = poplast(&icache.free)) == nil && (ie = evictlru()) == nil){
+ addstat(StatIcacheStall, 1);
+ while((ie = poplast(&icache.free)) == nil && (ie = evictlru()) == nil){
+ // Could safely return here if state == IEClean.
+ // But if state == IEDirty, have to wait to make
+ // sure we don't lose an index write.
+ // Let's wait all the time.
+ flushdcache();
+ kickicache();
+ rsleep(&icache.full);
+ }
+ addstat(StatIcacheStall, -1);
+ }
+
+ memmove(ie->score, score, VtScoreSize);
+ ie->state = state;
+ ie->ia = *ia;
+ if(state == IEClean){
+ addstat(StatIcachePrefetch, 1);
+ pushfirst(&icache.clean, ie);
+ }else{
+ addstat(StatIcacheWrite, 1);
+ assert(state == IEDirty);
icache.ndirty++;
setstat(StatIcacheDirty, icache.ndirty);
delaykickicache();
- ie->dirty = 1;
+ pushfirst(&icache.dirty, ie);
}
- ie->next = icache.heads[h];
- icache.heads[h] = ie;
+ ihashinsert(icache.hash, ie);
+}
+
+int
+icachelookup(u8int score[VtScoreSize], int type, IAddr *ia)
+{
+ IEntry *ie;
- se = *ie;
+ qlock(&icache.lock);
+ addstat(StatIcacheLookup, 1);
+ if((ie = ihashlookup(icache.hash, score, type)) != nil){
+ *ia = ie->ia;
+ if(ie->state == IEClean)
+ pushfirst(&icache.clean, ie);
+ addstat(StatIcacheHit, 1);
+ qunlock(&icache.lock);
+ return 0;
+ }
+
+ if((ie = ihashlookup(icache.shash, score, type)) != nil){
+ *ia = ie->ia;
+ icacheinsert(score, &ie->ia, IEClean);
+ scachehit(ie->ia.addr);
+ addstat(StatScacheHit, 1);
+ qunlock(&icache.lock);
+ return 0;
+ }
+ addstat(StatIcacheMiss, 1);
qunlock(&icache.lock);
- if(write && icache.ndirty >= icache.maxdirty)
+ return -1;
+}
+
+int
+insertscore(u8int score[VtScoreSize], IAddr *ia, int state)
+{
+ ISum *toload;
+
+ qlock(&icache.lock);
+ icacheinsert(score, ia, state);
+ if(state == IEClean)
+ toload = scachemiss(ia->addr);
+ else{
+ assert(state == IEDirty);
+ toload = nil;
+ }
+ qunlock(&icache.lock);
+ if(toload){
+ scacheload(toload);
+ qunlock(&toload->lock);
+ }
+
+ if(icache.ndirty >= icache.maxdirty)
kickicache();
/*
@@ -240,125 +425,81 @@ insertscore(u8int *score, IAddr *ia, int write)
* the lump, meaning any searches for this block
* will hit in the lump cache until after we return.
*/
- markbloomfilter(mainindex->bloom, score);
+ if(state == IEDirty)
+ markbloomfilter(mainindex->bloom, score);
return 0;
}
-/*
- * allocate a index cache entry which hasn't been used in a while.
- * must be called with icache.lock locked
- * if the score is already in the table, update the entry.
- */
-static IEntry *
-icachealloc(IAddr *ia, u8int *score)
+static int
+lookupscore_untimed(u8int score[VtScoreSize], int type, IAddr *ia)
{
- int i;
- IEntry *ie, *last, *clean, *lastclean;
- u32int h;
+ IEntry d;
- h = hashbits(score, icache.bits);
- last = nil;
- for(ie = icache.heads[h]; ie != nil; ie = ie->next){
- if(ie->ia.type == ia->type && scorecmp(ie->score, score)==0){
- if(last != nil)
- last->next = ie->next;
- else
- icache.heads[h] = ie->next;
- trace(TraceLump, "icachealloc hit");
- ie->rac = 1;
- return ie;
- }
- last = ie;
- }
+ if(icachelookup(score, type, ia) >= 0)
+ return 0;
- h = icache.unused;
- if(h < icache.entries){
- ie = &icache.base[h++];
- icache.unused = h;
- trace(TraceLump, "icachealloc unused");
- goto Found;
- }
+ addstat(StatIcacheFill, 1);
+ if(loadientry(mainindex, score, type, &d) < 0)
+ return -1;
- if((ie = icache.free) != nil){
- icache.free = ie->next;
- goto Found;
- }
+ insertscore(score, &d.ia, IEClean);
+ *ia = d.ia;
+ return 0;
+}
- h = icache.stolen;
- for(i=0;; i++){
- h++;
- if(h >= icache.size)
- h = 0;
- if(i == icache.size){
- trace(TraceLump, "icachealloc sleep");
- addstat(StatIcacheStall, 1);
- while(icache.ndirty == icache.entries){
- /*
- * This is a bit suspect. Kickicache will wake up the
- * icachewritecoord, but if all the index entries are for
- * unflushed disk blocks, icachewritecoord won't be
- * able to do much. It always rewakes everyone when
- * it thinks it is done, though, so at least we'll go around
- * the while loop again. Also, if icachewritecoord sees
- * that the disk state hasn't change at all since the last
- * time around, it kicks the disk. This needs to be
- * rethought, but it shouldn't deadlock anymore.
- */
- kickicache();
- rsleep(&icache.full);
- }
- addstat(StatIcacheStall, -1);
- i = 0;
- }
- lastclean = nil;
- clean = nil;
- last = nil;
- for(ie=icache.heads[h]; ie; last=ie, ie=ie->next){
- if(!ie->dirty){
- clean = ie;
- lastclean = last;
- }
- }
- if(clean){
- if(lastclean)
- lastclean->next = clean->next;
- else
- icache.heads[h] = clean->next;
- clean->next = nil;
- icache.stolen = h;
- ie = clean;
- trace(TraceLump, "icachealloc steal");
- goto Found;
- }
- }
+int
+lookupscore(u8int score[VtScoreSize], int type, IAddr *ia)
+{
+ int ms, ret;
+
+ ms = msec();
+ ret = lookupscore_untimed(score, type, ia);
+ ms = msec() - ms;
+ addstat2(StatIcacheRead, 1, StatIcacheReadTime, ms);
+ return ret;
+}
+
+u32int
+hashbits(u8int *sc, int bits)
+{
+ u32int v;
-Found:
- ie->ia = *ia;
- scorecp(ie->score, score);
- ie->rac = 0;
- return ie;
+ v = (sc[0] << 24) | (sc[1] << 16) | (sc[2] << 8) | sc[3];
+ if(bits < 32)
+ v >>= (32 - bits);
+ return v;
}
+ulong
+icachedirtyfrac(void)
+{
+ return (vlong)icache.ndirty*IcacheFrac / icache.nentries;
+}
+
+/*
+ * Return a singly-linked list of dirty index entries.
+ * with 32-bit hash numbers between lo and hi
+ * and address < limit.
+ */
IEntry*
icachedirty(u32int lo, u32int hi, u64int limit)
{
- int i;
u32int h;
IEntry *ie, *dirty;
dirty = nil;
trace(TraceProc, "icachedirty enter");
qlock(&icache.lock);
- for(i=0; i<icache.size; i++)
- for(ie = icache.heads[i]; ie; ie=ie->next)
- if(ie->dirty && ie->ia.addr != 0 && ie->ia.addr < limit){
+ for(ie = icache.dirty.next; ie != &icache.dirty; ie=ie->next){
+ if(ie->state == IEDirty && ie->ia.addr < limit){
h = hashbits(ie->score, 32);
if(lo <= h && h <= hi){
ie->nextdirty = dirty;
dirty = ie;
}
}
+ }
qunlock(&icache.lock);
trace(TraceProc, "icachedirty exit");
if(dirty == nil)
@@ -366,36 +507,49 @@ icachedirty(u32int lo, u32int hi, u64int limit)
return dirty;
}
+
+/*
+ * The singly-linked non-circular list of index entries ie
+ * has been written to disk. Move them to the clean list.
+ */
void
icacheclean(IEntry *ie)
{
- trace(TraceProc, "icachedirty enter");
+ IEntry *next;
+
+ trace(TraceProc, "icacheclean enter");
qlock(&icache.lock);
- for(; ie; ie=ie->nextdirty){
+ for(; ie; ie=next){
+ assert(ie->state == IEDirty);
+ next = ie->nextdirty;
+ ie->nextdirty = nil;
+ popout(ie); /* from icache.dirty */
icache.ndirty--;
- ie->dirty = 0;
+ ie->state = IEClean;
+ pushfirst(&icache.clean, ie);
}
setstat(StatIcacheDirty, icache.ndirty);
rwakeupall(&icache.full);
qunlock(&icache.lock);
- trace(TraceProc, "icachedirty exit");
+ trace(TraceProc, "icacheclean exit");
}
void
emptyicache(void)
{
int i;
- IEntry *ie, **lie;
+ IEntry *ie;
+ ISum *s;
qlock(&icache.lock);
- for(i=0; i<icache.size; i++)
- for(lie=&icache.heads[i]; (ie=*lie); ){
- if(ie->dirty == 0){
- *lie = ie->next;
- ie->next = icache.free;
- icache.free = ie;
- }else
- lie = &ie->next;
+ while((ie = evictlru()) != nil)
+ pushfirst(&icache.free, ie);
+ for(i=0; i<icache.nsum; i++){
+ s = icache.sum[i];
+ qlock(&s->lock);
+ sumclear(s);
+ qunlock(&s->lock);
}
qunlock(&icache.lock);
}
+
diff --git a/src/cmd/venti/srv/icachewrite.c b/src/cmd/venti/srv/icachewrite.c
index 49344c9f..0805001c 100644
--- a/src/cmd/venti/srv/icachewrite.c
+++ b/src/cmd/venti/srv/icachewrite.c
@@ -45,6 +45,16 @@ initicachewrite(void)
vtproc(delaykickroundproc, &iwrite.round);
}
+static u64int
+ie2diskaddr(Index *ix, ISect *is, IEntry *ie)
+{
+ u64int bucket, addr;
+
+ bucket = hashbits(ie->score, 32)/ix->div;
+ addr = is->blockbase + ((bucket - is->start) << is->blocklog);
+ return addr;
+}
+
static IEntry*
nextchunk(Index *ix, ISect *is, IEntry **pie, u64int *paddr, uint *pnbuf)
{
@@ -55,13 +65,13 @@ nextchunk(Index *ix, ISect *is, IEntry **pie, u64int *paddr, uint *pnbuf)
bsize = 1<<is->blocklog;
iefirst = *pie;
- addr = is->blockbase + ((u64int)(hashbits(iefirst->score, 32) / ix->div - is->start) << is->blocklog);
+ addr = ie2diskaddr(ix, is, iefirst);
nbuf = 0;
- for(l=&iefirst->nextdirty; (ie=*l)!=nil; l=&(*l)->nextdirty){
- naddr = is->blockbase + ((u64int)(hashbits(ie->score, 32) / ix->div - is->start) << is->blocklog);
+ for(l = &iefirst->nextdirty; (ie = *l) != nil; l = &(*l)->nextdirty){
+ naddr = ie2diskaddr(ix, is, ie);
if(naddr - addr >= Bufsize)
break;
- nbuf = naddr-addr;
+ nbuf = naddr - addr;
}
nbuf += bsize;
@@ -75,7 +85,7 @@ nextchunk(Index *ix, ISect *is, IEntry **pie, u64int *paddr, uint *pnbuf)
static int
icachewritesect(Index *ix, ISect *is, u8int *buf)
{
- int err, h, bsize, t;
+ int err, i, werr, h, bsize, t;
u32int lo, hi;
u64int addr, naddr;
uint nbuf, off;
@@ -89,29 +99,32 @@ icachewritesect(Index *ix, ISect *is, u8int *buf)
else
hi = is->stop * ix->div - 1;
- trace(TraceProc, "icachewritesect enter %ud %ud %llud", lo, hi, iwrite.as.aa);
+ trace(TraceProc, "icachewritesect enter %ud %ud %llud",
+ lo, hi, iwrite.as.aa);
iedirty = icachedirty(lo, hi, iwrite.as.aa);
iedirty = iesort(iedirty);
- bsize = 1<<is->blocklog;
+ bsize = 1 << is->blocklog;
err = 0;
while(iedirty){
disksched();
- while((t=icachesleeptime) == SleepForever){
+ while((t = icachesleeptime) == SleepForever){
sleep(1000);
disksched();
}
if(t < minicachesleeptime)
t = minicachesleeptime;
- sleep(t);
+ if(t > 0)
+ sleep(t);
trace(TraceProc, "icachewritesect nextchunk");
chunk = nextchunk(ix, is, &iedirty, &addr, &nbuf);
- trace(TraceProc, "icachewritesect readpart 0x%llux+0x%ux", addr, nbuf);
+ trace(TraceProc, "icachewritesect readpart 0x%llux+0x%ux",
+ addr, nbuf);
if(readpart(is->part, addr, buf, nbuf) < 0){
- /* XXX more details here */
- fprint(2, "icachewriteproc readpart: %r\n");
+ fprint(2, "%s: part %s addr 0x%llux: icachewritesect "
+ "readpart: %r\n", argv0, is->part->name, addr);
err = -1;
continue;
}
@@ -120,31 +133,34 @@ icachewritesect(Index *ix, ISect *is, u8int *buf)
addstat(StatIsectRead, 1);
for(l=&chunk; (ie=*l)!=nil; l=&ie->nextdirty){
- again:
- naddr = is->blockbase + ((u64int)(hashbits(ie->score, 32) / ix->div - is->start) << is->blocklog);
+again:
+ naddr = ie2diskaddr(ix, is, ie);
off = naddr - addr;
if(off+bsize > nbuf){
- fprint(2, "whoops! addr=0x%llux nbuf=%ud addr+nbuf=0x%llux naddr=0x%llux\n",
- addr, nbuf, addr+nbuf, naddr);
+ fprint(2, "%s: whoops! addr=0x%llux nbuf=%ud "
+ "addr+nbuf=0x%llux naddr=0x%llux\n",
+ argv0, addr, nbuf, addr+nbuf, naddr);
assert(off+bsize <= nbuf);
}
unpackibucket(&ib, buf+off, is->bucketmagic);
if(okibucket(&ib, is) < 0){
- fprint(2, "bad bucket XXX\n");
+ fprint(2, "%s: bad bucket XXX\n", argv0);
goto skipit;
}
- trace(TraceProc, "icachewritesect add %V at 0x%llux", ie->score, naddr);
+ trace(TraceProc, "icachewritesect add %V at 0x%llux",
+ ie->score, naddr);
h = bucklook(ie->score, ie->ia.type, ib.data, ib.n);
if(h & 1){
h ^= 1;
packientry(ie, &ib.data[h]);
}else if(ib.n < is->buckmax){
- memmove(&ib.data[h+IEntrySize], &ib.data[h], ib.n*IEntrySize - h);
+ memmove(&ib.data[h + IEntrySize], &ib.data[h],
+ ib.n*IEntrySize - h);
ib.n++;
packientry(ie, &ib.data[h]);
}else{
- fprint(2, "bucket overflow XXX\n");
- skipit:
+ fprint(2, "%s: bucket overflow XXX\n", argv0);
+skipit:
err = -1;
*l = ie->nextdirty;
ie = *l;
@@ -154,33 +170,29 @@ icachewritesect(Index *ix, ISect *is, u8int *buf)
break;
}
packibucket(&ib, buf+off, is->bucketmagic);
- /* XXX
- * This is not quite right - it's good that we
- * update the cached block (if any) here, but
- * since the block doesn't get written until writepart
- * below, we also need to make sure that the cache
- * doesn't load the stale block before we write it to
- * disk below. We could lock the disk cache during
- * the writepart, but that's pretty annoying.
- * Another possibility would be never to cache
- * index partition blocks. The hit rate on those is
- * miniscule anyway.
- */
- if((b = _getdblock(is->part, naddr, ORDWR, 0)) != nil){
- memmove(b->data, buf+off, bsize);
- putdblock(b);
- }
}
diskaccess(1);
trace(TraceProc, "icachewritesect writepart", addr, nbuf);
- if(writepart(is->part, addr, buf, nbuf) < 0 || flushpart(is->part) < 0){
- /* XXX more details here */
- fprint(2, "icachewriteproc writepart: %r\n");
+ werr = 0;
+ if(writepart(is->part, addr, buf, nbuf) < 0 || flushpart(is->part) < 0)
+ werr = -1;
+
+ for(i=0; i<nbuf; i+=bsize){
+ if((b = _getdblock(is->part, addr+i, ORDWR, 0)) != nil){
+ memmove(b->data, buf+i, bsize);
+ putdblock(b);
+ }
+ }
+
+ if(werr < 0){
+ fprint(2, "%s: part %s addr 0x%llux: icachewritesect "
+ "writepart: %r\n", argv0, is->part->name, addr);
err = -1;
continue;
}
+
addstat(StatIsectWriteBytes, nbuf);
addstat(StatIsectWrite, 1);
icacheclean(chunk);
diff --git a/src/cmd/venti/srv/ifile.c b/src/cmd/venti/srv/ifile.c
index e5d5460a..36d96b94 100644
--- a/src/cmd/venti/srv/ifile.c
+++ b/src/cmd/venti/srv/ifile.c
@@ -2,46 +2,57 @@
#include "dat.h"
#include "fns.h"
+static char vcmagic[] = "venti config\n";
+
+enum {
+ Maxconfig = 8 * 1024,
+ Maglen = sizeof vcmagic - 1,
+};
+
int
readifile(IFile *f, char *name)
{
- int m;
Part *p;
ZBlock *b;
u8int *z;
-
+
p = initpart(name, OREAD);
if(p == nil)
return -1;
- b = alloczblock(8192, 1, 0);
+ b = alloczblock(Maxconfig+1, 1, 0);
if(b == nil){
seterr(EOk, "can't alloc for %s: %R", name);
return -1;
}
if(p->size > PartBlank){
/*
- * this is likely a real venti partition, in which case
- * we're looking for the config file stored as 8k at end of PartBlank.
+ * this is likely a real venti partition, in which case we're
+ * looking for the config file stored as 8k at end of PartBlank.
*/
- if(readpart(p, PartBlank-8192, b->data, 8192) < 0){
+ if(readpart(p, PartBlank-Maxconfig, b->data, Maxconfig) < 0){
seterr(EOk, "can't read %s: %r", name);
freezblock(b);
freepart(p);
return -1;
}
- m = 5+1+6+1;
- if(memcmp(b->data, "venti config\n", m) != 0){
+ b->data[Maxconfig] = '\0';
+ if(memcmp(b->data, vcmagic, Maglen) != 0){
seterr(EOk, "bad venti config magic in %s", name);
freezblock(b);
freepart(p);
return -1;
}
- b->data += m;
- b->len -= m;
- z = memchr(b->data, 0, b->len);
+ /*
+ * if we change b->data+b->_size, freezblock
+ * will blow an assertion, so don't.
+ */
+ b->data += Maglen;
+ b->_size -= Maglen;
+ b->len -= Maglen;
+ z = memchr(b->data, '\0', b->len);
if(z)
b->len = z - b->data;
- }else if(p->size > 8192){
+ }else if(p->size > Maxconfig){
seterr(EOk, "config file is too large");
freepart(p);
freezblock(b);
diff --git a/src/cmd/venti/srv/index.c b/src/cmd/venti/srv/index.c
index a5ffeef9..dd49e055 100644
--- a/src/cmd/venti/srv/index.c
+++ b/src/cmd/venti/srv/index.c
@@ -596,6 +596,25 @@ print("want arena %d for %llux\n", l, a);
return ix->arenas[l];
}
+/*
+ * convert an arena index to the bounds of the containing arena group.
+ */
+Arena*
+amapitoag(Index *ix, u64int a, u64int *gstart, u64int *glimit, int *g)
+{
+ u64int aa;
+ Arena *arena;
+
+ arena = amapitoa(ix, a, &aa);
+ if(arena == nil)
+ return nil;
+ if(arenatog(arena, aa, gstart, glimit, g) < 0)
+ return nil;
+ *gstart += a - aa;
+ *glimit += a - aa;
+ return arena;
+}
+
int
iaddrcmp(IAddr *ia1, IAddr *ia2)
{
diff --git a/src/cmd/venti/srv/lump.c b/src/cmd/venti/srv/lump.c
index 1db62717..206d5e06 100644
--- a/src/cmd/venti/srv/lump.c
+++ b/src/cmd/venti/srv/lump.c
@@ -7,7 +7,7 @@ int queuewrites = 0;
int writestodevnull = 0;
int verifywrites = 0;
-static Packet *readilump(Lump *u, IAddr *ia, u8int *score, int rac);
+static Packet *readilump(Lump *u, IAddr *ia, u8int *score);
/*
* Some of this logic is duplicated in hdisk.c
@@ -19,7 +19,6 @@ readlump(u8int *score, int type, u32int size, int *cached)
Packet *p;
IAddr ia;
u32int n;
- int rac;
trace(TraceLump, "readlump enter");
/*
@@ -49,7 +48,7 @@ readlump(u8int *score, int type, u32int size, int *cached)
if(cached)
*cached = 0;
- if(lookupscore(score, type, &ia, &rac) < 0){
+ if(lookupscore(score, type, &ia) < 0){
/* ZZZ place to check for someone trying to guess scores */
seterr(EOk, "no block with score %V/%d exists", score, type);
@@ -64,7 +63,7 @@ readlump(u8int *score, int type, u32int size, int *cached)
}
trace(TraceLump, "readlump readilump");
- p = readilump(u, &ia, score, rac);
+ p = readilump(u, &ia, score);
putlump(u);
trace(TraceLump, "readlump exit");
@@ -134,9 +133,8 @@ writeqlump(Lump *u, Packet *p, int creator, uint ms)
Packet *old;
IAddr ia;
int ok;
- int rac;
- if(lookupscore(u->score, u->type, &ia, &rac) == 0){
+ if(lookupscore(u->score, u->type, &ia) == 0){
if(verifywrites == 0){
/* assume the data is here! */
packetfree(p);
@@ -149,7 +147,7 @@ writeqlump(Lump *u, Packet *p, int creator, uint ms)
* if the read fails,
* assume it was corrupted data and store the block again
*/
- old = readilump(u, &ia, u->score, rac);
+ old = readilump(u, &ia, u->score);
if(old != nil){
ok = 0;
if(packetcmp(p, old) != 0){
@@ -176,7 +174,7 @@ writeqlump(Lump *u, Packet *p, int creator, uint ms)
ok = storeclump(mainindex, flat, u->score, u->type, creator, &ia);
freezblock(flat);
if(ok == 0)
- ok = insertscore(u->score, &ia, 1);
+ ok = insertscore(u->score, &ia, IEDirty);
if(ok == 0)
insertlump(u, p);
else
@@ -193,39 +191,14 @@ writeqlump(Lump *u, Packet *p, int creator, uint ms)
return ok;
}
-static void
-lreadahead(u64int a, Arena *arena, u64int aa, int n)
-{
- u8int buf[ClumpSize];
- Clump cl;
- IAddr ia;
-
- while(n > 0) {
- if (aa >= arena->memstats.used)
- break;
- if(readarena(arena, aa, buf, ClumpSize) < ClumpSize)
- break;
- if(unpackclump(&cl, buf, arena->clumpmagic) < 0)
- break;
- ia.addr = a;
- ia.type = cl.info.type;
- ia.size = cl.info.uncsize;
- ia.blocks = (cl.info.size + ClumpSize + (1 << ABlockLog) - 1) >> ABlockLog;
- insertscore(cl.info.score, &ia, 0);
- a += ClumpSize + cl.info.size;
- aa += ClumpSize + cl.info.size;
- n--;
- }
-}
-
static Packet*
-readilump(Lump *u, IAddr *ia, u8int *score, int rac)
+readilump(Lump *u, IAddr *ia, u8int *score)
{
Arena *arena;
ZBlock *zb;
Packet *p, *pp;
Clump cl;
- u64int a, aa;
+ u64int aa;
u8int sc[VtScoreSize];
trace(TraceLump, "readilump enter");
@@ -258,13 +231,6 @@ readilump(Lump *u, IAddr *ia, u8int *score, int rac)
return nil;
}
- if(rac == 0) {
- trace(TraceLump, "readilump readahead");
- a = ia->addr + ClumpSize + cl.info.size;
- aa += ClumpSize + cl.info.size;
- lreadahead(a, arena, aa, 20);
- }
-
trace(TraceLump, "readilump success");
p = zblock2packet(zb, cl.info.uncsize);
freezblock(zb);
diff --git a/src/cmd/venti/srv/stats.c b/src/cmd/venti/srv/stats.c
index 3a66bf6d..5ee4d91f 100644
--- a/src/cmd/venti/srv/stats.c
+++ b/src/cmd/venti/srv/stats.c
@@ -60,6 +60,9 @@ Statdesc statdesc[NStat] =
{ "index cache flushes", },
{ "index cache stalls", },
{ "index cache read time", },
+ { "index cache lookups" },
+ { "index cache summary hits" },
+ { "index cache summary prefetches" },
{ "bloom filter hits", },
{ "bloom filter misses", },
@@ -81,6 +84,9 @@ Statdesc statdesc[NStat] =
{ "sum reads", },
{ "sum read bytes", },
+
+ { "cig loads" },
+ { "cig load time" },
};
QLock statslock;
diff --git a/src/cmd/venti/srv/syncindex.c b/src/cmd/venti/srv/syncindex.c
index 72d45f18..fb3c4ce2 100644
--- a/src/cmd/venti/srv/syncindex.c
+++ b/src/cmd/venti/srv/syncindex.c
@@ -56,13 +56,7 @@ threadmain(int argc, char *argv[])
if(0) fprint(2, "initialize %d bytes of disk block cache\n", bcmem);
initdcache(bcmem);
initlumpcache(1*1024*1024, 1024/8);
- icmem = u64log2(icmem / (sizeof(IEntry)+sizeof(IEntry*)) / ICacheDepth);
- if(icmem < 4)
- icmem = 4;
- if(1) fprint(2, "initialize %d bytes of index cache for %d index entries\n",
- (sizeof(IEntry)+sizeof(IEntry*)) * (1 << icmem) * ICacheDepth,
- (1 << icmem) * ICacheDepth);
- initicache(icmem, ICacheDepth);
+ initicache(icmem);
initicachewrite();
if(mainindex->bloom)
startbloomproc(mainindex->bloom);
diff --git a/src/cmd/venti/srv/syncindex0.c b/src/cmd/venti/srv/syncindex0.c
index 6a008bad..98f6adf1 100644
--- a/src/cmd/venti/srv/syncindex0.c
+++ b/src/cmd/venti/srv/syncindex0.c
@@ -101,7 +101,7 @@ syncarenaindex(Index *ix, Arena *arena, u32int clump, u64int a, int fix, int *pf
}
flush = 1;
trace(TraceProc, "syncarenaindex insert %V", ci->score);
- insertscore(ci->score, &ia, 1);
+ insertscore(ci->score, &ia, IEDirty);
}
if(0 && clump / 1000 != (clump + n) / 1000)
diff --git a/src/cmd/venti/srv/venti.c b/src/cmd/venti/srv/venti.c
index fab83782..4d59dfc6 100644
--- a/src/cmd/venti/srv/venti.c
+++ b/src/cmd/venti/srv/venti.c
@@ -18,7 +18,7 @@ static void ventiserver(void*);
void
usage(void)
{
- fprint(2, "usage: venti [-Ldrs] [-a ventiaddr] [-c config] "
+ fprint(2, "usage: venti [-Ldrsw] [-a ventiaddr] [-c config] "
"[-h httpaddr] [-B blockcachesize] [-C cachesize] [-I icachesize] [-W webroot]\n");
threadexitsall("usage");
}
@@ -73,6 +73,9 @@ threadmain(int argc, char *argv[])
case 's':
nofork = 1;
break;
+ case 'w': /* compatibility with old venti */
+ queuewrites = 1;
+ break;
case 'W':
webroot = EARGF(usage());
break;
@@ -103,9 +106,6 @@ threadmain(int argc, char *argv[])
if(configfile == nil)
configfile = "venti.conf";
- if(initarenasum() < 0)
- fprint(2, "warning: can't initialize arena summing process: %r");
-
fprint(2, "conf...");
if(initventi(configfile, &config) < 0)
sysfatal("can't init server: %r");
@@ -143,13 +143,7 @@ threadmain(int argc, char *argv[])
mem, mem / (8 * 1024));
initlumpcache(mem, mem / (8 * 1024));
- icmem = u64log2(icmem / (sizeof(IEntry)+sizeof(IEntry*)) / ICacheDepth);
- if(icmem < 4)
- icmem = 4;
- if(0) fprint(2, "initialize %d bytes of index cache for %d index entries\n",
- (sizeof(IEntry)+sizeof(IEntry*)) * (1 << icmem) * ICacheDepth,
- (1 << icmem) * ICacheDepth);
- initicache(icmem, ICacheDepth);
+ initicache(icmem);
initicachewrite();
/*
@@ -179,6 +173,9 @@ threadmain(int argc, char *argv[])
}
}
+ if(initarenasum() < 0)
+ fprint(2, "warning: can't initialize arena summing process: %r");
+
fprint(2, "announce %s...", vaddr);
ventisrv = vtlisten(vaddr);
if(ventisrv == nil)
@@ -272,5 +269,3 @@ ventiserver(void *v)
flushicache();
threadexitsall(0);
}
-
-
diff --git a/src/cmd/venti/srv/www/stats.js b/src/cmd/venti/srv/www/stats.js
index 76e9f276..64de5bbb 100644
--- a/src/cmd/venti/srv/www/stats.js
+++ b/src/cmd/venti/srv/www/stats.js
@@ -38,6 +38,10 @@ graphname = new Array(
"icache dirty %",
"arg=icachehit&graph=pctdiff&arg2=icachelookup&max=100",
"icache hit %",
+ "arg=scachehit&graph=pctdiff&arg2=icachelookup&max=100",
+ "scache hit %",
+ "arg=icachemiss&graph=pctdiff&arg2=icachelookup&max=100",
+ "icache miss %",
"arg=icachelookuptime&graph=divdiff&arg2=icachelookup",
"icache lookup time",
"arg=icacheprefetch&graph=diff",
@@ -75,6 +79,8 @@ graphname = new Array(
"fresh write RPC time",
"arg=rpcwriteoldtime&graph=divdiff&arg2=rpcwriteold",
"dup write RPC time",
+ "arg=cigloadtime&graph=divdiff&arg2=cigload",
+ "cig load time",
"arg=sumreadbyte&graph=diff",
"checksum bytes/second",
@@ -118,8 +124,11 @@ column1 = new Array(
"!icache",
"arg=icachedirty&graph=pct&arg2=icachesize&max=100",
"arg=icachehit&graph=pctdiff&arg2=icachelookup&max=100",
+ "arg=scachehit&graph=pctdiff&arg2=icachelookup&max=100",
+ "arg=icachemiss&graph=pctdiff&arg2=icachelookup&max=100",
"arg=icachewrite&graph=diff",
"arg=icacheprefetch&graph=diff",
+ "arg=scacheprefetch&graph=diff",
"!dcache",
"arg=dcachedirty&graph=pct&arg2=dcachesize&max=100",
@@ -154,6 +163,7 @@ column2 = new Array(
"arg=rpcreaduncachedtime&graph=divdiff&arg2=rpcreaduncached",
"arg=rpcwritenewtime&graph=divdiff&arg2=rpcwritenew",
"arg=rpcwriteoldtime&graph=divdiff&arg2=rpcwriteold",
+ "arg=cigloadtime&graph=divdiff&arg2=cigload",
"END"
)