diff options
author | Russ Cox <rsc@swtch.com> | 2007-09-25 09:47:31 -0400 |
---|---|---|
committer | Russ Cox <rsc@swtch.com> | 2007-09-25 09:47:31 -0400 |
commit | 7a400ee957a0815287af806e18ef90dd18b47f82 (patch) | |
tree | 023076fb829f630384f2f394eb9577a81fdca59e /src/cmd/venti | |
parent | 25a4e89fa907ed5a5f5d84eccfb66180007d9c68 (diff) | |
download | plan9port-7a400ee957a0815287af806e18ef90dd18b47f82.tar.gz plan9port-7a400ee957a0815287af806e18ef90dd18b47f82.tar.bz2 plan9port-7a400ee957a0815287af806e18ef90dd18b47f82.zip |
venti: new icache
Diffstat (limited to 'src/cmd/venti')
-rw-r--r-- | src/cmd/venti/srv/arena.c | 195 | ||||
-rw-r--r-- | src/cmd/venti/srv/checkindex.c | 11 | ||||
-rw-r--r-- | src/cmd/venti/srv/conv.c | 8 | ||||
-rw-r--r-- | src/cmd/venti/srv/dat.h | 46 | ||||
-rw-r--r-- | src/cmd/venti/srv/fns.h | 11 | ||||
-rw-r--r-- | src/cmd/venti/srv/hdisk.c | 10 | ||||
-rw-r--r-- | src/cmd/venti/srv/httpd.c | 8 | ||||
-rw-r--r-- | src/cmd/venti/srv/icache.c | 708 | ||||
-rw-r--r-- | src/cmd/venti/srv/icachewrite.c | 92 | ||||
-rw-r--r-- | src/cmd/venti/srv/ifile.c | 35 | ||||
-rw-r--r-- | src/cmd/venti/srv/index.c | 19 | ||||
-rw-r--r-- | src/cmd/venti/srv/lump.c | 50 | ||||
-rw-r--r-- | src/cmd/venti/srv/stats.c | 6 | ||||
-rw-r--r-- | src/cmd/venti/srv/syncindex.c | 8 | ||||
-rw-r--r-- | src/cmd/venti/srv/syncindex0.c | 2 | ||||
-rw-r--r-- | src/cmd/venti/srv/venti.c | 21 | ||||
-rw-r--r-- | src/cmd/venti/srv/www/stats.js | 10 |
17 files changed, 813 insertions, 427 deletions
diff --git a/src/cmd/venti/srv/arena.c b/src/cmd/venti/srv/arena.c index 7cd2eb73..c576e9aa 100644 --- a/src/cmd/venti/srv/arena.c +++ b/src/cmd/venti/srv/arena.c @@ -16,6 +16,7 @@ static int loadarena(Arena *arena); static CIBlock *getcib(Arena *arena, int clump, int writing, CIBlock *rock); static void putcib(Arena *arena, CIBlock *cib); static void sumproc(void *); +static void loadcig(Arena *arena); static QLock sumlock; static Rendez sumwait; @@ -137,14 +138,23 @@ readclumpinfos(Arena *arena, int clump, ClumpInfo *cis, int n) CIBlock *cib, r; int i; - for(i = 0; i < n; i++){ + /* + * because the clump blocks are laid out + * in reverse order at the end of the arena, + * it can be a few percent faster to read + * the clumps backwards, which reads the + * disk blocks forwards. + */ + for(i = n-1; i >= 0; i--){ cib = getcib(arena, clump + i, 0, &r); - if(cib == nil) - break; + if(cib == nil){ + n = i; + continue; + } unpackclumpinfo(&cis[i], &cib->data->data[cib->offset]); putcib(arena, cib); } - return i; + return n; } /* @@ -349,7 +359,28 @@ writeaclump(Arena *arena, Clump *c, u8int *clbuf, u64int start, u64int *pa) if(c->info.size < c->info.uncsize) arena->memstats.cclumps++; - clump = arena->memstats.clumps++; + clump = arena->memstats.clumps; + if(clump % ArenaCIGSize == 0){ + if(arena->cig == nil){ + loadcig(arena); + if(arena->cig == nil) + goto NoCIG; + } + /* add aa as start of next cig */ + if(clump/ArenaCIGSize != arena->ncig){ + fprint(2, "bad arena cig computation %s: writing clump %d but %d cigs\n", + arena->name, clump, arena->ncig); + arena->ncig = -1; + vtfree(arena->cig); + arena->cig = nil; + goto NoCIG; + } + arena->cig = vtrealloc(arena->cig, (arena->ncig+1)*sizeof arena->cig[0]); + arena->cig[arena->ncig++].offset = aa; + } +NoCIG: + arena->memstats.clumps++; + if(arena->memstats.clumps == 0) sysfatal("clumps wrapped"); arena->wtime = now(); @@ -752,3 +783,157 @@ putcib(Arena *arena, CIBlock *cib) putdblock(cib->data); cib->data = nil; } + + +/* + * For index entry readahead purposes, the arenas are + * broken into smaller subpieces, called clump info groups + * or cigs. Each cig has ArenaCIGSize clumps (ArenaCIGSize + * is chosen to make the index entries take up about half + * a megabyte). The index entries do not contain enough + * information to determine what the clump index is for + * a given address in an arena. That info is needed both for + * figuring out which clump group an address belongs to + * and for prefetching a clump group's index entries from + * the arena table of contents. The first time clump groups + * are accessed, we scan the entire arena table of contents + * (which might be 10s of megabytes), recording the data + * offset of each clump group. + */ + +/* + * load clump info group information by scanning entire toc. + */ +static void +loadcig(Arena *arena) +{ + u32int i, j, ncig, nci; + ArenaCIG *cig; + ClumpInfo *ci; + u64int offset; + int ms; + + if(arena->cig || arena->ncig < 0) + return; + +// fprint(2, "loadcig %s\n", arena->name); + + ncig = (arena->memstats.clumps+ArenaCIGSize-1) / ArenaCIGSize; + if(ncig == 0){ + arena->cig = vtmalloc(1); + arena->ncig = 0; + return; + } + + ms = msec(); + cig = vtmalloc(ncig*sizeof cig[0]); + ci = vtmalloc(ArenaCIGSize*sizeof ci[0]); + offset = 0; + for(i=0; i<ncig; i++){ + nci = readclumpinfos(arena, i*ArenaCIGSize, ci, ArenaCIGSize); + cig[i].offset = offset; + for(j=0; j<nci; j++) + offset += ClumpSize + ci[j].size; + if(nci < ArenaCIGSize){ + if(i != ncig-1){ + vtfree(ci); + vtfree(cig); + arena->ncig = -1; + fprint(2, "loadcig %s: got %ud cigs, expected %ud\n", arena->name, i+1, ncig); + goto out; + } + } + } + vtfree(ci); + + arena->ncig = ncig; + arena->cig = cig; + +out: + ms = msec() - ms; + addstat2(StatCigLoad, 1, StatCigLoadTime, ms); +} + +/* + * convert arena address into arena group + data boundaries. + */ +int +arenatog(Arena *arena, u64int addr, u64int *gstart, u64int *glimit, int *g) +{ + int r, l, m; + + qlock(&arena->lock); + if(arena->cig == nil) + loadcig(arena); + if(arena->cig == nil || arena->ncig == 0){ + qunlock(&arena->lock); + return -1; + } + + l = 1; + r = arena->ncig - 1; + while(l <= r){ + m = (r + l) / 2; + if(arena->cig[m].offset <= addr) + l = m + 1; + else + r = m - 1; + } + l--; + + *g = l; + *gstart = arena->cig[l].offset; + if(l+1 < arena->ncig) + *glimit = arena->cig[l+1].offset; + else + *glimit = arena->memstats.used; + qunlock(&arena->lock); + return 0; +} + +/* + * load the clump info for group g into the index entries. + */ +int +asumload(Arena *arena, int g, IEntry *entries, int nentries) +{ + int i, base, limit; + u64int addr; + ClumpInfo ci; + IEntry *ie; + + if(nentries < ArenaCIGSize){ + fprint(2, "asking for too few entries\n"); + return -1; + } + + qlock(&arena->lock); + if(arena->cig == nil) + loadcig(arena); + if(arena->cig == nil || arena->ncig == 0 || g >= arena->ncig){ + qunlock(&arena->lock); + return -1; + } + + addr = 0; + base = g*ArenaCIGSize; + limit = base + ArenaCIGSize; + if(base > arena->memstats.clumps) + base = arena->memstats.clumps; + ie = entries; + for(i=base; i<limit; i++){ + if(readclumpinfo(arena, i, &ci) < 0) + break; + if(ci.type != VtCorruptType){ + scorecp(ie->score, ci.score); + ie->ia.type = ci.type; + ie->ia.size = ci.uncsize; + ie->ia.blocks = (ci.size + ClumpSize + (1<<ABlockLog) - 1) >> ABlockLog; + ie->ia.addr = addr; + ie++; + } + addr += ClumpSize + ci.size; + } + qunlock(&arena->lock); + return ie - entries; +} diff --git a/src/cmd/venti/srv/checkindex.c b/src/cmd/venti/srv/checkindex.c index ef0e4ec9..ca955730 100644 --- a/src/cmd/venti/srv/checkindex.c +++ b/src/cmd/venti/srv/checkindex.c @@ -8,7 +8,7 @@ static void phdr(DBlock *eb) { static int did; - + if(!did){ did = 1; print("# diff actual correct\n"); @@ -168,7 +168,7 @@ checkbloom(Bloom *b1, Bloom *b2, int fix) { u32int *a1, *a2; int i, n, extra, missing; - + if(b1==nil && b2==nil) return 0; if(b1==nil || b2==nil){ @@ -188,13 +188,14 @@ checkbloom(Bloom *b1, Bloom *b2, int fix) missing = 0; for(i=BloomHeadSize/4; i<n; i++){ if(a1[i] != a2[i]){ -print("%.8ux/%.8ux.", a1[i], a2[i]); - extra += countbits(a1[i] & ~a2[i]); +// print("%.8ux/%.8ux.", a1[i], a2[i]); + extra += countbits(a1[i] & ~a2[i]); missing += countbits(a2[i] & ~a1[i]); } } if(extra || missing) - fprint(2, "bloom filter: %d spurious bits, %d missing bits\n", extra, missing); + fprint(2, "bloom filter: %d spurious bits, %d missing bits\n", + extra, missing); else fprint(2, "bloom filter: correct\n"); if(!fix && missing){ diff --git a/src/cmd/venti/srv/conv.c b/src/cmd/venti/srv/conv.c index 2a0a0bc1..f72511b1 100644 --- a/src/cmd/venti/srv/conv.c +++ b/src/cmd/venti/srv/conv.c @@ -581,9 +581,9 @@ unpackientry(IEntry *ie, u8int *buf) scorecp(ie->score, p); p += VtScoreSize; - ie->wtime = U32GET(p); + /* ie->wtime = U32GET(p); */ p += U32Size; - ie->train = U16GET(p); + /* ie->train = U16GET(p); */ p += U16Size; if(p - buf != IEntryAddrOff) sysfatal("unpackentry bad IEntryAddrOff amount"); @@ -613,9 +613,9 @@ packientry(IEntry *ie, u8int *buf) scorecp(p, ie->score); p += VtScoreSize; - U32PUT(p, ie->wtime); + U32PUT(p, 0); /* wtime */ p += U32Size; - U16PUT(p, ie->train); + U16PUT(p, 0); /* train */ p += U16Size; U64PUT(p, ie->ia.addr, t32); p += U64Size; diff --git a/src/cmd/venti/srv/dat.h b/src/cmd/venti/srv/dat.h index 07be62f4..e0a9e18a 100644 --- a/src/cmd/venti/srv/dat.h +++ b/src/cmd/venti/srv/dat.h @@ -3,6 +3,7 @@ typedef struct AMap AMap; typedef struct AMapN AMapN; typedef struct Arena Arena; typedef struct AState AState; +typedef struct ArenaCIG ArenaCIG; typedef struct ArenaHead ArenaHead; typedef struct ArenaPart ArenaPart; typedef struct ArenaTail ArenaTail; @@ -28,8 +29,10 @@ typedef struct ZBlock ZBlock; typedef struct Round Round; typedef struct Bloom Bloom; -#define TWID32 ((u32int)~(u32int)0) -#define TWID64 ((u64int)~(u64int)0) +#pragma incomplete IEStream + +#define TWID32 ((u32int)~(u32int)0) +#define TWID64 ((u64int)~(u64int)0) #define TWID8 ((u8int)~(u8int)0) enum @@ -44,7 +47,6 @@ enum IndexBase = 1024*1024, /* initial address to use in an index */ MaxIo = 64*1024, /* max size of a single read or write operation */ ICacheBits = 16, /* default bits for indexing icache */ - ICacheDepth = 4, /* default depth of an icache hash chain */ MaxAMap = 2*1024, /* max. allowed arenas in an address mapping; must be < 32*1024 */ /* @@ -147,6 +149,8 @@ enum DirtyArenaCib, DirtyArenaTrailer, DirtyMax, + + ArenaCIGSize = 10*1024, // about 0.5 MB worth of IEntry. VentiZZZZZZZZ }; @@ -371,6 +375,14 @@ struct Arena u32int ctime; /* first time a block was written */ u32int wtime; /* last time a block was written */ u32int clumpmagic; + + ArenaCIG *cig; + int ncig; +}; + +struct ArenaCIG +{ + u64int offset; // from arena base }; /* @@ -505,14 +517,20 @@ struct IAddr */ struct IEntry { - u8int score[VtScoreSize]; - IEntry *next; /* next in hash chain */ - IEntry *nextdirty; /* next in dirty chain */ - u32int wtime; /* last write time */ - u16int train; /* relative train containing the most recent ref; 0 if no ref, 1 if in same car */ - u8int rac; /* read ahead count */ - u8int dirty; /* is dirty */ - IAddr ia; + /* on disk data - 32 bytes*/ + u8int score[VtScoreSize]; + IAddr ia; + + IEntry *nexthash; + IEntry *nextdirty; + IEntry *next; + IEntry *prev; + u8int state; +}; +enum { + IEClean = 0, + IEDirty = 1, + IESummary = 2, }; /* @@ -607,6 +625,9 @@ enum StatIcacheFlush, StatIcacheStall, StatIcacheReadTime, + StatIcacheLookup, + StatScacheHit, + StatScachePrefetch, StatBloomHit, StatBloomMiss, @@ -628,6 +649,9 @@ enum StatSumRead, StatSumReadBytes, + + StatCigLoad, + StatCigLoadTime, NStat }; diff --git a/src/cmd/venti/srv/fns.h b/src/cmd/venti/srv/fns.h index cf529953..05a3e937 100644 --- a/src/cmd/venti/srv/fns.h +++ b/src/cmd/venti/srv/fns.h @@ -6,8 +6,11 @@ void addstat(int, int); void addstat2(int, int, int, int); ZBlock *alloczblock(u32int size, int zeroed, uint alignment); Arena *amapitoa(Index *index, u64int a, u64int *aa); +Arena *amapitoag(Index *index, u64int a, u64int *gstart, u64int *glimit, int *g); u64int arenadirsize(Arena *arena, u32int clumps); +int arenatog(Arena *arena, u64int aa, u64int *gstart, u64int *glimit, int *g); void arenaupdate(Arena *arena, u32int size, u8int *score); +int asumload(Arena *arena, int g, IEntry *entries, int maxentries); void backsumarena(Arena *arena); void binstats(long (*fn)(Stats *s0, Stats *s1, void*), void *arg, long t0, long t1, Statbin *bin, int nbin); int bloominit(Bloom*, vlong, uchar*); @@ -64,6 +67,7 @@ int iaddrcmp(IAddr *ia1, IAddr *ia2); IEntry* icachedirty(u32int, u32int, u64int); ulong icachedirtyfrac(void); void icacheclean(IEntry*); +int icachelookup(u8int *score, int type, IAddr *ia); int ientrycmp(const void *vie1, const void *vie2); char *ifileline(IFile *f); int ifilename(IFile *f, char *dst); @@ -76,7 +80,7 @@ ArenaPart *initarenapart(Part *part); int initarenasum(void); void initbloomfilter(Index*); void initdcache(u32int mem); -void initicache(int bits, int depth); +void initicache(u32int mem); void initicachewrite(void); IEStream *initiestream(Part *part, u64int off, u64int clumps, u32int size); ISect *initisect(Part *part); @@ -87,7 +91,7 @@ Part* initpart(char *name, int mode); void initround(Round*, char*, int); int initventi(char *config, Config *conf); void insertlump(Lump *lump, Packet *p); -int insertscore(u8int *score, IAddr *ia, int write); +int insertscore(u8int *score, IAddr *ia, int state); void kickdcache(void); void kickicache(void); void kickround(Round*, int wait); @@ -97,8 +101,7 @@ DBlock *loadibucket(Index *index, u8int *score, ISect **is, u32int *buck, IBucke int loadientry(Index *index, u8int *score, int type, IEntry *ie); void logerr(int severity, char *fmt, ...); Lump *lookuplump(u8int *score, int type); -int _lookupscore(u8int *score, int type, IAddr *ia, int *rac); -int lookupscore(u8int *score, int type, IAddr *ia, int *rac); +int lookupscore(u8int *score, int type, IAddr *ia); int maparenas(AMap *am, Arena **arenas, int n, char *what); void markbloomfilter(Bloom*, u8int*); uint msec(void); diff --git a/src/cmd/venti/srv/hdisk.c b/src/cmd/venti/srv/hdisk.c index 266218b1..8cf937d1 100644 --- a/src/cmd/venti/srv/hdisk.c +++ b/src/cmd/venti/srv/hdisk.c @@ -547,7 +547,7 @@ debugread(HConnect *c, u8int *score) Lump *u; IAddr ia; IEntry ie; - int i, rac; + int i; Arena *arena; u64int aa; ZBlock *zb; @@ -561,7 +561,7 @@ debugread(HConnect *c, u8int *score) } hprint(&c->hout, "<h2>index search %V</h2><pre>\n", score); - if(_lookupscore(score, -1, &ia, nil) < 0) + if(icachelookup(score, -1, &ia) < 0) hprint(&c->hout, " icache: not found\n"); else hprint(&c->hout, " icache: addr=%#llx size=%d type=%d blocks=%d\n", @@ -585,12 +585,12 @@ debugread(HConnect *c, u8int *score) hprint(&c->hout, " -cache"); putlump(u); - if(lookupscore(score, type, &ia, &rac) < 0){ + if(lookupscore(score, type, &ia) < 0){ hprint(&c->hout, " -lookup\n"); continue; } - hprint(&c->hout, "\n lookupscore: addr=%#llx size=%d blocks=%d rac=%d\n", - ia.addr, ia.size, ia.blocks, rac); + hprint(&c->hout, "\n lookupscore: addr=%#llx size=%d blocks=%d\n", + ia.addr, ia.size, ia.blocks); arena = amapitoa(mainindex, ia.addr, &aa); if(arena == nil){ diff --git a/src/cmd/venti/srv/httpd.c b/src/cmd/venti/srv/httpd.c index 6b7f20fa..51d8b9a1 100644 --- a/src/cmd/venti/srv/httpd.c +++ b/src/cmd/venti/srv/httpd.c @@ -895,7 +895,7 @@ static char* graphname[] = "icachehit", "icachemiss", - "icachelookup", + "icacheread", "icachewrite", "icachefill", "icacheprefetch", @@ -904,6 +904,9 @@ static char* graphname[] = "icacheflush", "icachestall", "icachelookuptime", + "icachelookup", + "scachehit", + "scacheprefetch", "bloomhit", "bloommiss", @@ -925,6 +928,9 @@ static char* graphname[] = "sumread", "sumreadbyte", + + "cigload", + "cigloadtime", }; static int diff --git a/src/cmd/venti/srv/icache.c b/src/cmd/venti/srv/icache.c index 32cbb5d3..384fd2c1 100644 --- a/src/cmd/venti/srv/icache.c +++ b/src/cmd/venti/srv/icache.c @@ -2,236 +2,421 @@ #include "dat.h" #include "fns.h" +int icacheprefetch = 1; + typedef struct ICache ICache; +typedef struct IHash IHash; +typedef struct ISum ISum; + struct ICache { - QLock lock; /* locks hash table & all associated data */ + QLock lock; Rendez full; - IEntry **heads; /* heads of all the hash chains */ - int bits; /* bits to use for indexing heads */ - u32int size; /* number of heads; == 1 << bits, should be < entries */ - IEntry *base; /* all allocated hash table entries */ - IEntry *free; - u32int entries; /* elements in base */ - IEntry *dirty; /* chain of dirty elements */ - u32int ndirty; + IHash *hash; + IEntry *entries; + int nentries; + IEntry free; + IEntry clean; + IEntry dirty; u32int maxdirty; - u32int unused; /* index of first unused element in base */ - u32int stolen; /* last head from which an element was stolen */ + u32int ndirty; - Arena *last[4]; - Arena *lastload; - int nlast; + ISum **sum; + int nsum; + IHash *shash; + IEntry *sentries; + int nsentries; }; -int icacheprefetch = 1; - static ICache icache; -static IEntry *icachealloc(IAddr *ia, u8int *score); - /* - * bits is the number of bits in the icache hash table - * depth is the average depth - * memory usage is about (1<<bits) * depth * sizeof(IEntry) + (1<<bits) * sizeof(IEntry*) + * Hash table of IEntries */ -void -initicache(int bits, int depth) + +struct IHash { - icache.bits = bits; - icache.size = 1 << bits; - icache.entries = depth * icache.size; - icache.maxdirty = icache.entries/2; - icache.base = MKNZ(IEntry, icache.entries); - icache.heads = MKNZ(IEntry*, icache.size); - icache.full.l = &icache.lock; - setstat(StatIcacheSize, icache.entries); -} + int bits; + u32int size; + IEntry **table; +}; -ulong -icachedirtyfrac(void) +static IHash* +mkihash(int size1) { - return (vlong)icache.ndirty*IcacheFrac / icache.entries; + u32int size; + int bits; + IHash *ih; + + bits = 0; + size = 1; + while(size < size1){ + bits++; + size <<= 1; + } + + ih = vtmallocz(sizeof(IHash)+size*sizeof(ih->table[0])); + ih->table = (IEntry**)(ih+1); + ih->bits = bits; + ih->size = size; + return ih; } -u32int -hashbits(u8int *sc, int bits) +static IEntry* +ihashlookup(IHash *ih, u8int score[VtScoreSize], int type) { - u32int v; - - v = (sc[0] << 24) | (sc[1] << 16) | (sc[2] << 8) | sc[3]; - if(bits < 32) - v >>= (32 - bits); - return v; + u32int h; + IEntry *ie; + + h = hashbits(score, ih->bits); + for(ie=ih->table[h]; ie; ie=ie->nexthash) + if((type == -1 || type == ie->ia.type) && scorecmp(score, ie->score) == 0) + return ie; + return nil; } static void -loadarenaclumps(Arena *arena, u64int aa) +ihashdelete(IHash *ih, IEntry *ie, char *what) { - ulong i; - ClumpInfo ci; - IAddr ia; - - for(i=0; i<arena->memstats.clumps; i++){ - if(readclumpinfo(arena, i, &ci) < 0) - break; - ia.type = ci.type; - ia.size = ci.uncsize; - ia.blocks = (ci.size + ClumpSize + (1 << ABlockLog) - 1) >> ABlockLog; - ia.addr = aa; - aa += ClumpSize + ci.size; - if(ia.type != VtCorruptType) - insertscore(ci.score, &ia, 0); - } + u32int h; + IEntry **l; + + h = hashbits(ie->score, ih->bits); + for(l=&ih->table[h]; *l; l=&(*l)->nexthash) + if(*l == ie){ + *l = ie->nexthash; + return; + } + fprint(2, "warning: %s %V not found in ihashdelete\n", what, ie->score); } -int -_lookupscore(u8int *score, int type, IAddr *ia, int *rac) +static void +ihashinsert(IHash *ih, IEntry *ie) { u32int h; - IEntry *ie, *last; - - qlock(&icache.lock); - h = hashbits(score, icache.bits); - last = nil; - for(ie = icache.heads[h]; ie != nil; ie = ie->next){ - if((ie->ia.type == type || type == -1) && scorecmp(ie->score, score)==0){ - if(last != nil) - last->next = ie->next; - else - icache.heads[h] = ie->next; - addstat(StatIcacheHit, 1); - if(rac) - ie->rac = 1; - trace(TraceLump, "lookupscore incache"); - ie->next = icache.heads[h]; - icache.heads[h] = ie; - - *ia = ie->ia; - if(rac) - *rac = ie->rac; - qunlock(&icache.lock); - return 0; - } - last = ie; - } - addstat(StatIcacheMiss, 1); - qunlock(&icache.lock); - return -1; + + h = hashbits(ie->score, ih->bits); + ie->nexthash = ih->table[h]; + ih->table[h] = ie; } /* -ZZZ need to think about evicting the correct IEntry, -and writing back the wtime. - * look up data score in the index cache - * if this fails, pull it in from the disk index table, if it exists. - * - * must be called with the lump for this score locked + * IEntry lists. */ -int -lookupscore(u8int *score, int type, IAddr *ia, int *rac) + +static IEntry* +popout(IEntry *ie) { - IEntry d, *ie; - u32int h; - u64int aa; - Arena *load; - int i, ret; - uint ms; + if(ie->prev == nil && ie->next == nil) + return ie; + ie->prev->next = ie->next; + ie->next->prev = ie->prev; + ie->next = nil; + ie->prev = nil; + return ie; +} - aa = 0; - ms = msec(); - - trace(TraceLump, "lookupscore %V.%d", score, type); +static IEntry* +poplast(IEntry *list) +{ + if(list->prev == list) + return nil; + return popout(list->prev); +} - ret = 0; - if(_lookupscore(score, type, ia, rac) < 0){ - if(loadientry(mainindex, score, type, &d) < 0){ - ret = -1; - goto out; - } +static IEntry* +pushfirst(IEntry *list, IEntry *ie) +{ + popout(ie); + ie->prev = list; + ie->next = list->next; + ie->prev->next = ie; + ie->next->prev = ie; + return ie; +} - /* failed in cache but found on disk - fill cache. */ - trace(TraceLump, "lookupscore loaded"); - addstat(StatIcacheFill, 1); +/* + * Arena summary cache. + */ +struct ISum +{ + QLock lock; + IEntry *entries; + int nentries; + int loaded; + u64int addr; + u64int limit; + Arena *arena; + int g; +}; - /* - * no one else can load an entry for this score, - * since we have this score's lump's lock. - */ - qlock(&icache.lock); - - /* - * If we notice that all the hits are coming from one arena, - * load the table of contents for that arena into the cache. - */ - load = nil; - h = hashbits(score, icache.bits); - ie = icachealloc(&d.ia, score); - if(icacheprefetch){ - icache.last[icache.nlast++%nelem(icache.last)] = amapitoa(mainindex, ie->ia.addr, &aa); - aa = ie->ia.addr - aa; /* compute base addr of arena */ - for(i=0; i<nelem(icache.last); i++) - if(icache.last[i] != icache.last[0]) - break; - if(i==nelem(icache.last) && icache.lastload != icache.last[0]){ - load = icache.last[0]; - icache.lastload = load; +static ISum* +scachelookup(u64int addr) +{ + int i; + ISum *s; + + for(i=0; i<icache.nsum; i++){ + s = icache.sum[i]; + if(s->addr <= addr && addr < s->limit){ + if(i > 0){ + memmove(icache.sum+1, icache.sum, i*sizeof icache.sum[0]); + icache.sum[0] = s; } + return s; } + } + return nil; +} + +static void +sumclear(ISum *s) +{ + int i; + + for(i=0; i<s->nentries; i++) + ihashdelete(icache.shash, &s->entries[i], "scache"); + s->nentries = 0; + s->loaded = 0; + s->addr = 0; + s->limit = 0; + s->arena = nil; + s->g = 0; +} + +static ISum* +scacheevict(void) +{ + ISum *s; + int i; - ie->next = icache.heads[h]; - icache.heads[h] = ie; - - *ia = ie->ia; - *rac = ie->rac; - - qunlock(&icache.lock); - if(load){ - trace(TraceProc, "preload 0x%llux", aa); - loadarenaclumps(load, aa); + for(i=icache.nsum-1; i>=0; i--){ + s = icache.sum[i]; + if(canqlock(&s->lock)){ + if(i > 0){ + memmove(icache.sum+1, icache.sum, i*sizeof icache.sum[0]); + icache.sum[0] = s; + } + sumclear(s); + return s; } } + return nil; +} -out: - ms = msec() - ms; - addstat2(StatIcacheRead, 1, StatIcacheReadTime, ms); +static void +scachehit(u64int addr) +{ + scachelookup(addr); /* for move-to-front */ +} - return ret; +static void +scachesetup(ISum *s, u64int addr) +{ + u64int addr0, limit; + int g; + + s->arena = amapitoag(mainindex, addr, &addr0, &limit, &g); + s->addr = addr0; + s->limit = limit; + s->g = g; +} + +static void +scacheload(ISum *s) +{ + int i, n; + + s->loaded = 1; + n = asumload(s->arena, s->g, s->entries, ArenaCIGSize); + /* + * n can be less then ArenaCIGSize, either if the clump group + * is the last in the arena and is only partially filled, or if there + * are corrupt clumps in the group -- those are not returned. + */ + for(i=0; i<n; i++){ + s->entries[i].ia.addr += s->addr; + ihashinsert(icache.shash, &s->entries[i]); + } +//fprint(2, "%T scacheload %s %d - %d entries\n", s->arena->name, s->g, n); + addstat(StatScachePrefetch, n); + s->nentries = n; +} + +static ISum* +scachemiss(u64int addr) +{ + ISum *s; + + s = scachelookup(addr); + if(s == nil){ + /* first time: make an entry in the cache but don't populate it yet */ + s = scacheevict(); + if(s == nil) + return nil; + scachesetup(s, addr); + qunlock(&s->lock); + return nil; + } + + /* second time: load from disk */ + qlock(&s->lock); + if(s->loaded || !icacheprefetch){ + qunlock(&s->lock); + return nil; + } + + return s; /* locked */ } /* - * insert a new element in the hash table. + * Index cache. */ -int -insertscore(u8int *score, IAddr *ia, int write) + +void +initicache(u32int mem0) { - IEntry *ie, se; - u32int h; + u32int mem; + int i, entries, scache; + + icache.full.l = &icache.lock; - trace(TraceLump, "insertscore enter"); - if(write) - addstat(StatIcacheWrite, 1); - else - addstat(StatIcachePrefetch, 1); + mem = mem0; + entries = mem / (sizeof(IEntry)+sizeof(IEntry*)); + scache = (entries/8) / ArenaCIGSize; + entries -= entries/8; + if(scache < 4) + scache = 4; + if(scache > 16) + scache = 16; + if(entries < 1000) + entries = 1000; +fprint(2, "icache %,d bytes = %,d entries; %d scache\n", mem0, entries, scache); + + icache.clean.prev = icache.clean.next = &icache.clean; + icache.dirty.prev = icache.dirty.next = &icache.dirty; + icache.free.prev = icache.free.next = &icache.free; + + icache.hash = mkihash(entries); + icache.nentries = entries; + setstat(StatIcacheSize, entries); + icache.entries = vtmallocz(entries*sizeof icache.entries[0]); + icache.maxdirty = entries / 2; + for(i=0; i<entries; i++) + pushfirst(&icache.free, &icache.entries[i]); + + icache.nsum = scache; + icache.sum = vtmallocz(scache*sizeof icache.sum[0]); + icache.sum[0] = vtmallocz(scache*sizeof icache.sum[0][0]); + icache.nsentries = scache * ArenaCIGSize; + icache.sentries = vtmallocz(scache*ArenaCIGSize*sizeof icache.sentries[0]); + icache.shash = mkihash(scache*ArenaCIGSize); + for(i=0; i<scache; i++){ + icache.sum[i] = icache.sum[0] + i; + icache.sum[i]->entries = icache.sentries + i*ArenaCIGSize; + } +} - qlock(&icache.lock); - h = hashbits(score, icache.bits); - ie = icachealloc(ia, score); - if(write){ +static IEntry* +evictlru(void) +{ + IEntry *ie; + + ie = poplast(&icache.clean); + if(ie == nil) + return nil; + ihashdelete(icache.hash, ie, "evictlru"); + return ie; +} + +static void +icacheinsert(u8int score[VtScoreSize], IAddr *ia, int state) +{ + IEntry *ie; + + if((ie = poplast(&icache.free)) == nil && (ie = evictlru()) == nil){ + addstat(StatIcacheStall, 1); + while((ie = poplast(&icache.free)) == nil && (ie = evictlru()) == nil){ + // Could safely return here if state == IEClean. + // But if state == IEDirty, have to wait to make + // sure we don't lose an index write. + // Let's wait all the time. + flushdcache(); + kickicache(); + rsleep(&icache.full); + } + addstat(StatIcacheStall, -1); + } + + memmove(ie->score, score, VtScoreSize); + ie->state = state; + ie->ia = *ia; + if(state == IEClean){ + addstat(StatIcachePrefetch, 1); + pushfirst(&icache.clean, ie); + }else{ + addstat(StatIcacheWrite, 1); + assert(state == IEDirty); icache.ndirty++; setstat(StatIcacheDirty, icache.ndirty); delaykickicache(); - ie->dirty = 1; + pushfirst(&icache.dirty, ie); } - ie->next = icache.heads[h]; - icache.heads[h] = ie; + ihashinsert(icache.hash, ie); +} + +int +icachelookup(u8int score[VtScoreSize], int type, IAddr *ia) +{ + IEntry *ie; - se = *ie; + qlock(&icache.lock); + addstat(StatIcacheLookup, 1); + if((ie = ihashlookup(icache.hash, score, type)) != nil){ + *ia = ie->ia; + if(ie->state == IEClean) + pushfirst(&icache.clean, ie); + addstat(StatIcacheHit, 1); + qunlock(&icache.lock); + return 0; + } + + if((ie = ihashlookup(icache.shash, score, type)) != nil){ + *ia = ie->ia; + icacheinsert(score, &ie->ia, IEClean); + scachehit(ie->ia.addr); + addstat(StatScacheHit, 1); + qunlock(&icache.lock); + return 0; + } + addstat(StatIcacheMiss, 1); qunlock(&icache.lock); - if(write && icache.ndirty >= icache.maxdirty) + return -1; +} + +int +insertscore(u8int score[VtScoreSize], IAddr *ia, int state) +{ + ISum *toload; + + qlock(&icache.lock); + icacheinsert(score, ia, state); + if(state == IEClean) + toload = scachemiss(ia->addr); + else{ + assert(state == IEDirty); + toload = nil; + } + qunlock(&icache.lock); + if(toload){ + scacheload(toload); + qunlock(&toload->lock); + } + + if(icache.ndirty >= icache.maxdirty) kickicache(); /* @@ -240,125 +425,81 @@ insertscore(u8int *score, IAddr *ia, int write) * the lump, meaning any searches for this block * will hit in the lump cache until after we return. */ - markbloomfilter(mainindex->bloom, score); + if(state == IEDirty) + markbloomfilter(mainindex->bloom, score); return 0; } -/* - * allocate a index cache entry which hasn't been used in a while. - * must be called with icache.lock locked - * if the score is already in the table, update the entry. - */ -static IEntry * -icachealloc(IAddr *ia, u8int *score) +static int +lookupscore_untimed(u8int score[VtScoreSize], int type, IAddr *ia) { - int i; - IEntry *ie, *last, *clean, *lastclean; - u32int h; + IEntry d; - h = hashbits(score, icache.bits); - last = nil; - for(ie = icache.heads[h]; ie != nil; ie = ie->next){ - if(ie->ia.type == ia->type && scorecmp(ie->score, score)==0){ - if(last != nil) - last->next = ie->next; - else - icache.heads[h] = ie->next; - trace(TraceLump, "icachealloc hit"); - ie->rac = 1; - return ie; - } - last = ie; - } + if(icachelookup(score, type, ia) >= 0) + return 0; - h = icache.unused; - if(h < icache.entries){ - ie = &icache.base[h++]; - icache.unused = h; - trace(TraceLump, "icachealloc unused"); - goto Found; - } + addstat(StatIcacheFill, 1); + if(loadientry(mainindex, score, type, &d) < 0) + return -1; - if((ie = icache.free) != nil){ - icache.free = ie->next; - goto Found; - } + insertscore(score, &d.ia, IEClean); + *ia = d.ia; + return 0; +} - h = icache.stolen; - for(i=0;; i++){ - h++; - if(h >= icache.size) - h = 0; - if(i == icache.size){ - trace(TraceLump, "icachealloc sleep"); - addstat(StatIcacheStall, 1); - while(icache.ndirty == icache.entries){ - /* - * This is a bit suspect. Kickicache will wake up the - * icachewritecoord, but if all the index entries are for - * unflushed disk blocks, icachewritecoord won't be - * able to do much. It always rewakes everyone when - * it thinks it is done, though, so at least we'll go around - * the while loop again. Also, if icachewritecoord sees - * that the disk state hasn't change at all since the last - * time around, it kicks the disk. This needs to be - * rethought, but it shouldn't deadlock anymore. - */ - kickicache(); - rsleep(&icache.full); - } - addstat(StatIcacheStall, -1); - i = 0; - } - lastclean = nil; - clean = nil; - last = nil; - for(ie=icache.heads[h]; ie; last=ie, ie=ie->next){ - if(!ie->dirty){ - clean = ie; - lastclean = last; - } - } - if(clean){ - if(lastclean) - lastclean->next = clean->next; - else - icache.heads[h] = clean->next; - clean->next = nil; - icache.stolen = h; - ie = clean; - trace(TraceLump, "icachealloc steal"); - goto Found; - } - } +int +lookupscore(u8int score[VtScoreSize], int type, IAddr *ia) +{ + int ms, ret; + + ms = msec(); + ret = lookupscore_untimed(score, type, ia); + ms = msec() - ms; + addstat2(StatIcacheRead, 1, StatIcacheReadTime, ms); + return ret; +} + +u32int +hashbits(u8int *sc, int bits) +{ + u32int v; -Found: - ie->ia = *ia; - scorecp(ie->score, score); - ie->rac = 0; - return ie; + v = (sc[0] << 24) | (sc[1] << 16) | (sc[2] << 8) | sc[3]; + if(bits < 32) + v >>= (32 - bits); + return v; } +ulong +icachedirtyfrac(void) +{ + return (vlong)icache.ndirty*IcacheFrac / icache.nentries; +} + +/* + * Return a singly-linked list of dirty index entries. + * with 32-bit hash numbers between lo and hi + * and address < limit. + */ IEntry* icachedirty(u32int lo, u32int hi, u64int limit) { - int i; u32int h; IEntry *ie, *dirty; dirty = nil; trace(TraceProc, "icachedirty enter"); qlock(&icache.lock); - for(i=0; i<icache.size; i++) - for(ie = icache.heads[i]; ie; ie=ie->next) - if(ie->dirty && ie->ia.addr != 0 && ie->ia.addr < limit){ + for(ie = icache.dirty.next; ie != &icache.dirty; ie=ie->next){ + if(ie->state == IEDirty && ie->ia.addr < limit){ h = hashbits(ie->score, 32); if(lo <= h && h <= hi){ ie->nextdirty = dirty; dirty = ie; } } + } qunlock(&icache.lock); trace(TraceProc, "icachedirty exit"); if(dirty == nil) @@ -366,36 +507,49 @@ icachedirty(u32int lo, u32int hi, u64int limit) return dirty; } + +/* + * The singly-linked non-circular list of index entries ie + * has been written to disk. Move them to the clean list. + */ void icacheclean(IEntry *ie) { - trace(TraceProc, "icachedirty enter"); + IEntry *next; + + trace(TraceProc, "icacheclean enter"); qlock(&icache.lock); - for(; ie; ie=ie->nextdirty){ + for(; ie; ie=next){ + assert(ie->state == IEDirty); + next = ie->nextdirty; + ie->nextdirty = nil; + popout(ie); /* from icache.dirty */ icache.ndirty--; - ie->dirty = 0; + ie->state = IEClean; + pushfirst(&icache.clean, ie); } setstat(StatIcacheDirty, icache.ndirty); rwakeupall(&icache.full); qunlock(&icache.lock); - trace(TraceProc, "icachedirty exit"); + trace(TraceProc, "icacheclean exit"); } void emptyicache(void) { int i; - IEntry *ie, **lie; + IEntry *ie; + ISum *s; qlock(&icache.lock); - for(i=0; i<icache.size; i++) - for(lie=&icache.heads[i]; (ie=*lie); ){ - if(ie->dirty == 0){ - *lie = ie->next; - ie->next = icache.free; - icache.free = ie; - }else - lie = &ie->next; + while((ie = evictlru()) != nil) + pushfirst(&icache.free, ie); + for(i=0; i<icache.nsum; i++){ + s = icache.sum[i]; + qlock(&s->lock); + sumclear(s); + qunlock(&s->lock); } qunlock(&icache.lock); } + diff --git a/src/cmd/venti/srv/icachewrite.c b/src/cmd/venti/srv/icachewrite.c index 49344c9f..0805001c 100644 --- a/src/cmd/venti/srv/icachewrite.c +++ b/src/cmd/venti/srv/icachewrite.c @@ -45,6 +45,16 @@ initicachewrite(void) vtproc(delaykickroundproc, &iwrite.round); } +static u64int +ie2diskaddr(Index *ix, ISect *is, IEntry *ie) +{ + u64int bucket, addr; + + bucket = hashbits(ie->score, 32)/ix->div; + addr = is->blockbase + ((bucket - is->start) << is->blocklog); + return addr; +} + static IEntry* nextchunk(Index *ix, ISect *is, IEntry **pie, u64int *paddr, uint *pnbuf) { @@ -55,13 +65,13 @@ nextchunk(Index *ix, ISect *is, IEntry **pie, u64int *paddr, uint *pnbuf) bsize = 1<<is->blocklog; iefirst = *pie; - addr = is->blockbase + ((u64int)(hashbits(iefirst->score, 32) / ix->div - is->start) << is->blocklog); + addr = ie2diskaddr(ix, is, iefirst); nbuf = 0; - for(l=&iefirst->nextdirty; (ie=*l)!=nil; l=&(*l)->nextdirty){ - naddr = is->blockbase + ((u64int)(hashbits(ie->score, 32) / ix->div - is->start) << is->blocklog); + for(l = &iefirst->nextdirty; (ie = *l) != nil; l = &(*l)->nextdirty){ + naddr = ie2diskaddr(ix, is, ie); if(naddr - addr >= Bufsize) break; - nbuf = naddr-addr; + nbuf = naddr - addr; } nbuf += bsize; @@ -75,7 +85,7 @@ nextchunk(Index *ix, ISect *is, IEntry **pie, u64int *paddr, uint *pnbuf) static int icachewritesect(Index *ix, ISect *is, u8int *buf) { - int err, h, bsize, t; + int err, i, werr, h, bsize, t; u32int lo, hi; u64int addr, naddr; uint nbuf, off; @@ -89,29 +99,32 @@ icachewritesect(Index *ix, ISect *is, u8int *buf) else hi = is->stop * ix->div - 1; - trace(TraceProc, "icachewritesect enter %ud %ud %llud", lo, hi, iwrite.as.aa); + trace(TraceProc, "icachewritesect enter %ud %ud %llud", + lo, hi, iwrite.as.aa); iedirty = icachedirty(lo, hi, iwrite.as.aa); iedirty = iesort(iedirty); - bsize = 1<<is->blocklog; + bsize = 1 << is->blocklog; err = 0; while(iedirty){ disksched(); - while((t=icachesleeptime) == SleepForever){ + while((t = icachesleeptime) == SleepForever){ sleep(1000); disksched(); } if(t < minicachesleeptime) t = minicachesleeptime; - sleep(t); + if(t > 0) + sleep(t); trace(TraceProc, "icachewritesect nextchunk"); chunk = nextchunk(ix, is, &iedirty, &addr, &nbuf); - trace(TraceProc, "icachewritesect readpart 0x%llux+0x%ux", addr, nbuf); + trace(TraceProc, "icachewritesect readpart 0x%llux+0x%ux", + addr, nbuf); if(readpart(is->part, addr, buf, nbuf) < 0){ - /* XXX more details here */ - fprint(2, "icachewriteproc readpart: %r\n"); + fprint(2, "%s: part %s addr 0x%llux: icachewritesect " + "readpart: %r\n", argv0, is->part->name, addr); err = -1; continue; } @@ -120,31 +133,34 @@ icachewritesect(Index *ix, ISect *is, u8int *buf) addstat(StatIsectRead, 1); for(l=&chunk; (ie=*l)!=nil; l=&ie->nextdirty){ - again: - naddr = is->blockbase + ((u64int)(hashbits(ie->score, 32) / ix->div - is->start) << is->blocklog); +again: + naddr = ie2diskaddr(ix, is, ie); off = naddr - addr; if(off+bsize > nbuf){ - fprint(2, "whoops! addr=0x%llux nbuf=%ud addr+nbuf=0x%llux naddr=0x%llux\n", - addr, nbuf, addr+nbuf, naddr); + fprint(2, "%s: whoops! addr=0x%llux nbuf=%ud " + "addr+nbuf=0x%llux naddr=0x%llux\n", + argv0, addr, nbuf, addr+nbuf, naddr); assert(off+bsize <= nbuf); } unpackibucket(&ib, buf+off, is->bucketmagic); if(okibucket(&ib, is) < 0){ - fprint(2, "bad bucket XXX\n"); + fprint(2, "%s: bad bucket XXX\n", argv0); goto skipit; } - trace(TraceProc, "icachewritesect add %V at 0x%llux", ie->score, naddr); + trace(TraceProc, "icachewritesect add %V at 0x%llux", + ie->score, naddr); h = bucklook(ie->score, ie->ia.type, ib.data, ib.n); if(h & 1){ h ^= 1; packientry(ie, &ib.data[h]); }else if(ib.n < is->buckmax){ - memmove(&ib.data[h+IEntrySize], &ib.data[h], ib.n*IEntrySize - h); + memmove(&ib.data[h + IEntrySize], &ib.data[h], + ib.n*IEntrySize - h); ib.n++; packientry(ie, &ib.data[h]); }else{ - fprint(2, "bucket overflow XXX\n"); - skipit: + fprint(2, "%s: bucket overflow XXX\n", argv0); +skipit: err = -1; *l = ie->nextdirty; ie = *l; @@ -154,33 +170,29 @@ icachewritesect(Index *ix, ISect *is, u8int *buf) break; } packibucket(&ib, buf+off, is->bucketmagic); - /* XXX - * This is not quite right - it's good that we - * update the cached block (if any) here, but - * since the block doesn't get written until writepart - * below, we also need to make sure that the cache - * doesn't load the stale block before we write it to - * disk below. We could lock the disk cache during - * the writepart, but that's pretty annoying. - * Another possibility would be never to cache - * index partition blocks. The hit rate on those is - * miniscule anyway. - */ - if((b = _getdblock(is->part, naddr, ORDWR, 0)) != nil){ - memmove(b->data, buf+off, bsize); - putdblock(b); - } } diskaccess(1); trace(TraceProc, "icachewritesect writepart", addr, nbuf); - if(writepart(is->part, addr, buf, nbuf) < 0 || flushpart(is->part) < 0){ - /* XXX more details here */ - fprint(2, "icachewriteproc writepart: %r\n"); + werr = 0; + if(writepart(is->part, addr, buf, nbuf) < 0 || flushpart(is->part) < 0) + werr = -1; + + for(i=0; i<nbuf; i+=bsize){ + if((b = _getdblock(is->part, addr+i, ORDWR, 0)) != nil){ + memmove(b->data, buf+i, bsize); + putdblock(b); + } + } + + if(werr < 0){ + fprint(2, "%s: part %s addr 0x%llux: icachewritesect " + "writepart: %r\n", argv0, is->part->name, addr); err = -1; continue; } + addstat(StatIsectWriteBytes, nbuf); addstat(StatIsectWrite, 1); icacheclean(chunk); diff --git a/src/cmd/venti/srv/ifile.c b/src/cmd/venti/srv/ifile.c index e5d5460a..36d96b94 100644 --- a/src/cmd/venti/srv/ifile.c +++ b/src/cmd/venti/srv/ifile.c @@ -2,46 +2,57 @@ #include "dat.h" #include "fns.h" +static char vcmagic[] = "venti config\n"; + +enum { + Maxconfig = 8 * 1024, + Maglen = sizeof vcmagic - 1, +}; + int readifile(IFile *f, char *name) { - int m; Part *p; ZBlock *b; u8int *z; - + p = initpart(name, OREAD); if(p == nil) return -1; - b = alloczblock(8192, 1, 0); + b = alloczblock(Maxconfig+1, 1, 0); if(b == nil){ seterr(EOk, "can't alloc for %s: %R", name); return -1; } if(p->size > PartBlank){ /* - * this is likely a real venti partition, in which case - * we're looking for the config file stored as 8k at end of PartBlank. + * this is likely a real venti partition, in which case we're + * looking for the config file stored as 8k at end of PartBlank. */ - if(readpart(p, PartBlank-8192, b->data, 8192) < 0){ + if(readpart(p, PartBlank-Maxconfig, b->data, Maxconfig) < 0){ seterr(EOk, "can't read %s: %r", name); freezblock(b); freepart(p); return -1; } - m = 5+1+6+1; - if(memcmp(b->data, "venti config\n", m) != 0){ + b->data[Maxconfig] = '\0'; + if(memcmp(b->data, vcmagic, Maglen) != 0){ seterr(EOk, "bad venti config magic in %s", name); freezblock(b); freepart(p); return -1; } - b->data += m; - b->len -= m; - z = memchr(b->data, 0, b->len); + /* + * if we change b->data+b->_size, freezblock + * will blow an assertion, so don't. + */ + b->data += Maglen; + b->_size -= Maglen; + b->len -= Maglen; + z = memchr(b->data, '\0', b->len); if(z) b->len = z - b->data; - }else if(p->size > 8192){ + }else if(p->size > Maxconfig){ seterr(EOk, "config file is too large"); freepart(p); freezblock(b); diff --git a/src/cmd/venti/srv/index.c b/src/cmd/venti/srv/index.c index a5ffeef9..dd49e055 100644 --- a/src/cmd/venti/srv/index.c +++ b/src/cmd/venti/srv/index.c @@ -596,6 +596,25 @@ print("want arena %d for %llux\n", l, a); return ix->arenas[l]; } +/* + * convert an arena index to the bounds of the containing arena group. + */ +Arena* +amapitoag(Index *ix, u64int a, u64int *gstart, u64int *glimit, int *g) +{ + u64int aa; + Arena *arena; + + arena = amapitoa(ix, a, &aa); + if(arena == nil) + return nil; + if(arenatog(arena, aa, gstart, glimit, g) < 0) + return nil; + *gstart += a - aa; + *glimit += a - aa; + return arena; +} + int iaddrcmp(IAddr *ia1, IAddr *ia2) { diff --git a/src/cmd/venti/srv/lump.c b/src/cmd/venti/srv/lump.c index 1db62717..206d5e06 100644 --- a/src/cmd/venti/srv/lump.c +++ b/src/cmd/venti/srv/lump.c @@ -7,7 +7,7 @@ int queuewrites = 0; int writestodevnull = 0; int verifywrites = 0; -static Packet *readilump(Lump *u, IAddr *ia, u8int *score, int rac); +static Packet *readilump(Lump *u, IAddr *ia, u8int *score); /* * Some of this logic is duplicated in hdisk.c @@ -19,7 +19,6 @@ readlump(u8int *score, int type, u32int size, int *cached) Packet *p; IAddr ia; u32int n; - int rac; trace(TraceLump, "readlump enter"); /* @@ -49,7 +48,7 @@ readlump(u8int *score, int type, u32int size, int *cached) if(cached) *cached = 0; - if(lookupscore(score, type, &ia, &rac) < 0){ + if(lookupscore(score, type, &ia) < 0){ /* ZZZ place to check for someone trying to guess scores */ seterr(EOk, "no block with score %V/%d exists", score, type); @@ -64,7 +63,7 @@ readlump(u8int *score, int type, u32int size, int *cached) } trace(TraceLump, "readlump readilump"); - p = readilump(u, &ia, score, rac); + p = readilump(u, &ia, score); putlump(u); trace(TraceLump, "readlump exit"); @@ -134,9 +133,8 @@ writeqlump(Lump *u, Packet *p, int creator, uint ms) Packet *old; IAddr ia; int ok; - int rac; - if(lookupscore(u->score, u->type, &ia, &rac) == 0){ + if(lookupscore(u->score, u->type, &ia) == 0){ if(verifywrites == 0){ /* assume the data is here! */ packetfree(p); @@ -149,7 +147,7 @@ writeqlump(Lump *u, Packet *p, int creator, uint ms) * if the read fails, * assume it was corrupted data and store the block again */ - old = readilump(u, &ia, u->score, rac); + old = readilump(u, &ia, u->score); if(old != nil){ ok = 0; if(packetcmp(p, old) != 0){ @@ -176,7 +174,7 @@ writeqlump(Lump *u, Packet *p, int creator, uint ms) ok = storeclump(mainindex, flat, u->score, u->type, creator, &ia); freezblock(flat); if(ok == 0) - ok = insertscore(u->score, &ia, 1); + ok = insertscore(u->score, &ia, IEDirty); if(ok == 0) insertlump(u, p); else @@ -193,39 +191,14 @@ writeqlump(Lump *u, Packet *p, int creator, uint ms) return ok; } -static void -lreadahead(u64int a, Arena *arena, u64int aa, int n) -{ - u8int buf[ClumpSize]; - Clump cl; - IAddr ia; - - while(n > 0) { - if (aa >= arena->memstats.used) - break; - if(readarena(arena, aa, buf, ClumpSize) < ClumpSize) - break; - if(unpackclump(&cl, buf, arena->clumpmagic) < 0) - break; - ia.addr = a; - ia.type = cl.info.type; - ia.size = cl.info.uncsize; - ia.blocks = (cl.info.size + ClumpSize + (1 << ABlockLog) - 1) >> ABlockLog; - insertscore(cl.info.score, &ia, 0); - a += ClumpSize + cl.info.size; - aa += ClumpSize + cl.info.size; - n--; - } -} - static Packet* -readilump(Lump *u, IAddr *ia, u8int *score, int rac) +readilump(Lump *u, IAddr *ia, u8int *score) { Arena *arena; ZBlock *zb; Packet *p, *pp; Clump cl; - u64int a, aa; + u64int aa; u8int sc[VtScoreSize]; trace(TraceLump, "readilump enter"); @@ -258,13 +231,6 @@ readilump(Lump *u, IAddr *ia, u8int *score, int rac) return nil; } - if(rac == 0) { - trace(TraceLump, "readilump readahead"); - a = ia->addr + ClumpSize + cl.info.size; - aa += ClumpSize + cl.info.size; - lreadahead(a, arena, aa, 20); - } - trace(TraceLump, "readilump success"); p = zblock2packet(zb, cl.info.uncsize); freezblock(zb); diff --git a/src/cmd/venti/srv/stats.c b/src/cmd/venti/srv/stats.c index 3a66bf6d..5ee4d91f 100644 --- a/src/cmd/venti/srv/stats.c +++ b/src/cmd/venti/srv/stats.c @@ -60,6 +60,9 @@ Statdesc statdesc[NStat] = { "index cache flushes", }, { "index cache stalls", }, { "index cache read time", }, + { "index cache lookups" }, + { "index cache summary hits" }, + { "index cache summary prefetches" }, { "bloom filter hits", }, { "bloom filter misses", }, @@ -81,6 +84,9 @@ Statdesc statdesc[NStat] = { "sum reads", }, { "sum read bytes", }, + + { "cig loads" }, + { "cig load time" }, }; QLock statslock; diff --git a/src/cmd/venti/srv/syncindex.c b/src/cmd/venti/srv/syncindex.c index 72d45f18..fb3c4ce2 100644 --- a/src/cmd/venti/srv/syncindex.c +++ b/src/cmd/venti/srv/syncindex.c @@ -56,13 +56,7 @@ threadmain(int argc, char *argv[]) if(0) fprint(2, "initialize %d bytes of disk block cache\n", bcmem); initdcache(bcmem); initlumpcache(1*1024*1024, 1024/8); - icmem = u64log2(icmem / (sizeof(IEntry)+sizeof(IEntry*)) / ICacheDepth); - if(icmem < 4) - icmem = 4; - if(1) fprint(2, "initialize %d bytes of index cache for %d index entries\n", - (sizeof(IEntry)+sizeof(IEntry*)) * (1 << icmem) * ICacheDepth, - (1 << icmem) * ICacheDepth); - initicache(icmem, ICacheDepth); + initicache(icmem); initicachewrite(); if(mainindex->bloom) startbloomproc(mainindex->bloom); diff --git a/src/cmd/venti/srv/syncindex0.c b/src/cmd/venti/srv/syncindex0.c index 6a008bad..98f6adf1 100644 --- a/src/cmd/venti/srv/syncindex0.c +++ b/src/cmd/venti/srv/syncindex0.c @@ -101,7 +101,7 @@ syncarenaindex(Index *ix, Arena *arena, u32int clump, u64int a, int fix, int *pf } flush = 1; trace(TraceProc, "syncarenaindex insert %V", ci->score); - insertscore(ci->score, &ia, 1); + insertscore(ci->score, &ia, IEDirty); } if(0 && clump / 1000 != (clump + n) / 1000) diff --git a/src/cmd/venti/srv/venti.c b/src/cmd/venti/srv/venti.c index fab83782..4d59dfc6 100644 --- a/src/cmd/venti/srv/venti.c +++ b/src/cmd/venti/srv/venti.c @@ -18,7 +18,7 @@ static void ventiserver(void*); void usage(void) { - fprint(2, "usage: venti [-Ldrs] [-a ventiaddr] [-c config] " + fprint(2, "usage: venti [-Ldrsw] [-a ventiaddr] [-c config] " "[-h httpaddr] [-B blockcachesize] [-C cachesize] [-I icachesize] [-W webroot]\n"); threadexitsall("usage"); } @@ -73,6 +73,9 @@ threadmain(int argc, char *argv[]) case 's': nofork = 1; break; + case 'w': /* compatibility with old venti */ + queuewrites = 1; + break; case 'W': webroot = EARGF(usage()); break; @@ -103,9 +106,6 @@ threadmain(int argc, char *argv[]) if(configfile == nil) configfile = "venti.conf"; - if(initarenasum() < 0) - fprint(2, "warning: can't initialize arena summing process: %r"); - fprint(2, "conf..."); if(initventi(configfile, &config) < 0) sysfatal("can't init server: %r"); @@ -143,13 +143,7 @@ threadmain(int argc, char *argv[]) mem, mem / (8 * 1024)); initlumpcache(mem, mem / (8 * 1024)); - icmem = u64log2(icmem / (sizeof(IEntry)+sizeof(IEntry*)) / ICacheDepth); - if(icmem < 4) - icmem = 4; - if(0) fprint(2, "initialize %d bytes of index cache for %d index entries\n", - (sizeof(IEntry)+sizeof(IEntry*)) * (1 << icmem) * ICacheDepth, - (1 << icmem) * ICacheDepth); - initicache(icmem, ICacheDepth); + initicache(icmem); initicachewrite(); /* @@ -179,6 +173,9 @@ threadmain(int argc, char *argv[]) } } + if(initarenasum() < 0) + fprint(2, "warning: can't initialize arena summing process: %r"); + fprint(2, "announce %s...", vaddr); ventisrv = vtlisten(vaddr); if(ventisrv == nil) @@ -272,5 +269,3 @@ ventiserver(void *v) flushicache(); threadexitsall(0); } - - diff --git a/src/cmd/venti/srv/www/stats.js b/src/cmd/venti/srv/www/stats.js index 76e9f276..64de5bbb 100644 --- a/src/cmd/venti/srv/www/stats.js +++ b/src/cmd/venti/srv/www/stats.js @@ -38,6 +38,10 @@ graphname = new Array( "icache dirty %", "arg=icachehit&graph=pctdiff&arg2=icachelookup&max=100", "icache hit %", + "arg=scachehit&graph=pctdiff&arg2=icachelookup&max=100", + "scache hit %", + "arg=icachemiss&graph=pctdiff&arg2=icachelookup&max=100", + "icache miss %", "arg=icachelookuptime&graph=divdiff&arg2=icachelookup", "icache lookup time", "arg=icacheprefetch&graph=diff", @@ -75,6 +79,8 @@ graphname = new Array( "fresh write RPC time", "arg=rpcwriteoldtime&graph=divdiff&arg2=rpcwriteold", "dup write RPC time", + "arg=cigloadtime&graph=divdiff&arg2=cigload", + "cig load time", "arg=sumreadbyte&graph=diff", "checksum bytes/second", @@ -118,8 +124,11 @@ column1 = new Array( "!icache", "arg=icachedirty&graph=pct&arg2=icachesize&max=100", "arg=icachehit&graph=pctdiff&arg2=icachelookup&max=100", + "arg=scachehit&graph=pctdiff&arg2=icachelookup&max=100", + "arg=icachemiss&graph=pctdiff&arg2=icachelookup&max=100", "arg=icachewrite&graph=diff", "arg=icacheprefetch&graph=diff", + "arg=scacheprefetch&graph=diff", "!dcache", "arg=dcachedirty&graph=pct&arg2=dcachesize&max=100", @@ -154,6 +163,7 @@ column2 = new Array( "arg=rpcreaduncachedtime&graph=divdiff&arg2=rpcreaduncached", "arg=rpcwritenewtime&graph=divdiff&arg2=rpcwritenew", "arg=rpcwriteoldtime&graph=divdiff&arg2=rpcwriteold", + "arg=cigloadtime&graph=divdiff&arg2=cigload", "END" ) |