diff options
Diffstat (limited to 'src/cmd/venti/srv/mirrorarenas.c')
-rw-r--r-- | src/cmd/venti/srv/mirrorarenas.c | 464 |
1 files changed, 464 insertions, 0 deletions
diff --git a/src/cmd/venti/srv/mirrorarenas.c b/src/cmd/venti/srv/mirrorarenas.c new file mode 100644 index 00000000..253b4edb --- /dev/null +++ b/src/cmd/venti/srv/mirrorarenas.c @@ -0,0 +1,464 @@ +/* + * Mirror one arena partition onto another. + * Be careful to copy only new data. + */ + +#include "stdinc.h" +#include "dat.h" +#include "fns.h" + +Channel *writechan; + +typedef struct Write Write; +struct Write +{ + uchar *p; + int n; + uvlong o; + int error; +}; + +Part *src; +Part *dst; +int force; +int verbose; +char *status; +uvlong astart, aend; + +void +usage(void) +{ + fprint(2, "usage: mirrorarenas [-v] src dst [ranges]\n"); + threadexitsall("usage"); +} + +int +ereadpart(Part *p, u64int offset, u8int *buf, u32int count) +{ + if(readpart(p, offset, buf, count) != count){ + print("%T readpart %s at %#llux+%ud: %r\n", p->name, offset, count); + return -1; + } + return 0; +} + +int +ewritepart(Part *p, u64int offset, u8int *buf, u32int count) +{ + if(writepart(p, offset, buf, count) != count){ + print("%T writepart %s at %#llux+%ud: %r\n", p->name, offset, count); + return -1; + } + return 0; +} + +/* + * Extra proc to do writes to dst, so that we can overlap reading + * src with writing dst during copy. This is an easy factor of two + * (almost) in performance. + */ +static void +writeproc(void *v) +{ + Write *w; + + USED(v); + while((w = recvp(writechan)) != nil){ + if(w->n == 0) + continue; + if(ewritepart(dst, w->o, w->p, w->n) < 0) + w->error = 1; + } +} + +int +copy(uvlong start, uvlong end, char *what, DigestState *ds) +{ + int i, n; + uvlong o; + static uchar tmp[2][1024*1024]; + Write w[2]; + + assert(start <= end); + assert(astart <= start && start < aend); + assert(astart <= end && end <= aend); + + if(verbose && start != end) + print("%T copy %,llud-%,llud %s\n", start, end, what); + + i = 0; + memset(w, 0, sizeof w); + for(o=start; o<end; o+=n){ + if(w[i].error) + goto error; + n = sizeof tmp[i]; + if(o+n > end) + n = end - o; + if(ereadpart(src, o, tmp[i], n) < 0) + goto error; + w[i].p = tmp[i]; + w[i].o = o; + w[i].n = n; + w[i].error = 0; + sendp(writechan, &w[i]); + if(ds) + sha1(tmp[i], n, nil, ds); + i = 1-i; + } + if(w[i].error) + goto error; + + /* + * wait for queued write to finish + */ + w[i].p = nil; + w[i].o = 0; + w[i].n = 0; + w[i].error = 0; + sendp(writechan, &w[i]); + i = 1-i; + if(w[i].error) + return -1; + return 0; + +error: + /* + * sync with write proc + */ + w[i].p = nil; + w[i].o = 0; + w[i].n = 0; + w[i].error = 0; + sendp(writechan, &w[i]); + return -1; +} + +/* single-threaded, for reference */ +int +copy1(uvlong start, uvlong end, char *what, DigestState *ds) +{ + int n; + uvlong o; + static uchar tmp[1024*1024]; + + assert(start <= end); + assert(astart <= start && start < aend); + assert(astart <= end && end <= aend); + + if(verbose && start != end) + print("%T copy %,llud-%,llud %s\n", start, end, what); + + for(o=start; o<end; o+=n){ + n = sizeof tmp; + if(o+n > end) + n = end - o; + if(ereadpart(src, o, tmp, n) < 0) + return -1; + if(ds) + sha1(tmp, n, nil, ds); + if(ewritepart(dst, o, tmp, n) < 0) + return -1; + } + return 0; +} + +int +asha1(Part *p, uvlong start, uvlong end, DigestState *ds) +{ + int n; + uvlong o; + static uchar tmp[1024*1024]; + + if(start == end) + return 0; + assert(start < end); + + if(verbose) + print("%T sha1 %,llud-%,llud\n", start, end); + + for(o=start; o<end; o+=n){ + n = sizeof tmp; + if(o+n > end) + n = end - o; + if(ereadpart(p, o, tmp, n) < 0) + return -1; + sha1(tmp, n, nil, ds); + } + return 0; +} + +uvlong +rdown(uvlong a, int b) +{ + return a-a%b; +} + +uvlong +rup(uvlong a, int b) +{ + if(a%b == 0) + return a; + return a+b-a%b; +} + +void +mirror(Arena *sa, Arena *da) +{ + vlong v, si, di, end; + int clumpmax, blocksize; + static uchar buf[MaxIoSize]; + ArenaHead h; + DigestState xds, *ds; + vlong shaoff, base; + + base = sa->base; + blocksize = sa->blocksize; + end = sa->base + sa->size; + + astart = base - blocksize; + aend = end + blocksize; + + shaoff = 0; + + if(force){ + copy(astart, aend, "all", nil); + return; + } + + if(verbose) + print("%T %s (%,llud-%,llud)\n", sa->name, astart, aend); + + if(sa->diskstats.sealed && da->diskstats.sealed && scorecmp(da->score, zeroscore) != 0){ + if(scorecmp(sa->score, da->score) == 0) + return; + print("%T arena %s: sealed score mismatch %V vs %V\n", sa->name, sa->score, da->score); + status = "errors"; + return; + } + if(da->diskstats.sealed && scorecmp(da->score, zeroscore) != 0){ + print("%T arena %s: dst is sealed, src is not\n", sa->name); + status = "errors"; + return; + } + if(sa->diskstats.used < da->diskstats.used){ + print("%T arena %s: src used %,lld < dst used %,lld\n", sa->name, sa->diskstats.used, da->diskstats.used); + status = "errors"; + return; + } + + if(da->clumpmagic != sa->clumpmagic){ + /* + * Write this now to reduce the window in which + * the head and tail disagree about clumpmagic. + */ + da->clumpmagic = sa->clumpmagic; + memset(buf, 0, sizeof buf); + packarena(da, buf); + if(ewritepart(dst, end, buf, blocksize) < 0) + return; + } + + memset(&h, 0, sizeof h); + h.version = da->version; + strcpy(h.name, da->name); + h.blocksize = da->blocksize; + h.size = da->size + 2*da->blocksize; + h.clumpmagic = da->clumpmagic; + memset(buf, 0, sizeof buf); + packarenahead(&h, buf); + if(ewritepart(dst, base - blocksize, buf, blocksize) < 0) + return; + + ds = nil; + if(sa->diskstats.sealed && scorecmp(sa->score, zeroscore) != 0){ + /* start sha1 state with header */ + memset(&xds, 0, sizeof xds); + ds = &xds; + sha1(buf, blocksize, nil, ds); + shaoff = base; + } + + if(sa->diskstats.used != da->diskstats.used){ + di = base+rdown(da->diskstats.used, blocksize); + si = base+rup(sa->diskstats.used, blocksize); + if(ds && asha1(dst, shaoff, di, ds) < 0) + return; + if(copy(di, si, "data", ds) < 0) + return; + shaoff = si; + } + + clumpmax = sa->clumpmax; + di = end - da->diskstats.clumps/clumpmax * blocksize; + si = end - (sa->diskstats.clumps+clumpmax-1)/clumpmax * blocksize; + + if(sa->diskstats.sealed){ + /* + * might be a small hole between the end of the + * data and the beginning of the directory. + */ + v = base+rup(sa->diskstats.used, blocksize); + if(ds && asha1(dst, shaoff, v, ds) < 0) + return; + if(copy(v, si, "hole", ds) < 0) + return; + shaoff = si; + } + + if(da->diskstats.clumps != sa->diskstats.clumps){ + if(ds && asha1(dst, shaoff, si, ds) < 0) + return; + if(copy(si, di, "directory", ds) < 0) /* si < di because clumpinfo blocks grow down */ + return; + shaoff = di; + } + + da->ctime = sa->ctime; + da->wtime = sa->wtime; + da->diskstats = sa->diskstats; + da->diskstats.sealed = 0; + + memset(buf, 0, sizeof buf); + packarena(da, buf); + if(ewritepart(dst, end, buf, blocksize) < 0) + return; + + if(ds){ + asha1(dst, shaoff, end, ds); + da->diskstats.sealed = 1; + memset(buf, 0, sizeof buf); + packarena(da, buf); + sha1(buf, blocksize, da->score, ds); + if(scorecmp(sa->score, da->score) == 0){ + if(verbose) + print("%T arena %s: %V\n", sa->name, da->score); + scorecp(buf+blocksize-VtScoreSize, da->score); + if(ewritepart(dst, end, buf, blocksize) < 0) + return; + }else{ + print("%T arena %s: sealing dst: score mismatch: %V vs %V\n", sa->name, sa->score, da->score); + memset(&xds, 0, sizeof xds); + asha1(dst, base-blocksize, end, &xds); + sha1(buf, blocksize, da->score, &xds); + print("%T reseal: %V\n", da->score); + status = "errors"; + } + } +} + +void +mirrormany(ArenaPart *sp, ArenaPart *dp, char *range) +{ + int i, lo, hi; + char *s, *t; + Arena *sa, *da; + + if(range == nil){ + for(i=0; i<sp->narenas; i++){ + sa = sp->arenas[i]; + da = dp->arenas[i]; + mirror(sa, da); + } + return; + } + if(strcmp(range, "none") == 0) + return; + + for(s=range; *s; s=t){ + t = strchr(s, ','); + if(t) + *t++ = 0; + else + t = s+strlen(s); + if(*s == '-') + lo = 0; + else + lo = strtol(s, &s, 0); + hi = lo; + if(*s == '-'){ + s++; + if(*s == 0) + hi = sp->narenas-1; + else + hi = strtol(s, &s, 0); + } + if(*s != 0){ + print("%T bad arena range: %s\n", s); + continue; + } + for(i=lo; i<=hi; i++){ + sa = sp->arenas[i]; + da = dp->arenas[i]; + mirror(sa, da); + } + } +} + + +void +threadmain(int argc, char **argv) +{ + int i; + Arena *sa, *da; + ArenaPart *s, *d; + char *ranges; + + ventifmtinstall(); + + ARGBEGIN{ + case 'F': + force = 1; + break; + case 'v': + verbose++; + break; + default: + usage(); + }ARGEND + + if(argc != 2 && argc != 3) + usage(); + ranges = nil; + if(argc == 3) + ranges = argv[2]; + + if((src = initpart(argv[0], OREAD)) == nil) + sysfatal("initpart %s: %r", argv[0]); + if((dst = initpart(argv[1], ORDWR)) == nil) + sysfatal("initpart %s: %r", argv[1]); + if((s = initarenapart(src)) == nil) + sysfatal("initarenapart %s: %r", argv[0]); + for(i=0; i<s->narenas; i++) + delarena(s->arenas[i]); + if((d = initarenapart(dst)) == nil) + sysfatal("loadarenapart %s: %r", argv[1]); + for(i=0; i<d->narenas; i++) + delarena(d->arenas[i]); + + /* + * The arena geometries must match or all bets are off. + */ + if(s->narenas != d->narenas) + sysfatal("arena count mismatch: %d vs %d", s->narenas, d->narenas); + for(i=0; i<s->narenas; i++){ + sa = s->arenas[i]; + da = d->arenas[i]; + if(sa->version != da->version) + sysfatal("arena %d: version mismatch: %d vs %d", i, sa->version, da->version); + if(sa->blocksize != da->blocksize) + sysfatal("arena %d: blocksize mismatch: %d vs %d", i, sa->blocksize, da->blocksize); + if(sa->size != da->size) + sysfatal("arena %d: size mismatch: %,lld vs %,lld", i, sa->size, da->size); + if(strcmp(sa->name, da->name) != 0) + sysfatal("arena %d: name mismatch: %s vs %s", i, sa->name, da->name); + } + + /* + * Mirror one arena at a time. + */ + writechan = chancreate(sizeof(void*), 0); + vtproc(writeproc, nil); + mirrormany(s, d, ranges); + sendp(writechan, nil); + threadexitsall(status); +} |