diff options
Diffstat (limited to 'src/cmd/fossil/disk.c')
-rw-r--r-- | src/cmd/fossil/disk.c | 406 |
1 files changed, 406 insertions, 0 deletions
diff --git a/src/cmd/fossil/disk.c b/src/cmd/fossil/disk.c new file mode 100644 index 00000000..3ae43fb6 --- /dev/null +++ b/src/cmd/fossil/disk.c @@ -0,0 +1,406 @@ +#include "stdinc.h" +#include "dat.h" +#include "fns.h" +#include "error.h" + +static void diskThread(void *a); + +enum { + /* + * disable measurement since it gets alignment faults on BG + * and the guts used to be commented out. + */ + Timing = 0, /* flag */ + QueueSize = 100, /* maximum block to queue */ +}; + +struct Disk { + VtLock *lk; + int ref; + + int fd; + Header h; + + VtRendez *flow; + VtRendez *starve; + VtRendez *flush; + VtRendez *die; + + int nqueue; + + Block *cur; /* block to do on current scan */ + Block *next; /* blocks to do next scan */ +}; + +/* keep in sync with Part* enum in dat.h */ +static char *partname[] = { + [PartError] "error", + [PartSuper] "super", + [PartLabel] "label", + [PartData] "data", + [PartVenti] "venti", +}; + +Disk * +diskAlloc(int fd) +{ + u8int buf[HeaderSize]; + Header h; + Disk *disk; + + if(pread(fd, buf, HeaderSize, HeaderOffset) < HeaderSize){ + vtSetError("short read: %r"); + vtOSError(); + return nil; + } + + if(!headerUnpack(&h, buf)){ + vtSetError("bad disk header"); + return nil; + } + disk = vtMemAllocZ(sizeof(Disk)); + disk->lk = vtLockAlloc(); + disk->starve = vtRendezAlloc(disk->lk); + disk->flow = vtRendezAlloc(disk->lk); + disk->flush = vtRendezAlloc(disk->lk); + disk->fd = fd; + disk->h = h; + + disk->ref = 2; + vtThread(diskThread, disk); + + return disk; +} + +void +diskFree(Disk *disk) +{ + diskFlush(disk); + + /* kill slave */ + vtLock(disk->lk); + disk->die = vtRendezAlloc(disk->lk); + vtWakeup(disk->starve); + while(disk->ref > 1) + vtSleep(disk->die); + vtUnlock(disk->lk); + vtRendezFree(disk->flow); + vtRendezFree(disk->starve); + vtRendezFree(disk->die); + vtLockFree(disk->lk); + close(disk->fd); + vtMemFree(disk); +} + +static u32int +partStart(Disk *disk, int part) +{ + switch(part){ + default: + assert(0); + case PartSuper: + return disk->h.super; + case PartLabel: + return disk->h.label; + case PartData: + return disk->h.data; + } +} + + +static u32int +partEnd(Disk *disk, int part) +{ + switch(part){ + default: + assert(0); + case PartSuper: + return disk->h.super+1; + case PartLabel: + return disk->h.data; + case PartData: + return disk->h.end; + } +} + +int +diskReadRaw(Disk *disk, int part, u32int addr, uchar *buf) +{ + ulong start, end; + u64int offset; + int n, nn; + + start = partStart(disk, part); + end = partEnd(disk, part); + + if(addr >= end-start){ + vtSetError(EBadAddr); + return 0; + } + + offset = ((u64int)(addr + start))*disk->h.blockSize; + n = disk->h.blockSize; + while(n > 0){ + nn = pread(disk->fd, buf, n, offset); + if(nn < 0){ + vtOSError(); + return 0; + } + if(nn == 0){ + vtSetError("eof reading disk"); + return 0; + } + n -= nn; + offset += nn; + buf += nn; + } + return 1; +} + +int +diskWriteRaw(Disk *disk, int part, u32int addr, uchar *buf) +{ + ulong start, end; + u64int offset; + int n; + + start = partStart(disk, part); + end = partEnd(disk, part); + + if(addr >= end - start){ + vtSetError(EBadAddr); + return 0; + } + + offset = ((u64int)(addr + start))*disk->h.blockSize; + n = pwrite(disk->fd, buf, disk->h.blockSize, offset); + if(n < 0){ + vtOSError(); + return 0; + } + if(n < disk->h.blockSize) { + vtSetError("short write"); + return 0; + } + + return 1; +} + +static void +diskQueue(Disk *disk, Block *b) +{ + Block **bp, *bb; + + vtLock(disk->lk); + while(disk->nqueue >= QueueSize) + vtSleep(disk->flow); + if(disk->cur == nil || b->addr > disk->cur->addr) + bp = &disk->cur; + else + bp = &disk->next; + + for(bb=*bp; bb; bb=*bp){ + if(b->addr < bb->addr) + break; + bp = &bb->ionext; + } + b->ionext = bb; + *bp = b; + if(disk->nqueue == 0) + vtWakeup(disk->starve); + disk->nqueue++; + vtUnlock(disk->lk); +} + + +void +diskRead(Disk *disk, Block *b) +{ + assert(b->iostate == BioEmpty || b->iostate == BioLabel); + blockSetIOState(b, BioReading); + diskQueue(disk, b); +} + +void +diskWrite(Disk *disk, Block *b) +{ + assert(b->nlock == 1); + assert(b->iostate == BioDirty); + blockSetIOState(b, BioWriting); + diskQueue(disk, b); +} + +void +diskWriteAndWait(Disk *disk, Block *b) +{ + int nlock; + + /* + * If b->nlock > 1, the block is aliased within + * a single thread. That thread is us. + * DiskWrite does some funny stuff with VtLock + * and blockPut that basically assumes b->nlock==1. + * We humor diskWrite by temporarily setting + * nlock to 1. This needs to be revisited. + */ + nlock = b->nlock; + if(nlock > 1) + b->nlock = 1; + diskWrite(disk, b); + while(b->iostate != BioClean) + vtSleep(b->ioready); + b->nlock = nlock; +} + +int +diskBlockSize(Disk *disk) +{ + return disk->h.blockSize; /* immuttable */ +} + +int +diskFlush(Disk *disk) +{ + Dir dir; + + vtLock(disk->lk); + while(disk->nqueue > 0) + vtSleep(disk->flush); + vtUnlock(disk->lk); + + /* there really should be a cleaner interface to flush an fd */ + nulldir(&dir); + if(dirfwstat(disk->fd, &dir) < 0){ + vtOSError(); + return 0; + } + return 1; +} + +u32int +diskSize(Disk *disk, int part) +{ + return partEnd(disk, part) - partStart(disk, part); +} + +static uintptr +mypc(int x) +{ + return getcallerpc(&x); +} + +static char * +disk2file(Disk *disk) +{ + static char buf[256]; + + if (fd2path(disk->fd, buf, sizeof buf) < 0) + strncpy(buf, "GOK", sizeof buf); + return buf; +} + +static void +diskThread(void *a) +{ + Disk *disk = a; + Block *b; + uchar *buf, *p; + double t; + int nio; + + vtThreadSetName("disk"); + +//fprint(2, "diskThread %d\n", getpid()); + + buf = vtMemAlloc(disk->h.blockSize); + + vtLock(disk->lk); + if (Timing) { + nio = 0; + t = -nsec(); + } + for(;;){ + while(disk->nqueue == 0){ + if (Timing) { + t += nsec(); + if(nio >= 10000){ + fprint(2, "disk: io=%d at %.3fms\n", + nio, t*1e-6/nio); + nio = 0; + t = 0; + } + } + if(disk->die != nil) + goto Done; + vtSleep(disk->starve); + if (Timing) + t -= nsec(); + } + assert(disk->cur != nil || disk->next != nil); + + if(disk->cur == nil){ + disk->cur = disk->next; + disk->next = nil; + } + b = disk->cur; + disk->cur = b->ionext; + vtUnlock(disk->lk); + + /* + * no one should hold onto blocking in the + * reading or writing state, so this lock should + * not cause deadlock. + */ +if(0)fprint(2, "fossil: diskThread: %d:%d %x\n", getpid(), b->part, b->addr); + bwatchLock(b); + vtLock(b->lk); + b->pc = mypc(0); + assert(b->nlock == 1); + switch(b->iostate){ + default: + abort(); + case BioReading: + if(!diskReadRaw(disk, b->part, b->addr, b->data)){ + fprint(2, "fossil: diskReadRaw failed: %s: " + "score %V: part=%s block %ud: %r\n", + disk2file(disk), b->score, + partname[b->part], b->addr); + blockSetIOState(b, BioReadError); + }else + blockSetIOState(b, BioClean); + break; + case BioWriting: + p = blockRollback(b, buf); + /* NB: ctime result ends with a newline */ + if(!diskWriteRaw(disk, b->part, b->addr, p)){ + fprint(2, "fossil: diskWriteRaw failed: %s: " + "score %V: date %s part=%s block %ud: %r\n", + disk2file(disk), b->score, + ctime(time(0)), + partname[b->part], b->addr); + break; + } + if(p != buf) + blockSetIOState(b, BioClean); + else + blockSetIOState(b, BioDirty); + break; + } + + blockPut(b); /* remove extra reference, unlock */ + vtLock(disk->lk); + disk->nqueue--; + if(disk->nqueue == QueueSize-1) + vtWakeup(disk->flow); + if(disk->nqueue == 0) + vtWakeup(disk->flush); + if(Timing) + nio++; + } +Done: +//fprint(2, "diskThread done\n"); + disk->ref--; + vtWakeup(disk->die); + vtUnlock(disk->lk); + vtMemFree(buf); +} |