aboutsummaryrefslogtreecommitdiff
path: root/src/libdiskfs
diff options
context:
space:
mode:
Diffstat (limited to 'src/libdiskfs')
-rw-r--r--src/libdiskfs/COPYRIGHT27
-rw-r--r--src/libdiskfs/block.c51
-rw-r--r--src/libdiskfs/cache.c311
-rw-r--r--src/libdiskfs/disk.c39
-rw-r--r--src/libdiskfs/ext2.c742
-rw-r--r--src/libdiskfs/ext2.h167
-rw-r--r--src/libdiskfs/fat.c11
-rw-r--r--src/libdiskfs/ffs.c791
-rw-r--r--src/libdiskfs/ffs.h281
-rw-r--r--src/libdiskfs/file.c99
-rw-r--r--src/libdiskfs/fsys.c114
-rw-r--r--src/libdiskfs/kfs.c11
-rw-r--r--src/libdiskfs/mkfile31
-rw-r--r--src/libdiskfs/venti.c163
-rw-r--r--src/libdiskfs/vfile.c35
15 files changed, 2873 insertions, 0 deletions
diff --git a/src/libdiskfs/COPYRIGHT b/src/libdiskfs/COPYRIGHT
new file mode 100644
index 00000000..2af95d83
--- /dev/null
+++ b/src/libdiskfs/COPYRIGHT
@@ -0,0 +1,27 @@
+This software was developed as part of a project at MIT:
+ $PLAN9/src/libdiskfs/*
+ $PLAN9/include/diskfs.h
+ $PLAN9/src/cmd/vbackup/*
+
+Copyright (c) 2005 Russ Cox,
+ Massachusetts Institute of Technology
+
+Permission is hereby granted, free of charge, to any person obtaining
+a copy of this software and associated documentation files (the
+"Software"), to deal in the Software without restriction, including
+without limitation the rights to use, copy, modify, merge, publish,
+distribute, sublicense, and/or sell copies of the Software, and to
+permit persons to whom the Software is furnished to do so, subject to
+the following conditions:
+
+The above copyright notice and this permission notice shall be
+included in all copies or substantial portions of the Software.
+
+THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
+NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE
+LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION
+OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION
+WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
+
diff --git a/src/libdiskfs/block.c b/src/libdiskfs/block.c
new file mode 100644
index 00000000..1c73ed34
--- /dev/null
+++ b/src/libdiskfs/block.c
@@ -0,0 +1,51 @@
+#include <u.h>
+#include <libc.h>
+#include <bio.h>
+#include <diskfs.h>
+
+void
+blockdump(Block *bb, char *desc)
+{
+ uchar *p, *ep;
+ int i;
+ Biobuf b;
+
+ Binit(&b, 2, OWRITE);
+
+ Bprint(&b, "%s\n", desc);
+
+ p = bb->data;
+ ep = bb->data + bb->len;
+
+ while(p < ep){
+ for(i=0; i<16; i++){
+ if(p+i < ep)
+ Bprint(&b, "%.2ux ", p[i]);
+ else
+ Bprint(&b, " ");
+ if(i==7)
+ Bprint(&b, "- ");
+ }
+ Bprint(&b, " ");
+ for(i=0; i<16; i++){
+ if(p+i < ep)
+ Bprint(&b, "%c", p[i] >= 0x20 && p[i] <= 0x7F ? p[i] : '.');
+ else
+ Bprint(&b, " ");
+ }
+ p += 16;
+ Bprint(&b, "\n");
+ }
+}
+
+void
+blockput(Block *b)
+{
+ if(b == nil)
+ return;
+ if(!b->_close){
+ fprint(2, "no blockPut\n");
+ abort();
+ }
+ (*b->_close)(b);
+}
diff --git a/src/libdiskfs/cache.c b/src/libdiskfs/cache.c
new file mode 100644
index 00000000..cdef865e
--- /dev/null
+++ b/src/libdiskfs/cache.c
@@ -0,0 +1,311 @@
+#include <u.h>
+#include <libc.h>
+#include <diskfs.h>
+
+/*
+ * Disk cache. Caches by offset, so higher levels have
+ * to deal with alignment issues (if we get asked for the
+ * blocks at offsets 0 and 1, we'll do two reads.
+ */
+
+typedef struct DiskCache DiskCache;
+typedef struct DiskCacheBlock DiskCacheBlock;
+
+struct DiskCache
+{
+ Disk disk;
+ Disk *subdisk;
+ DiskCacheBlock **h;
+ DiskCacheBlock *lruhead;
+ DiskCacheBlock *lrutail;
+ int nhash;
+ int blocksize;
+ Lock lk;
+};
+
+struct DiskCacheBlock
+{
+ Block block;
+ Block *subblock;
+ Lock lk;
+ int ref;
+ DiskCache *dc;
+ DiskCacheBlock *next;
+ DiskCacheBlock *lrunext;
+ DiskCacheBlock *lruprev;
+ u64int offset;
+};
+
+static void
+addtohash(DiskCache *d, DiskCacheBlock *b, u64int offset)
+{
+ int h;
+
+ if(b->offset != ~(u64int)0){
+ fprint(2, "bad offset in addtohash\n");
+ return;
+ }
+ b->offset = offset;
+ h = offset % d->nhash;
+ b->next = d->h[h];
+ d->h[h] = b;
+}
+
+static void
+delfromhash(DiskCache *d, DiskCacheBlock *b)
+{
+ int h;
+ DiskCacheBlock **l;
+
+ if(b->offset == ~(u64int)0)
+ return;
+
+ h = b->offset % d->nhash;
+ for(l=&d->h[h]; *l; l=&(*l)->next)
+ if(*l == b){
+ *l = b->next;
+ b->offset = ~(u64int)0;
+ return;
+ }
+ fprint(2, "delfromhash: didn't find in hash table\n");
+ return;
+}
+
+static void
+putmru(DiskCache *d, DiskCacheBlock *b)
+{
+ b->lruprev = nil;
+ b->lrunext = d->lruhead;
+ d->lruhead = b;
+ if(b->lrunext == nil)
+ d->lrutail = b;
+ else
+ b->lrunext->lruprev = b;
+}
+
+static void
+putlru(DiskCache *d, DiskCacheBlock *b)
+{
+ b->lruprev = d->lrutail;
+ b->lrunext = nil;
+ d->lrutail = b;
+ if(b->lruprev == nil)
+ d->lruhead = b;
+ else
+ b->lruprev->lrunext = b;
+}
+
+static void
+delfromlru(DiskCache *d, DiskCacheBlock *b)
+{
+ if(b->lruprev)
+ b->lruprev->lrunext = b->lrunext;
+ else
+ d->lruhead = b->lrunext;
+ if(b->lrunext)
+ b->lrunext->lruprev = b->lruprev;
+ else
+ d->lrutail = b->lruprev;
+}
+
+static DiskCacheBlock*
+getlru(DiskCache *d)
+{
+ DiskCacheBlock *b;
+
+ b = d->lrutail;
+ if(b){
+ delfromlru(d, b);
+ delfromhash(d, b);
+ blockput(b->subblock);
+ b->subblock = nil;
+ }
+ return b;
+}
+
+static DiskCacheBlock*
+findblock(DiskCache *d, u64int offset)
+{
+ int h;
+ DiskCacheBlock *b;
+
+ h = offset % d->nhash;
+ for(b=d->h[h]; b; b=b->next)
+ if(b->offset == offset)
+ return b;
+ return nil;
+}
+
+static DiskCacheBlock*
+diskcachereadbig(DiskCache *d, u64int offset)
+{
+ Block *b;
+ DiskCacheBlock *dcb;
+
+ lock(&d->lk);
+ dcb = findblock(d, offset);
+ if(dcb){
+//fprint(2, "found %llud in cache %p\n", (uvlong)offset, dcb);
+ if(dcb->ref++ == 0)
+ delfromlru(d, dcb);
+ unlock(&d->lk);
+ return dcb;
+ }
+
+ dcb = getlru(d);
+ unlock(&d->lk);
+ if(dcb == nil){
+ fprint(2, "diskcacheread: all blocks in use\n");
+ return nil;
+ }
+
+ b = diskread(d->subdisk, d->blocksize, offset);
+ lock(&d->lk);
+ if(b == nil){
+ putlru(d, dcb);
+ dcb = nil;
+ }else{
+//fprint(2, "read %llud from disk %p\n", (uvlong)offset, dcb);
+ dcb->subblock = b;
+ dcb->ref++;
+ addtohash(d, dcb, offset);
+ }
+ unlock(&d->lk);
+ return dcb;
+}
+
+static void
+diskcacheblockclose(Block *bb)
+{
+ DiskCacheBlock *b = bb->priv;
+
+ lock(&b->dc->lk);
+ if(--b->ref == 0)
+ putmru(b->dc, b);
+ unlock(&b->dc->lk);
+ free(bb);
+}
+
+static Block*
+diskcacheread(Disk *dd, u32int len, u64int offset)
+{
+ int frag, dlen;
+ DiskCache *d = (DiskCache*)dd;
+ DiskCacheBlock *dcb;
+ Block *b;
+
+ if(offset/d->blocksize != (offset+len-1)/d->blocksize){
+ fprint(2, "diskBigRead: request for block crossing big block boundary\n");
+ return nil;
+ }
+
+ b = mallocz(sizeof(Block), 1);
+ if(b == nil)
+ return nil;
+
+ frag = offset%d->blocksize;
+
+ dcb = diskcachereadbig(d, offset-frag);
+ if(dcb == nil){
+ free(b);
+ return nil;
+ }
+ b->priv = dcb;
+ b->_close = diskcacheblockclose;
+ b->data = dcb->subblock->data+frag;
+
+ dlen = dcb->subblock->len;
+ if(frag+len >= dlen){
+ if(frag >= dlen){
+ blockput(b);
+ return nil;
+ }
+ len = dlen-frag;
+ }
+ b->len = len;
+//fprint(2, "offset %llud at pointer %p %lux\n", (uvlong)offset, b->data, *(ulong*)(b->data+4));
+ return b;
+}
+
+/*
+ * It's okay to remove these from the hash table.
+ * Either the block is in use by someone or it is on
+ * the lru list. If it's in use it will get put on the lru
+ * list once the refs go away.
+ */
+static int
+diskcachesync(Disk *dd)
+{
+ DiskCache *d = (DiskCache*)dd;
+ DiskCacheBlock *b, *nextb;
+ int i;
+
+ lock(&d->lk);
+ for(i=0; i<d->nhash; i++){
+ for(b=d->h[i]; b; b=nextb){
+ nextb = b->next;
+ b->next = nil;
+ b->offset = ~(u64int)0;
+ }
+ d->h[i] = nil;
+ }
+ unlock(&d->lk);
+ return disksync(d->subdisk);
+}
+
+static void
+diskcacheclose(Disk *dd)
+{
+ DiskCacheBlock *b;
+ DiskCache *d = (DiskCache*)dd;
+
+ diskclose(d->subdisk);
+ for(b=d->lruhead; b; b=b->lrunext)
+ blockput(b->subblock);
+ free(d);
+}
+
+/* needn't be fast */
+static int
+isprime(int n)
+{
+ int i;
+
+ for(i=2; i*i<=n; i++)
+ if(n%i == 0)
+ return 0;
+ return 1;
+}
+
+Disk*
+diskcache(Disk *subdisk, uint blocksize, uint ncache)
+{
+ int nhash, i;
+ DiskCache *d;
+ DiskCacheBlock *b;
+
+ nhash = ncache;
+ while(nhash > 1 && !isprime(nhash))
+ nhash--;
+ d = mallocz(sizeof(DiskCache)+ncache*sizeof(DiskCacheBlock)+nhash*sizeof(DiskCacheBlock*), 1);
+ if(d == nil)
+ return nil;
+
+ b = (DiskCacheBlock*)&d[1];
+ d->h = (DiskCacheBlock**)&b[ncache];
+ d->nhash = nhash;
+ d->blocksize = blocksize;
+ d->subdisk = subdisk;
+ d->disk._read = diskcacheread;
+ d->disk._sync = diskcachesync;
+ d->disk._close = diskcacheclose;
+
+ for(i=0; i<ncache; i++){
+ b[i].block._close = diskcacheblockclose;
+ b[i].offset = ~(u64int)0;
+ b[i].dc = d;
+ putlru(d, &b[i]);
+ }
+
+ return &d->disk;
+}
diff --git a/src/libdiskfs/disk.c b/src/libdiskfs/disk.c
new file mode 100644
index 00000000..001a19de
--- /dev/null
+++ b/src/libdiskfs/disk.c
@@ -0,0 +1,39 @@
+#include <u.h>
+#include <libc.h>
+#include <bio.h>
+#include <diskfs.h>
+
+Block*
+diskread(Disk *disk, u32int count, u64int offset)
+{
+ if(disk == nil)
+ return nil;
+
+ if(!disk->_read){
+ werrstr("no disk read dispatch function");
+ return nil;
+ }
+ return (*disk->_read)(disk, count, offset);
+}
+
+int
+disksync(Disk *disk)
+{
+ if(disk == nil)
+ return 0;
+ if(!disk->_sync)
+ return 0;
+ return (*disk->_sync)(disk);
+}
+
+void
+diskclose(Disk *disk)
+{
+ if(disk == nil)
+ return;
+ if(!disk->_close){
+ fprint(2, "no diskClose\n");
+ abort();
+ }
+ (*disk->_close)(disk);
+}
diff --git a/src/libdiskfs/ext2.c b/src/libdiskfs/ext2.c
new file mode 100644
index 00000000..17039c0f
--- /dev/null
+++ b/src/libdiskfs/ext2.c
@@ -0,0 +1,742 @@
+#include <u.h>
+#include <libc.h>
+#include <thread.h>
+#include <sunrpc.h>
+#include <nfs3.h>
+#include <diskfs.h>
+#include "ext2.h"
+
+#define debug 1
+
+static int ext2sync(Fsys*);
+static void ext2close(Fsys*);
+static Block* ext2blockread(Fsys*, u64int);
+
+static Nfs3Status ext2root(Fsys*, Nfs3Handle*);
+static Nfs3Status ext2getattr(Fsys*, SunAuthUnix *au, Nfs3Handle*, Nfs3Attr*);
+static Nfs3Status ext2lookup(Fsys*, SunAuthUnix *au, Nfs3Handle*, char*, Nfs3Handle*);
+static Nfs3Status ext2readfile(Fsys*, SunAuthUnix *au, Nfs3Handle*, u32int, u64int, uchar**, u32int*, u1int*);
+static Nfs3Status ext2readlink(Fsys *fsys, SunAuthUnix *au, Nfs3Handle *h, char **link);
+static Nfs3Status ext2readdir(Fsys *fsys, SunAuthUnix *au, Nfs3Handle *h, u32int, u64int, uchar**, u32int*, u1int*);
+static Nfs3Status ext2access(Fsys *fsys, SunAuthUnix *au, Nfs3Handle *h, u32int want, u32int *got, Nfs3Attr *attr);
+
+Fsys*
+fsysopenext2(Disk *disk)
+{
+ Ext2 *fs;
+ Fsys *fsys;
+
+ fsys = emalloc(sizeof(Fsys));
+ fs = emalloc(sizeof(Ext2));
+ fs->disk = disk;
+ fsys->priv = fs;
+ fs->fsys = fsys;
+ fsys->type = "ext2";
+ fsys->_readblock = ext2blockread;
+ fsys->_sync = ext2sync;
+ fsys->_root = ext2root;
+ fsys->_getattr = ext2getattr;
+ fsys->_access = ext2access;
+ fsys->_lookup = ext2lookup;
+ fsys->_readfile = ext2readfile;
+ fsys->_readlink = ext2readlink;
+ fsys->_readdir = ext2readdir;
+
+ if(ext2sync(fsys) < 0)
+ goto error;
+
+ return fsys;
+
+error:
+ ext2close(fsys);
+ return nil;
+}
+
+static void
+ext2close(Fsys *fsys)
+{
+ Ext2 *fs;
+
+ fs = fsys->priv;
+ free(fs);
+ free(fsys);
+}
+
+static Group*
+ext2group(Ext2 *fs, u32int i, Block **pb)
+{
+ Block *b;
+ u64int addr;
+ Group *g;
+
+ if(i >= fs->ngroup)
+ return nil;
+
+ addr = fs->groupaddr + i/fs->descperblock;
+ b = diskread(fs->disk, fs->blocksize, addr*fs->blocksize);
+ if(b == nil)
+ return nil;
+ g = (Group*)(b->data+i%fs->descperblock*GroupSize);
+ *pb = b;
+ return g;
+}
+
+static Block*
+ext2blockread(Fsys *fsys, u64int vbno)
+{
+ Block *bitb;
+ Group *g;
+ Block *gb;
+ uchar *bits;
+ u32int bno, boff;
+ Ext2 *fs;
+
+ fs = fsys->priv;
+ if(vbno >= fs->nblock)
+ return nil;
+ bno = vbno;
+ if(bno != vbno)
+ return nil;
+
+/*
+ if(bno < fs->firstblock)
+ return diskread(fs->disk, fs->blocksize, (u64int)bno*fs->blocksize);
+*/
+ if(bno < fs->firstblock)
+ return nil;
+
+ bno -= fs->firstblock;
+ if((g = ext2group(fs, bno/fs->blockspergroup, &gb)) == nil){
+ if(debug)
+ fprint(2, "loading group: %r...");
+ return nil;
+ }
+// if(debug)
+// fprint(2, "group %d bitblock=%d...", bno/fs->blockspergroup, g->bitblock);
+
+ if((bitb = diskread(fs->disk, fs->blocksize, (u64int)g->bitblock*fs->blocksize)) == nil){
+ if(debug)
+ fprint(2, "loading bitblock: %r...");
+ blockput(gb);
+ return nil;
+ }
+ bits = bitb->data;
+ boff = bno%fs->blockspergroup;
+ if((bits[boff>>3] & (1<<(boff&7))) == 0){
+ if(debug)
+ fprint(2, "block %d not allocated...", bno);
+ blockput(bitb);
+ blockput(gb);
+ return nil;
+ }
+
+ bno += fs->firstblock;
+ return diskread(fs->disk, fs->blocksize, (u64int)bno*fs->blocksize);
+}
+
+static Block*
+ext2datablock(Ext2 *fs, u32int bno, int size)
+{
+ return ext2blockread(fs->fsys, bno+fs->firstblock);
+}
+
+static Block*
+ext2fileblock(Ext2 *fs, Inode *ino, u32int bno, int size)
+{
+ int ppb;
+ Block *b;
+ u32int *a;
+ u32int obno;
+
+ obno = bno;
+ if(bno < NDIRBLOCKS){
+ if(debug)
+ fprint(2, "fileblock %d -> %d...",
+ bno, ino->block[bno]);
+ return ext2datablock(fs, ino->block[bno], size);
+ }
+ bno -= NDIRBLOCKS;
+ ppb = fs->blocksize/4;
+
+ /* one indirect */
+ if(bno < ppb){
+ b = ext2datablock(fs, ino->block[INDBLOCK], fs->blocksize);
+ if(b == nil)
+ return nil;
+ a = (u32int*)b->data;
+ bno = a[bno%ppb];
+ blockput(b);
+ return ext2datablock(fs, bno, size);
+ }
+ bno -= ppb;
+
+ /* one double indirect */
+ if(bno < ppb*ppb){
+ b = ext2datablock(fs, ino->block[DINDBLOCK], fs->blocksize);
+ if(b == nil)
+ return nil;
+ a = (u32int*)b->data;
+ bno = a[(bno/ppb)%ppb];
+ blockput(b);
+ b = ext2datablock(fs, bno, fs->blocksize);
+ if(b == nil)
+ return nil;
+ a = (u32int*)b->data;
+ bno = a[bno%ppb];
+ blockput(b);
+ return ext2datablock(fs, bno, size);
+ }
+ bno -= ppb*ppb;
+
+ /* one triple indirect */
+ if(bno < ppb*ppb*ppb){
+ b = ext2datablock(fs, ino->block[TINDBLOCK], fs->blocksize);
+ if(b == nil)
+ return nil;
+ a = (u32int*)b->data;
+ bno = a[(bno/(ppb*ppb))%ppb];
+ blockput(b);
+ b = ext2datablock(fs, bno, fs->blocksize);
+ if(b == nil)
+ return nil;
+ a = (u32int*)b->data;
+ bno = a[(bno/ppb)%ppb];
+ blockput(b);
+ b = ext2datablock(fs, bno, fs->blocksize);
+ if(b == nil)
+ return nil;
+ a = (u32int*)b->data;
+ bno = a[bno%ppb];
+ blockput(b);
+ return ext2datablock(fs, bno, size);
+ }
+
+ fprint(2, "ext2fileblock %llud: too big\n", obno);
+ return nil;
+}
+
+static int
+checksuper(Super *super)
+{
+ if(super->magic != SUPERMAGIC){
+ werrstr("bad magic 0x%ux wanted 0x%ux", super->magic, SUPERMAGIC);
+ return -1;
+ }
+ return 0;
+}
+
+static int
+ext2sync(Fsys *fsys)
+{
+ int i;
+ Group *g;
+ Block *b;
+ Super *super;
+ Ext2 *fs;
+ Disk *disk;
+
+ fs = fsys->priv;
+ disk = fs->disk;
+ if((b = diskread(disk, SBSIZE, SBOFF)) == nil)
+ goto error;
+ super = (Super*)b->data;
+ if(checksuper(super) < 0)
+ goto error;
+ fs->blocksize = MINBLOCKSIZE<<super->logblocksize;
+ fs->nblock = super->nblock;
+ fs->ngroup = (super->nblock+super->blockspergroup-1)
+ / super->blockspergroup;
+ fs->inospergroup = super->inospergroup;
+ fs->blockspergroup = super->blockspergroup;
+ fs->inosperblock = fs->blocksize / InodeSize;
+ if(fs->blocksize == SBOFF)
+ fs->groupaddr = 2;
+ else
+ fs->groupaddr = 1;
+ fs->descperblock = fs->blocksize / GroupSize;
+ fs->firstblock = super->firstdatablock;
+ blockput(b);
+
+ fsys->blocksize = fs->blocksize;
+ fsys->nblock = fs->nblock;
+ fprint(2, "ext2 %d %d-byte blocks, first data block %d, %d groups of %d\n",
+ fs->nblock, fs->blocksize, fs->firstblock, fs->ngroup, fs->blockspergroup);
+
+ if(0){
+ for(i=0; i<fs->ngroup; i++)
+ if((g = ext2group(fs, i, &b)) != nil){
+ fprint(2, "grp %d: bitblock=%d\n", i, g->bitblock);
+ blockput(b);
+ }
+ }
+ return 0;
+
+error:
+ blockput(b);
+ return -1;
+}
+
+static void
+mkhandle(Nfs3Handle *h, u64int ino)
+{
+ h->h[0] = ino>>24;
+ h->h[1] = ino>>16;
+ h->h[2] = ino>>8;
+ h->h[3] = ino;
+ h->len = 4;
+}
+
+static u32int
+byte2u32(uchar *p)
+{
+ return (p[0]<<24) | (p[1]<<16) | (p[2]<<8) | p[3];
+}
+
+static Nfs3Status
+handle2ino(Ext2 *fs, Nfs3Handle *h, u32int *pinum, Inode *ino)
+{
+ int i;
+ uint ioff;
+ u32int inum;
+ u32int addr;
+ Block *gb, *b;
+ Group *g;
+
+ if(h->len != 4)
+ return Nfs3ErrBadHandle;
+ inum = byte2u32(h->h);
+ if(pinum)
+ *pinum = inum;
+ i = (inum-1) / fs->inospergroup;
+ if(i >= fs->ngroup)
+ return Nfs3ErrBadHandle;
+ ioff = (inum-1) % fs->inospergroup;
+ if((g = ext2group(fs, i, &gb)) == nil)
+ return Nfs3ErrIo;
+ addr = g->inodeaddr + ioff/fs->inosperblock;
+ blockput(gb);
+ if((b = diskread(fs->disk, fs->blocksize, (u64int)addr*fs->blocksize)) == nil)
+ return Nfs3ErrIo;
+ *ino = ((Inode*)b->data)[ioff%fs->inosperblock];
+ blockput(b);
+ return Nfs3Ok;
+}
+
+static Nfs3Status
+ext2root(Fsys *fsys, Nfs3Handle *h)
+{
+ mkhandle(h, ROOTINODE);
+ return Nfs3Ok;
+}
+
+static Nfs3Status
+ino2attr(Ext2 *fs, Inode *ino, u32int inum, Nfs3Attr *attr)
+{
+ u32int rdev;
+
+ attr->type = -1;
+ switch(ino->mode&IFMT){
+ case IFIFO:
+ attr->type = Nfs3FileFifo;
+ break;
+ case IFCHR:
+ attr->type = Nfs3FileChar;
+ break;
+ case IFDIR:
+ attr->type = Nfs3FileDir;
+ break;
+ case IFBLK:
+ attr->type = Nfs3FileBlock;
+ break;
+ case IFREG:
+ attr->type = Nfs3FileReg;
+ break;
+ case IFLNK:
+ attr->type = Nfs3FileSymlink;
+ break;
+ case IFSOCK:
+ attr->type = Nfs3FileSocket;
+ break;
+ case IFWHT:
+ default:
+ return Nfs3ErrBadHandle;
+ }
+
+ attr->mode = ino->mode&07777;
+ attr->nlink = ino->nlink;
+ attr->uid = ino->uid;
+ attr->gid = ino->gid;
+ attr->size = ino->size;
+ attr->used = ino->nblock*fs->blocksize;
+ if(attr->type==Nfs3FileBlock || attr->type==Nfs3FileChar){
+ rdev = ino->block[0];
+ attr->major = (rdev>>8)&0xFF;
+ attr->minor = rdev & 0xFFFF00FF;
+ }else{
+ attr->major = 0;
+ attr->minor = 0;
+ }
+ attr->fsid = 0;
+ attr->fileid = inum;
+ attr->atime.sec = ino->atime;
+ attr->atime.nsec = 0;
+ attr->mtime.sec = ino->mtime;
+ attr->mtime.nsec = 0;
+ attr->ctime.sec = ino->ctime;
+ attr->ctime.nsec = 0;
+ return Nfs3Ok;
+}
+
+static int
+ingroup(SunAuthUnix *au, uint gid)
+{
+ int i;
+
+ for(i=0; i<au->ng; i++)
+ if(au->g[i] == gid)
+ return 1;
+ return 0;
+}
+
+static Nfs3Status
+inoperm(Inode *ino, SunAuthUnix *au, int need)
+{
+ int have;
+
+ if(allowall)
+ return Nfs3Ok;
+
+ have = ino->mode&0777;
+ if(ino->uid == au->uid)
+ have >>= 6;
+ else if(ino->gid == au->gid || ingroup(au, ino->gid))
+ have >>= 3;
+
+ if((have&need) != need)
+ return Nfs3ErrNotOwner; /* really EPERM */
+ return Nfs3Ok;
+}
+
+static Nfs3Status
+ext2getattr(Fsys *fsys, SunAuthUnix *au, Nfs3Handle *h, Nfs3Attr *attr)
+{
+ Inode ino;
+ u32int inum;
+ Ext2 *fs;
+ Nfs3Status ok;
+
+ fs = fsys->priv;
+ if((ok = handle2ino(fs, h, &inum, &ino)) != Nfs3Ok)
+ return ok;
+
+ USED(au); /* anyone can getattr */
+ return ino2attr(fs, &ino, inum, attr);
+}
+
+static Nfs3Status
+ext2access(Fsys *fsys, SunAuthUnix *au, Nfs3Handle *h, u32int want, u32int *got, Nfs3Attr *attr)
+{
+ int have;
+ Inode ino;
+ u32int inum;
+ Ext2 *fs;
+ Nfs3Status ok;
+
+ fs = fsys->priv;
+ if((ok = handle2ino(fs, h, &inum, &ino)) != Nfs3Ok)
+ return ok;
+
+ have = ino.mode&0777;
+ if(ino.uid == au->uid)
+ have >>= 6;
+ else if(ino.gid == au->gid || ingroup(au, ino.gid))
+ have >>= 3;
+
+ *got = 0;
+ if((want&Nfs3AccessRead) && (have&AREAD))
+ *got |= Nfs3AccessRead;
+ if((want&Nfs3AccessLookup) && (ino.mode&IFMT)==IFDIR && (have&AEXEC))
+ *got |= Nfs3AccessLookup;
+ if((want&Nfs3AccessExecute) && (ino.mode&IFMT)!=IFDIR && (have&AEXEC))
+ *got |= Nfs3AccessExecute;
+
+ return ino2attr(fs, &ino, inum, attr);
+}
+
+static Nfs3Status
+ext2lookup(Fsys *fsys, SunAuthUnix *au, Nfs3Handle *h, char *name, Nfs3Handle *nh)
+{
+ u32int nblock;
+ u32int i;
+ uchar *p, *ep;
+ Dirent *de;
+ Inode ino;
+ Block *b;
+ Ext2 *fs;
+ Nfs3Status ok;
+ int len, want;
+
+ fs = fsys->priv;
+ if((ok = handle2ino(fs, h, nil, &ino)) != Nfs3Ok)
+ return ok;
+
+ if((ino.mode&IFMT) != IFDIR)
+ return Nfs3ErrNotDir;
+
+ if((ok = inoperm(&ino, au, AEXEC)) != Nfs3Ok)
+ return ok;
+
+ len = strlen(name);
+ nblock = (ino.size+fs->blocksize-1) / fs->blocksize;
+ if(debug) fprint(2, "%d blocks in dir...", nblock);
+ for(i=0; i<nblock; i++){
+ if(i==nblock-1)
+ want = ino.size % fs->blocksize;
+ else
+ want = fs->blocksize;
+ b = ext2fileblock(fs, &ino, i, want);
+ if(b == nil){
+ if(debug) fprint(2, "empty block...");
+ continue;
+ }
+ p = b->data;
+ ep = p+b->len;
+ while(p < ep){
+ de = (Dirent*)p;
+ if(de->reclen == 0){
+ if(debug)
+ fprint(2, "reclen 0 at offset %d of %d\n", (int)(p-b->data), b->len);
+ break;
+ }
+ p += de->reclen;
+ if(p > ep){
+ if(debug)
+ fprint(2, "bad len %d at offset %d of %d\n", de->reclen, (int)(p-b->data), b->len);
+ break;
+ }
+ if(de->ino == 0)
+ continue;
+ if(4+2+2+de->namlen > de->reclen){
+ if(debug)
+ fprint(2, "bad namelen %d at offset %d of %d\n", de->namlen, (int)(p-b->data), b->len);
+ break;
+ }
+ if(de->namlen == len && memcmp(de->name, name, len) == 0){
+ mkhandle(nh, de->ino);
+ blockput(b);
+ return Nfs3Ok;
+ }
+ }
+ blockput(b);
+ }
+ return Nfs3ErrNoEnt;
+}
+
+static Nfs3Status
+ext2readdir(Fsys *fsys, SunAuthUnix *au, Nfs3Handle *h, u32int count, u64int cookie, uchar **pdata, u32int *pcount, u1int *peof)
+{
+ u32int nblock;
+ u32int i;
+ int off, done;
+ uchar *data, *dp, *dep, *p, *ep, *ndp;
+ Dirent *de;
+ Inode ino;
+ Block *b;
+ Ext2 *fs;
+ Nfs3Status ok;
+ Nfs3Entry e;
+ int want;
+
+ fs = fsys->priv;
+ if((ok = handle2ino(fs, h, nil, &ino)) != Nfs3Ok)
+ return ok;
+
+ if((ino.mode&IFMT) != IFDIR)
+ return Nfs3ErrNotDir;
+
+ if((ok = inoperm(&ino, au, AREAD)) != Nfs3Ok)
+ return ok;
+
+ if(cookie >= ino.size){
+ *pcount = 0;
+ *pdata = 0;
+ return Nfs3Ok;
+ }
+
+ dp = malloc(count);
+ data = dp;
+ if(dp == nil)
+ return Nfs3ErrNoMem;
+ dep = dp+count;
+ *peof = 0;
+ nblock = (ino.size+fs->blocksize-1) / fs->blocksize;
+ i = cookie/fs->blocksize;
+ off = cookie%fs->blocksize;
+ done = 0;
+ for(; i<nblock && !done; i++){
+ if(i==nblock-1)
+ want = ino.size % fs->blocksize;
+ else
+ want = fs->blocksize;
+ b = ext2fileblock(fs, &ino, i, want);
+ if(b == nil)
+ continue;
+ p = b->data;
+ ep = p+b->len;
+ memset(&e, 0, sizeof e);
+ while(p < ep){
+ de = (Dirent*)p;
+ if(de->reclen == 0){
+ if(debug) fprint(2, "reclen 0 at offset %d of %d\n", (int)(p-b->data), b->len);
+ break;
+ }
+ p += de->reclen;
+ if(p > ep){
+ if(debug) fprint(2, "reclen %d at offset %d of %d\n", de->reclen, (int)(p-b->data), b->len);
+ break;
+ }
+ if(de->ino == 0){
+ if(debug) fprint(2, "zero inode\n");
+ continue;
+ }
+ if(4+2+2+de->namlen > de->reclen){
+ if(debug) fprint(2, "bad namlen %d reclen %d at offset %d of %d\n", de->namlen, de->reclen, (int)(p-b->data), b->len);
+ break;
+ }
+ if(de->name[de->namlen] != 0){
+ if(debug) fprint(2, "bad name %d %.*s\n", de->namlen, de->namlen, de->name);
+ continue;
+ }
+ if(debug) print("%s/%d ", de->name, (int)de->ino);
+ if((uchar*)de - b->data < off)
+ continue;
+ e.fileid = de->ino;
+ e.name = de->name;
+ e.cookie = (u64int)i*fs->blocksize + (p - b->data);
+ if(nfs3entrypack(dp, dep, &ndp, &e) < 0){
+ done = 1;
+ break;
+ }
+ dp = ndp;
+ }
+ off = 0;
+ blockput(b);
+ }
+ if(i==nblock)
+ *peof = 1;
+
+ *pcount = dp - data;
+ *pdata = data;
+ return Nfs3Ok;
+}
+
+static Nfs3Status
+ext2readfile(Fsys *fsys, SunAuthUnix *au, Nfs3Handle *h, u32int count,
+ u64int offset, uchar **pdata, u32int *pcount, u1int *peof)
+{
+ uchar *data;
+ Block *b;
+ Ext2 *fs;
+ int off, want, fragcount;
+ Inode ino;
+ Nfs3Status ok;
+
+ fs = fsys->priv;
+ if((ok = handle2ino(fs, h, nil, &ino)) != Nfs3Ok)
+ return ok;
+
+ if((ok = inoperm(&ino, au, AREAD)) != Nfs3Ok)
+ return ok;
+
+ if(offset >= ino.size){
+ *pdata = 0;
+ *pcount = 0;
+ *peof = 1;
+ return Nfs3Ok;
+ }
+ if(offset+count > ino.size)
+ count = ino.size-offset;
+ if(offset/fs->blocksize != (offset+count-1)/fs->blocksize)
+ count = fs->blocksize - offset%fs->blocksize;
+
+ data = malloc(count);
+ if(data == nil)
+ return Nfs3ErrNoMem;
+
+ want = offset%fs->blocksize+count;
+ if(want%fs->blocksize)
+ want += fs->blocksize - want%fs->blocksize;
+
+ b = ext2fileblock(fs, &ino, offset/fs->blocksize, want);
+ if(b == nil){
+ /* BUG: distinguish sparse file from I/O error */
+ memset(data, 0, count);
+ }else{
+ off = offset%fs->blocksize;
+ fragcount = count; /* need signed variable */
+ if(off+fragcount > b->len){
+ fragcount = b->len - off;
+ if(fragcount < 0)
+ fragcount = 0;
+ }
+ if(fragcount > 0)
+ memmove(data, b->data+off, fragcount);
+ count = fragcount;
+ blockput(b);
+ }
+ *peof = (offset+count == ino.size);
+ *pcount = count;
+ *pdata = data;
+ return Nfs3Ok;
+}
+
+static Nfs3Status
+ext2readlink(Fsys *fsys, SunAuthUnix *au, Nfs3Handle *h, char **link)
+{
+ Ext2 *fs;
+ Nfs3Status ok;
+ int len;
+ Inode ino;
+ Block *b;
+
+ fs = fsys->priv;
+ if((ok = handle2ino(fs, h, nil, &ino)) != Nfs3Ok)
+ return ok;
+ if((ok = inoperm(&ino, au, AREAD)) != Nfs3Ok)
+ return ok;
+
+ if(ino.size > 1024)
+ return Nfs3ErrIo;
+ len = ino.size;
+
+ if(ino.nblock != 0){
+ /* BUG: assumes symlink fits in one block */
+ b = ext2fileblock(fs, &ino, 0, len);
+ if(b == nil)
+ return Nfs3ErrIo;
+ if(memchr(b->data, 0, len) != nil){
+ blockput(b);
+ return Nfs3ErrIo;
+ }
+ *link = malloc(len+1);
+ if(*link == 0){
+ blockput(b);
+ return Nfs3ErrNoMem;
+ }
+ memmove(*link, b->data, len);
+ (*link)[len] = 0;
+ blockput(b);
+ return Nfs3Ok;
+ }
+
+ if(len > sizeof ino.block)
+ return Nfs3ErrIo;
+
+ *link = malloc(len+1);
+ if(*link == 0)
+ return Nfs3ErrNoMem;
+ memmove(*link, ino.block, ino.size);
+ (*link)[len] = 0;
+ return Nfs3Ok;
+}
+
diff --git a/src/libdiskfs/ext2.h b/src/libdiskfs/ext2.h
new file mode 100644
index 00000000..29bf60b5
--- /dev/null
+++ b/src/libdiskfs/ext2.h
@@ -0,0 +1,167 @@
+typedef struct Super Super;
+typedef struct Group Group;
+typedef struct Inode Inode;
+typedef struct Dirent Dirent;
+typedef struct Ext2 Ext2;
+
+enum
+{
+ BYTESPERSEC = 512,
+
+ SBOFF = 1024,
+ SBSIZE = 1024,
+
+ SUPERMAGIC = 0xEF53,
+ MINBLOCKSIZE = 1024,
+ MAXBLOCKSIZE = 4096,
+ ROOTINODE = 2,
+ FIRSTINODE = 11,
+ VALIDFS = 0x0001,
+ ERRORFS = 0x0002,
+
+ NDIRBLOCKS = 12,
+ INDBLOCK = NDIRBLOCKS,
+ DINDBLOCK = INDBLOCK+1,
+ TINDBLOCK = DINDBLOCK+1,
+ NBLOCKS = TINDBLOCK+1,
+
+ NAMELEN = 255,
+
+ /* permissions in Inode.mode */
+ IEXEC = 00100,
+ IWRITE = 0200,
+ IREAD = 0400,
+ ISVTX = 01000,
+ ISGID = 02000,
+ ISUID = 04000,
+
+ /* type in Inode.mode */
+ IFMT = 0170000,
+ IFIFO = 0010000,
+ IFCHR = 0020000,
+ IFDIR = 0040000,
+ IFBLK = 0060000,
+ IFREG = 0100000,
+ IFLNK = 0120000,
+ IFSOCK = 0140000,
+ IFWHT = 0160000,
+};
+
+#define DIRLEN(namlen) (((namlen)+8+3)&~3)
+
+
+/*
+ * Super block on-disk format.
+ */
+struct Super
+{
+ u32int ninode; /* Inodes count */
+ u32int nblock; /* Blocks count */
+ u32int rblockcount; /* Reserved blocks count */
+ u32int freeblockcount; /* Free blocks count */
+ u32int freeinodecount; /* Free inodes count */
+ u32int firstdatablock; /* First Data Block */
+ u32int logblocksize; /* Block size */
+ u32int logfragsize; /* Fragment size */
+ u32int blockspergroup; /* # Blocks per group */
+ u32int fragpergroup; /* # Fragments per group */
+ u32int inospergroup; /* # Inodes per group */
+ u32int mtime; /* Mount time */
+ u32int wtime; /* Write time */
+ u16int mntcount; /* Mount count */
+ u16int maxmntcount; /* Maximal mount count */
+ u16int magic; /* Magic signature */
+ u16int state; /* File system state */
+ u16int errors; /* Behaviour when detecting errors */
+ u16int pad;
+ u32int lastcheck; /* time of last check */
+ u32int checkinterval; /* max. time between checks */
+ u32int creatoros; /* OS */
+ u32int revlevel; /* Revision level */
+ u16int defresuid; /* Default uid for reserved blocks */
+ u16int defresgid; /* Default gid for reserved blocks */
+ u32int reserved[235]; /* Padding to the end of the block */
+};
+
+/*
+ * Blcok group on-disk format.
+ */
+struct Group
+{
+ u32int bitblock; /* Blocks bitmap block */
+ u32int inodebitblock; /* Inodes bitmap block */
+ u32int inodeaddr; /* Inodes table block */
+ u16int freeblockscount; /* Free blocks count */
+ u16int freeinodescount; /* Free inodes count */
+ u16int useddirscount; /* Directories count */
+ u16int pad;
+ u32int reserved[3];
+};
+enum
+{
+ GroupSize = 32
+};
+
+/*
+ * Structure of an inode on the disk
+ */
+struct Inode
+{
+ u16int mode; /* File mode */
+ u16int uid; /* Owner Uid */
+ u32int size; /* Size in bytes */
+ u32int atime; /* Access time */
+ u32int ctime; /* Creation time */
+ u32int mtime; /* Modification time */
+ u32int dtime; /* Deletion Time */
+ u16int gid; /* Group Id */
+ u16int nlink; /* Links count */
+ u32int nblock; /* Blocks count */
+ u32int flags; /* File flags */
+ u32int osd1;
+ u32int block[NBLOCKS];/* Pointers to blocks */
+ u32int version; /* File version (for NFS) */
+ u32int fileacl; /* File ACL */
+ u32int diracl; /* Directory ACL */
+ u32int faddr; /* Fragment address */
+ uchar osd2[12];
+};
+enum
+{
+ InodeSize = 128
+};
+
+/*
+ * Directory entry on-disk structure.
+ */
+struct Dirent
+{
+ u32int ino; /* Inode number */
+ u16int reclen; /* Directory entry length */
+ u8int namlen; /* Name length */
+ u8int pad;
+ char name[NAMELEN]; /* File name */
+};
+enum
+{
+ MinDirentSize = 4+2+1+1,
+};
+
+/*
+ * In-core fs info.
+ */
+struct Ext2
+{
+ uint blocksize;
+ uint nblock;
+ uint ngroup;
+ uint inospergroup;
+ uint blockspergroup;
+ uint inosperblock;
+ uint groupaddr;
+ uint descperblock;
+ uint firstblock;
+ Disk *disk;
+ Fsys *fsys;
+};
+
diff --git a/src/libdiskfs/fat.c b/src/libdiskfs/fat.c
new file mode 100644
index 00000000..4d12512c
--- /dev/null
+++ b/src/libdiskfs/fat.c
@@ -0,0 +1,11 @@
+#include <u.h>
+#include <libc.h>
+#include <diskfs.h>
+
+Fsys*
+fsysopenfat(Disk *disk)
+{
+ USED(disk);
+ return nil;
+}
+
diff --git a/src/libdiskfs/ffs.c b/src/libdiskfs/ffs.c
new file mode 100644
index 00000000..2342171f
--- /dev/null
+++ b/src/libdiskfs/ffs.c
@@ -0,0 +1,791 @@
+#include <u.h>
+#include <libc.h>
+#include <thread.h>
+#include <sunrpc.h>
+#include <nfs3.h>
+#include <diskfs.h>
+#include "ffs.h"
+
+#define checkcg 0
+#define debug 0
+
+static int checkfsblk(Fsblk*);
+static int checkcgblk(Cgblk*);
+static Block *ffsblockread(Fsys*, u64int);
+static int ffssync(Fsys*);
+static void ffsclose(Fsys*);
+
+static Nfs3Status ffsroot(Fsys*, Nfs3Handle*);
+static Nfs3Status ffsgetattr(Fsys*, SunAuthUnix *au, Nfs3Handle*, Nfs3Attr*);
+static Nfs3Status ffslookup(Fsys*, SunAuthUnix *au, Nfs3Handle*, char*, Nfs3Handle*);
+static Nfs3Status ffsreadfile(Fsys*, SunAuthUnix *au, Nfs3Handle*, u32int, u64int, uchar**, u32int*, u1int*);
+static Nfs3Status ffsreadlink(Fsys *fsys, SunAuthUnix *au, Nfs3Handle *h, char **link);
+static Nfs3Status ffsreaddir(Fsys *fsys, SunAuthUnix *au, Nfs3Handle *h, u32int, u64int, uchar**, u32int*, u1int*);
+static Nfs3Status ffsaccess(Fsys *fsys, SunAuthUnix *au, Nfs3Handle *h, u32int want, u32int *got, Nfs3Attr *attr);
+
+Fsys*
+fsysopenffs(Disk *disk)
+{
+ Ffs *fs;
+ Fsys *fsys;
+
+ fsys = emalloc(sizeof(Fsys));
+ fs = emalloc(sizeof(Ffs));
+ fs->disk = disk;
+ fsys->priv = fs;
+ fsys->type = "ffs";
+ fsys->_readblock = ffsblockread;
+ fsys->_sync = ffssync;
+ fsys->_root = ffsroot;
+ fsys->_getattr = ffsgetattr;
+ fsys->_access = ffsaccess;
+ fsys->_lookup = ffslookup;
+ fsys->_readfile = ffsreadfile;
+ fsys->_readlink = ffsreadlink;
+ fsys->_readdir = ffsreaddir;
+
+ if(ffssync(fsys) < 0)
+ goto error;
+
+ return fsys;
+
+error:
+ ffsclose(fsys);
+ return nil;
+}
+
+static Cgblk*
+ffscylgrp(Ffs *fs, int i, Block **pb)
+{
+ Block *b;
+ Cgblk *cg;
+
+ if(i >= fs->ncg)
+ return nil;
+
+ b = diskread(fs->disk, fs->blocksize, (u64int)fs->cg[i].cgblkno*fs->blocksize);
+ if(b == nil)
+ return nil;
+ cg = (Cgblk*)b->data;
+ if(checkcgblk(cg) < 0){
+fprint(2, "checkcgblk %d %lud: %r\n", i, (ulong)fs->cg[i].cgblkno);
+ blockput(b);
+ return nil;
+ }
+ *pb = b;
+ return cg;
+}
+
+static int
+ffssync(Fsys *fsys)
+{
+ int i;
+ Block *b, *cgb;
+ Cgblk *cgblk;
+ Cylgrp *cg;
+ Disk *disk;
+ Ffs *fs;
+ Fsblk *fsblk;
+
+ fs = fsys->priv;
+ disk = fs->disk;
+
+ /*
+ * Read super block.
+ */
+ if((b = diskread(disk, SBSIZE, SBOFF)) == nil)
+ goto error;
+ fsblk = (Fsblk*)b->data;
+ if(checkfsblk(fsblk) < 0)
+ goto error;
+
+ fs->blocksize = fsblk->blocksize;
+ fs->nblock = (fsblk->nfrag+fsblk->fragsperblock-1) / fsblk->fragsperblock;
+ fs->fragsize = fsblk->fragsize;
+ fs->fragspergroup = fsblk->fragspergroup;
+ fs->fragsperblock = fsblk->fragsperblock;
+ fs->inosperblock = fsblk->inosperblock;
+ fs->inospergroup = fsblk->inospergroup;
+
+ fs->nfrag = fsblk->nfrag;
+ fs->ndfrag = fsblk->ndfrag;
+ fs->blockspergroup = (u64int)fsblk->cylspergroup *
+ fsblk->secspercyl * BYTESPERSEC / fsblk->blocksize;
+ fs->ncg = fsblk->ncg;
+
+ fsys->blocksize = fs->blocksize;
+ fsys->nblock = fs->nblock;
+
+ if(0) fprint(2, "ffs %d %d-byte blocks, %d cylinder groups\n",
+ fs->nblock, fs->blocksize, fs->ncg);
+
+ if(fs->cg == nil)
+ fs->cg = emalloc(fs->ncg*sizeof(Cylgrp));
+ for(i=0; i<fs->ncg; i++){
+ cg = &fs->cg[i];
+ cg->bno = fs->blockspergroup*i + fsblk->cgoffset * (i & ~fsblk->cgmask);
+ cg->cgblkno = cg->bno + fsblk->cfragno/fs->fragsperblock;
+ cg->ibno = cg->bno + fsblk->ifragno/fs->fragsperblock;
+ cg->dbno = cg->bno + fsblk->dfragno/fs->fragsperblock;
+
+ if(checkcg){
+ if((cgb = diskread(disk, fs->blocksize, (u64int)cg->cgblkno*fs->blocksize)) == nil)
+ goto error;
+
+ cgblk = (Cgblk*)cgb->data;
+ if(checkcgblk(cgblk) < 0){
+ blockput(cgb);
+ goto error;
+ }
+ if(cgblk->nfrag % fs->fragsperblock && i != fs->ncg-1){
+ werrstr("fractional number of blocks in non-last cylinder group %d", cgblk->nfrag);
+ blockput(cgb);
+ goto error;
+ }
+ // cg->nfrag = cgblk->nfrag;
+ // cg->nblock = (cgblk->nfrag+fs->fragsperblock-1) / fs->fragsperblock;
+ // fprint(2, "cg #%d: cgblk %lud, %d blocks, %d inodes\n", cgblk->num, (ulong)cg->cgblkno, cg->nblock, cg->nino);
+ }
+ }
+ blockput(b);
+ return 0;
+
+error:
+ blockput(b);
+ return -1;
+}
+
+static void
+ffsclose(Fsys *fsys)
+{
+ Ffs *fs;
+
+ fs = fsys->priv;
+ if(fs->cg)
+ free(fs->cg);
+ free(fs);
+ free(fsys);
+}
+
+static int
+checkfsblk(Fsblk *super)
+{
+ if(super->magic != FSMAGIC){
+ werrstr("bad super block");
+ return -1;
+ }
+
+ return 0;
+}
+
+static int
+checkcgblk(Cgblk *cg)
+{
+ if(cg->magic != CGMAGIC){
+ werrstr("bad cylinder group block");
+ return -1;
+ }
+ return 0;
+}
+
+/*
+ * Read block #bno from the disk, zeroing unused data.
+ * If there is no data whatsoever, it's okay to return nil.
+ */
+int nskipx;
+static Block*
+ffsblockread(Fsys *fsys, u64int bno)
+{
+ u32int i, o;
+ u8int *fmap;
+ int frag, fsize, avail;
+ Block *b;
+// Cylgrp *cg;
+ Cgblk *cgblk;
+ Ffs *fs;
+
+ fs = fsys->priv;
+ i = bno / fs->blockspergroup;
+ o = bno % fs->blockspergroup;
+ if(i >= fs->ncg)
+ return nil;
+// cg = &fs->cg[i];
+
+// if(o >= cg->nblock)
+// return nil;
+
+ if((cgblk = ffscylgrp(fs, i, &b)) == nil)
+ return nil;
+
+ fmap = (u8int*)cgblk+cgblk->fmapoff;
+ frag = fs->fragsperblock;
+ switch(frag){
+ default:
+ sysfatal("bad frag");
+ case 8:
+ avail = fmap[o];
+ break;
+ case 4:
+ avail = (fmap[o>>1] >> ((o&1)*4)) & 0xF;
+ break;
+ case 2:
+ avail = (fmap[o>>2] >> ((o&3)*2)) & 0x3;
+ break;
+ case 1:
+ avail = (fmap[o>>3] >> (o&7)) & 0x1;
+ break;
+ }
+ blockput(b);
+
+ if(avail == ((1<<frag)-1))
+{
+nskipx++;
+ return nil;
+}
+ if((b = diskread(fs->disk, fs->blocksize, bno*fs->blocksize)) == nil){
+ fprint(2, "diskread failed!!!\n");
+ return nil;
+ }
+
+ fsize = fs->fragsize;
+ for(i=0; i<frag; i++)
+ if(avail & (1<<i))
+ memset(b->data + fsize*i, 0, fsize);
+ return b;
+}
+
+static Block*
+ffsdatablock(Ffs *fs, u32int bno, int size)
+{
+ int fsize;
+ u64int diskaddr;
+ Block *b;
+
+ if(bno == 0)
+ return nil;
+
+ fsize = size;
+ if(fsize < fs->fragsize)
+ fsize = fs->fragsize;
+
+ if(bno >= fs->nfrag){
+ fprint(2, "ffs: request for block %#lux; nfrag %#x\n", (ulong)bno, fs->nfrag);
+ return nil;
+ }
+ diskaddr = (u64int)bno*fs->fragsize;
+ b = diskread(fs->disk, fsize, diskaddr);
+ if(b == nil){
+ fprint(2, "ffs: disk i/o at %#llux for %#ux: %r\n", diskaddr, fsize);
+ return nil;
+ }
+ if(b->len < fsize){
+ fprint(2, "ffs: disk i/o at %#llux for %#ux got %#ux\n", diskaddr, fsize,
+ b->len);
+ blockput(b);
+ return nil;
+ }
+
+ return b;
+}
+
+static Block*
+ffsfileblock(Ffs *fs, Inode *ino, u32int bno, int size)
+{
+ int ppb;
+ Block *b;
+ u32int *a;
+
+ if(bno < NDADDR){
+ if(debug) fprint(2, "ffsfileblock %lud: direct %#lux\n", (ulong)bno, (ulong)ino->db[bno]);
+ return ffsdatablock(fs, ino->db[bno], size);
+ }
+ bno -= NDADDR;
+ ppb = fs->blocksize/4;
+
+ if(bno/ppb < NIADDR){
+ if(debug) fprint(2, "ffsfileblock %lud: indirect %#lux\n", (ulong)(bno+NDADDR),
+ (ulong)ino->ib[bno/ppb]);
+ b = ffsdatablock(fs, ino->ib[bno/ppb], fs->blocksize);
+ if(b == nil)
+ return nil;
+ a = (u32int*)b->data;
+ bno = a[bno%ppb];
+ if(debug) fprint(2, "ffsfileblock: indirect fetch %#lux size %d\n", (ulong)bno, size);
+ blockput(b);
+ return ffsdatablock(fs, bno, size);
+ }
+
+ fprint(2, "ffsfileblock %lud: too big\n", (ulong)bno+NDADDR);
+ return nil;
+}
+
+/*
+ * NFS handles are 4-byte inode number.
+ */
+static void
+mkhandle(Nfs3Handle *h, u64int ino)
+{
+ h->h[0] = ino >> 24;
+ h->h[1] = ino >> 16;
+ h->h[2] = ino >> 8;
+ h->h[3] = ino;
+ h->len = 4;
+}
+
+static u32int
+byte2u32(uchar *p)
+{
+ return (p[0]<<24) | (p[1]<<16) | (p[2]<<8) | p[3];
+}
+
+static Nfs3Status
+handle2ino(Ffs *fs, Nfs3Handle *h, u32int *pinum, Inode *ino)
+{
+ int i;
+ u32int ioff;
+ u32int inum;
+ Block *b;
+ Cylgrp *cg;
+
+ if(h->len != 4)
+ return Nfs3ErrBadHandle;
+ inum = byte2u32(h->h);
+ if(pinum)
+ *pinum = inum;
+ if(debug) print("inum %d...", (int)inum);
+
+ /* fetch inode from disk */
+ i = inum / fs->inospergroup;
+ ioff = inum % fs->inospergroup;
+ if(debug)print("cg %d off %d...", i, (int)ioff);
+ if(i >= fs->ncg)
+ return Nfs3ErrBadHandle;
+ cg = &fs->cg[i];
+/*
+ if(ioff >= cg->nino)
+ return Nfs3ErrBadHandle;
+*/
+
+ if(debug) print("cg->ibno %d...", cg->ibno);
+ if((b = diskread(fs->disk, fs->blocksize,
+ (cg->ibno+ioff/fs->inosperblock)*(vlong)fs->blocksize)) == nil)
+ return Nfs3ErrIo;
+ *ino = ((Inode*)b->data)[ioff%fs->inosperblock];
+ blockput(b);
+
+ return Nfs3Ok;
+}
+
+static Nfs3Status
+ffsroot(Fsys *fsys, Nfs3Handle *h)
+{
+ USED(fsys);
+ mkhandle(h, 2);
+ return Nfs3Ok;
+}
+
+static Nfs3Status
+ino2attr(Ffs *fs, Inode *ino, u32int inum, Nfs3Attr *attr)
+{
+ u32int rdev;
+
+ attr->type = -1;
+ switch(ino->mode&IFMT){
+ case IFIFO:
+ attr->type = Nfs3FileFifo;
+ break;
+ case IFCHR:
+ attr->type = Nfs3FileChar;
+ break;
+ case IFDIR:
+ attr->type = Nfs3FileDir;
+ break;
+ case IFBLK:
+ attr->type = Nfs3FileBlock;
+ break;
+ case IFREG:
+ attr->type = Nfs3FileReg;
+ break;
+ case IFLNK:
+ attr->type = Nfs3FileSymlink;
+ break;
+ case IFSOCK:
+ attr->type = Nfs3FileSocket;
+ break;
+ case IFWHT:
+ default:
+ return Nfs3ErrBadHandle;
+ }
+
+ attr->mode = ino->mode&07777;
+ attr->nlink = ino->nlink;
+ attr->uid = ino->uid;
+ attr->gid = ino->gid;
+ attr->size = ino->size;
+ attr->used = ino->nblock*fs->blocksize;
+ if(attr->type==Nfs3FileBlock || attr->type==Nfs3FileChar){
+ rdev = ino->db[0];
+ attr->major = (rdev>>8)&0xFF;
+ attr->minor = rdev & 0xFFFF00FF;
+ }else{
+ attr->major = 0;
+ attr->minor = 0;
+ }
+ attr->fsid = 0;
+ attr->fileid = inum;
+ attr->atime.sec = ino->atime;
+ attr->atime.nsec = ino->atimensec;
+ attr->mtime.sec = ino->mtime;
+ attr->mtime.nsec = ino->mtimensec;
+ attr->ctime.sec = ino->ctime;
+ attr->ctime.nsec = ino->ctimensec;
+ return Nfs3Ok;
+}
+
+static int
+ingroup(SunAuthUnix *au, uint gid)
+{
+ int i;
+
+ for(i=0; i<au->ng; i++)
+ if(au->g[i] == gid)
+ return 1;
+ return 0;
+}
+
+static Nfs3Status
+inoperm(Inode *ino, SunAuthUnix *au, int need)
+{
+ int have;
+
+ have = ino->mode&0777;
+ if(ino->uid == au->uid)
+ have >>= 6;
+ else if(ino->gid == au->gid || ingroup(au, ino->gid))
+ have >>= 3;
+
+ if((have&need) != need)
+ return Nfs3ErrNotOwner; /* really EPERM */
+ return Nfs3Ok;
+}
+
+static Nfs3Status
+ffsgetattr(Fsys *fsys, SunAuthUnix *au, Nfs3Handle *h, Nfs3Attr *attr)
+{
+ Inode ino;
+ u32int inum;
+ Ffs *fs;
+ Nfs3Status ok;
+
+ fs = fsys->priv;
+ if((ok = handle2ino(fs, h, &inum, &ino)) != Nfs3Ok)
+ return ok;
+
+ USED(au); /* anyone can getattr */
+
+ return ino2attr(fs, &ino, inum, attr);
+}
+
+static Nfs3Status
+ffsaccess(Fsys *fsys, SunAuthUnix *au, Nfs3Handle *h, u32int want, u32int *got, Nfs3Attr *attr)
+{
+ int have;
+ Inode ino;
+ u32int inum;
+ Ffs *fs;
+ Nfs3Status ok;
+
+ fs = fsys->priv;
+ if((ok = handle2ino(fs, h, &inum, &ino)) != Nfs3Ok)
+ return ok;
+
+ have = ino.mode&0777;
+ if(ino.uid == au->uid)
+ have >>= 6;
+ else if(ino.gid == au->gid || ingroup(au, ino.gid))
+ have >>= 3;
+
+ *got = 0;
+ if((want&Nfs3AccessRead) && (have&AREAD))
+ *got |= Nfs3AccessRead;
+ if((want&Nfs3AccessLookup) && (ino.mode&IFMT)==IFDIR && (have&AEXEC))
+ *got |= Nfs3AccessLookup;
+ if((want&Nfs3AccessExecute) && (ino.mode&IFMT)!=IFDIR && (have&AEXEC))
+ *got |= Nfs3AccessExecute;
+
+ return ino2attr(fs, &ino, inum, attr);
+}
+
+static Nfs3Status
+ffslookup(Fsys *fsys, SunAuthUnix *au, Nfs3Handle *h, char *name, Nfs3Handle *nh)
+{
+ u32int nblock;
+ u32int i;
+ uchar *p, *ep;
+ Dirent *de;
+ Inode ino;
+ Block *b;
+ Ffs *fs;
+ Nfs3Status ok;
+ int len, want;
+
+ fs = fsys->priv;
+ if((ok = handle2ino(fs, h, nil, &ino)) != Nfs3Ok)
+ return ok;
+
+ if((ino.mode&IFMT) != IFDIR)
+ return Nfs3ErrNotDir;
+
+ if((ok = inoperm(&ino, au, AEXEC)) != Nfs3Ok)
+ return ok;
+
+ len = strlen(name);
+ nblock = (ino.size+fs->blocksize-1) / fs->blocksize;
+ for(i=0; i<nblock; i++){
+ if(i==nblock-1)
+ want = ino.size % fs->blocksize;
+ else
+ want = fs->blocksize;
+ b = ffsfileblock(fs, &ino, i, want);
+ if(b == nil)
+ continue;
+ p = b->data;
+ ep = p+b->len;
+ while(p < ep){
+ de = (Dirent*)p;
+ if(de->reclen == 0){
+ if(debug)
+ fprint(2, "reclen 0 at offset %d of %d\n", (int)(p-b->data), b->len);
+ break;
+ }
+ p += de->reclen;
+ if(p > ep){
+ if(debug)
+ fprint(2, "bad len %d at offset %d of %d\n", de->reclen, (int)(p-b->data), b->len);
+ break;
+ }
+ if(de->ino == 0)
+ continue;
+ if(4+2+2+de->namlen > de->reclen){
+ if(debug)
+ fprint(2, "bad namelen %d at offset %d of %d\n", de->namlen, (int)(p-b->data), b->len);
+ break;
+ }
+ if(de->namlen == len && memcmp(de->name, name, len) == 0){
+ mkhandle(nh, de->ino);
+ blockput(b);
+ return Nfs3Ok;
+ }
+ }
+ blockput(b);
+ }
+ return Nfs3ErrNoEnt;
+}
+
+static Nfs3Status
+ffsreaddir(Fsys *fsys, SunAuthUnix *au, Nfs3Handle *h, u32int count, u64int cookie, uchar **pdata, u32int *pcount, u1int *peof)
+{
+ u32int nblock;
+ u32int i;
+ int off, done;
+ uchar *data, *dp, *dep, *p, *ep, *ndp;
+ Dirent *de;
+ Inode ino;
+ Block *b;
+ Ffs *fs;
+ Nfs3Status ok;
+ Nfs3Entry e;
+ int want;
+
+ fs = fsys->priv;
+ if((ok = handle2ino(fs, h, nil, &ino)) != Nfs3Ok)
+ return ok;
+
+ if((ino.mode&IFMT) != IFDIR)
+ return Nfs3ErrNotDir;
+
+ if((ok = inoperm(&ino, au, AREAD)) != Nfs3Ok)
+ return ok;
+
+ if(cookie >= ino.size){
+ *pcount = 0;
+ *pdata = 0;
+ return Nfs3Ok;
+ }
+
+ dp = malloc(count);
+ data = dp;
+ if(dp == nil)
+ return Nfs3ErrNoMem;
+ dep = dp+count;
+ *peof = 0;
+ nblock = (ino.size+fs->blocksize-1) / fs->blocksize;
+ i = cookie/fs->blocksize;
+ off = cookie%fs->blocksize;
+ done = 0;
+ for(; i<nblock && !done; i++){
+ if(i==nblock-1)
+ want = ino.size % fs->blocksize;
+ else
+ want = fs->blocksize;
+ b = ffsfileblock(fs, &ino, i, want);
+ if(b == nil)
+ continue;
+ p = b->data;
+ ep = p+b->len;
+ memset(&e, 0, sizeof e);
+ while(p < ep){
+ de = (Dirent*)p;
+ if(de->reclen == 0){
+ if(debug) fprint(2, "reclen 0 at offset %d of %d\n", (int)(p-b->data), b->len);
+ break;
+ }
+ p += de->reclen;
+ if(p > ep){
+ if(debug) fprint(2, "reclen %d at offset %d of %d\n", de->reclen, (int)(p-b->data), b->len);
+ break;
+ }
+ if(de->ino == 0){
+ if(debug) fprint(2, "zero inode\n");
+ continue;
+ }
+ if(4+2+2+de->namlen > de->reclen){
+ if(debug) fprint(2, "bad namlen %d reclen %d at offset %d of %d\n", de->namlen, de->reclen, (int)(p-b->data), b->len);
+ break;
+ }
+ if(de->name[de->namlen] != 0){
+ if(debug) fprint(2, "bad name %d %.*s\n", de->namlen, de->namlen, de->name);
+ continue;
+ }
+ if(debug) print("%s/%d ", de->name, (int)de->ino);
+ if((uchar*)de - b->data < off)
+ continue;
+ e.fileid = de->ino;
+ e.name = de->name;
+ e.cookie = (u64int)i*fs->blocksize + (p - b->data);
+ if(nfs3entrypack(dp, dep, &ndp, &e) < 0){
+ done = 1;
+ break;
+ }
+ dp = ndp;
+ }
+ off = 0;
+ blockput(b);
+ }
+ if(i==nblock)
+ *peof = 1;
+
+ *pcount = dp - data;
+ *pdata = data;
+ return Nfs3Ok;
+}
+
+static Nfs3Status
+ffsreadfile(Fsys *fsys, SunAuthUnix *au, Nfs3Handle *h, u32int count,
+ u64int offset, uchar **pdata, u32int *pcount, u1int *peof)
+{
+ uchar *data;
+ Block *b;
+ Ffs *fs;
+ int off, want, fragcount;
+ Inode ino;
+ Nfs3Status ok;
+
+ fs = fsys->priv;
+ if((ok = handle2ino(fs, h, nil, &ino)) != Nfs3Ok)
+ return ok;
+
+ if((ok = inoperm(&ino, au, AREAD)) != Nfs3Ok)
+ return ok;
+
+ if(offset >= ino.size){
+ *pdata = 0;
+ *pcount = 0;
+ *peof = 1;
+ return Nfs3Ok;
+ }
+ if(offset+count > ino.size)
+ count = ino.size-offset;
+ if(offset/fs->blocksize != (offset+count-1)/fs->blocksize)
+ count = fs->blocksize - offset%fs->blocksize;
+
+ data = malloc(count);
+ if(data == nil)
+ return Nfs3ErrNoMem;
+
+ want = offset%fs->blocksize+count;
+ if(want%fs->fragsize)
+ want += fs->fragsize - want%fs->fragsize;
+
+ b = ffsfileblock(fs, &ino, offset/fs->blocksize, want);
+ if(b == nil){
+ /* BUG: distinguish sparse file from I/O error */
+ memset(data, 0, count);
+ }else{
+ off = offset%fs->blocksize;
+ fragcount = count; /* need signed variable */
+ if(off+fragcount > b->len){
+ fragcount = b->len - off;
+ if(fragcount < 0)
+ fragcount = 0;
+ }
+ if(fragcount > 0)
+ memmove(data, b->data+off, fragcount);
+ count = fragcount;
+ blockput(b);
+ }
+ *peof = (offset+count == ino.size);
+ *pcount = count;
+ *pdata = data;
+ return Nfs3Ok;
+}
+
+static Nfs3Status
+ffsreadlink(Fsys *fsys, SunAuthUnix *au, Nfs3Handle *h, char **link)
+{
+ Ffs *fs;
+ Nfs3Status ok;
+ int len;
+ Inode ino;
+ Block *b;
+
+ fs = fsys->priv;
+ if((ok = handle2ino(fs, h, nil, &ino)) != Nfs3Ok)
+ return ok;
+ if((ok = inoperm(&ino, au, AREAD)) != Nfs3Ok)
+ return ok;
+
+ if(ino.size > 1024)
+ return Nfs3ErrIo;
+ len = ino.size;
+
+ if(ino.nblock != 0){
+ /* BUG: assumes symlink fits in one block */
+ b = ffsfileblock(fs, &ino, 0, len);
+ if(b == nil)
+ return Nfs3ErrIo;
+ if(memchr(b->data, 0, len) != nil){
+ blockput(b);
+ return Nfs3ErrIo;
+ }
+ *link = malloc(len+1);
+ if(*link == 0){
+ blockput(b);
+ return Nfs3ErrNoMem;
+ }
+ memmove(*link, b->data, len);
+ (*link)[len] = 0;
+ blockput(b);
+ return Nfs3Ok;
+ }
+
+ if(len > sizeof ino.db + sizeof ino.ib)
+ return Nfs3ErrIo;
+
+ *link = malloc(len+1);
+ if(*link == 0)
+ return Nfs3ErrNoMem;
+ memmove(*link, ino.db, ino.size);
+ (*link)[len] = 0;
+ return Nfs3Ok;
+}
diff --git a/src/libdiskfs/ffs.h b/src/libdiskfs/ffs.h
new file mode 100644
index 00000000..479ff65f
--- /dev/null
+++ b/src/libdiskfs/ffs.h
@@ -0,0 +1,281 @@
+/*
+ * An FFS file system is a sequence of cylinder groups.
+ *
+ * Each cylinder group is laid out as follows:
+ *
+ * fs superblock (Fsblk)
+ * cylinder group block (Cgblk)
+ * inodes
+ * data
+ *
+ * The location of the fs superblock in the first cylinder
+ * group is known. The rest of the info about cylinder group
+ * layout can be derived from the super block.
+ */
+
+#define daddr_t u32int
+#define time_t u32int
+
+typedef struct Cgblk Cgblk;
+typedef struct Cylgrp Cylgrp;
+typedef struct Cylsum Cylsum;
+typedef struct Ffs Ffs;
+typedef struct Fsblk Fsblk;
+typedef struct Inode Inode;
+typedef struct Dirent Dirent;
+
+enum
+{
+ BYTESPERSEC = 512,
+
+ /* constants for Fsblk */
+ FSMAXMNTLEN = 512,
+ FSNOCSPTRS = 128 / sizeof(void*) - 3,
+ FSMAXSNAP = 20,
+ FSMAGIC = 0x011954,
+ FSCHECKSUM = 0x7c269d38,
+
+ /* Fsblk.inodefmt */
+ FS42INODEFMT = -1,
+ FS44INODEFMT = 2,
+
+ /* offset and size of first boot block */
+ BBOFF = 0,
+ BBSIZE = 8192,
+
+ /* offset and size of first super block */
+ SBOFF = BBOFF+BBSIZE,
+ SBSIZE = 8192,
+
+ /* minimum block size */
+ MINBSIZE = 4096,
+
+ /* maximum fragments per block */
+ MAXFRAG = 8,
+
+ /* constants for Cgblk */
+ CGMAGIC = 0x090255,
+
+ /* inode-related */
+ ROOTINODE = 2,
+ WHITEOUT = 1,
+
+ NDADDR = 12,
+ NIADDR = 3,
+
+ /* permissions in Inode.mode */
+ IEXEC = 00100,
+ IWRITE = 0200,
+ IREAD = 0400,
+ ISVTX = 01000,
+ ISGID = 02000,
+ ISUID = 04000,
+
+ /* type in Inode.mode */
+ IFMT = 0170000,
+ IFIFO = 0010000,
+ IFCHR = 0020000,
+ IFDIR = 0040000,
+ IFBLK = 0060000,
+ IFREG = 0100000,
+ IFLNK = 0120000,
+ IFSOCK = 0140000,
+ IFWHT = 0160000,
+
+ /* type in Dirent.type */
+ DTUNKNOWN = 0,
+ DTFIFO = 1,
+ DTCHR = 2,
+ DTDIR = 4,
+ DTBLK = 6,
+ DTREG = 8,
+ DTLNK = 10,
+ DTSOCK = 12,
+ DTWHT = 14,
+};
+
+struct Cylsum
+{
+ u32int ndir;
+ u32int nbfree;
+ u32int nifree;
+ u32int nffree;
+};
+
+struct Fsblk
+{
+ u32int unused0;
+ u32int unused1;
+ daddr_t sfragno; /* fragment address of super block in file system */
+ daddr_t cfragno; /* fragment address if cylinder block in file system */
+ daddr_t ifragno; /* fragment offset of inode blocks in file system */
+ daddr_t dfragno; /* fragment offset of data blocks in cg */
+ u32int cgoffset; /* block (maybe fragment?) offset of Cgblk in cylinder */
+ u32int cgmask;
+ time_t time;
+ u32int nfrag; /* number of blocks in fs * fragsperblock */
+ u32int ndfrag;
+ u32int ncg; /* number of cylinder groups in fs */
+ u32int blocksize; /* block size in fs */
+ u32int fragsize; /* frag size in fs */
+ u32int fragsperblock; /* fragments per block: blocksize / fragsize */
+ u32int minfree; /* ignored by us */
+ u32int rotdelay; /* ... */
+ u32int rps;
+ u32int bmask;
+ u32int fmask;
+ u32int bshift;
+ u32int fshift;
+ u32int maxcontig;
+ u32int maxbpg;
+ u32int fragshift;
+ u32int fsbtodbshift;
+ u32int sbsize; /* size of super block */
+ u32int unused2; /* more stuff we don't use ... */
+ u32int unused3;
+ u32int nindir;
+ u32int inosperblock; /* inodes per block */
+ u32int nspf;
+ u32int optim;
+ u32int npsect;
+ u32int interleave;
+ u32int trackskew;
+ u32int id[2];
+ daddr_t csaddr; /* blk addr of cyl grp summary area */
+ u32int cssize; /* size of cyl grp summary area */
+ u32int cgsize; /* cylinder group size */
+ u32int trackspercyl; /* tracks per cylinder */
+ u32int secspertrack; /* sectors per track */
+ u32int secspercyl; /* sectors per cylinder */
+ u32int ncyl; /* cylinders in fs */
+ u32int cylspergroup; /* cylinders per group */
+ u32int inospergroup; /* inodes per group */
+ u32int fragspergroup; /* data blocks per group * fragperblock */
+ Cylsum cstotal; /* more unused... */
+ u8int fmod;
+ u8int clean;
+ u8int ronly;
+ u8int flags;
+ char fsmnt[FSMAXMNTLEN];
+ u32int cgrotor;
+ void* ocsp[FSNOCSPTRS];
+ u8int* contigdirs;
+ Cylsum* csp;
+ u32int* maxcluster;
+ u32int cpc;
+ u16int opostbl[16][8];
+ u32int snapinum[FSMAXSNAP];
+ u32int avgfilesize;
+ u32int avgfpdir;
+ u32int sparecon[26];
+ u32int pendingblocks;
+ u32int pendinginodes;
+ u32int contigsumsize;
+ u32int maxsymlinklen;
+ u32int inodefmt; /* format of on-disk inodes */
+ u64int maxfilesize; /* maximum representable file size */
+ u64int qbmask;
+ u64int qfmask;
+ u32int state;
+ u32int postblformat;
+ u32int nrpos;
+ u32int postbloff;
+ u32int rotbloff;
+ u32int magic; /* FS_MAGIC */
+};
+
+/*
+ * Cylinder group block for a file system.
+ */
+struct Cgblk
+{
+ u32int unused0;
+ u32int magic; /* CGMAGIC */
+ u32int time; /* time last written */
+ u32int num; /* we are cg #cgnum */
+ u16int ncyl; /* number of cylinders in gp */
+ u16int nino; /* number of inodes */
+ u32int nfrag; /* number of fragments */
+ Cylsum csum;
+ u32int rotor;
+ u32int frotor;
+ u32int irotor;
+ u32int frsum[MAXFRAG]; /* counts of available frags */
+ u32int btotoff;
+ u32int boff;
+ u32int imapoff; /* offset to used inode map */
+ u32int fmapoff; /* offset to free fragment map */
+ u32int nextfrag; /* next free fragment */
+ u32int csumoff;
+ u32int clusteroff;
+ u32int ncluster;
+ u32int sparecon[13];
+};
+
+struct Cylgrp
+{
+ /* these are block numbers not fragment numbers */
+ u32int bno; /* disk block address of start of cg */
+ u32int ibno; /* disk block address of first inode */
+ u32int dbno; /* disk block address of first data */
+ u32int cgblkno;
+};
+
+/*
+ * this is the on-disk structure
+ */
+struct Inode
+{
+ u16int mode;
+ u16int nlink;
+ u32int unused;
+ u64int size;
+ u32int atime;
+ u32int atimensec;
+ u32int mtime;
+ u32int mtimensec;
+ u32int ctime;
+ u32int ctimensec;
+ /* rdev is db[0] */
+ u32int db[NDADDR];
+ u32int ib[NIADDR];
+ u32int flags;
+ u32int nblock;
+ u32int gen;
+ u32int uid;
+ u32int gid;
+ u32int spare[2];
+};
+
+struct Dirent
+{
+ u32int ino;
+ u16int reclen;
+ u8int type;
+ u8int namlen;
+ char name[1];
+};
+
+/*
+ * main file system structure
+ */
+struct Ffs
+{
+ int blocksize;
+ int nblock;
+ int fragsize;
+ int fragsperblock;
+ int inosperblock;
+ int blockspergroup;
+ int fragspergroup;
+ int inospergroup;
+
+ u32int nfrag;
+ u32int ndfrag;
+
+ int ncg;
+ Cylgrp *cg;
+
+ Disk *disk;
+};
+
diff --git a/src/libdiskfs/file.c b/src/libdiskfs/file.c
new file mode 100644
index 00000000..b72f053d
--- /dev/null
+++ b/src/libdiskfs/file.c
@@ -0,0 +1,99 @@
+#include <u.h>
+#include <libc.h>
+#include <diskfs.h>
+
+typedef struct DiskFile DiskFile;
+struct DiskFile
+{
+ Disk disk;
+ int fd;
+};
+
+int nfilereads;
+
+static long
+preadn(int fd, void *vdata, u32int ulen, u64int offset)
+{
+ long n;
+ uchar *data;
+ long len;
+
+ nfilereads++;
+ len = ulen;
+ data = vdata;
+// fprint(2, "readn 0x%llux 0x%ux\n", offset, ulen);
+ while(len > 0){
+ n = pread(fd, data, len, offset);
+ if(n <= 0)
+ break;
+ data += n;
+ offset += n;
+ len -= n;
+ }
+ return data-(uchar*)vdata;
+}
+
+static void
+diskfileblockput(Block *b)
+{
+ free(b);
+}
+
+uvlong nreadx;
+static Block*
+diskfileread(Disk *dd, u32int len, u64int offset)
+{
+ int n;
+ Block *b;
+ DiskFile *d = (DiskFile*)dd;
+
+ b = mallocz(sizeof(Block)+len, 1);
+ if(b == nil)
+ return nil;
+ b->data = (uchar*)&b[1];
+nreadx += len;
+ n = preadn(d->fd, b->data, len, offset);
+ if(n <= 0){
+ free(b);
+ return nil;
+ }
+ b->_close = diskfileblockput;
+ b->len = n;
+ return b;
+}
+
+static int
+diskfilesync(Disk *dd)
+{
+ USED(dd);
+ return 0;
+}
+
+static void
+diskfileclose(Disk *dd)
+{
+ DiskFile *d = (DiskFile*)dd;
+
+ close(d->fd);
+ free(d);
+}
+
+Disk*
+diskopenfile(char *file)
+{
+ int fd;
+ DiskFile *d;
+
+ if((fd = open(file, OREAD)) < 0)
+ return nil;
+ d = mallocz(sizeof(DiskFile), 1);
+ if(d == nil){
+ close(fd);
+ return nil;
+ }
+ d->disk._read = diskfileread;
+ d->disk._sync = diskfilesync;
+ d->disk._close = diskfileclose;
+ d->fd = fd;
+ return &d->disk;
+}
diff --git a/src/libdiskfs/fsys.c b/src/libdiskfs/fsys.c
new file mode 100644
index 00000000..3875e50b
--- /dev/null
+++ b/src/libdiskfs/fsys.c
@@ -0,0 +1,114 @@
+#include <u.h>
+#include <libc.h>
+#include <thread.h>
+#include <sunrpc.h>
+#include <nfs3.h>
+#include <diskfs.h>
+
+int allowall;
+
+static Fsys *(*opentab[])(Disk*) =
+{
+ fsysopenffs,
+ fsysopenkfs,
+ fsysopenext2,
+ fsysopenfat,
+};
+
+Fsys*
+fsysopen(Disk *disk)
+{
+ int i;
+ Fsys *fsys;
+
+ for(i=0; i<nelem(opentab); i++)
+ if((fsys = (*opentab[i])(disk)) != nil)
+ return fsys;
+ return nil;
+}
+
+Block*
+fsysreadblock(Fsys *fsys, u64int blockno)
+{
+ if(!fsys->_readblock){
+ werrstr("no read dispatch function");
+ return nil;
+ }
+ return (*fsys->_readblock)(fsys, blockno);
+}
+
+int
+fsyssync(Fsys *fsys)
+{
+ if(disksync(fsys->disk) < 0)
+ return -1;
+ if(!fsys->_sync)
+ return 0;
+ return (*fsys->_sync)(fsys);
+}
+
+void
+fsysclose(Fsys *fsys)
+{
+ if(!fsys->_close){
+ fprint(2, "no fsysClose\n");
+ abort();
+ }
+ (*fsys->_close)(fsys);
+}
+
+Nfs3Status
+fsysroot(Fsys *fsys, Nfs3Handle *h)
+{
+ if(!fsys->_root)
+ return Nfs3ErrNxio;
+ return (*fsys->_root)(fsys, h);
+}
+
+Nfs3Status
+fsyslookup(Fsys *fsys, SunAuthUnix *au, Nfs3Handle *h, char *name, Nfs3Handle *nh)
+{
+ if(!fsys->_lookup)
+ return Nfs3ErrNxio;
+ return (*fsys->_lookup)(fsys, au, h, name, nh);
+}
+
+Nfs3Status
+fsysgetattr(Fsys *fsys, SunAuthUnix *au, Nfs3Handle *h, Nfs3Attr *attr)
+{
+ if(!fsys->_getattr)
+ return Nfs3ErrNxio;
+ return (*fsys->_getattr)(fsys, au, h, attr);
+}
+
+Nfs3Status
+fsysreaddir(Fsys *fsys, SunAuthUnix *au, Nfs3Handle *h, u32int count, u64int cookie, uchar **e, u32int *ne, u1int *peof)
+{
+ if(!fsys->_readdir)
+ return Nfs3ErrNxio;
+ return (*fsys->_readdir)(fsys, au, h, count, cookie, e, ne, peof);
+}
+
+Nfs3Status
+fsysreadfile(Fsys *fsys, SunAuthUnix *au, Nfs3Handle *h, u32int count, u64int offset, uchar **data, u32int *pcount, uchar *peof)
+{
+ if(!fsys->_readfile)
+ return Nfs3ErrNxio;
+ return (*fsys->_readfile)(fsys, au, h, count, offset, data, pcount, peof);
+}
+
+Nfs3Status
+fsysreadlink(Fsys *fsys, SunAuthUnix *au, Nfs3Handle *h, char **plink)
+{
+ if(!fsys->_readlink)
+ return Nfs3ErrNxio;
+ return (*fsys->_readlink)(fsys, au, h, plink);
+}
+
+Nfs3Status
+fsysaccess(Fsys *fsys, SunAuthUnix *au, Nfs3Handle *h, u32int want, u32int *got, Nfs3Attr *attr)
+{
+ if(!fsys->_access)
+ return Nfs3ErrNxio;
+ return (*fsys->_access)(fsys, au, h, want, got, attr);
+}
diff --git a/src/libdiskfs/kfs.c b/src/libdiskfs/kfs.c
new file mode 100644
index 00000000..51d7c5e3
--- /dev/null
+++ b/src/libdiskfs/kfs.c
@@ -0,0 +1,11 @@
+#include <u.h>
+#include <libc.h>
+#include <diskfs.h>
+
+Fsys*
+fsysopenkfs(Disk *disk)
+{
+ USED(disk);
+ return nil;
+}
+
diff --git a/src/libdiskfs/mkfile b/src/libdiskfs/mkfile
new file mode 100644
index 00000000..61538da1
--- /dev/null
+++ b/src/libdiskfs/mkfile
@@ -0,0 +1,31 @@
+<$PLAN9/src/mkhdr
+
+LIB=libdiskfs.a
+
+FSOFILES=\
+ ext2.$O\
+ fat.$O\
+ ffs.$O\
+ kfs.$O\
+
+DISKOFILES=\
+ cache.$O\
+ file.$O\
+ venti.$O\
+
+OFILES=\
+ block.$O\
+ disk.$O\
+ fsys.$O\
+ $DISKOFILES\
+ $FSOFILES\
+
+HFILES=\
+ fs.h
+
+<$PLAN9/src/mksyslib
+
+CFLAGS=$CFLAGS
+
+%.acid: %.$O %.c
+ $CC $CFLAGS -a $stem.c >$stem.acid
diff --git a/src/libdiskfs/venti.c b/src/libdiskfs/venti.c
new file mode 100644
index 00000000..ba314388
--- /dev/null
+++ b/src/libdiskfs/venti.c
@@ -0,0 +1,163 @@
+#include <u.h>
+#include <libc.h>
+#include <diskfs.h>
+#include <venti.h>
+
+extern void vtlibthread(void);
+
+typedef struct DiskVenti DiskVenti;
+struct DiskVenti
+{
+ Disk disk;
+ VtEntry e;
+ VtCache *c;
+};
+
+int nfilereads;
+
+/*
+ * This part is like file.c but doesn't require storing the root block
+ * in the cache permanently and doesn't care about locking since
+ * all the blocks are read-only. Perhaps at some point this functionality
+ * should go into libvac in some form.
+ */
+static int
+vtfileindices(VtEntry *e, u32int bn, int *index)
+{
+ int i, np;
+
+ memset(index, 0, VtPointerDepth*sizeof(int));
+
+ np = e->psize/VtScoreSize;
+ memset(index, 0, sizeof(index));
+ for(i=0; bn > 0; i++){
+ if(i >= VtPointerDepth){
+ werrstr("bad block number %lud", (ulong)bn);
+ return -1;
+ }
+ index[i] = bn % np;
+ bn /= np;
+ }
+ return i;
+}
+
+static VtBlock*
+_vtfileblock(VtCache *c, VtEntry *e, u32int bn)
+{
+ VtBlock *b, *bb;
+ int i, d, index[VtPointerDepth+1], t;
+
+ i = vtfileindices(e, bn, index);
+ if(i < 0)
+ return nil;
+ d = (e->type&VtTypeDepthMask);
+ if(i > d){
+ werrstr("bad address %d > %d (%x %x)", i, d, e->type, e->flags);
+ return nil;
+ }
+
+//fprint(2, "vtread %V\n", e->score);
+ b = vtcacheglobal(c, e->score, e->type);
+ if(b == nil)
+ return nil;
+
+ for(i=d-1; i>=0; i--){
+ t = VtDataType+i;
+//fprint(2, "vtread %V\n", b->data+index[i]*VtScoreSize);
+ bb = vtcacheglobal(c, b->data+index[i]*VtScoreSize, t);
+ vtblockput(b);
+ if(bb == nil)
+ return nil;
+ b = bb;
+ }
+ return b;
+}
+
+static void
+diskventiblockput(Block *b)
+{
+ vtblockput(b->priv);
+ free(b);
+}
+
+static Block*
+diskventiread(Disk *dd, u32int len, u64int offset)
+{
+ DiskVenti *d = (DiskVenti*)dd;
+ VtBlock *vb;
+ Block *b;
+ int frag;
+
+nfilereads++;
+ vb = _vtfileblock(d->c, &d->e, offset/d->e.dsize);
+ if(vb == nil)
+ return nil;
+
+ b = mallocz(sizeof(Block), 1);
+ if(b == nil){
+ vtblockput(vb);
+ return nil;
+ }
+
+ b->priv = vb;
+ b->_close = diskventiblockput;
+ frag = offset%d->e.dsize;
+ b->data = (uchar*)vb->data + frag;
+ b->len = d->e.dsize - frag;
+ if(b->len > len)
+ b->len = len;
+ return b;
+}
+
+static void
+diskventiclose(Disk *dd)
+{
+ DiskVenti *d = (DiskVenti*)dd;
+ free(d);
+}
+
+Disk*
+diskopenventi(VtCache *c, uchar score[VtScoreSize])
+{
+ DiskVenti *d;
+ VtEntry e;
+ VtRoot root;
+ VtBlock *b;
+
+ if((b = vtcacheglobal(c, score, VtRootType)) == nil)
+ goto Err;
+ if(vtrootunpack(&root, b->data) < 0)
+ goto Err;
+ if(root.blocksize < 512 || (root.blocksize&(root.blocksize-1))){
+ werrstr("bad blocksize %d", root.blocksize);
+ goto Err;
+ }
+ vtblockput(b);
+
+ if((b = vtcacheglobal(c, root.score, VtDirType)) == nil)
+ goto Err;
+ if(vtentryunpack(&e, b->data, 0) < 0)
+ goto Err;
+ vtblockput(b);
+ b = nil;
+ if((e.type&VtTypeBaseMask) != VtDataType){
+ werrstr("not a single file");
+ goto Err;
+ }
+
+ d = mallocz(sizeof(DiskVenti), 1);
+ if(d == nil)
+ goto Err;
+
+ d->disk._read = diskventiread;
+ d->disk._close = diskventiclose;
+ d->e = e;
+ d->c = c;
+ return &d->disk;
+
+Err:
+ if(b)
+ vtblockput(b);
+ return nil;
+}
+
diff --git a/src/libdiskfs/vfile.c b/src/libdiskfs/vfile.c
new file mode 100644
index 00000000..72ca0446
--- /dev/null
+++ b/src/libdiskfs/vfile.c
@@ -0,0 +1,35 @@
+#include <u.h>
+#include <libc.h>
+#include <diskfs.h>
+#include <venti.h>
+
+extern void vtLibThread(void);
+
+typedef struct DiskVenti DiskVenti;
+struct DiskVenti
+{
+ TvCache *c;
+ Entry e;
+};
+
+Disk*
+diskOpenVenti(TvCache *c, uchar score[VtScoreSize])
+{
+ vtLibThread();
+
+ fetch vtroot
+ fetch dir block
+ copy e
+}
+
+Block*
+diskVentiRead(Disk *dd, u32int len, u64int offset)
+{
+ DiskVenti *d = (DiskVenti*)dd;
+
+ make offset list
+ walk down blocks
+ return the one
+}
+
+