From be7cbb4ef2cb02aa9ac48c02dc1ee585a8e49043 Mon Sep 17 00:00:00 2001 From: rsc Date: Tue, 12 Jul 2005 15:24:18 +0000 Subject: venti, now with documentation! --- man/man1/venti.1 | 149 ++++++++++++++++ man/man3/venti-cache.3 | 198 ++++++++++++++++++++++ man/man3/venti-client.3 | 194 +++++++++++++++++++++ man/man3/venti-conn.3 | 188 +++++++++++++++++++++ man/man3/venti-fcall.3 | 273 ++++++++++++++++++++++++++++++ man/man3/venti-file.3 | 324 +++++++++++++++++++++++++++++++++++ man/man3/venti-log.3 | 133 +++++++++++++++ man/man3/venti-mem.3 | 67 ++++++++ man/man3/venti-packet.3 | 266 +++++++++++++++++++++++++++++ man/man3/venti-server.3 | 121 +++++++++++++ man/man3/venti-zero.3 | 56 ++++++ man/man3/venti.3 | 75 +++++++++ man/man7/venti.7 | 439 ++++++++++++++++++++++++++++++++++++++++++++++++ man/man7/venti.conf.7 | 360 +++++++++++++++++++++++++++++++++++++++ 14 files changed, 2843 insertions(+) create mode 100644 man/man1/venti.1 create mode 100644 man/man3/venti-cache.3 create mode 100644 man/man3/venti-client.3 create mode 100644 man/man3/venti-conn.3 create mode 100644 man/man3/venti-fcall.3 create mode 100644 man/man3/venti-file.3 create mode 100644 man/man3/venti-log.3 create mode 100644 man/man3/venti-mem.3 create mode 100644 man/man3/venti-packet.3 create mode 100644 man/man3/venti-server.3 create mode 100644 man/man3/venti-zero.3 create mode 100644 man/man3/venti.3 create mode 100644 man/man7/venti.7 create mode 100644 man/man7/venti.conf.7 (limited to 'man') diff --git a/man/man1/venti.1 b/man/man1/venti.1 new file mode 100644 index 00000000..a40eebf9 --- /dev/null +++ b/man/man1/venti.1 @@ -0,0 +1,149 @@ +.TH VENTI 1 +.SH NAME +read, write, copy \- simple Venti clients +.SH SYNOPSIS +.B venti/read +[ +.B -h +.I host +] +[ +.B -t +.I type +] +.I score +.br +.B venti/write +[ +.B -z +] +[ +.B -h +.I host +] +[ +.B -t +.I type +] +.br +.B venti/copy +[ +.B -fir +] +[ +.B -t +.I type +] +.I srchost +.I dsthost +.I score +.SH DESCRIPTION +Venti is a SHA1-addressed block storage server. +See +.IR venti (7) +for a full introduction. +.PP +.I Read +reads a block with the given +.I score +and numeric +.I type +from the server +.I host +and prints the block to standard output. +If the +.B -h +option is omitted, +.I read +consults the environment variable +.B $venti +for the name of the Venti server. +If the +.B -t +option is omitted, +.I read +will try each type, one at a time, until it finds +one that works. +It prints the corresponding +.B read +.B -t +command to standard error +to indicate the type of the block. +.PP +.I Write +writes at most 56 kilobytes of data from standard input +to the server +.I host +and prints the resulting score to standard output. +If the +.B -t +option is omitted, +.I write +uses type 0, +denoting a data block. +If the +.B -z +option is given, +.I write +truncates the block before writing it to the server. +.PP +.I Copy +expects +.I score +to be the score of a +.B VtRoot +block. +It copies the entire tree of blocks reachable from +the root block from the server +.I srchost +to the server +.IR dsthost . +.PP +The +.B -f +option causes +.I copy +to run in `fast' mode, +assuming that if a block already exists on the +destination Venti server, all its children also +exist and need not be checked. +.PP +The +.B -i +and +.B -r +option control +.IR copy 's +behavior upon encountering errors while reading +from srchost. +.I Copy +always prints information to standard error +about each read error. +By default, +.I copy +immediately exits after printing the first error. +If the +.B -i +option is given, read errors are ignored. +This is dangerous behavior because it breaks the +assumption made by `fast' mode. +If the +.B -r +option is given, +.I copy +replaces pointers to unreadable blocks with +pointers to the zero block. +It writes the new root score to standard output. +.SH SOURCE +.B \*9/src/cmd/venti/cmd +.SH SEE ALSO +.IR vac (1), +.IR vbackup (1), +.IR venti (3), +.IR vacfs (4), +.IR vnfs (4), +.IR venti (7), +.IR venti (8) +.SH BUGS +There should be programs to read and write +streams and directories. diff --git a/man/man3/venti-cache.3 b/man/man3/venti-cache.3 new file mode 100644 index 00000000..bdd18ec4 --- /dev/null +++ b/man/man3/venti-cache.3 @@ -0,0 +1,198 @@ +.TH VENTI-CACHE 3 +.SH NAME +VtBlock, VtCache, +vtblockcopy, +vtblockdirty, +vtblockduplock, +vtblockput, +vtblockwrite, +vtcachealloc, +vtcacheallocblock, +vtcacheblocksize, +vtcachefree, +vtcacheglobal, +vtcachelocal, +vtcachesetwrite, +vtglobaltolocal, +vtlocaltoglobal \- Venti block cache +.SH SYNOPSIS +.ft L +#include +.br +#include +.br +#include +.ta +\w'\fLxxxx 'u +.PP +typedef struct VtBlock +{ + uchar *data; + uchar type; + uchar score[VtScoreSize]; + u32int addr; + ... +} VtBlock; +.ta +\w'\fLVtBlock* 'u +\w'\fLxxxxxxxx'u +.PP +.B +VtCache* vtcachealloc(VtConn *z, int blocksize, ulong nblocks, int mode); +.PP +.B +void vtcachefree(VtCache *c); +.PP +.B +u32int vtcacheblocksize(VtCache *c); +.br +.B + int (*write)(VtConn*, uchar[VtScoreSize], uint, uchar*, int)); +.PP +.B +u32int vtglobaltolocal(uchar score[VtScoreSize]) +.br +.B +void vtlocaltoglobal(u32int local, uchar score[VtScoreSize]) +.PP +.B +VtBlock* vtcacheallocblock(VtCache *c, int type); +.PP +.B +VtBlock* vtcachelocal(VtCache *c, u32int addr, int type); +.PP +.B +VtBlock* vtcacheglobal(VtCache *c, uchar[VtScoreSize], int type); +.PP +.B +void vtblockput(VtBlock *b); +.PP +.B +void vtblockduplock(VtBlock *b); +.PP +.B +int vtblockwrite(VtBlock *b); +.PP +.B +void vtcachesetwrite(VtCache *c, +.PP +.B +VtBlock* vtblockcopy(VtBlock *b); +.PP +.B +int vtblockdirty(VtBlock *b); +.SH DESCRIPTION +These functions provide access to a simple in-memory +cache of blocks already stored on a Venti server +and blocks that will eventually be stored on a Venti server. +.PP +.I Vtcachealloc +allocates a new cache using the client connection +.I z +(see +.IR venti-conn (3) +and +.IR venti-client (3)), +with room for +.I nblocks +of maximum block size +.I blocksize . +.PP +.I Vtcachefree +frees a cache and all the associated blocks. +.PP +.I Vtcacheblocksize +.PP +XXX global vs local blocks +.PP +.I Vtcacheallocblock +allocates a new local block with the given +.IR type . +.PP +.I Vtcachelocal +retrieves the local block at address +.I addr +from the cache. +The given +.I type +must match the type of the block found at +.IR addr . +.PP +.I Vtcacheglobal +retrieves the block with the given +.I score +and +.I dtype +from the cache, consulting the Venti server +if necessary. +If passed a local score, +.I vtcacheglobal +behaves as +.IR vtcachelocal . +.PP +The block references returned by +.IR vtcacheallocblock , +.IR vtcachelocal , +and +.I vtcacheglobal +must be released when no longer needed. +.I Vtblockput +releases such a reference. +.PP +It is occasionally convenient to have multiple variables +refer to the same block. +.I Vtblockduplock +increments the block's reference count so that +an extra +.I vtblockput +will be required in order to release the block. +.PP +.I Vtblockwrite +writes a local block to the Venti server, +changing the block to a global block. +It calls the cache's +.I write +function +to write the block to the server. +The default +.I write +function is +.I vtwrite +(see +.IR venti-client (3)); +.I vtsetcachewrite +sets it. +.I Vtsetcachewrite +is used by clients to install replacement functions +that run writes in the background or perform other +additional processing. +.PP +.I Vtblockcopy +copies a block in preparation for modifying its contents. +The old block may be a local or global block, +but the new block will be a local block. +.PP +The cache only evicts global blocks. +Local blocks can only leave the cache via +.IR vtblockwrite , +which turns them into global blocks, making them candidates for +eviction. +.PP +If a new cache block must be allocated (for +.IR vtcacheallocblock , +.IR vtcachelocal , +.IR vtcacheglobal , +or +.IR vtblockcopy ), +but the cache is filled (with local blocks and blocks that +have not yet been released with +.IR vtblockput ), +the library prints the score and reference count of +every block in the cache and then aborts. +A full cache indicates either that the cache is too small, +or, more commonly, that cache blocks are being leaked. +.SH SOURCE +.B \*9/src/libventi +.SH SEE ALSO +.IR venti (1), +.IR venti (3), +.IR venti-client (3), +.IR venti-conn (3), +.IR venti-file (3) diff --git a/man/man3/venti-client.3 b/man/man3/venti-client.3 new file mode 100644 index 00000000..ec18fc4e --- /dev/null +++ b/man/man3/venti-client.3 @@ -0,0 +1,194 @@ +.TH VENTI-CLIENT 3 +.SH NAME +vtconnect, vthello, vtread, vtwrite, vtreadpacket, vtwritepacket, vtsync, vtping, vtrpc, ventidoublechecksha1 \- Venti client +.SH SYNOPSIS +.ft L +#include +.br +#include +.br +#include +.ta +\w'\fLextern int 'u +\w'\fLxxxxxxxx'u +.PP +.B +Packet* vtrpc(VtConn *z, Packet *p) +.PP +.B +int vthello(VtConn *z) +.PP +.B +int vtconnect(VtConn *z) +.PP +.B +int vtread(VtConn *z, uchar score[VtScoreSize], +.br +.B + uint type, uchar *buf, int n) +.PP +.B +int vtwrite(VtConn *z, uchar score[VtScoreSize], +.br +.B + uint type, uchar *buf, int n) +.PP +.B +Packet* vtreadpacket(VtConn *z, uchar score[VtScoreSize], +.br +.B + uint type, int n) +.PP +.B +int vtwritepacket(VtConn *z, uchar score[VtScoreSize], +.br +.B + uint type, Packet *p) +.PP +.B +int vtsync(VtConn *z) +.PP +.B +int vtping(VtConn *z) +.PP +.B +extern int ventidoublechecksha1; /* default 1 */ +.SH DESCRIPTION +These routines execute the client side of the +.IR venti (7) +protocol. +.PP +.I Vtrpc +executes a single Venti RPC transaction, sending the request +packet +.IR p +and then waiting for and returning the response packet. +.I Vtrpc +will set the tag in the packet. +.I Vtrpc +frees +.IR p , +even on error. +.I Vtrpc +is typically called only indirectly, via the functions below. +.PP +.I Vthello +executes a +.B hello +transaction +(see +.IR venti (7)), setting +.IB z -> sid +to the name used by the server. +.I Vthello +is typically called only indirectly, via +.IR vtconnect . +.PP +.I Vtconnect +calls +.I vtversion +(see +.IR venti-conn (3)) +and +.IR vthello , +in that order, returning success only +if both succeed. +This sequence (calling +.I vtversion +and then +.IR vthello ) +must be done before the functions below can be called. +.PP +.I Vtread +reads the block with the given +.I score +and +.I type +from the server, +writes the returned data +to +.IR buf , +and returns the number of bytes retrieved. +If the stored block has size larger than +.IR n , +.I vtread +does not modify +.I buf +and +returns an error. +.PP +.I Vtwrite +writes the +.I n +bytes in +.I buf +with type +.IR type , +setting +.IR score . +.PP +.I Vtreadpacket +and +.I vtwritepacket +are like +.I vtread +and +.I vtwrite +but return or accept the block contents in the +form of a +.BR Packet . +They avoid making a copy of the data. +.PP +.I Vtsync +causes the server to flush all pending write requests +to disk before returning. +.PP +.I Vtping +executes a ping transaction with the server. +.PP +By default, +.I vtread +and +.I vtreadpacket +check that the SHA1 hash of the returned data +matches the requested +.IR score , +and +.I vtwrite +and +.I vtwritepacket +check that the returned +.I score +matches the SHA1 hash of the written data. +Setting +.I ventidoublechecksha1 +to zero disables these extra checks, +mainly for benchmarking purposes. +Doing so in production code is not recommended. +.PP +These functions can be called from multiple threads +or procs simultaneously to issue requests +in parallel. +Programs that issue requests from multiple threads +in the same proc should start separate procs running +.I vtsendproc +and +.I vtrecvproc +as described in +.IR venti-conn (3). +.SH SOURCE +.B \*9/src/libventi +.SH SEE ALSO +.IR venti (1), +.IR venti (3), +.IR venti-conn (3), +.IR venti-packet (3), +.IR venti (7) +.SH DIAGNOSTICS +.I Vtrpc +and +.I vtpacket +return nil on error. +The other routines return \-1 on error. +.PP +.I Vtwrite +returns 0 on success, +meaning it wrote the entire block. diff --git a/man/man3/venti-conn.3 b/man/man3/venti-conn.3 new file mode 100644 index 00000000..bc2de00d --- /dev/null +++ b/man/man3/venti-conn.3 @@ -0,0 +1,188 @@ +.TH VENTI-CONN 3 +.SH NAME +VtConn, vtconn, vtdial, vtfreeconn, vtsend, vtrecv, vtversion, +vtdebug, vthangup \- Venti network connections +.SH SYNOPSIS +.PP +.ft L +#include +.br +#include +.br +#include +.PP +.ft L +.nf +.ta +\w'\fL 'u +typedef struct VtConn { + int debug; + char *version; + char *uid; + char *sid; + char addr[256]; + ... +} VtConn; +.PP +.ta \w'\fLextern int 'u +.B +VtConn* vtconn(int infd, int outfd) +.PP +.B +VtConn* vtdial(char *addr) +.PP +.B +int vtversion(VtConn *z) +.PP +.B +int vtsend(VtConn *z, Packet *p) +.PP +.B +Packet* vtrecv(VtConn *z) +.PP +.B +void vtrecvproc(void *z) +.PP +.B +void vtsendproc(void *z) +.PP +.B +void vtdebug(VtConn *z, char *fmt, ...) +.PP +.B +void vthangup(VtConn *z) +.PP +.B +void vtfreeconn(VtConn *z) +.PP +.B +extern int chattyventi; /* default 0 */ +.SH DESCRIPTION +A +.B VtConn +structure represents a connection to a Venti server +(when used by a client) or to a client (when used by a server). +.PP +.I Vtconn +initializes a new connection structure using file descriptors +.I infd +and +.I outfd +(which may be the same) +for reading and writing. +.I Vtdial +dials the given network address +(see +.IR dial (3)) +and returns a corresponding connection. +It returns nil if the connection cannot be established. +.PP +.I Vtversion +exchanges version information with the remote side +as described in +.IR venti (7). +The negotiated version is stored in +.IB z -> version \fR. +.PP +.I Vtsend +writes a packet +(see +.IR venti-packet (3)) +on the connection +.IR z . +The packet +.IR p +should be a formatted Venti message as might +be returned by +.IR vtfcallpack ; +.I vtsend +will add the two-byte length field +(see +.IR venti (7)) +at the begnning. +.I Vtsend +frees +.IR p , +even on error. +.PP +.I Vtrecv +reads a packet from the connection +.IR z . +Analogous to +.IR vtsend , +the data read from the connection must start with +a two-byte length, but the returned packet will omit them. +.PP +By default, +.I vtsend +and +.I vtrecv +block until the packet can be written or read from the network. +In a threaded program +(see +.IR thread (3)), +this may not be desirable. +If the caller arranges for +.IR vtsendproc +and +.IR vtrecvproc +to run in their own procs +(typically by calling +.IR proccreate ), +then +.I vtsend +and +.I vtrecv +will yield the proc in which they are run +to other threads when waiting on the network. +The +.B void* +argument to +.I vtsendproc +and +.I vtrecvproc +must be the connection structure +.IR z . +.PP +.I Vtdebug +prints the formatted message to standard error +when +.IB z -> debug +is set. Otherwise it is a no-op. +.PP +.I Vthangup +hangs up a connection. +It closes the associated file descriptors +and shuts down send and receive procs if they have been +started. +Future calls to +.IR vtrecv +or +.IR vtsend +will return errors. +Additional calls to +.I vthangup +will have no effect. +.PP +.I Vtfreeconn +frees the connection structure, hanging it up first +if necessary. +.PP +If the global variable +.I chattyventi +is set, the library prints all Venti RPCs to standard error +as they are sent or received. +.SH SOURCE +.B \*9/src/libventi +.SH SEE ALSO +.IR venti (1), +.IR venti (3), +.IR venti-client (3), +.IR venti-packet (3), +.IR venti-server (3), +.IR venti (7) +.SH DIAGNOSTICS +Routines that return pointers return nil on error. +Routines returning integers return 0 on success, \-1 on error. +All routines set +.I errstr +on error. diff --git a/man/man3/venti-fcall.3 b/man/man3/venti-fcall.3 new file mode 100644 index 00000000..e5bdfa9e --- /dev/null +++ b/man/man3/venti-fcall.3 @@ -0,0 +1,273 @@ +.TH VENTI-FCALL 3 +.SH NAME +VtEntry, VtFcall, VtRoot, +vtentrypack, +vtentryunpack, +vtfcallclear, +vtfcallfmt, +vtfcallpack, +vtfcallunpack, +vtfromdisktype, +vttodisktype, +vtgetstring, +vtputstring, +vtrootpack, +vtrootunpack, +vtparsescore, +vtscorefmt \- Venti external data representation +.SH SYNOPSIS +.PP +.ft L +#include +.br +#include +.br +#include +.ta +\w'\fLxxxx'u +.PP +.ft L +.nf +enum +{ + VtEntrySize = 40, + VtRootSize = 300, + VtRootVersion = 2, + VtScoreSize = 20, +}; +.PP +.ft L +.nf +typedef struct VtEntry +{ + ulong gen; /* generation number */ + ushort psize; /* pointer block size */ + ushort dsize; /* data block size */ + uchar type; + uchar flags; + uvlong size; + uchar score[VtScoreSize]; +} VtEntry; +.PP +.ft L +.nf +typedef struct VtRoot +{ + char name[128]; + char type[128]; + uchar score[VtScoreSize]; /* to a Dir block */ + ushort blocksize; /* maximum block size */ + uchar prev[VtScoreSize]; /* previous root block */ +} VtRoot; +.ta +\w'\fLPacket* 'u +.PP +.B +void vtentrypack(VtEntry *e, uchar *buf, int index) +.br +.B +int vtentryunpack(VtEntry *e, uchar *buf, int index) +.PP +.B +Packet* vtfcallpack(VtFcall *f) +.br +.B +int vtfcallunpack(VtFcall *f, Packet *p) +.PP +.B +void vtfcallclear(VtFcall *f) +.PP +.B +uint vttodisktype(uint type) +.br +.B +uint vtfromdisktype(uint type) +.PP +.B +int vtputstring(Packet *p, char *s) +.br +.B +int vtgetstring(Packet *p, char **s) +.PP +.B +void vtrootpack(VtRoot *r, uchar *buf) +.br +.B +int vtrootunpack(VtRoot *r, uchar *buf) +.PP +.B +int vtparsescore(char *s, char **prefix, uchar score[VtScoreSize]) +.PP +.B +int vtfcallfmt(Fmt *fmt) +.B +int vtscorefmt(Fmt *fmt) +.SH DESCRIPTION +These routines convert between C representations of Venti +structures and serialized representations used on disk and +on the network. +.PP +.I Vtentrypack +converts a +.B VtEntry +structure describing a Venti file +(see +.IR venti (1)) +into a 40-byte +.RB ( VtEntrySize ) +structure at +.IB buf + index *40 \fR. +Vtentryunpack +does the reverse conversion. +.PP +.I Vtfcallpack +converts a +.B VtFcall +structure describing a Venti protocol message +(see +.IR venti (7)) +into a packet. +.I Vtfcallunpack +does the reverse conversion. +.PP +The fields in a +.B VtFcall +are named after the protocol fields described in +.IR venti (7), +except that the +.B type +field is renamed +.BR blocktype . +The +.B msgtype +field holds the one-byte message type: +.BR VtThello , +.BR VtRhello , +and so on. +.PP +.I Vtfcallclear +frees the strings +.IB f ->error \fR, +.IB f ->version \fR, +.IB f ->uid \fR, +.IB f ->sid \fR, +the buffers +.I f ->crypto +and +.IB f ->codec \fR, +and the packet +.IB f ->data \fR. +.PP +The block type enumeration defined in +.B +(presented in +.IR venti (1)) +differs from the one used on disk and in the network +protocol. +The disk and network representation uses different +constants does not distinguish between +.BI VtDataType+ n +and +.BI VtDirType+ n +blocks. +.I Vttodisktype +converts a +.B +enumeration value to the disk value; +.I vtfromdisktype +converts a disk value to the enumeration value. +The +.B VtFcall +field +.B blocktype +is an enumeration value +.RI ( vtfcallpack +and +.I vtfcallunpack +convert to and from the disk values used in packets +automatically), +so most programs will not need to call these functions. +.PP +.I Vtputstring +appends the Venti protocol representation of the string +.I s +to the packet +.IR p . +.I Vtgetstring +reads a string from the packet, returning a pointer to a copy +of the string in +.BI * s \fR. +The copy must be freed by the caller. +These functions are used by +.I vtfcallpack +and +.IR vtfcallunpack ; +most programs will not need to call them directly. +.PP +.I Vtrootpack +converts a +.B VtRoot +structure describing a Venti file tree +into the 300-byte +.RB ( VtRootSize ) +buffer pointed to by +.IR buf . +.I Vtrootunpack does the reverse conversion. +.PP +.I Vtparsescore +parses the 40-digit hexadecimal string +.IR s , +writing its value +into +.IR score . +If the hexadecimal string is prefixed with +a text label followed by a colon, a copy of that +label is returned in +.BI * prefix \fR. +If +.I prefix +is nil, the label is ignored. +.PP +.I Vtfcallfmt +and +.I vtscorefmt +are +.IR print (3) +formatters to print +.B VtFcall +structures and scores. +.I Vtfcallfmt +assumes that +.I vtscorefmt +is installed as +.BR %V . +.SH SOURCE +.B \*9/src/libventi +.SH SEE ALSO +.IR venti (1), +.IR venti (3), +.IR venti (7) +.SH DIAGNOSTICS +.IR Vtentrypack , +.IR vtfcallpack , +.IR vtrootpack , +and +.I vtfcallclear +cannot fail. +.PP +.IR Vtentryunpack , +.IR vtrootunpack , +.IR vtputstring , +.IR vtgetstring , +and +.I vtparsescore +return 0 on success, \-1 on error. +.PP +.I Vtfcallpack +returns a packet on success, nil on error. +.PP +.I Vttodisktype +and +.I vtfromdisktype +return +.B VtCorruptType +(255) +when presented with invalid input. diff --git a/man/man3/venti-file.3 b/man/man3/venti-file.3 new file mode 100644 index 00000000..dfb7bf41 --- /dev/null +++ b/man/man3/venti-file.3 @@ -0,0 +1,324 @@ +.TH VENTI-FILE 3 +.SH NAME +VtFile, +vtfileopenroot, +vtfilecreateroot, +vtfileopen, +vtfilecreate, +vtfileblock, +vtfileread, +vtfilewrite, +vtfileflush, +vtfileincref, +vtfileclose, +vtfilegetentry, +vtfilesetentry, +vtfileblockscore, +vtfilegetdirsize, +vtfilesetdirsize, +vtfileunlock, +vtfilelock, +vtfilelock2, +vtfileflushbefore, +vtfiletruncate, +vtfilegetsize, +vtfilesetsize, +vtfileremove \- Venti files +.SH SYNOPSIS +.ta +\w'\fLVtBlock* 'u +.PP +.B +VtFile* vtfilecreateroot(VtCache *c, int psize, int dsize, int type); +.PP +.B +VtFile* vtfileopenroot(VtCache *c, VtEntry *e); +.PP +.B +VtFile* vtfileopen(VtFile *f, u32int n, int mode); +.PP +.B +VtFile* vtfilecreate(VtFile *f, int psize, int dsize, int dir); +.PP +.B +void vtfileincref(VtFile *f); +.PP +.B +void vtfileclose(VtFile *f); +.PP +.B +int vtfileremove(VtFile *f); +.PP +.B +VtBlock* vtfileblock(VtFile *f, u32int n, int mode); +.PP +.B +long vtfileread(VtFile *f, void *buf, long n, vlong offset); +.PP +.B +long vtfilewrite(VtFile *f, void *buf, long n, vlong offset); +.PP +.B +int vtfileflush(VtFile *f); +.PP +.B +int vtfileflushbefore(VtFile *f, vlong offset); +.PP +.B +int vtfiletruncate(VtFile *f); +.PP +.B +uvlong vtfilegetsize(VtFile *f); +.PP +.B +int vtfilesetsize(VtFile *f, vlong size); +.PP +.B +u32int vtfilegetdirsize(VtFile *f); +.PP +.B +int vtfilesetdirsize(VtFile *f, u32int size); +.PP +.B +int vtfilegetentry(VtFile *f, VtEntry *e); +.PP +.B +int vtfilesetentry(VtFile *f, VtEntry *e); +.PP +.B +int vtfileblockscore(VtFile *f, u32int n, uchar score[VtScoreSize]); +.PP +.B +int vtfilelock(VtFile *f, int mode); +.PP +.B +int vtfilelock2(VtFile *f, VtFile *f, int mode); +.PP +.B +void vtfileunlock(VtFile *f); +.SH DESCRIPTION +These routines provide a simple interface to create and +manipulate Venti file trees (see +.IR venti (1)). +.PP +.I Vtfilecreateroot +creates a new Venti file. +.I Btype +must be either +.B VtDataType +or +.BR VtDirType , +specifying a data or directory file. +.I Dsize +is the block size to use for leaf (data or directory) blocks in the hash tree; +.I psize +is the block size to use for intermediate (pointer) blocks. +.PP +.I Vtfileopenroot +opens an existing Venti file described by +.IR e . +.PP +.I Vtfileopen +opens the Venti file described by the +.IR n th +entry in the directory +.IR f . +.I Mode +should be one of +.IR VtOREAD , +.IR VtOWRITE , +or +.IR VtORDWR , +indicating how the returned file is to be used. +The +.IR VtOWRITE +and +.IR VtORDWR +modes can only be used if +.IR f +is open with mode +.IR VtORDWR . +.PP +.I Vtfilecreate +creates a new file in the directory +.I f +with block type +.I type +and block sizes +.I dsize +and +.I psize +(see +.I vtfilecreateroot +above). +.PP +Each file has an associated reference count +and holds a reference to its parent in the file tree. +.I Vtfileincref +increments this reference count. +.I Vtfileclose +decrements the reference count. +If there are no other references, +.I vtfileclose +releases the reference to +.IR f 's +parent and then frees the in-memory structure +.IR f . +The data stored in +.I f +is still accessible by reopening it. +.PP +.I Vtfileremove +removes the file +.I f +from its parent directory. +It also acts as +.IR vtfileclose , +releasing the reference to +.I f +and potentially freeing the structure. +.PP +.I Vtfileblock +returns the +.IR n th +block in the file +.IR f . +If there are not +.I n +blocks in the file and +.I mode +is +.BR VtOREAD , +.I vtfileblock +returns nil. +If the mode is +.B VtOWRITE +or +.BR VtORDWR , +.I vtfileblock +grows the file as needed and then returns the block. +.PP +.I Vtfileread +reads at most +.I n +bytes at offset +.I offset +from +.I f +into memory at +.IR buf . +It returns the number of bytes read. +.PP +.I Vtfilewrite +writes the +.I n +bytes in memory at +.I buf +into the file +.I f +at offset +.IR n . +It returns the number of bytes written, +or \-1 on error. +Writing fewer bytes than requested will only happen +if an error is encountered. +.PP +.I Vtfilewrite +writes to an in-memory copy of the data blocks +(see +.IR venti-cache (3)) +instead of writing directly to Venti. +.I Vtfileflush +writes all copied blocks associated with +.I f +to the Venti server. +.I Vtfileflushbefore +flushes only those blocks corresponding to data in the file before +byte +.IR offset . +Loops that +.I vtfilewrite +should call +.I vtfileflushbefore +regularly to avoid filling the block cache with dirty blocks. +.PP +.I Vtfiletruncate +changes the file +.I f +to have zero length. +.PP +.I Vtfilegetsize +returns the length (in bytes) of file +.IR f . +.PP +.I Vtfilesetsize +sets the length (in bytes) of file +.IR f . +.PP +.I Vtfilegetdirsize +returns the length (in directory entries) +of the directory +.IR f . +.PP +.I Vtfilesetdirsize +sets the length (in directory entries) +of the directory +.IR f . +.PP +.I Vtfilegetentry +fills +.I e +with an entry that can be passed to +.IR vtfileopenroot +to reopen +.I f +at a later time. +.PP +.I Vtfilesetentry +sets the entry associated with +.I f +to be +.IR e . +.PP +.I Vtfileblockscore +returns in +.I score +the score of the +.I n th +block in the file +.IR f . +.PP +Venti files are locked and unlocked +via +.I vtfilelock +and +.I vtfileunlock +to moderate concurrent access. +Only one thread at a time\(emthe one that has the file locked\(emcan +read or modify the file. +The functions that return files +.RI ( vtfilecreateroot , +.IR vtfileopenroot , +.IR vtfilecreate , +and +.IR vtfileopen ) +return them unlocked. +When files are passed to any of the functions documented in +this manual page, it is the caller's responsibility to ensure that +they are already locked. +.PP +Internally, a file is locked by locking the +block that contains its directory entry. +When two files in the same +directory both need to be locked, +.I vtfilelock2 +must be used. +It locks both its arguments, taking special care +not to deadlock if their entries are stored +in the same directory block. +.SH SOURCE +.B \*9/src/libventi/file.c +.SH SEE ALSO +.IR venti (1), +.IR venti-cache (3), +.IR venti-conn (3), +.IR venti-client (3) diff --git a/man/man3/venti-log.3 b/man/man3/venti-log.3 new file mode 100644 index 00000000..bc4efe76 --- /dev/null +++ b/man/man3/venti-log.3 @@ -0,0 +1,133 @@ +.TH VENTI-LOG 3 +.SH NAME +VtLog, +VtLogChunk, +vtlog, +vtlogclose, +vtlogdump, +vtlognames, +vtlogopen, +vtlogprint, +vtlogremove, +vtlogopen, +ventilogging \- Venti logs +.SH SYNOPSIS +.ft L +#include +.br +#include +.br +#include +.ta +\w'\fLVtLog* 'u +.PP +.B +VtLog* vtlogopen(char *name, uint size); +.PP +.B +void vtlogprint(VtLog *log, char *fmt, ...); +.PP +.B +void vtlogclose(VtLog *log); +.PP +.B +void vtlog(char *name, char *fmt, ...); +.PP +.B +void vtlogremove(char *name); +.PP +.B +char** vtlognames(int *n); +.PP +.B +void vtlogdump(int fd, VtLog *log); +.PP +.B +extern int ventilogging; /* default 0 */ +.PP +.B +extern char *VtServerLog; /* "libventi/server" */ +.SH DESCRIPTION +These routines provide an in-memory circular log +structure used by the Venti library and the Venti server +to record events for debugging purposes. +The logs have textual names represented as UTF strings. +.PP +.I Vtlogopen +returns a reference to the log named +.I name . +If a log with that name does not exist and +.I size +is non-zero, a new log capable of holding at +least +.I size +bytes is allocated and returned. +.I Vtlogclose +releases the reference returned by +.IR vtlogopen . +.PP +.I Vtlogprint +writes to +.IR log , +which must be open. +.PP +.I Vtlog +is a convenient packaging of +.I vtlogopen +followed by +.I vtlogprint +and +.IR vtlogclose . +.PP +.I Vtlogremove +removes the log with the given +.IR name , +freeing any associated storage. +.PP +.I Vtlognames +returns a list of the names of all the logs. +The length of the list is returned in +.BI * n \fR. +The list +should be freed +by calling +.I vtfree +on the returned pointer. +The strings in the list will be freed by this call as well. +(It is an error to call +.I vtfree +on any of the strings in the list.) +.PP +.I Vtlogdump +prints +.IR log , +which must be open, to the file descriptor +.IR fd . +.PP +If +.I ventilogging +is set to zero (the default), +.I vtlognames +and +.I vtlogdump +can inspect existing logs, but +.I vtlogopen +always returns nil +and +.I vtlog +is a no-op. +The other functions are no-ops when +passed nil log structures. +.PP +The server library +(see +.IR venti-conn (3) +and +.IR venti-server (3)) +writes debugging information to the log named +.IR VtServerLog , +which defaults to the string +.LR libventi/server . +.SH SOURCE +.B \*9/src/libventi +.SH SEE ALSO +.IR venti (3) diff --git a/man/man3/venti-mem.3 b/man/man3/venti-mem.3 new file mode 100644 index 00000000..46b2bb31 --- /dev/null +++ b/man/man3/venti-mem.3 @@ -0,0 +1,67 @@ +.TH VENTI-MEM 3 +.SH NAME +vtbrk, +vtmalloc, +vtmallocz, +vtrealloc, +vtstrdup, +vtfree \- error-checking memory allocators +.SH SYNOPSIS +.ft L +#include +.br +#include +.br +#include +.ta +\w'\fLvoid* 'u +.PP +.B +void* vtbrk(int size) +.PP +.B +void* vtmalloc(int size) +.PP +.B +void* vtmallocz(int size) +.PP +.B +void* vtrealloc(void *ptr, int size) +.PP +.B +char* vtstrdup(char *s) +.PP +.B +void vtfree(void *ptr) +.SH DESCRIPTION +These routines allocate and free memory. +On failure, they print an error message and call +.IR sysfatal (3). +They do not return. +.PP +.I Vtbrk +returns a pointer to a new block of at least +.I size +bytes. +The block cannot be freed. +.PP +.IR Vtmalloc , +.IR vtrealloc , +and +.I vtstrdup +are like +.IR malloc , +.IR realloc , +and +.IR strdup , +but, as noted above, do not return on error. +.I Vtmallocz +is like +.I vtmalloc +but zeros the block before returning it. +Memory allocated with all four should be freed with +.I vtfree +when no longer needed. +.SH SOURCE +.B \*9/src/libventi +.SH SEE ALSO +.IR venti (3) diff --git a/man/man3/venti-packet.3 b/man/man3/venti-packet.3 new file mode 100644 index 00000000..7d5a518f --- /dev/null +++ b/man/man3/venti-packet.3 @@ -0,0 +1,266 @@ +.TH VENTI-PACKET 3 +.SH NAME +Packet, packetalloc, packetfree, packetforeign, packetdup, +packetsplit, packetconsume, packettrim, packetheader, +packettrailer, packetprefix, packetappend, packetconcat, +packetpeek, packetcopy, packetfragments, +packetsize, packetasize, packetcompact, packetcmp, +packetstats, packetsha1 \- zero-copy network buffers +.SH SYNOPSIS +.ft L +#include +.br +#include +.br +#include +.ta +\w'\fLPacket* 'u +\w'\fLxxxx'u +.PP +.B +.PP +.B +Packet* packetalloc(void); +.PP +.B +void packetfree(Packet *p) +.PP +.B +Packet* packetforeign(uchar *buf, int n, +.br +.B + void (*free)(void *a), void *a) +.PP +.B +Packet* packetdup(Packet *p, int offset, int n) +.PP +.B +Packet* packetsplit(Packet *p, int n) +.PP +.B +int packetconsume(Packet *p, uchar *buf, int n) +.PP +.B +int packettrim(Packet *p, int offset, int n) +.PP +.B +uchar* packetheader(Packet *p, int n) +.PP +.B +uchar* packettrailer(Packet *p, int n) +.PP +.B +void packetprefix(Packet *p, uchar *buf, int n) +.PP +.B +void packetappend(Packet *p, uchar *buf, int n) +.PP +.B +void packetconcat(Packet *p, Packet *q) +.PP +.B +uchar* packetpeek(Packet *p, uchar *buf, int offset, int n) +.PP +.B +int packetcopy(Packet *p, uchar *buf, int offset, int n) +.PP +.B +int packetfragments(Packet *p, IOchunk *io, int nio, +.br +.B + int offset) +.PP +.B +uint packetsize(Packet *p) +.PP +.B +uint packetasize(Packet *p) +.PP +.B +int packetcmp(Packet *p, Packet *q) +.PP +.B +void packetstats(void) +.PP +.B +void packetsha1(Packet *p, uchar sha1[20]) +.SH DESCRIPTION +A +.B Packet +is a list of blocks of data. +Each block is contiguous in memory, but the entire packet +may not be. +This representation helps avoid unnecessary memory copies. +.PP +.I Packetalloc +allocates an empty packet. +.PP +.I Packetappend +appends the +.I n +bytes at +.I buf +to the end of +.IR p . +.PP +.I Packetasize +returns the number of data bytes allocated to +.IR p . +This may be larger than the number of bytes stored +in +.IR p +because individual fragments may not be filled. +.PP +.I Packetcmp +compares the data sections of two packets as +.I memcmp +(see +.IR memory (3)) +would. +.PP +.I Packetconcat +removes all data from +.IR q , +appending it to +.IR p . +.PP +.I Packetconsume +removes +.I n +bytes from the beginning of +.IR p , +storing them into +.IR buf . +.PP +.I Packetcopy +copies +.I n +bytes at +.I offset +in +.I p +to +.IR buf . +.PP +.I Packetdup +creates a new packet initialized with +.I n +bytes from +.I offset +in +.IR p . +.PP +.I Packetforeign +allocates a packet containing `foreign' data: the +.I n +bytes pointed to by +.IR buf . +Once the bytes are no longer needed, they are freed by calling +.IB free ( a )\fR. +.PP +.I Packetfragments +initializes up to +.I nio +of the +.I io +structures with pointers to the data in +.IR p , +starting at +.IR offset . +It returns the total number of bytes represented +by the returned structures. +.I Packetfragments +initializes any unused +.I io +structures with nil pointer and zero length. +.PP +.I Packetfree +frees the packet +.IR p . +.PP +.I Packetheader +returns a pointer to the first +.I n +bytes of +.IR p , +making them contiguous in memory +if necessary. +.PP +.I Packetpeek +returns a pointer to the +.I n +bytes at +.I offset +in +.IR p . +If the requested bytes are already stored contiguously in memory, +the returned pointer points at the internal data storage for +.IR p . +Otherwise, the bytes are copied into +.IR buf , +and +.I packetpeek +returns +.IR buf . +.PP +.I Packetprefix +inserts a copy of the +.I n +bytes at +.I buf +at the beginning of +.IR p . +.PP +.I Packetsha1 +computes the SHA1 hash of the data contained in +.IR p . +.PP +.I Packetsize +returns the number of bytes of data contained in +.IR p . +.PP +.I Packetsplit +returns a new packet initialized with +.I n +bytes removed from the beginning of +.IR p . +.PP +.I Packetstats +prints run-time statistics to standard output. +.PP +.I Packettrailer +returns a pointer to the last +.I n +bytes of +.IR p , +making them contiguous in memory +if necessary. +.PP +.I Packettrim +removes +.I n +bytes at offset +.I offset +from packet +.IR p . +.SH SOURCE +.B \*9/src/libventi +.SH SEE ALSO +.IR venti (3) +.SH DIAGNOSTICS +These functions return errors only when passed +invalid inputs, +.IR e.g. , +requests for data at negative offsets or beyond the end of a packet. +.PP +Functions returning pointers return nil on error; +functions returning integers return \-1 on error. +Most functions returning integers return 0 on success. +The exceptions are +.I packetfragments +and +.IR packetcmp , +whose return values are described above. +.PP +When these functions run out of memory, they +print error messages and call +.IR sysfatal . +They do not return. diff --git a/man/man3/venti-server.3 b/man/man3/venti-server.3 new file mode 100644 index 00000000..9dd6f347 --- /dev/null +++ b/man/man3/venti-server.3 @@ -0,0 +1,121 @@ +.TH VENTI-SERVER 3 +.SH NAME +vtsrvhello, vtlisten, vtgetreq, vtrespond \- Venti server +.SH SYNOPSIS +.PP +.ft L +#include +.br +#include +.br +#include +.ta +\w'\fLVtReq* 'u +.PP +.ft L +.nf +typedef struct VtReq +{ + VtFcall tx; + VtFcall rx; + ... +} VtReq; +.PP +.B +int vtsrvhello(VtConn *z) +.PP +.B +VtSrv* vtlisten(char *addr) +.PP +.B +VtReq* vtgetreq(VtSrv *srv) +.PP +.B +void vtrespond(VtReq *req) +.SH DESCRIPTION +These routines execute the server side of the +.IR venti (7) +protocol. +.PP +.I Vtsrvhello +executes the server side of the initial +.B hello +transaction. +It sets +.IB z -> uid +with the user name claimed by the other side. +Each new connection must be initialized by running +.I vtversion +and then +.IR vtsrvhello . +The framework below takes care of this detail automatically; +.I vtsrvhello +is provided for programs that do not use the functions below. +.PP +.IR Vtlisten , +.IR vtgetreq , +and +.I vtrespond +provide a simple framework for writing Venti servers. +.PP +.I Vtlisten +announces at the network address +.IR addr , +returning a fresh +.B VtSrv +structure representing the service. +.PP +.I Vtgetreq +waits for and returns +the next +.BR read , +.BR write , +.BR sync , +or +.B ping +request from any client connected to +the service +.IR srv . +.B Hello +and +.B goodbye +messages are handled internally and not returned to the client. +The interface does not distinguish between the +different clients that may be connected at any given time. +The request can be found in the +.I tx +field of the returned +.BR VtReq . +.PP +Once a request has been served and a response stored in +.IB r ->rx \fR, +the server should call +.IR vtrespond +to send the response to the client. +.I Vtrespond +frees the structure +.I r +as well as the packets +.IB r ->tx.data +and +.IB r ->rx.data \fR. +.SH EXAMPLE +.B \*9/src/venti/cmd +contains two simple Venti servers +.B ro.c +and +.B devnull.c +written using these routines. +.I Ro +is a read-only Venti proxy (it rejects +.B write +requests). +.I Devnull +is a write-only Venti server: it discards all +blocks written to it and returns error on all reads. +.SH SOURCE +.B \*9/src/libventi +.SH SEE ALSO +.IR venti (1), +.IR venti (3), +.IR venti-conn (3), +.IR venti-packet (3) diff --git a/man/man3/venti-zero.3 b/man/man3/venti-zero.3 new file mode 100644 index 00000000..c163b888 --- /dev/null +++ b/man/man3/venti-zero.3 @@ -0,0 +1,56 @@ +.TH VENTI-ZERO 3 +.SH NAME +vtzerotruncate, vtzeroextend, vtzeroscore \- Venti block truncation +.SH SYNOPSIS +.ft L +#include +.br +#include +.br +#include +.ta +\w'\fLuint 'u +.PP +.B +uint vtzerotruncate(int type, uchar *buf, uint size) +.PP +.B +void vtzeroextend(int type, uchar *buf, uint size, uint newsize) +.PP +.B +extern uchar vtzeroscore[VtScoreSize]; +.SH DESCRIPTION +These utility functions compute how to truncate or replace +trailing zeros (for data blocks) or trailing zero scores +(for pointer blocks) to canonicalize the blocks before +storing them to Venti. +.PP +.I Vtzerotruncate +returns the size of the +.IR size -byte +buffer pointed to by +.I buf +ignoring trailing zeros or zero scores, +according to the block type +.IR type . +.PP +.I Vtzeroextend +pads +.I buf +with zeros or zero scores, +according to the block type +.IR type , +to grow it from +.I size +bytes to +.I newsize +bytes. +.PP +.I Vtzeroscore +is the score of the zero-length block. +.SH SOURCE +.B \*9/src/libventi/zero.c +.br +.B \*9/src/libventi/zeroscore.c +.SH SEE ALSO +.IR venti (1), +.IR venti (3) diff --git a/man/man3/venti.3 b/man/man3/venti.3 new file mode 100644 index 00000000..a2581e98 --- /dev/null +++ b/man/man3/venti.3 @@ -0,0 +1,75 @@ +.TH VENTI 3 +.SH NAME +xxx \- Venti storage server +.SH SYNOPSIS +.PP +.ft L +#include +.br +#include +.br +#include +.SH DESCRIPTION +The Venti library provides support for writing Venti servers and clients. +This manual page describes general utility functions. +.PP +Other manual pages describe the library functions in detail. +.PP +.IR Venti-cache (3) +describes a simple in-memory block cache to help clients. +.PP +.IR Venti-conn (3) +describes routines for manipulating network connections +between Venti clients and servers. +.IR Venti-client (3) +and +.IR venti-server (3) +describe routines for writing clients +and servers on top of these. +.PP +.IR Venti-fcall (3) +describes the in-memory representation of Venti protocol messages +and data structures. +It also describes routines that convert between the C representation +and the network and disk representations. +.PP +.IR Venti-file (3) +describes routines for writing clients that manipulate +Venti file trees +(see +.IR venti (1)). +.PP +.IR Venti-log (3) +describes routines to access in-memory log buffers +as well as the logging that is done automatically by +the library. +.PP +.IR Venti-mem (3) +describes wrappers around the canonical +.IR malloc (3) +routines that abort on error. +.PP +.IR Venti-packet (3) +describes routines for +efficiently manipulating chains of +data buffers. +.PP +.IR Venti-zero (3) +describes routines to zero truncate and zero extend blocks +(see +.IR venti (1)). +.SH SOURCE +.B \*9/src/libventi +.SH SEE ALSO +.IR venti (1), +.IR venti-cache (3), +.IR venti-client (3), +.IR venti-fcall (3), +.IR venti-file (3) +.IR venti-log (3), +.IR venti-mem (3), +.IR venti-packet (3), +.IR venti-server (3), +.IR venti-zero (3), +.IR venti (7), +.IR venti (8) diff --git a/man/man7/venti.7 b/man/man7/venti.7 new file mode 100644 index 00000000..efab4e99 --- /dev/null +++ b/man/man7/venti.7 @@ -0,0 +1,439 @@ +.TH VENTI 7 +.SH NAME +venti \- archival storage server +.SH DESCRIPTION +Venti is a block storage server intended for archival data. +In a Venti server, the SHA1 hash of a block's contents acts +as the block identifier for read and write operations. +This approach enforces a write-once policy, preventing +accidental or malicious destruction of data. In addition, +duplicate copies of a block are coalesced, reducing the +consumption of storage and simplifying the implementation +of clients. +.PP +This manual page documents the basic concepts of +block storage using Venti as well as the Venti network protocol. +.PP +.IR Venti (1) +documents some simple clients. +.IR Vac (1), +.IR vbackup (1), +.IR vacfs (4), +and +.IR vnfs (4) +are more complex clients. +.PP +.IR Venti (3) +describes a C library interface for accessing +Venti servers and manipulating Venti data structures. +.PP +.IR Venti.conf (7) +describes the Venti server configuration file. +.PP +.IR Venti (8) +describes the programs used to run a Venti server. +.PP +.SS "Scores +The SHA1 hash that identifies a block is called its +.IR score . +The score of the zero-length block is called the +.IR "zero score" . +.PP +Scores may have an optional +.IB label : +prefix, typically used to +describe the format of the data. +For example, +.IR vac (1) +uses a +.B vac: +prefix, while +.IR vbackup (1) +uses prefixes corresponding to the file system +types: +.BR ext2: , +.BR ffs: , +and so on. +.SS "Files and Directories +Venti accepts blocks up to 56 kilobytes in size. +By convention, Venti clients use hash trees of blocks to +represent arbitrary-size data +.IR files . +The data to be stored is split into fixed-size +blocks and written to the server, producing a list +of scores. +The resulting list of scores is split into fixed-size pointer +blocks (using only an integral number of scores per block) +and written to the server, producing a smaller list +of scores. +The process continues, eventually ending with the +score for the hash tree's top-most block. +Each file stored this way is summarized by +a +.B VtEntry +structure recording the top-most score, the depth +of the tree, the data block size, and the pointer block size. +One or more +.B VtEntry +structures can be concatenated +and stored as a special file called a +.IR directory . +In this +manner, arbitrary trees of files can be constructed +and stored. +.PP +Scores passed between programs conventionally refer +to +.B VtRoot +blocks, which contain descriptive information +as well as the score of a block containing a small number +of +.B VtEntries . +.SS "Block Types +To allow programs to traverse these structures without +needing to understand their higher-level meanings, +Venti tags each block with a type. The types are: +.PP +.nf +.ft L + VtDataType 000 \f1data\fL + VtDataType+1 001 \fRscores of \fPVtDataType\fR blocks\fL + VtDataType+2 002 \fRscores of \fPVtDataType+1\fR blocks\fL + \fR\&...\fL + VtDirType 010 VtEntry\fR structures\fL + VtDirType+1 011 \fRscores of \fLVtDirType\fR blocks\fL + VtDirType+2 012 \fRscores of \fLVtDirType+1\fR blocks\fL + \fR\&...\fL + VtRootType 020 VtRoot\fR structure\fL +.fi +.PP +The octal numbers listed are the type numbers used +by the commands below. +(For historical reasons, the type numbers used on +disk and on the wire are different from the above. +They do not distinguish +.BI VtDataType+ n +blocks from +.BI VtDirType+ n +blocks.) +.SS "Zero Truncation +To avoid storing the same short data blocks padded with +differing numbers of zeros, Venti clients working with fixed-size +blocks conventionally +`zero truncate' the blocks before writing them to the server. +For example, if a 1024-byte data block contains the +11-byte string +.RB ` hello " " world ' +followed by 1013 zero bytes, +a client would store only the 11-byte block. +When the client later read the block from the server, +it would append zeros to the end as necessary to +reach the expected size. +.PP +When truncating pointer blocks +.RB ( VtDataType+ \fIn +and +.BI VtDirType+ n +blocks), +trailing zero scores are removed +instead of trailing zero bytes. +.PP +Because of the truncation convention, +any file consisting entirely of zero bytes, +no matter what the length, will be represented by the zero score: +the data blocks contain all zeros and are thus truncated +to the empty block, and the pointer blocks contain all zero scores +and are thus also truncated to the empty block, +and so on up the hash tree. +.SS NETWORK PROTOCOL +A Venti session begins when a +.I client +connects to the network address served by a Venti +.IR server ; +the conventional address is +.BI tcp! server !venti +(the +.B venti +port is 17034). +Both client and server begin by sending a version +string of the form +.BI venti- versions - comment \en \fR. +The +.I versions +field is a list of acceptable versions separated by +colons. +The protocol described here is version +.B 02 . +The client is responsible for choosing a common +version and sending it in the +.B VtThello +message, described below. +.PP +After the initial version exchange, the client transmits +.I requests +.RI ( T-messages ) +to the server, which subsequently returns +.I replies +.RI ( R-messages ) +to the client. +The combined act of transmitting (receiving) a request +of a particular type, and receiving (transmitting) its reply +is called a +.I transaction +of that type. +.PP +Each message consists of a sequence of bytes. +Two-byte fields hold unsigned integers represented +in big-endian order (most significant byte first). +Data items of variable lengths are represented by +a one-byte field specifying a count, +.IR n , +followed by +.I n +bytes of data. +Text strings are represented similarly, +using a two-byte count with +the text itself stored as a UTF-8 encoded sequence +of Unicode characters (see +.IR utf (7)). +Text strings are not +.SM NUL\c +-terminated: +.I n +counts the bytes of UTF-8 data, which include no final +zero byte. +The +.SM NUL +character is illegal in text strings in the Venti protocol. +The maximum string length in Venti is 1024 bytes. +.PP +Each Venti message begins with a two-byte size field +specifying the length in bytes of the message, +not including the length field itself. +The next byte is the message type, one of the constants +in the enumeration in the include file +.BR . +The next byte is an identifying +.IR tag , +used to match responses with requests. +The remaining bytes are parameters of different sizes. +In the message descriptions, the number of bytes in a field +is given in brackets after the field name. +The notation +.IR parameter [ n ] +where +.I n +is not a constant represents a variable-length parameter: +.IR n [1] +followed by +.I n +bytes of data forming the +.IR parameter . +The notation +.IR string [ s ] +(using a literal +.I s +character) +is shorthand for +.IR s [2] +followed by +.I s +bytes of UTF-8 text. +The notation +.IR parameter [] +where +.I parameter +is the last field in the message represents a +variable-length field that comprises all remaining +bytes in the message. +.PP +All Venti RPC messages are prefixed with a field +.IR size [2] +giving the length of the message that follows +(not including the +.I size +field itself). +The message bodies are: +.ta \w'\fLVtTgoodbye 'u +.IP +.ne 2v +.B VtThello +.IR tag [1] +.IR version [ s ] +.IR uid [ s ] +.IR strength [1] +.IR crypto [ n ] +.IR codec [ n ] +.br +.B VtRhello +.IR tag [1] +.IR sid [ s ] +.IR rcrypto [1] +.IR rcodec [1] +.IP +.ne 2v +.B VtTping +.IR tag [1] +.br +.B VtRping +.IR tag [1] +.IP +.ne 2v +.B VtTread +.IR tag [1] +.IR score [20] +.IR type [1] +.IR pad [1] +.IR count [2] +.br +.B VtRead +.IR tag [1] +.IR data [] +.IP +.ne 2v +.B VtTwrite +.IR tag [1] +.IR type [1] +.IR pad [3] +.IR data [] +.br +.B VtRwrite +.IR tag [1] +.IR score [20] +.IP +.ne 2v +.B VtTsync +.IR tag [1] +.br +.B VtRsync +.IR tag [1] +.IP +.ne 2v +.B VtRerror +.IR tag [1] +.IR error [ s ] +.IP +.ne 2v +.B VtTgoodbye +.IR tag [1] +.PP +Each T-message has a one-byte +.I tag +field, chosen and used by the client to identify the message. +The server will echo the request's +.I tag +field in the reply. +Clients should arrange that no two outstanding +messages have the same tag field so that responses +can be distinguished. +.PP +The type of an R-message will either be one greater than +the type of the corresponding T-message or +.BR Rerror , +indicating that the request failed. +In the latter case, the +.I error +field contains a string describing the reason for failure. +.PP +Venti connections must begin with a +.B hello +transaction. +The +.B VtThello +message contains the protocol +.I version +that the client has chosen to use. +The fields +.IR strength , +.IR crypto , +and +.IR codec +could be used to add authentication, encryption, +and compression to the Venti session +but are currently ignored. +The +.IR rcrypto , +and +.I rcodec +fields in the +.B VtRhello +response are similarly ignored. +The +.IR uid +and +.IR sid +fields are intended to be the identity +of the client and server but, given the lack of +authentication, should be treated only as advisory. +The initial +.B hello +should be the only +.B hello +transaction during the session. +.PP +The +.B ping +message has no effect and +is used mainly for debugging. +Servers should respond immediately to pings. +.PP +The +.B read +message requests a block with the given +.I score +and +.I type . +Use +.I vttodisktype +and +.I vtfromdisktype +(see +.IR venti (3)) +to convert a block type enumeration value +.RB ( VtDataType , +etc.) +to the +.I type +used on disk and in the protocol. +The +.I count +field specifies the maximum expected size +of the block. +The +.I data +in the reply is the block's contents. +.PP +The +.B write +message writes a new block of the given +.I type +with contents +.I data +to the server. +The response includes the +.I score +to use to read the block, +which should be the SHA1 hash of +.IR data . +.PP +The Venti server may buffer written blocks in memory, +waiting until after responding to the +.B write +message before writing them to +permanent storage. +The server will delay the response to a +.B sync +message until after all blocks in earlier +.B write +messages have been written to permanent storage. +.PP +The +.B goodbye +message ends a session. There is no +.BR VtRgoodbye : +upon receiving the +.BR VtTgoodbye +message, the server terminates up the connection. +.SH SEE ALSO +.IR venti (1), +.IR venti (3) diff --git a/man/man7/venti.conf.7 b/man/man7/venti.conf.7 new file mode 100644 index 00000000..000d8aa4 --- /dev/null +++ b/man/man7/venti.conf.7 @@ -0,0 +1,360 @@ +.TH VENTI.CONF 7 +.SH NAME +venti.conf \- venti configuration +.SH DESCRIPTION +Venti is a SHA1-addressed archival storage server. +See +.IR venti (7) +for a full introduction to the system. +This page documents the structure and operation of the server. +.PP +A venti server requires multiple disks or disk partitions, +each of which must be properly formatted before the server +can be run. +.SS Disk +The venti server maintains three disk structures, typically +stored on raw disk partitions: +the append-only +.IR "data log" , +which holds, in sequential order, +the contents of every block written to the server; +the +.IR index , +which helps locate a block in the data log given its score; +and optionally the +.IR "bloom filter" , +a concise summary of which scores are present in the index. +The data log is the primary storage. +To improve the robustness, it should be stored on +a device that provides RAID functionality. +The index and the bloom filter are optimizations +employed to access the data log efficiently and can be rebuilt +if lost or damaged. +.PP +The data log is logically split into sections called +.IR arenas , +typically sized for easy offline backup +(e.g., 500MB). +A data log may comprise many disks, each storing +one or more arenas. +Such disks are called +.IR "arena partitions" . +Arena partitions are filled in the order given in the configuration. +.PP +The index is logically split into block-sized pieces called +.IR buckets , +each of which is responsible for a particular range of scores. +An index may be split across many disks, each storing many buckets. +Such disks are called +.IR "index sections" . +.PP +The index must be sized so that no bucket is full. +When a bucket fills, the server must be shut down and +the index made larger. +Since scores appear random, each bucket will contain +approximately the same number of entries. +Index entries are 40 bytes long. Assuming that a typical block +being written to the server is 8192 bytes and compresses to 4096 +bytes, the active index is expected to be about 1% of +the active data log. +Storing smaller blocks increases the relative index footprint; +storing larger blocks decreases it. +To allow variation in both block size and the random distribution +of scores to buckets, the suggested index size is 5% of +the active data log. +.PP +The (optional) bloom filter is a large bitmap that is stored on disk but +also kept completely in memory while the venti server runs. +It helps the venti server efficiently detect scores that are +.I not +already stored in the index. +The bloom filter starts out zeroed. +Each score recorded in the bloom filter is hashed to choose +.I nhash +bits to set in the bloom filter. +A score is definitely not stored in the index of any of its +.I nhash +bits are not set. +The bloom filter thus has two parameters: +.I nhash +(maximum 32) +and the total bitmap size +(maximum 512MB, 2\s-2\u32\d\s+2 bits). +.PP +The bloom filter should be sized so that +.I nhash +\(ti +.I nblock +\(ti +0.7 +\(<= +0.7 \(ti +.IR b , +where +.I nblock +is the expected number of blocks stored on the server +and +.I b +is the bitmap size in bits. +The false positive rate of the bloom filter when sized +this way is approximately 2\s-2\u\-\fInblock\fR\d\s+2. +.I Nhash +less than 10 are not very useful; +.I nhash +greater than 24 are probably a waste of memory. +.I Fmtbloom +(see +.IR venti-fmt (8)) +can be given either +.I nhash +or +.IR nblock ; +if given +.IR nblock , +it will derive an appropriate +.IR nhash . +.SS Memory +Venti can make effective use of large amounts of memory +for various caches. +.PP +The +.I "lump cache +holds recently-accessed venti data blocks, which the server refers to as +.IR lumps . +The lump cache should be at least 1MB but can profitably be much larger. +The lump cache can be thought of as the level-1 cache: +read requests handled by the lump cache can +be served instantly. +.PP +The +.I "block cache +holds recently-accessed +.I disk +blocks from the arena partitions. +The block cache needs to be able to simultaneously hold two blocks +from each arena plus four blocks for the currently-filling arena. +The block cache can be thought of as the level-2 cache: +read requests handled by the block cache are slower than those +handled by the lump cache, since the lump data must be extracted +from the raw disk blocks and possibly decompressed, but no +disk accesses are necessary. +.PP +The +.I "index cache +holds recently-accessed or prefetched +index entries. +The index cache needs to be able to hold index entries +for three or four arenas, at least, in order for prefetching +to work properly. Each index entry is 50 bytes. +Assuming 500MB arenas of +128,000 blocks that are 4096 bytes each after compression, +the minimum index cache size is about 6MB. +The index cache can be thought of as the level-3 cache: +read requests handled by the index cache must still go +to disk to fetch the arena blocks, but the costly random +access to the index is avoided. +.PP +The size of the index cache determines how long venti +can sustain its `burst' write throughput, during which time +the only disk accesses on the critical path +are sequential writes to the arena partitions. +For example, if you want to be able to sustain 10MB/s +for an hour, you need enough index cache to hold entries +for 36GB of blocks. Assuming 8192-byte blocks, +you need room for almost five million index entries. +Since index entries are 50 bytes each, you need 250MB +of index cache. +If the background index update process can make a single +pass through the index in an hour, which is possible, +then you can sustain the 10MB/s indefinitely (at least until +the arenas are all filled). +.PP +The +.I "bloom filter +requires memory equal to its size on disk, +as discussed above. +.PP +A reasonable starting allocation is to +divide memory equally (in thirds) between +the bloom filter, the index cache, and the lump and block caches; +the third of memory allocated to the lump and block caches +should be split unevenly, with more (say, two thirds) +going to the block cache. +.SS Network +The venti server announces two network services, one +(conventionally TCP port +.BR venti , +17034) serving +the venti protocol as described in +.IR venti (7), +and one serving HTTP +(conventionally TCP port +.BR venti , +80). +.PP +The venti web server provides the following +URLs for accessing status information: +.TP +.B /index +A summary of the usage of the arenas and index sections. +.TP +.B /xindex +An XML version of +.BR /index . +.TP +.B /storage +Brief storage totals. +.TP +.BI /set/ variable +The current integer value of +.IR variable . +Variables are: +.BR compress , +whether or not to compress blocks +(for debugging); +.BR logging , +whether to write entries to the debugging logs; +.BR stats , +whether to collect run-time statistics; +.BR icachesleeptime , +the time in milliseconds between successive updates +of megabytes of the index cache; +.BR arenasumsleeptime , +the time in milliseconds between reads while +checksumming an arena in the background. +The two sleep times should be (but are not) managed by venti; +they exist to provide more experience with their effects. +The other variables exist only for debugging and +performance measurement. +.TP +.BI /set/ variable / value +Set +.I variable +to +.IR value . +.TP +.BI /graph/ name / param / param / \fR... +A PNG image graphing the named run-time statistic over time. +The details of names and parameters are undocumented; +see +.B httpd.c +in the venti sources. +.TP +.B /log +A list of all debugging logs present in the server's memory. +.TP +.BI /log/ name +The contents of the debugging log with the given +.IR name . +.TP +.B /flushicache +Force venti to begin flushing the index cache to disk. +The request response will not be sent until the flush +has completed. +.TP +.B /flushdcache +Force venti to begin flushing the arena block cache to disk. +The request response will not be sent until the flush +has completed. +.PD +.PP +Requests for other files are served by consulting a +directory named in the configuration file +(see +.B webroot +below). +.SS Configuration File +A venti configuration file +enumerates the various index sections and +arenas that constitute a venti system. +The components are indicated by the name of the file, typically +a disk partition, in which they reside. The configuration +file is the only location that file names are used. Internally, +venti uses the names assigned when the components were formatted +with +.I fmtarenas +or +.I fmtisect +(see +.IR venti-fmt (8)). +In particular, only the configuration needs to be +changed if a component is moved to a different file. +.PP +The configuration file consists of lines in the form described below. +Lines starting with +.B # +are comments. +.TP +.BI index " name +Names the index for the system. +.TP +.BI arenas " file +.I File +is an arena partition, formatted using +.IR fmtarenas . +.TP +.BI isect " file +.I File +is an index section, formatted using +.IR fmtisect . +.PP +After formatting a venti system using +.IR fmtindex , +the order of arenas and index sections should not be changed. +Additional arenas can be appended to the configuration; +run +.I fmtindex +with the +.B -a +flag to update the index. +.PP +The configuration file also holds configuration parameters +for the venti server itself. +These are: +.TF httpaddr netaddr +.TP +.BI mem " size +lump cache size +.TP +.BI bcmem " size +block cache size +.TP +.BI icmem " size +index cache size +.TP +.BI addr " netaddr +network address to announce venti service +(default +.BR tcp!*!venti ) +.TP +.BI httpaddr " netaddr +network address to announce HTTP service +(default +.BR tcp!*!http ) +.TP +.B queuewrites +queue writes in memory +(default is not to queue) +.PD +See the server description in +.IR venti (8) +for explanations of these variables. +.SH EXAMPLE +.IP +.EX +index main +isect /tmp/disks/isect0 +isect /tmp/disks/isect1 +arenas /tmp/disks/arenas +mem 10M +bcmem 20M +icmem 30M +.EE +.SH "SEE ALSO" +.IR venti (8), +.IR venti-fmt (8) +.SH BUGS +Setting up a venti server is too complicated. +.PP +Venti should not require the user to decide how to +partition its memory usage. -- cgit v1.2.3