typedef struct Config		Config;
typedef struct AMap		AMap;
typedef struct AMapN		AMapN;
typedef struct Arena		Arena;
typedef struct ArenaHead	ArenaHead;
typedef struct ArenaPart	ArenaPart;
typedef struct CIBlock		CIBlock;
typedef struct Clump		Clump;
typedef struct ClumpInfo	ClumpInfo;
typedef struct IAddr		IAddr;
typedef struct IBucket		IBucket;
typedef struct ICache		ICache;
typedef struct IEStream		IEStream;
typedef struct IEntry		IEntry;
typedef struct IFile		IFile;
typedef struct ISect		ISect;
typedef struct Index		Index;
typedef struct Lump		Lump;
typedef struct DBlock		DBlock;
typedef struct Part		Part;
typedef struct Stats		Stats;
typedef struct ZBlock		ZBlock;

#define TWID32	((u32int)~(u32int)0)
#define TWID64	((u64int)~(u64int)0)
#define	TWID8	((u8int)~(u8int)0)

enum
{
	ABlockLog		= 9,		/* log2(512), the quantum for reading arenas */
	ANameSize		= 64,
	MaxDiskBlock		= 64*1024,	/* max. allowed size for a disk block */
	MaxIoSize		= 64*1024,	/* max. allowed size for a disk io operation */
	PartBlank		= 256*1024,	/* untouched section at beginning of partition */
	HeadSize		= 512,		/* size of a header after PartBlank */
	MinArenaSize		= 1*1024*1024,	/* smallest reasonable arena size */
	IndexBase		= 1024*1024,	/* initial address to use in an index */
	MaxIo			= 64*1024,	/* max size of a single read or write operation */
	ICacheBits		= 16,		/* default bits for indexing icache */
	ICacheDepth		= 4,		/* default depth of an icache hash chain */
	MaxAMap			= 2*1024,	/* max. allowed arenas in an address mapping; must be < 32*1024 */

	/*
	 * return codes from syncarena
	 */
	SyncDataErr	= 1 << 0,		/* problem reading the clump data */
	SyncCIErr	= 1 << 1,		/* found erroneous clump directory entries */
	SyncCIZero	= 1 << 2,		/* found unwritten clump directory entries */
	SyncFixErr	= 1 << 3,		/* error writing fixed data */
	SyncHeader	= 1 << 4,		/* altered header fields */

	/*
	 * error severity
	 */
	EOk			= 0,		/* error expected in normal operation */
	EStrange,				/* strange error that should be logged */
	ECorrupt,				/* corrupted data found in arenas */
	EICorrupt,				/* corrupted data found in index */
	EAdmin,					/* should be brought to administrators' attention */
	ECrash,					/* really bad internal error */
	EBug,					/* a limitation which should be fixed */
	EInconsist,				/* inconsistencies between index and arena */
	EMax,

	/*
	 * internal disk formats for the venti archival storage system
	 */
	/*
	 * magic numbers on disk
	 */
	ClumpMagic		= 0xd15cb10c,	/* clump header */
	ClumpFreeMagic		= 0,		/* free clump; terminates active clump log */

	ArenaPartMagic		= 0xa9e4a5e7,	/* arena partition header */
	ArenaMagic		= 0xf2a14ead,	/* arena trailer */
	ArenaHeadMagic		= 0xd15c4ead,	/* arena header */

	ISectMagic		= 0xd15c5ec7,	/* index header */

	ArenaPartVersion	= 3,
	ArenaVersion		= 4,
	IndexVersion1		= 1,
	IndexVersion2		= 2,
	ISectVersion		= 1,

	/*
	 * encodings of clumps on disk
	 */
	ClumpEErr		= 0,		/* can't happen */
	ClumpENone,				/* plain */
	ClumpECompress,				/* compressed */
	ClumpEMax,

	/*
	 * marker for corrupted data on disk
	 */
	VtTypeCorrupt		= VtMaxType,

	/*
	 * sizes in bytes on disk
	 */
	U8Size			= 1,
	U16Size			= 2,
	U32Size			= 4,
	U64Size			= 8,

	ArenaPartSize		= 4 * U32Size,
	ArenaSize		= 2 * U64Size + 6 * U32Size + ANameSize + U8Size,
	ArenaHeadSize		= U64Size + 3 * U32Size + ANameSize,
	ISectSize		= 7 * U32Size + 2 * ANameSize,
	ClumpInfoSize		= U8Size + 2 * U16Size + VtScoreSize,
	ClumpSize		= ClumpInfoSize + U8Size + 3 * U32Size,
	IBucketSize		= U32Size + U16Size,
	IEntrySize		= U64Size + U32Size + 2*U16Size + 2*U8Size + VtScoreSize,
	IEntryTypeOff		= VtScoreSize + U64Size + U32Size + 2 * U16Size,

	MaxClumpBlocks		=  (VtMaxLumpSize + ClumpSize + (1 << ABlockLog) - 1) >> ABlockLog,

	/*
	 * dirty flags - order controls disk write order
	 */
	DirtyArena		= 1,
	DirtyIndexSplit,
	DirtyIndex,
	DirtyIndexBitmap,
	DirtyArenaCib,
	DirtyArenaTrailer,
	DirtyMax,

	VentiZZZZZZZZ
};

/*
 * results of parsing and initializing a config file
 */
struct Config
{
	char		*index;			/* name of the index to initialize */
	int		naparts;		/* arena partitions initialized */
	ArenaPart	**aparts;
	int		nsects;			/* index sections initialized */
	ISect		**sects;
};

/*
 * a Part is the low level interface to files or disks.
 * there are two main types of partitions
 *	arena paritions, which some number of arenas, each in a sub-partition.
 *	index partition, which only have one subpartition.
 */
struct Part
{
	int		fd;			/* rock for accessing the disk */
	u64int		size;			/* size of the partiton */
	u32int		blocksize;		/* block size for reads and writes */
	char		*name;
	Channel		*writechan;		/* chan[dcache.nblock](DBlock*) */
};

/*
 * a cached block from the partition
 * yuck -- most of this is internal structure for the cache
 * all other routines should only use data
 */
struct DBlock
{
	u8int	*data;

	Part	*part;			/* partition in which cached */
	u64int	addr;			/* base address on the partition */
	u16int	size;			/* amount of data available, not amount allocated; should go away */
	u32int	dirty;
	u32int	dirtying;
	DBlock	*next;			/* doubly linked hash chains */
	DBlock	*prev;
	u32int	heap;			/* index in heap table */
	u32int	used;			/* last reference times */
	u32int	used2;
	u32int	ref;			/* reference count */
	QLock	lock;			/* for access to data only */
	Channel	writedonechan;	
	void*	chanbuf[1];		/* buffer for the chan! */
};

/*
 * a cached block from the partition
 * yuck -- most of this is internal structure for the cache
 * all other routines should only use data
 * double yuck -- this is mostly the same as a DBlock
 */
struct Lump
{
	Packet	*data;

	Part	*part;			/* partition in which cached */
	u8int	score[VtScoreSize];	/* score of packet */
	u8int	type;			/* type of packet */
	u16int	size;			/* amount of data allocated to hold packet */
	Lump	*next;			/* doubly linked hash chains */
	Lump	*prev;
	u32int	heap;			/* index in heap table */
	u32int	used;			/* last reference times */
	u32int	used2;
	u32int	ref;			/* reference count */
	QLock	lock;			/* for access to data only */
};

/*
 * mapping between names and address ranges
 */
struct AMap
{
	u64int		start;
	u64int		stop;
	char		name[ANameSize];
};

/*
 * an AMap along with a length
 */
struct AMapN
{
	int		n;
	AMap		*map;
};

/*
 * an ArenaPart is a partition made up of Arenas
 * it exists because most os's don't support many partitions,
 * and we want to have many different Arenas
 */
struct ArenaPart
{
	Part		*part;
	u64int		size;			/* size of underlying partition, rounded down to blocks */
	Arena		**arenas;
	u32int		tabbase;		/* base address of arena table on disk */
	u32int		tabsize;		/* max. bytes in arena table */

	/*
	 * fields stored on disk
	 */
	u32int		version;
	u32int		blocksize;		/* "optimal" block size for reads and writes */
	u32int		arenabase;		/* base address of first arena */

	/*
	 * stored in the arena mapping table on disk
	 */
	AMap		*map;
	int		narenas;
};

/*
 * info about one block in the clump info cache
 */
struct CIBlock
{
	u32int		block;			/* blocks in the directory */
	int		offset;			/* offsets of one clump in the data */
	DBlock		*data;
};

/*
 * an Arena is a log of Clumps, preceeded by an ArenaHeader,
 * and followed by a Arena, each in one disk block.
 * struct on disk is not always up to date, but should be self-consistent.
 * to sync after reboot, follow clumps starting at used until ClumpFreeMagic if found.
 * <struct name="Arena" type="Arena *">
 *	<field name="name" val="s->name" type="AName"/>
 *	<field name="version" val="s->version" type="U32int"/>
 *	<field name="partition" val="s->part->name" type="AName"/>
 *	<field name="blocksize" val="s->blocksize" type="U32int"/>
 *	<field name="start" val="s->base" type="U64int"/>
 *	<field name="stop" val="s->base+2*s->blocksize" type="U64int"/>
 *	<field name="created" val="s->ctime" type="U32int"/>
 *	<field name="modified" val="s->wtime" type="U32int"/>
 *	<field name="sealed" val="s->sealed" type="Sealed"/>
 *	<field name="score" val="s->score" type="Score"/>
 *	<field name="clumps" val="s->clumps" type="U32int"/>
 *	<field name="compressedclumps" val="s->cclumps" type="U32int"/>
 *	<field name="data" val="s->uncsize" type="U64int"/>
 *	<field name="compresseddata" val="s->used - s->clumps * ClumpSize" type="U64int"/>
 *	<field name="storage" val="s->used + s->clumps * ClumpInfoSize" type="U64int"/>
 * </struct>
 */
struct Arena
{
	QLock		lock;			/* lock for arena fields, writing to disk */
	Part		*part;			/* partition in which arena lives */
	int		blocksize;		/* size of block to read or write */
	u64int		base;			/* base address on disk */
	u64int		size;			/* total space in the arena */
	u64int		limit;			/* storage limit for clumps */
	u8int		score[VtScoreSize];	/* score of the entire sealed & summed arena */

	int		clumpmax;		/* ClumpInfos per block */
	CIBlock		cib;			/* dirty clump directory block */

	/*
	 * fields stored on disk
	 */
	u32int		version;
	char		name[ANameSize];	/* text label */
	u32int		clumps;			/* number of allocated clumps */
	u32int		cclumps;		/* clumps which are compressed; informational only */
	u32int		ctime;			/* first time a block was written */
	u32int		wtime;			/* last time a block was written */
	u64int		used;			/* number of bytes currently used */
	u64int		uncsize;		/* total of all clumps's uncsize; informational only */
	u8int		sealed;			/* arena all filled up? */
};

/*
 * redundant storage of some fields at the beginning of each arena
 */
struct ArenaHead
{
	u32int		version;
	char		name[ANameSize];
	u32int		blocksize;
	u64int		size;
};

/*
 * most interesting meta information for a clump.
 * stored in each clump's header and in the Arena's directory,
 * stored in reverse order just prior to the arena trailer
 */
struct ClumpInfo
{
	u8int		type;
	u16int		size;			/* size of disk data, not including header */
	u16int		uncsize;		/* size of uncompressed data */
	u8int		score[VtScoreSize];	/* score of the uncompressed data only */
};

/*
 * header for an immutable clump of data
 */
struct Clump
{
	ClumpInfo	info;
	u8int		encoding;
	u32int		creator;		/* initial client which wrote the block */
	u32int		time;			/* creation at gmt seconds since 1/1/1970 */
};

/*
 * index of all clumps according to their score
 * this is just a wrapper to tie together the index sections
 * <struct name="Index" type="Index *">
 *	<field name="name" val="s->name" type="AName"/>
 *	<field name="version" val="s->version" type="U32int"/>
 *	<field name="blocksize" val="s->blocksize" type="U32int"/>
 *	<field name="tabsize" val="s->tabsize" type="U32int"/>
 *	<field name="buckets" val="s->buckets" type="U32int"/>
 *	<field name="buckdiv" val="s->div" type="U32int"/>
 *	<field name="bitblocks" val="s->div" type="U32int"/>
 *	<field name="maxdepth" val="s->div" type="U32int"/>
 *	<field name="bitkeylog" val="s->div" type="U32int"/>
 *	<field name="bitkeymask" val="s->div" type="U32int"/>
 *	<array name="sect" val="&s->smap[i]" elems="s->nsects" type="Amap"/>
 *	<array name="amap" val="&s->amap[i]" elems="s->narenas" type="Amap"/>
 *	<array name="arena" val="s->arenas[i]" elems="s->narenas" type="Arena"/>
 * </struct>
 * <struct name="Amap" type="AMap *">
 *	<field name="name" val="s->name" type="AName"/>
 *	<field name="start" val="s->start" type="U64int"/>
 *	<field name="stop" val="s->stop" type="U64int"/>
 * </struct>
 */
struct Index
{
	u32int		div;			/* divisor for mapping score to bucket */
	u32int		buckets;		/* last bucket used in disk hash table */
	u32int		blocksize;
	u32int		tabsize;		/* max. bytes in index config */
	u32int		bitblocks;
	u32int		maxdepth;
	u32int		bitkeylog;
	u32int		bitkeymask;

	int		mapalloc;		/* first arena to check when adding a lump */
	Arena		**arenas;		/* arenas in the mapping */
	ISect		**sects;		/* sections which hold the buckets */

	/*
	 * fields stored in config file 
	 */
	u32int		version;
	char		name[ANameSize];	/* text label */
	int		nsects;
	AMap		*smap;			/* mapping of buckets to index sections */
	int		narenas;
	AMap		*amap;			/* mapping from index addesses to arenas */
};

/*
 * one part of the bucket storage for an index.
 * the index blocks are sequentially allocated
 * across all of the sections.
 */
struct ISect
{
	Part		*part;
	int		blocklog;		/* log2(blocksize) */
	int		buckmax;		/* max. entries in a index bucket */
	u32int		tabbase;		/* base address of index config table on disk */
	u32int		tabsize;		/* max. bytes in index config */

	/*
	 * fields stored on disk
	 */
	u32int		version;
	char		name[ANameSize];	/* text label */
	char		index[ANameSize];	/* index owning the section */
	u32int		blocksize;		/* size of hash buckets in index */
	u32int		blockbase;		/* address of start of on disk index table */
	u32int		blocks;			/* total blocks on disk; some may be unused */
	u32int		start;			/* first bucket in this section */
	u32int		stop;			/* limit of buckets in this section */
};

/*
 * externally interesting part of an IEntry
 */
struct IAddr
{
	u64int		addr;
	u16int		size;			/* uncompressed size */
	u8int		type;			/* type of block */
	u8int		blocks;			/* arena io quanta for Clump + data */
};

/*
 * entries in the index
 * kept in IBuckets in the disk index table,
 * cached in the memory ICache.
 */
struct IEntry
{
	u8int		score[VtScoreSize];
	IEntry		*next;			/* next in hash chain */
	u32int		wtime;			/* last write time */
	u16int		train;			/* relative train containing the most recent ref; 0 if no ref, 1 if in same car */
	u8int		rac;			/* read ahead count */
	IAddr		ia;
};

/*
 * buckets in the on disk index table
 */
struct IBucket
{
	u16int		n;			/* number of active indices */
	u32int		depth;		/* depth in version 2 (was overflow in v1) */
	u8int		*data;
};

/*
 * temporary buffers used by individual threads
 */
struct ZBlock
{
	u32int		len;
	u8int		*data;
};

/*
 * simple input buffer for a '\0' terminated text file
 */
struct IFile
{
	char		*name;				/* name of the file */
	ZBlock		*b;				/* entire contents of file */
	u32int		pos;				/* current position in the file */
};

/*
 * statistics about the operation of the server
 * mainly for performance monitoring and profiling.
 */
struct Stats
{
	QLock		lock;
	long		lumpwrites;		/* protocol block writes */
	long		lumpreads;		/* protocol block reads */
	long		lumphit;		/* lump cache hit */
	long		lumpmiss;		/* lump cache miss */
	long		clumpwrites;		/* clumps to disk */
	vlong		clumpbwrites;		/* clump data bytes to disk */
	vlong		clumpbcomp;		/* clump bytes compressed */
	long		clumpreads;		/* clumps from disk */
	vlong		clumpbreads;		/* clump data bytes from disk */
	vlong		clumpbuncomp;		/* clump bytes uncompressed */
	long		ciwrites;		/* clump directory to disk */
	long		cireads;		/* clump directory from disk */
	long		indexwrites;		/* index to disk */
	long		indexreads;		/* index from disk */
	long		indexwreads;		/* for writing a new entry */
	long		indexareads;		/* for allocating an overflow block */
	long		indexsplits;		/* index block splits */
	long		diskwrites;		/* total disk writes */
	long		diskreads;		/* total disk reads */
	vlong		diskbwrites;		/* total disk bytes written */
	vlong		diskbreads;		/* total disk bytes read */
	long		pchit;			/* partition cache hit */
	long		pcmiss;			/* partition cache miss */
	long		pcreads;		/* partition cache reads from disk */
	vlong		pcbreads;		/* partition cache bytes read */
	long		icinserts;		/* stores into index cache */
	long		iclookups;		/* index cache lookups */
	long		ichits;			/* hits in the cache */
	long		icfills;		/* successful fills from index */
	long		absorbedwrites;		/* disk writes absorbed by dcache */
	long		dirtydblocks;		/* blocks dirtied */
	long		dcacheflushes;		/* times dcache has flushed */
	long		dcacheflushwrites;	/* blocks written by those flushes */
};

extern	Index		*mainindex;
extern	u32int		maxblocksize;		/* max. block size used by any partition */
extern	int		paranoid;		/* should verify hashes on disk read */
extern	int		queuewrites;		/* put all lump writes on a queue and finish later */
extern	int		readonly;		/* only allowed to read the disk data */
extern	Stats		stats;
extern	u8int		zeroscore[VtScoreSize];