1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
|
typedef struct Arch Arch;
typedef struct BList BList;
typedef struct Block Block;
typedef struct Cache Cache;
typedef struct Disk Disk;
typedef struct Entry Entry;
typedef struct Fsck Fsck;
typedef struct Header Header;
typedef struct Label Label;
typedef struct Periodic Periodic;
typedef struct Snap Snap;
typedef struct Source Source;
typedef struct Super Super;
typedef struct WalkPtr WalkPtr;
#pragma incomplete Arch
#pragma incomplete BList
#pragma incomplete Cache
#pragma incomplete Disk
#pragma incomplete Periodic
#pragma incomplete Snap
/* tunable parameters - probably should not be constants */
enum {
/*
* estimate of bytes per dir entries - determines number
* of index entries in the block
*/
BytesPerEntry = 100,
/* don't allocate in block if more than this percentage full */
FullPercentage = 80,
FlushSize = 200, /* number of blocks to flush */
DirtyPercentage = 50, /* maximum percentage of dirty blocks */
};
enum {
Nowaitlock,
Waitlock,
MaxBlock = (1UL<<31),
};
enum {
HeaderMagic = 0x3776ae89,
HeaderVersion = 1,
HeaderOffset = 128*1024,
HeaderSize = 512,
SuperMagic = 0x2340a3b1,
SuperSize = 512,
SuperVersion = 1,
LabelSize = 14,
};
/* well known tags */
enum {
BadTag = 0, /* this tag should not be used */
RootTag = 1, /* root of fs */
EnumTag, /* root of a dir listing */
UserTag = 32, /* all other tags should be >= UserTag */
};
struct Super {
u16int version;
u32int epochLow;
u32int epochHigh;
u64int qid; /* next qid */
u32int active; /* root of active file system */
u32int next; /* root of next snapshot to archive */
u32int current; /* root of snapshot currently archiving */
uchar last[VtScoreSize]; /* last snapshot successfully archived */
char name[128]; /* label */
};
struct Fs {
Arch *arch; /* immutable */
Cache *cache; /* immutable */
int mode; /* immutable */
int noatimeupd; /* immutable */
int blockSize; /* immutable */
VtConn *z; /* immutable */
Snap *snap; /* immutable */
/* immutable; copy here & Fsys to ease error reporting */
char *name;
Periodic *metaFlush; /* periodically flushes metadata cached in files */
/*
* epoch lock.
* Most operations on the fs require a read lock of elk, ensuring that
* the current high and low epochs do not change under foot.
* This lock is mostly acquired via a call to fileLock or fileRlock.
* Deletion and creation of snapshots occurs under a write lock of elk,
* ensuring no file operations are occurring concurrently.
*/
RWLock elk; /* epoch lock */
u32int ehi; /* epoch high */
u32int elo; /* epoch low */
int halted; /* epoch lock is held to halt (console initiated) */
Source *source; /* immutable: root of sources */
File *file; /* immutable: root of files */
};
/*
* variant on VtEntry
* there are extra fields when stored locally
*/
struct Entry {
u32int gen; /* generation number */
ushort psize; /* pointer block size */
ushort dsize; /* data block size */
uchar depth; /* unpacked from flags */
uchar flags;
uvlong size;
uchar score[VtScoreSize];
u32int tag; /* tag for local blocks: zero if stored on Venti */
u32int snap; /* non-zero -> entering snapshot of given epoch */
uchar archive; /* archive this snapshot: only valid for snap != 0 */
};
/*
* This is called a `stream' in the fossil paper. There used to be Sinks too.
* We believe that Sources and Files are one-to-one.
*/
struct Source {
Fs *fs; /* immutable */
int mode; /* immutable */
int issnapshot; /* immutable */
u32int gen; /* immutable */
int dsize; /* immutable */
int dir; /* immutable */
Source *parent; /* immutable */
File *file; /* immutable; point back */
QLock lk;
int ref;
/*
* epoch for the source
* for ReadWrite sources, epoch is used to lazily notice
* sources that must be split from the snapshots.
* for ReadOnly sources, the epoch represents the minimum epoch
* along the chain from the root, and is used to lazily notice
* sources that have become invalid because they belong to an old
* snapshot.
*/
u32int epoch;
Block *b; /* block containing this source */
uchar score[VtScoreSize]; /* score of block containing this source */
u32int scoreEpoch; /* epoch of block containing this source */
int epb; /* immutable: entries per block in parent */
u32int tag; /* immutable: tag of parent */
u32int offset; /* immutable: entry offset in parent */
};
struct Header {
ushort version;
ushort blockSize;
ulong super; /* super blocks */
ulong label; /* start of labels */
ulong data; /* end of labels - start of data blocks */
ulong end; /* end of data blocks */
};
/*
* contains a one block buffer
* to avoid problems of the block changing underfoot
* and to enable an interface that supports unget.
*/
struct DirEntryEnum {
File *file;
u32int boff; /* block offset */
int i, n;
DirEntry *buf;
};
/* Block states */
enum {
BsFree = 0, /* available for allocation */
BsBad = 0xFF, /* something is wrong with this block */
/* bit fields */
BsAlloc = 1<<0, /* block is in use */
BsCopied = 1<<1,/* block has been copied (usually in preparation for unlink) */
BsVenti = 1<<2, /* block has been stored on Venti */
BsClosed = 1<<3,/* block has been unlinked on disk from active file system */
BsMask = BsAlloc|BsCopied|BsVenti|BsClosed,
};
/*
* block types
* more regular than Venti block types
* bit 3 -> block or data block
* bits 2-0 -> level of block
*/
enum {
BtData,
BtDir = 1<<3,
BtLevelMask = 7,
BtMax = 1<<4,
};
/* io states */
enum {
BioEmpty, /* label & data are not valid */
BioLabel, /* label is good */
BioClean, /* data is on the disk */
BioDirty, /* data is not yet on the disk */
BioReading, /* in process of reading data */
BioWriting, /* in process of writing data */
BioReadError, /* error reading: assume disk always handles write errors */
BioVentiError, /* error reading from venti (probably disconnected) */
BioMax
};
struct Label {
uchar type;
uchar state;
u32int tag;
u32int epoch;
u32int epochClose;
};
struct Block {
Cache *c;
int ref;
int nlock;
uintptr pc; /* pc that fetched this block from the cache */
QLock lk;
int part;
u32int addr;
uchar score[VtScoreSize]; /* score */
Label l;
uchar *dmap;
uchar *data;
/* the following is private; used by cache */
Block *next; /* doubly linked hash chains */
Block **prev;
u32int heap; /* index in heap table */
u32int used; /* last reference times */
u32int vers; /* version of dirty flag */
BList *uhead; /* blocks to unlink when this block is written */
BList *utail;
/* block ordering for cache -> disk */
BList *prior; /* list of blocks before this one */
Block *ionext;
int iostate;
Rendez ioready;
};
/* tree walker, for gc and archiver */
struct WalkPtr
{
uchar *data;
int isEntry;
int n;
int m;
Entry e;
uchar type;
u32int tag;
};
enum
{
DoClose = 1<<0,
DoClre = 1<<1,
DoClri = 1<<2,
DoClrp = 1<<3,
};
struct Fsck
{
/* filled in by caller */
int printblocks;
int useventi;
int flags;
int printdirs;
int printfiles;
int walksnapshots;
int walkfs;
Fs *fs;
int (*print)(char*, ...);
void (*clre)(Fsck*, Block*, int);
void (*clrp)(Fsck*, Block*, int);
void (*close)(Fsck*, Block*, u32int);
void (*clri)(Fsck*, char*, MetaBlock*, int, Block*);
/* used internally */
Cache *cache;
uchar *amap; /* all blocks seen so far */
uchar *emap; /* all blocks seen in this epoch */
uchar *xmap; /* all blocks in this epoch with parents in this epoch */
uchar *errmap; /* blocks with errors */
uchar *smap; /* walked sources */
int nblocks;
int bsize;
int walkdepth;
u32int hint; /* where the next root probably is */
int nseen;
int quantum;
int nclre;
int nclrp;
int nclose;
int nclri;
};
/* disk partitions; keep in sync with partname[] in disk.c */
enum {
PartError,
PartSuper,
PartLabel,
PartData,
PartVenti, /* fake partition */
};
extern int vtType[BtMax];
|