diff options
author | rsc <devnull@localhost> | 2005-07-12 15:23:36 +0000 |
---|---|---|
committer | rsc <devnull@localhost> | 2005-07-12 15:23:36 +0000 |
commit | a0d146edd7a7de6236a0d60baafeeb59f8452aae (patch) | |
tree | b55baa526d9f5adfc73246e6ee2fadf455e0b7a2 /src/cmd/venti/srv/whack.c | |
parent | 88bb285e3d87ec2508840af33f7e0af53ec3c13c (diff) | |
download | plan9port-a0d146edd7a7de6236a0d60baafeeb59f8452aae.tar.gz plan9port-a0d146edd7a7de6236a0d60baafeeb59f8452aae.tar.bz2 plan9port-a0d146edd7a7de6236a0d60baafeeb59f8452aae.zip |
return of venti
Diffstat (limited to 'src/cmd/venti/srv/whack.c')
-rw-r--r-- | src/cmd/venti/srv/whack.c | 331 |
1 files changed, 331 insertions, 0 deletions
diff --git a/src/cmd/venti/srv/whack.c b/src/cmd/venti/srv/whack.c new file mode 100644 index 00000000..ecd29033 --- /dev/null +++ b/src/cmd/venti/srv/whack.c @@ -0,0 +1,331 @@ +#include "stdinc.h" +#include "whack.h" + +typedef struct Huff Huff; +int compressblocks = 1; + +enum +{ + MaxFastLen = 9, + BigLenCode = 0x1f4, /* minimum code for large lenth encoding */ + BigLenBits = 9, + BigLenBase = 4, /* starting items to encode for big lens */ + + MinOffBits = 6, + MaxOffBits = MinOffBits + 8, + + MaxLen = 2051 /* max. length encodable in 24 bits */ +}; + +enum +{ + StatBytes, + StatOutBytes, + StatLits, + StatMatches, + StatLitBits, + StatOffBits, + StatLenBits, + + MaxStat +}; + +struct Huff +{ + short bits; /* length of the code */ + ulong encode; /* the code */ +}; + +static Huff lentab[MaxFastLen] = +{ + {2, 0x2}, /* 10 */ + {3, 0x6}, /* 110 */ + {5, 0x1c}, /* 11100 */ + {5, 0x1d}, /* 11101 */ + {6, 0x3c}, /* 111100 */ + {7, 0x7a}, /* 1111010 */ + {7, 0x7b}, /* 1111011 */ + {8, 0xf8}, /* 11111000 */ + {8, 0xf9}, /* 11111001 */ +}; + +static int thwmaxcheck; + +void +whackinit(Whack *tw, int level) +{ + thwmaxcheck = (1 << level); + thwmaxcheck -= thwmaxcheck >> 2; + if(thwmaxcheck < 2) + thwmaxcheck = 2; + else if(thwmaxcheck > 1024) + thwmaxcheck = 1024; + memset(tw, 0, sizeof *tw); + tw->begin = 2 * WhackMaxOff; +} + +/* + * find a string in the dictionary + */ +static int +whackmatch(Whack *b, uchar **ss, uchar *esrc, ulong h, ulong now) +{ + ushort then, off, last; + int bestoff, bestlen, check; + uchar *s, *t; + + s = *ss; + if(esrc < s + MinMatch) + return -1; + if(s + MaxLen < esrc) + esrc = s + MaxLen; + + bestoff = 0; + bestlen = 0; + check = thwmaxcheck; + last = 0; + for(then = b->hash[h]; check-- > 0; then = b->next[then & (WhackMaxOff - 1)]){ + off = now - then; + if(off <= last || off > WhackMaxOff) + break; + + /* + * don't need to check for the end because + * 1) s too close check above + */ + t = s - off; + if(s[0] == t[0] && s[1] == t[1] && s[2] == t[2]){ + if(!bestlen || esrc - s > bestlen && s[bestlen] == t[bestlen]){ + t += 3; + for(s += 3; s < esrc; s++){ + if(*s != *t) + break; + t++; + } + if(s - *ss > bestlen){ + bestlen = s - *ss; + bestoff = off; + if(bestlen > thwmaxcheck) + break; + } + } + } + s = *ss; + last = off; + } + *ss += bestlen; + return bestoff; +} + +/* + * knuth vol. 3 multiplicative hashing + * each byte x chosen according to rules + * 1/4 < x < 3/10, 1/3 x < < 3/7, 4/7 < x < 2/3, 7/10 < x < 3/4 + * with reasonable spread between the bytes & their complements + * + * the 3 byte value appears to be as almost good as the 4 byte value, + * and might be faster on some machines + */ +/* +#define hashit(c) ((((ulong)(c) * 0x6b43a9) >> (24 - HashLog)) & HashMask) +*/ +#define hashit(c) (((((ulong)(c) & 0xffffff) * 0x6b43a9b5) >> (32 - HashLog)) & HashMask) + +/* + * lz77 compression with single lookup in a hash table for each block + */ +int +whack(Whack *w, uchar *dst, uchar *src, int n, ulong stats[WhackStats]) +{ + uchar *s, *ss, *sss, *esrc, *half, *wdst, *wdmax; + ulong cont, code, wbits; + ushort now; + int toff, lithist, h, len, bits, use, wnbits, lits, matches, offbits, lenbits; + + if(!compressblocks || n < MinMatch) + return -1; + + wdst = dst; + wdmax = dst + n; + + now = w->begin; + s = src; + w->data = s; + + cont = (s[0] << 16) | (s[1] << 8) | s[2]; + + esrc = s + n; + half = s + (n >> 1); + wnbits = 0; + wbits = 0; + lits = 0; + matches = 0; + offbits = 0; + lenbits = 0; + lithist = ~0; + while(s < esrc){ + h = hashit(cont); + + sss = s; + toff = whackmatch(w, &sss, esrc, h, now); + ss = sss; + + len = ss - s; + for(; wnbits >= 8; wnbits -= 8){ + if(wdst >= wdmax){ + w->begin = now; + return -1; + } + *wdst++ = wbits >> (wnbits - 8); + } + if(len < MinMatch){ + toff = *s; + lithist = (lithist << 1) | toff < 32 | toff > 127; + if(lithist & 0x1e){ + wbits = (wbits << 9) | toff; + wnbits += 9; + }else if(lithist & 1){ + toff = (toff + 64) & 0xff; + if(toff < 96){ + wbits = (wbits << 10) | toff; + wnbits += 10; + }else{ + wbits = (wbits << 11) | toff; + wnbits += 11; + } + }else{ + wbits = (wbits << 8) | toff; + wnbits += 8; + } + lits++; + + /* + * speed hack + * check for compression progress, bail if none achieved + */ + if(s > half){ + if(4 * (s - src) < 5 * lits){ + w->begin = now; + return -1; + } + half = esrc; + } + + if(s + MinMatch <= esrc){ + w->next[now & (WhackMaxOff - 1)] = w->hash[h]; + w->hash[h] = now; + if(s + MinMatch < esrc) + cont = (cont << 8) | s[MinMatch]; + } + now++; + s++; + continue; + } + + matches++; + + /* + * length of match + */ + if(len > MaxLen){ + len = MaxLen; + ss = s + len; + } + len -= MinMatch; + if(len < MaxFastLen){ + bits = lentab[len].bits; + wbits = (wbits << bits) | lentab[len].encode; + wnbits += bits; + lenbits += bits; + }else{ + code = BigLenCode; + bits = BigLenBits; + use = BigLenBase; + len -= MaxFastLen; + while(len >= use){ + len -= use; + code = (code + use) << 1; + use <<= (bits & 1) ^ 1; + bits++; + } + + wbits = (wbits << bits) | (code + len); + wnbits += bits; + lenbits += bits; + + for(; wnbits >= 8; wnbits -= 8){ + if(wdst >= wdmax){ + w->begin = now; + return -1; + } + *wdst++ = wbits >> (wnbits - 8); + } + } + + /* + * offset in history + */ + toff--; + for(bits = MinOffBits; toff >= (1 << bits); bits++) + ; + if(bits < MaxOffBits-1){ + wbits = (wbits << 3) | (bits - MinOffBits); + if(bits != MinOffBits) + bits--; + wnbits += bits + 3; + offbits += bits + 3; + }else{ + wbits = (wbits << 4) | 0xe | (bits - (MaxOffBits-1)); + bits--; + wnbits += bits + 4; + offbits += bits + 4; + } + wbits = (wbits << bits) | toff & ((1 << bits) - 1); + + for(; s != ss; s++){ + if(s + MinMatch <= esrc){ + h = hashit(cont); + w->next[now & (WhackMaxOff - 1)] = w->hash[h]; + w->hash[h] = now; + if(s + MinMatch < esrc) + cont = (cont << 8) | s[MinMatch]; + } + now++; + } + } + + w->begin = now; + + stats[StatBytes] += esrc - src; + stats[StatLits] += lits; + stats[StatMatches] += matches; + stats[StatLitBits] += (wdst - (dst + 2)) * 8 + wnbits - offbits - lenbits; + stats[StatOffBits] += offbits; + stats[StatLenBits] += lenbits; + + if(wnbits & 7){ + wbits <<= 8 - (wnbits & 7); + wnbits += 8 - (wnbits & 7); + } + for(; wnbits >= 8; wnbits -= 8){ + if(wdst >= wdmax) + return -1; + *wdst++ = wbits >> (wnbits - 8); + } + + stats[StatOutBytes] += wdst - dst; + + return wdst - dst; +} + +int +whackblock(uchar *dst, uchar *src, int ssize) +{ + Whack w; + ulong stats[MaxStat]; + int r; + + whackinit(&w, 6); + r = whack(&w, dst, src, ssize, stats); + return r; +} |