diff options
author | rsc <devnull@localhost> | 2003-11-23 18:19:18 +0000 |
---|---|---|
committer | rsc <devnull@localhost> | 2003-11-23 18:19:18 +0000 |
commit | b6afd33e2f23953f00c6fac6b5d45946a9113654 (patch) | |
tree | 300478ec8ac939d1b7aa0c1b987367a49a4e47ce /src/libflate/deflate.c | |
parent | 8a708fb239f4272ac7e4f16f437093c56b2cab39 (diff) | |
download | plan9port-b6afd33e2f23953f00c6fac6b5d45946a9113654.tar.gz plan9port-b6afd33e2f23953f00c6fac6b5d45946a9113654.tar.bz2 plan9port-b6afd33e2f23953f00c6fac6b5d45946a9113654.zip |
add libflate
Diffstat (limited to 'src/libflate/deflate.c')
-rw-r--r-- | src/libflate/deflate.c | 1359 |
1 files changed, 1359 insertions, 0 deletions
diff --git a/src/libflate/deflate.c b/src/libflate/deflate.c new file mode 100644 index 00000000..3a55af3f --- /dev/null +++ b/src/libflate/deflate.c @@ -0,0 +1,1359 @@ +#include <u.h> +#include <libc.h> +#include <flate.h> + +typedef struct Chain Chain; +typedef struct Chains Chains; +typedef struct Dyncode Dyncode; +typedef struct Huff Huff; +typedef struct LZblock LZblock; +typedef struct LZstate LZstate; + +enum +{ + /* + * deflate format paramaters + */ + DeflateUnc = 0, /* uncompressed block */ + DeflateFix = 1, /* fixed huffman codes */ + DeflateDyn = 2, /* dynamic huffman codes */ + + DeflateEob = 256, /* end of block code in lit/len book */ + DeflateMaxBlock = 64*1024-1, /* maximum size of uncompressed block */ + + DeflateMaxExp = 10, /* maximum expansion for a block */ + + LenStart = 257, /* start of length codes in litlen */ + Nlitlen = 288, /* number of litlen codes */ + Noff = 30, /* number of offset codes */ + Nclen = 19, /* number of codelen codes */ + + MaxOff = 32*1024, + MinMatch = 3, /* shortest match possible */ + MaxMatch = 258, /* longest match possible */ + + /* + * huffman code paramaters + */ + MaxLeaf = Nlitlen, + MaxHuffBits = 16, /* max bits in a huffman code */ + ChainMem = 2 * (MaxHuffBits - 1) * MaxHuffBits, + + /* + * coding of the lz parse + */ + LenFlag = 1 << 3, + LenShift = 4, /* leaves enough space for MinMatchMaxOff */ + MaxLitRun = LenFlag - 1, + + /* + * internal lz paramaters + */ + DeflateOut = 4096, /* output buffer size */ + BlockSize = 8192, /* attempted input read quanta */ + DeflateBlock = DeflateMaxBlock & ~(BlockSize - 1), + MinMatchMaxOff = 4096, /* max profitable offset for small match; + * assumes 8 bits for len, 5+10 for offset + * DONT CHANGE WITHOUT CHANGING LZPARSE CONSTANTS + */ + HistSlop = 512, /* must be at lead MaxMatch */ + HistBlock = 64*1024, + HistSize = HistBlock + HistSlop, + + HashLog = 13, + HashSize = 1<<HashLog, + + MaxOffCode = 256, /* biggest offset looked up in direct table */ + + EstLitBits = 8, + EstLenBits = 4, + EstOffBits = 5, +}; + +/* + * knuth vol. 3 multiplicative hashing + * each byte x chosen according to rules + * 1/4 < x < 3/10, 1/3 x < < 3/7, 4/7 < x < 2/3, 7/10 < x < 3/4 + * with reasonable spread between the bytes & their complements + * + * the 3 byte value appears to be as almost good as the 4 byte value, + * and might be faster on some machines + */ +/* +#define hashit(c) (((ulong)(c) * 0x6b43a9) >> (24 - HashLog)) +*/ +#define hashit(c) ((((ulong)(c) & 0xffffff) * 0x6b43a9b5) >> (32 - HashLog)) + +/* + * lempel-ziv style compression state + */ +struct LZstate +{ + uchar hist[HistSize]; + ulong pos; /* current location in history buffer */ + ulong avail; /* data available after pos */ + int eof; + ushort hash[HashSize]; /* hash chains */ + ushort nexts[MaxOff]; + int now; /* pos in hash chains */ + int dot; /* dawn of time in history */ + int prevlen; /* lazy matching state */ + int prevoff; + int maxcheck; /* compressor tuning */ + + uchar obuf[DeflateOut]; + uchar *out; /* current position in the output buffer */ + uchar *eout; + ulong bits; /* bit shift register */ + int nbits; + int rbad; /* got an error reading the buffer */ + int wbad; /* got an error writing the buffer */ + int (*w)(void*, void*, int); + void *wr; + + ulong totr; /* total input size */ + ulong totw; /* total output size */ + int debug; +}; + +struct LZblock +{ + ushort parse[DeflateMaxBlock / 2 + 1]; + int lastv; /* value being constucted for parse */ + ulong litlencount[Nlitlen]; + ulong offcount[Noff]; + ushort *eparse; /* limit for parse table */ + int bytes; /* consumed from the input */ + int excost; /* cost of encoding extra len & off bits */ +}; + +/* + * huffman code table + */ +struct Huff +{ + short bits; /* length of the code */ + ushort encode; /* the code */ +}; + +/* + * encoding of dynamic huffman trees + */ +struct Dyncode +{ + int nlit; + int noff; + int nclen; + int ncode; + Huff codetab[Nclen]; + uchar codes[Nlitlen+Noff]; + uchar codeaux[Nlitlen+Noff]; +}; + +static int deflateb(LZstate *lz, LZblock *lzb, void *rr, int (*r)(void*, void*, int)); +static int lzcomp(LZstate*, LZblock*, uchar*, ushort*, int finish); +static void wrblock(LZstate*, int, ushort*, ushort*, Huff*, Huff*); +static int bitcost(Huff*, ulong*, int); +static int huffcodes(Dyncode*, Huff*, Huff*); +static void wrdyncode(LZstate*, Dyncode*); +static void lzput(LZstate*, ulong bits, int nbits); +static void lzflushbits(LZstate*); +static void lzflush(LZstate *lz); +static void lzwrite(LZstate *lz, void *buf, int n); + +static int hufftabinit(Huff*, int, ulong*, int); +static int mkgzprecode(Huff*, ulong *, int, int); + +static int mkprecode(Huff*, ulong *, int, int, ulong*); +static void nextchain(Chains*, int); +static void leafsort(ulong*, ushort*, int, int); + +/* conversion from len to code word */ +static int lencode[MaxMatch]; + +/* + * conversion from off to code word + * off <= MaxOffCode ? offcode[off] : bigoffcode[off >> 7] +*/ +static int offcode[MaxOffCode]; +static int bigoffcode[256]; + +/* litlen code words LenStart-285 extra bits */ +static int litlenbase[Nlitlen-LenStart]; +static int litlenextra[Nlitlen-LenStart] = +{ +/* 257 */ 0, 0, 0, +/* 260 */ 0, 0, 0, 0, 0, 1, 1, 1, 1, 2, +/* 270 */ 2, 2, 2, 3, 3, 3, 3, 4, 4, 4, +/* 280 */ 4, 5, 5, 5, 5, 0, 0, 0 +}; + +/* offset code word extra bits */ +static int offbase[Noff]; +static int offextra[] = +{ + 0, 0, 0, 0, 1, 1, 2, 2, 3, 3, + 4, 4, 5, 5, 6, 6, 7, 7, 8, 8, + 9, 9, 10, 10, 11, 11, 12, 12, 13, 13, + 0, 0, +}; + +/* order code lengths */ +static int clenorder[Nclen] = +{ + 16, 17, 18, 0, 8, 7, 9, 6, 10, 5, 11, 4, 12, 3, 13, 2, 14, 1, 15 +}; + +/* static huffman tables */ +static Huff litlentab[Nlitlen]; +static Huff offtab[Noff]; +static Huff hofftab[Noff]; + +/* bit reversal for brain dead endian swap in huffman codes */ +static uchar revtab[256]; +static ulong nlits; +static ulong nmatches; + +int +deflateinit(void) +{ + ulong bitcount[MaxHuffBits]; + int i, j, ci, n; + + /* byte reverse table */ + for(i=0; i<256; i++) + for(j=0; j<8; j++) + if(i & (1<<j)) + revtab[i] |= 0x80 >> j; + + /* static Litlen bit lengths */ + for(i=0; i<144; i++) + litlentab[i].bits = 8; + for(i=144; i<256; i++) + litlentab[i].bits = 9; + for(i=256; i<280; i++) + litlentab[i].bits = 7; + for(i=280; i<Nlitlen; i++) + litlentab[i].bits = 8; + + memset(bitcount, 0, sizeof(bitcount)); + bitcount[8] += 144 - 0; + bitcount[9] += 256 - 144; + bitcount[7] += 280 - 256; + bitcount[8] += Nlitlen - 280; + + if(!hufftabinit(litlentab, Nlitlen, bitcount, 9)) + return FlateInternal; + + /* static offset bit lengths */ + for(i = 0; i < Noff; i++) + offtab[i].bits = 5; + + memset(bitcount, 0, sizeof(bitcount)); + bitcount[5] = Noff; + + if(!hufftabinit(offtab, Noff, bitcount, 5)) + return FlateInternal; + + bitcount[0] = 0; + bitcount[1] = 0; + if(!mkgzprecode(hofftab, bitcount, 2, MaxHuffBits)) + return FlateInternal; + + /* conversion tables for lens & offs to codes */ + ci = 0; + for(i = LenStart; i < 286; i++){ + n = ci + (1 << litlenextra[i - LenStart]); + litlenbase[i - LenStart] = ci; + for(; ci < n; ci++) + lencode[ci] = i; + } + /* patch up special case for len MaxMatch */ + lencode[MaxMatch-MinMatch] = 285; + litlenbase[285-LenStart] = MaxMatch-MinMatch; + + ci = 0; + for(i = 0; i < 16; i++){ + n = ci + (1 << offextra[i]); + offbase[i] = ci; + for(; ci < n; ci++) + offcode[ci] = i; + } + + ci = ci >> 7; + for(; i < 30; i++){ + n = ci + (1 << (offextra[i] - 7)); + offbase[i] = ci << 7; + for(; ci < n; ci++) + bigoffcode[ci] = i; + } + return FlateOk; +} + +static void +deflatereset(LZstate *lz, int level, int debug) +{ + memset(lz->nexts, 0, sizeof lz->nexts); + memset(lz->hash, 0, sizeof lz->hash); + lz->totr = 0; + lz->totw = 0; + lz->pos = 0; + lz->avail = 0; + lz->out = lz->obuf; + lz->eout = &lz->obuf[DeflateOut]; + lz->prevlen = MinMatch - 1; + lz->prevoff = 0; + lz->now = MaxOff + 1; + lz->dot = lz->now; + lz->bits = 0; + lz->nbits = 0; + lz->maxcheck = (1 << level); + lz->maxcheck -= lz->maxcheck >> 2; + if(lz->maxcheck < 2) + lz->maxcheck = 2; + else if(lz->maxcheck > 1024) + lz->maxcheck = 1024; + + lz->debug = debug; +} + +int +deflate(void *wr, int (*w)(void*, void*, int), void *rr, int (*r)(void*, void*, int), int level, int debug) +{ + LZstate *lz; + LZblock *lzb; + int ok; + + lz = malloc(sizeof *lz + sizeof *lzb); + if(lz == nil) + return FlateNoMem; + lzb = (LZblock*)&lz[1]; + + deflatereset(lz, level, debug); + lz->w = w; + lz->wr = wr; + lz->wbad = 0; + lz->rbad = 0; + lz->eof = 0; + ok = FlateOk; + while(!lz->eof || lz->avail){ + ok = deflateb(lz, lzb, rr, r); + if(ok != FlateOk) + break; + } + if(ok == FlateOk && lz->rbad) + ok = FlateInputFail; + if(ok == FlateOk && lz->wbad) + ok = FlateOutputFail; + free(lz); + return ok; +} + +static int +deflateb(LZstate *lz, LZblock *lzb, void *rr, int (*r)(void*, void*, int)) +{ + Dyncode dyncode, hdyncode; + Huff dlitlentab[Nlitlen], dofftab[Noff], hlitlentab[Nlitlen]; + ulong litcount[Nlitlen]; + long nunc, ndyn, nfix, nhuff; + uchar *slop, *hslop; + ulong ep; + int i, n, m, mm, nslop; + + memset(lzb->litlencount, 0, sizeof lzb->litlencount); + memset(lzb->offcount, 0, sizeof lzb->offcount); + lzb->litlencount[DeflateEob]++; + + lzb->bytes = 0; + lzb->eparse = lzb->parse; + lzb->lastv = 0; + lzb->excost = 0; + + slop = &lz->hist[lz->pos]; + n = lz->avail; + while(n < DeflateBlock && (!lz->eof || lz->avail)){ + /* + * fill the buffer as much as possible, + * while leaving room for MaxOff history behind lz->pos, + * and not reading more than we can handle. + * + * make sure we read at least HistSlop bytes. + */ + if(!lz->eof){ + ep = lz->pos + lz->avail; + if(ep >= HistBlock) + ep -= HistBlock; + m = HistBlock - MaxOff - lz->avail; + if(m > HistBlock - n) + m = HistBlock - n; + if(m > (HistBlock + HistSlop) - ep) + m = (HistBlock + HistSlop) - ep; + if(m & ~(BlockSize - 1)) + m &= ~(BlockSize - 1); + + /* + * be nice to the caller: stop reads that are too small. + * can only get here when we've already filled the buffer some + */ + if(m < HistSlop){ + if(!m || !lzb->bytes) + return FlateInternal; + break; + } + + mm = (*r)(rr, &lz->hist[ep], m); + if(mm > 0){ + /* + * wrap data to end if we're read it from the beginning + * this way, we don't have to wrap searches. + * + * wrap reads past the end to the beginning. + * this way, we can guarantee minimum size reads. + */ + if(ep < HistSlop) + memmove(&lz->hist[ep + HistBlock], &lz->hist[ep], HistSlop - ep); + else if(ep + mm > HistBlock) + memmove(&lz->hist[0], &lz->hist[HistBlock], ep + mm - HistBlock); + + lz->totr += mm; + n += mm; + lz->avail += mm; + }else{ + if(mm < 0) + lz->rbad = 1; + lz->eof = 1; + } + } + ep = lz->pos + lz->avail; + if(ep > HistSize) + ep = HistSize; + if(lzb->bytes + ep - lz->pos > DeflateMaxBlock) + ep = lz->pos + DeflateMaxBlock - lzb->bytes; + m = lzcomp(lz, lzb, &lz->hist[ep], lzb->eparse, lz->eof); + lzb->bytes += m; + lz->pos = (lz->pos + m) & (HistBlock - 1); + lz->avail -= m; + } + if(lzb->lastv) + *lzb->eparse++ = lzb->lastv; + if(lzb->eparse > lzb->parse + nelem(lzb->parse)) + return FlateInternal; + nunc = lzb->bytes; + + if(!mkgzprecode(dlitlentab, lzb->litlencount, Nlitlen, MaxHuffBits) + || !mkgzprecode(dofftab, lzb->offcount, Noff, MaxHuffBits)) + return FlateInternal; + + ndyn = huffcodes(&dyncode, dlitlentab, dofftab); + if(ndyn < 0) + return FlateInternal; + ndyn += bitcost(dlitlentab, lzb->litlencount, Nlitlen) + + bitcost(dofftab, lzb->offcount, Noff) + + lzb->excost; + + memset(litcount, 0, sizeof litcount); + + nslop = nunc; + if(nslop > &lz->hist[HistSize] - slop) + nslop = &lz->hist[HistSize] - slop; + + for(i = 0; i < nslop; i++) + litcount[slop[i]]++; + hslop = &lz->hist[HistSlop - nslop]; + for(; i < nunc; i++) + litcount[hslop[i]]++; + litcount[DeflateEob]++; + + if(!mkgzprecode(hlitlentab, litcount, Nlitlen, MaxHuffBits)) + return FlateInternal; + nhuff = huffcodes(&hdyncode, hlitlentab, hofftab); + if(nhuff < 0) + return FlateInternal; + nhuff += bitcost(hlitlentab, litcount, Nlitlen); + + nfix = bitcost(litlentab, lzb->litlencount, Nlitlen) + + bitcost(offtab, lzb->offcount, Noff) + + lzb->excost; + + lzput(lz, lz->eof && !lz->avail, 1); + + if(lz->debug){ + fprint(2, "block: bytes=%lud entries=%ld extra bits=%d\n\tuncompressed=%lud fixed=%lud dynamic=%lud huffman=%lud\n", + nunc, lzb->eparse - lzb->parse, lzb->excost, (nunc + 4) * 8, nfix, ndyn, nhuff); + fprint(2, "\tnlit=%lud matches=%lud eof=%d\n", nlits, nmatches, lz->eof && !lz->avail); + } + + if((nunc + 4) * 8 < ndyn && (nunc + 4) * 8 < nfix && (nunc + 4) * 8 < nhuff){ + lzput(lz, DeflateUnc, 2); + lzflushbits(lz); + + lzput(lz, nunc & 0xff, 8); + lzput(lz, (nunc >> 8) & 0xff, 8); + lzput(lz, ~nunc & 0xff, 8); + lzput(lz, (~nunc >> 8) & 0xff, 8); + lzflush(lz); + + lzwrite(lz, slop, nslop); + lzwrite(lz, &lz->hist[HistSlop], nunc - nslop); + }else if(ndyn < nfix && ndyn < nhuff){ + lzput(lz, DeflateDyn, 2); + + wrdyncode(lz, &dyncode); + wrblock(lz, slop - lz->hist, lzb->parse, lzb->eparse, dlitlentab, dofftab); + lzput(lz, dlitlentab[DeflateEob].encode, dlitlentab[DeflateEob].bits); + }else if(nhuff < nfix){ + lzput(lz, DeflateDyn, 2); + + wrdyncode(lz, &hdyncode); + + m = 0; + for(i = nunc; i > MaxLitRun; i -= MaxLitRun) + lzb->parse[m++] = MaxLitRun; + lzb->parse[m++] = i; + + wrblock(lz, slop - lz->hist, lzb->parse, lzb->parse + m, hlitlentab, hofftab); + lzput(lz, hlitlentab[DeflateEob].encode, hlitlentab[DeflateEob].bits); + }else{ + lzput(lz, DeflateFix, 2); + + wrblock(lz, slop - lz->hist, lzb->parse, lzb->eparse, litlentab, offtab); + lzput(lz, litlentab[DeflateEob].encode, litlentab[DeflateEob].bits); + } + + if(lz->eof && !lz->avail){ + lzflushbits(lz); + lzflush(lz); + } + return FlateOk; +} + +static void +lzwrite(LZstate *lz, void *buf, int n) +{ + int nw; + + if(n && lz->w){ + nw = (*lz->w)(lz->wr, buf, n); + if(nw != n){ + lz->w = nil; + lz->wbad = 1; + }else + lz->totw += n; + } +} + +static void +lzflush(LZstate *lz) +{ + lzwrite(lz, lz->obuf, lz->out - lz->obuf); + lz->out = lz->obuf; +} + +static void +lzput(LZstate *lz, ulong bits, int nbits) +{ + bits = (bits << lz->nbits) | lz->bits; + for(nbits += lz->nbits; nbits >= 8; nbits -= 8){ + *lz->out++ = bits; + if(lz->out == lz->eout) + lzflush(lz); + bits >>= 8; + } + lz->bits = bits; + lz->nbits = nbits; +} + +static void +lzflushbits(LZstate *lz) +{ + if(lz->nbits) + lzput(lz, 0, 8 - (lz->nbits & 7)); +} + +/* + * write out a block of n samples, + * given lz encoding and counts for huffman tables + */ +static void +wrblock(LZstate *out, int litoff, ushort *soff, ushort *eoff, Huff *litlentab, Huff *offtab) +{ + ushort *off; + int i, run, offset, lit, len, c; + + if(out->debug > 2){ + for(off = soff; off < eoff; ){ + offset = *off++; + run = offset & MaxLitRun; + if(run){ + for(i = 0; i < run; i++){ + lit = out->hist[litoff & (HistBlock - 1)]; + litoff++; + fprint(2, "\tlit %.2ux %c\n", lit, lit); + } + if(!(offset & LenFlag)) + continue; + len = offset >> LenShift; + offset = *off++; + }else if(offset & LenFlag){ + len = offset >> LenShift; + offset = *off++; + }else{ + len = 0; + offset >>= LenShift; + } + litoff += len + MinMatch; + fprint(2, "\t<%d, %d>\n", offset + 1, len + MinMatch); + } + } + + for(off = soff; off < eoff; ){ + offset = *off++; + run = offset & MaxLitRun; + if(run){ + for(i = 0; i < run; i++){ + lit = out->hist[litoff & (HistBlock - 1)]; + litoff++; + lzput(out, litlentab[lit].encode, litlentab[lit].bits); + } + if(!(offset & LenFlag)) + continue; + len = offset >> LenShift; + offset = *off++; + }else if(offset & LenFlag){ + len = offset >> LenShift; + offset = *off++; + }else{ + len = 0; + offset >>= LenShift; + } + litoff += len + MinMatch; + c = lencode[len]; + lzput(out, litlentab[c].encode, litlentab[c].bits); + c -= LenStart; + if(litlenextra[c]) + lzput(out, len - litlenbase[c], litlenextra[c]); + + if(offset < MaxOffCode) + c = offcode[offset]; + else + c = bigoffcode[offset >> 7]; + lzput(out, offtab[c].encode, offtab[c].bits); + if(offextra[c]) + lzput(out, offset - offbase[c], offextra[c]); + } +} + +/* + * look for the longest, closest string which matches + * the next prefix. the clever part here is looking for + * a string 1 longer than the previous best match. + * + * follows the recommendation of limiting number of chains + * which are checked. this appears to be the best heuristic. + */ +static int +lzmatch(int now, int then, uchar *p, uchar *es, ushort *nexts, uchar *hist, int runlen, int check, int *m) +{ + uchar *s, *t; + int ml, off, last; + + ml = check; + if(runlen >= 8) + check >>= 2; + *m = 0; + if(p + runlen >= es) + return runlen; + last = 0; + for(; check-- > 0; then = nexts[then & (MaxOff-1)]){ + off = (ushort)(now - then); + if(off <= last || off > MaxOff) + break; + s = p + runlen; + t = hist + (((p - hist) - off) & (HistBlock-1)); + t += runlen; + for(; s >= p; s--){ + if(*s != *t) + goto matchloop; + t--; + } + + /* + * we have a new best match. + * extend it to it's maximum length + */ + t += runlen + 2; + s += runlen + 2; + for(; s < es; s++){ + if(*s != *t) + break; + t++; + } + runlen = s - p; + *m = off - 1; + if(s == es || runlen > ml) + break; +matchloop:; + last = off; + } + return runlen; +} + +static int +lzcomp(LZstate *lz, LZblock *lzb, uchar *ep, ushort *parse, int finish) +{ + ulong cont, excost, *litlencount, *offcount; + uchar *p, *q, *s, *es; + ushort *nexts, *hash; + int v, i, h, runlen, n, now, then, m, prevlen, prevoff, maxdefer; + + litlencount = lzb->litlencount; + offcount = lzb->offcount; + nexts = lz->nexts; + hash = lz->hash; + now = lz->now; + + p = &lz->hist[lz->pos]; + if(lz->prevlen != MinMatch - 1) + p++; + + /* + * hash in the links for any hanging link positions, + * and calculate the hash for the current position. + */ + n = MinMatch; + if(n > ep - p) + n = ep - p; + cont = 0; + for(i = 0; i < n - 1; i++){ + m = now - ((MinMatch-1) - i); + if(m < lz->dot) + continue; + s = lz->hist + (((p - lz->hist) - (now - m)) & (HistBlock-1)); + + cont = (s[0] << 16) | (s[1] << 8) | s[2]; + h = hashit(cont); + prevoff = 0; + for(then = hash[h]; ; then = nexts[then & (MaxOff-1)]){ + v = (ushort)(now - then); + if(v <= prevoff || v >= (MinMatch-1) - i) + break; + prevoff = v; + } + if(then == (ushort)m) + continue; + nexts[m & (MaxOff-1)] = hash[h]; + hash[h] = m; + } + for(i = 0; i < n; i++) + cont = (cont << 8) | p[i]; + + /* + * now must point to the index in the nexts array + * corresponding to p's position in the history + */ + prevlen = lz->prevlen; + prevoff = lz->prevoff; + maxdefer = lz->maxcheck >> 2; + excost = 0; + v = lzb->lastv; + for(;;){ + es = p + MaxMatch; + if(es > ep){ + if(!finish || p >= ep) + break; + es = ep; + } + + h = hashit(cont); + runlen = lzmatch(now, hash[h], p, es, nexts, lz->hist, prevlen, lz->maxcheck, &m); + + /* + * back out of small matches too far in the past + */ + if(runlen == MinMatch && m >= MinMatchMaxOff){ + runlen = MinMatch - 1; + m = 0; + } + + /* + * record the encoding and increment counts for huffman trees + * if we get a match, defer selecting it until we check for + * a longer match at the next position. + */ + if(prevlen >= runlen && prevlen != MinMatch - 1){ + /* + * old match at least as good; use that one + */ + n = prevlen - MinMatch; + if(v || n){ + *parse++ = v | LenFlag | (n << LenShift); + *parse++ = prevoff; + }else + *parse++ = prevoff << LenShift; + v = 0; + + n = lencode[n]; + litlencount[n]++; + excost += litlenextra[n - LenStart]; + + if(prevoff < MaxOffCode) + n = offcode[prevoff]; + else + n = bigoffcode[prevoff >> 7]; + offcount[n]++; + excost += offextra[n]; + + runlen = prevlen - 1; + prevlen = MinMatch - 1; + nmatches++; + }else if(runlen == MinMatch - 1){ + /* + * no match; just put out the literal + */ + if(++v == MaxLitRun){ + *parse++ = v; + v = 0; + } + litlencount[*p]++; + nlits++; + runlen = 1; + }else{ + if(prevlen != MinMatch - 1){ + /* + * longer match now. output previous literal, + * update current match, and try again + */ + if(++v == MaxLitRun){ + *parse++ = v; + v = 0; + } + litlencount[p[-1]]++; + nlits++; + } + + prevoff = m; + + if(runlen < maxdefer){ + prevlen = runlen; + runlen = 1; + }else{ + n = runlen - MinMatch; + if(v || n){ + *parse++ = v | LenFlag | (n << LenShift); + *parse++ = prevoff; + }else + *parse++ = prevoff << LenShift; + v = 0; + + n = lencode[n]; + litlencount[n]++; + excost += litlenextra[n - LenStart]; + + if(prevoff < MaxOffCode) + n = offcode[prevoff]; + else + n = bigoffcode[prevoff >> 7]; + offcount[n]++; + excost += offextra[n]; + + prevlen = MinMatch - 1; + nmatches++; + } + } + + /* + * update the hash for the newly matched data + * this is constructed so the link for the old + * match in this position must be at the end of a chain, + * and will expire when this match is added, ie it will + * never be examined by the match loop. + * add to the hash chain only if we have the real hash data. + */ + for(q = p + runlen; p != q; p++){ + if(p + MinMatch <= ep){ + h = hashit(cont); + nexts[now & (MaxOff-1)] = hash[h]; + hash[h] = now; + if(p + MinMatch < ep) + cont = (cont << 8) | p[MinMatch]; + } + now++; + } + } + + /* + * we can just store away the lazy state and + * pick it up next time. the last block will have finish set + * so we won't have any pending matches + * however, we need to correct for how much we've encoded + */ + if(prevlen != MinMatch - 1) + p--; + + lzb->excost += excost; + lzb->eparse = parse; + lzb->lastv = v; + + lz->now = now; + lz->prevlen = prevlen; + lz->prevoff = prevoff; + + return p - &lz->hist[lz->pos]; +} + +/* + * make up the dynamic code tables, and return the number of bits + * needed to transmit them. + */ +static int +huffcodes(Dyncode *dc, Huff *littab, Huff *offtab) +{ + Huff *codetab; + uchar *codes, *codeaux; + ulong codecount[Nclen], excost; + int i, n, m, v, c, nlit, noff, ncode, nclen; + + codetab = dc->codetab; + codes = dc->codes; + codeaux = dc->codeaux; + + /* + * trim the sizes of the tables + */ + for(nlit = Nlitlen; nlit > 257 && littab[nlit-1].bits == 0; nlit--) + ; + for(noff = Noff; noff > 1 && offtab[noff-1].bits == 0; noff--) + ; + + /* + * make the code-length code + */ + for(i = 0; i < nlit; i++) + codes[i] = littab[i].bits; + for(i = 0; i < noff; i++) + codes[i + nlit] = offtab[i].bits; + + /* + * run-length compress the code-length code + */ + excost = 0; + c = 0; + ncode = nlit+noff; + for(i = 0; i < ncode; ){ + n = i + 1; + v = codes[i]; + while(n < ncode && v == codes[n]) + n++; + n -= i; + i += n; + if(v == 0){ + while(n >= 11){ + m = n; + if(m > 138) + m = 138; + codes[c] = 18; + codeaux[c++] = m - 11; + n -= m; + excost += 7; + } + if(n >= 3){ + codes[c] = 17; + codeaux[c++] = n - 3; + n = 0; + excost += 3; + } + } + while(n--){ + codes[c++] = v; + while(n >= 3){ + m = n; + if(m > 6) + m = 6; + codes[c] = 16; + codeaux[c++] = m - 3; + n -= m; + excost += 3; + } + } + } + + memset(codecount, 0, sizeof codecount); + for(i = 0; i < c; i++) + codecount[codes[i]]++; + if(!mkgzprecode(codetab, codecount, Nclen, 8)) + return -1; + + for(nclen = Nclen; nclen > 4 && codetab[clenorder[nclen-1]].bits == 0; nclen--) + ; + + dc->nlit = nlit; + dc->noff = noff; + dc->nclen = nclen; + dc->ncode = c; + + return 5 + 5 + 4 + nclen * 3 + bitcost(codetab, codecount, Nclen) + excost; +} + +static void +wrdyncode(LZstate *out, Dyncode *dc) +{ + Huff *codetab; + uchar *codes, *codeaux; + int i, v, c; + + /* + * write out header, then code length code lengths, + * and code lengths + */ + lzput(out, dc->nlit-257, 5); + lzput(out, dc->noff-1, 5); + lzput(out, dc->nclen-4, 4); + + codetab = dc->codetab; + for(i = 0; i < dc->nclen; i++) + lzput(out, codetab[clenorder[i]].bits, 3); + + codes = dc->codes; + codeaux = dc->codeaux; + c = dc->ncode; + for(i = 0; i < c; i++){ + v = codes[i]; + lzput(out, codetab[v].encode, codetab[v].bits); + if(v >= 16){ + if(v == 16) + lzput(out, codeaux[i], 2); + else if(v == 17) + lzput(out, codeaux[i], 3); + else /* v == 18 */ + lzput(out, codeaux[i], 7); + } + } +} + +static int +bitcost(Huff *tab, ulong *count, int n) +{ + ulong tot; + int i; + + tot = 0; + for(i = 0; i < n; i++) + tot += count[i] * tab[i].bits; + return tot; +} + +static int +mkgzprecode(Huff *tab, ulong *count, int n, int maxbits) +{ + ulong bitcount[MaxHuffBits]; + int i, nbits; + + nbits = mkprecode(tab, count, n, maxbits, bitcount); + for(i = 0; i < n; i++){ + if(tab[i].bits == -1) + tab[i].bits = 0; + else if(tab[i].bits == 0){ + if(nbits != 0 || bitcount[0] != 1) + return 0; + bitcount[1] = 1; + bitcount[0] = 0; + nbits = 1; + tab[i].bits = 1; + } + } + if(bitcount[0] != 0) + return 0; + return hufftabinit(tab, n, bitcount, nbits); +} + +static int +hufftabinit(Huff *tab, int n, ulong *bitcount, int nbits) +{ + ulong code, nc[MaxHuffBits]; + int i, bits; + + code = 0; + for(bits = 1; bits <= nbits; bits++){ + code = (code + bitcount[bits-1]) << 1; + nc[bits] = code; + } + + for(i = 0; i < n; i++){ + bits = tab[i].bits; + if(bits){ + code = nc[bits]++ << (16 - bits); + if(code & ~0xffff) + return 0; + tab[i].encode = revtab[code >> 8] | (revtab[code & 0xff] << 8); + } + } + return 1; +} + + +/* + * this should be in a library + */ +struct Chain +{ + ulong count; /* occurances of everything in the chain */ + ushort leaf; /* leaves to the left of chain, or leaf value */ + char col; /* ref count for collecting unused chains */ + char gen; /* need to generate chains for next lower level */ + Chain *up; /* Chain up in the lists */ +}; + +struct Chains +{ + Chain *lists[(MaxHuffBits - 1) * 2]; + ulong leafcount[MaxLeaf]; /* sorted list of leaf counts */ + ushort leafmap[MaxLeaf]; /* map to actual leaf number */ + int nleaf; /* number of leaves */ + Chain chains[ChainMem]; + Chain *echains; + Chain *free; + char col; + int nlists; +}; + +/* + * fast, low space overhead algorithm for max depth huffman type codes + * + * J. Katajainen, A. Moffat and A. Turpin, "A fast and space-economical + * algorithm for length-limited coding," Proc. Intl. Symp. on Algorithms + * and Computation, Cairns, Australia, Dec. 1995, Lecture Notes in Computer + * Science, Vol 1004, J. Staples, P. Eades, N. Katoh, and A. Moffat, eds., + * pp 12-21, Springer Verlag, New York, 1995. + */ +static int +mkprecode(Huff *tab, ulong *count, int n, int maxbits, ulong *bitcount) +{ + Chains cs; + Chain *c; + int i, m, em, bits; + + /* + * set up the sorted list of leaves + */ + m = 0; + for(i = 0; i < n; i++){ + tab[i].bits = -1; + tab[i].encode = 0; + if(count[i] != 0){ + cs.leafcount[m] = count[i]; + cs.leafmap[m] = i; + m++; + } + } + if(m < 2){ + if(m != 0){ + tab[cs.leafmap[0]].bits = 0; + bitcount[0] = 1; + }else + bitcount[0] = 0; + return 0; + } + cs.nleaf = m; + leafsort(cs.leafcount, cs.leafmap, 0, m); + + for(i = 0; i < m; i++) + cs.leafcount[i] = count[cs.leafmap[i]]; + + /* + * set up free list + */ + cs.free = &cs.chains[2]; + cs.echains = &cs.chains[ChainMem]; + cs.col = 1; + + /* + * initialize chains for each list + */ + c = &cs.chains[0]; + c->count = cs.leafcount[0]; + c->leaf = 1; + c->col = cs.col; + c->up = nil; + c->gen = 0; + cs.chains[1] = cs.chains[0]; + cs.chains[1].leaf = 2; + cs.chains[1].count = cs.leafcount[1]; + for(i = 0; i < maxbits-1; i++){ + cs.lists[i * 2] = &cs.chains[0]; + cs.lists[i * 2 + 1] = &cs.chains[1]; + } + + cs.nlists = 2 * (maxbits - 1); + m = 2 * m - 2; + for(i = 2; i < m; i++) + nextchain(&cs, cs.nlists - 2); + + bits = 0; + bitcount[0] = cs.nleaf; + for(c = cs.lists[cs.nlists - 1]; c != nil; c = c->up){ + m = c->leaf; + bitcount[bits++] -= m; + bitcount[bits] = m; + } + m = 0; + for(i = bits; i >= 0; i--) + for(em = m + bitcount[i]; m < em; m++) + tab[cs.leafmap[m]].bits = i; + + return bits; +} + +/* + * calculate the next chain on the list + * we can always toss out the old chain + */ +static void +nextchain(Chains *cs, int list) +{ + Chain *c, *oc; + int i, nleaf, sumc; + + oc = cs->lists[list + 1]; + cs->lists[list] = oc; + if(oc == nil) + return; + + /* + * make sure we have all chains needed to make sumc + * note it is possible to generate only one of these, + * use twice that value for sumc, and then generate + * the second if that preliminary sumc would be chosen. + * however, this appears to be slower on current tests + */ + if(oc->gen){ + nextchain(cs, list - 2); + nextchain(cs, list - 2); + oc->gen = 0; + } + + /* + * pick up the chain we're going to add; + * collect unused chains no free ones are left + */ + for(c = cs->free; ; c++){ + if(c >= cs->echains){ + cs->col++; + for(i = 0; i < cs->nlists; i++) + for(c = cs->lists[i]; c != nil; c = c->up) + c->col = cs->col; + c = cs->chains; + } + if(c->col != cs->col) + break; + } + + /* + * pick the cheapest of + * 1) the next package from the previous list + * 2) the next leaf + */ + nleaf = oc->leaf; + sumc = 0; + if(list > 0 && cs->lists[list-1] != nil) + sumc = cs->lists[list-2]->count + cs->lists[list-1]->count; + if(sumc != 0 && (nleaf >= cs->nleaf || cs->leafcount[nleaf] > sumc)){ + c->count = sumc; + c->leaf = oc->leaf; + c->up = cs->lists[list-1]; + c->gen = 1; + }else if(nleaf >= cs->nleaf){ + cs->lists[list + 1] = nil; + return; + }else{ + c->leaf = nleaf + 1; + c->count = cs->leafcount[nleaf]; + c->up = oc->up; + c->gen = 0; + } + cs->free = c + 1; + + cs->lists[list + 1] = c; + c->col = cs->col; +} + +static int +pivot(ulong *c, int a, int n) +{ + int j, pi, pj, pk; + + j = n/6; + pi = a + j; /* 1/6 */ + j += j; + pj = pi + j; /* 1/2 */ + pk = pj + j; /* 5/6 */ + if(c[pi] < c[pj]){ + if(c[pi] < c[pk]){ + if(c[pj] < c[pk]) + return pj; + return pk; + } + return pi; + } + if(c[pj] < c[pk]){ + if(c[pi] < c[pk]) + return pi; + return pk; + } + return pj; +} + +static void +leafsort(ulong *leafcount, ushort *leafmap, int a, int n) +{ + ulong t; + int j, pi, pj, pn; + + while(n > 1){ + if(n > 10){ + pi = pivot(leafcount, a, n); + }else + pi = a + (n>>1); + + t = leafcount[pi]; + leafcount[pi] = leafcount[a]; + leafcount[a] = t; + t = leafmap[pi]; + leafmap[pi] = leafmap[a]; + leafmap[a] = t; + pi = a; + pn = a + n; + pj = pn; + for(;;){ + do + pi++; + while(pi < pn && (leafcount[pi] < leafcount[a] || leafcount[pi] == leafcount[a] && leafmap[pi] > leafmap[a])); + do + pj--; + while(pj > a && (leafcount[pj] > leafcount[a] || leafcount[pj] == leafcount[a] && leafmap[pj] < leafmap[a])); + if(pj < pi) + break; + t = leafcount[pi]; + leafcount[pi] = leafcount[pj]; + leafcount[pj] = t; + t = leafmap[pi]; + leafmap[pi] = leafmap[pj]; + leafmap[pj] = t; + } + t = leafcount[a]; + leafcount[a] = leafcount[pj]; + leafcount[pj] = t; + t = leafmap[a]; + leafmap[a] = leafmap[pj]; + leafmap[pj] = t; + j = pj - a; + + n = n-j-1; + if(j >= n){ + leafsort(leafcount, leafmap, a, j); + a += j+1; + }else{ + leafsort(leafcount, leafmap, a + (j+1), n); + n = j; + } + } +} |