File:  [ELWIX - Embedded LightWeight unIX -] / embedaddon / rsync / zlib / deflate.h
Revision 1.1.1.1 (vendor branch): download - view: text, annotated - select for diffs - revision graph
Fri Feb 17 15:09:30 2012 UTC (12 years, 4 months ago) by misho
Branches: rsync, MAIN
CVS tags: rsync3_0_9p0, RSYNC3_0_9, HEAD
rsync

    1: /* deflate.h -- internal compression state
    2:  * Copyright (C) 1995-2004 Jean-loup Gailly
    3:  * For conditions of distribution and use, see copyright notice in zlib.h
    4:  */
    5: 
    6: /* WARNING: this file should *not* be used by applications. It is
    7:    part of the implementation of the compression library and is
    8:    subject to change. Applications should only use zlib.h.
    9:  */
   10: 
   11: /* @(#) $Id: deflate.h,v 1.1.1.1 2012/02/17 15:09:30 misho Exp $ */
   12: 
   13: #ifndef DEFLATE_H
   14: #define DEFLATE_H
   15: 
   16: #include "zutil.h"
   17: 
   18: /* define NO_GZIP when compiling if you want to disable gzip header and
   19:    trailer creation by deflate().  NO_GZIP would be used to avoid linking in
   20:    the crc code when it is not needed.  For shared libraries, gzip encoding
   21:    should be left enabled. */
   22: #ifndef NO_GZIP
   23: #  define GZIP
   24: #endif
   25: 
   26: /* ===========================================================================
   27:  * Internal compression state.
   28:  */
   29: 
   30: #define LENGTH_CODES 29
   31: /* number of length codes, not counting the special END_BLOCK code */
   32: 
   33: #define LITERALS  256
   34: /* number of literal bytes 0..255 */
   35: 
   36: #define L_CODES (LITERALS+1+LENGTH_CODES)
   37: /* number of Literal or Length codes, including the END_BLOCK code */
   38: 
   39: #define D_CODES   30
   40: /* number of distance codes */
   41: 
   42: #define BL_CODES  19
   43: /* number of codes used to transfer the bit lengths */
   44: 
   45: #define HEAP_SIZE (2*L_CODES+1)
   46: /* maximum heap size */
   47: 
   48: #define MAX_BITS 15
   49: /* All codes must not exceed MAX_BITS bits */
   50: 
   51: #define INIT_STATE    42
   52: #define EXTRA_STATE   69
   53: #define NAME_STATE    73
   54: #define COMMENT_STATE 91
   55: #define HCRC_STATE   103
   56: #define BUSY_STATE   113
   57: #define FINISH_STATE 666
   58: /* Stream status */
   59: 
   60: 
   61: /* Data structure describing a single value and its code string. */
   62: typedef struct ct_data_s {
   63:     union {
   64:         ush  freq;       /* frequency count */
   65:         ush  code;       /* bit string */
   66:     } fc;
   67:     union {
   68:         ush  dad;        /* father node in Huffman tree */
   69:         ush  len;        /* length of bit string */
   70:     } dl;
   71: } FAR ct_data;
   72: 
   73: #define Freq fc.freq
   74: #define Code fc.code
   75: #define Dad  dl.dad
   76: #define Len  dl.len
   77: 
   78: typedef struct static_tree_desc_s  static_tree_desc;
   79: 
   80: typedef struct tree_desc_s {
   81:     ct_data *dyn_tree;           /* the dynamic tree */
   82:     int     max_code;            /* largest code with non zero frequency */
   83:     static_tree_desc *stat_desc; /* the corresponding static tree */
   84: } FAR tree_desc;
   85: 
   86: typedef ush Pos;
   87: typedef Pos FAR Posf;
   88: typedef unsigned IPos;
   89: 
   90: /* A Pos is an index in the character window. We use short instead of int to
   91:  * save space in the various tables. IPos is used only for parameter passing.
   92:  */
   93: 
   94: typedef struct internal_state {
   95:     z_streamp strm;      /* pointer back to this zlib stream */
   96:     int   status;        /* as the name implies */
   97:     Bytef *pending_buf;  /* output still pending */
   98:     ulg   pending_buf_size; /* size of pending_buf */
   99:     Bytef *pending_out;  /* next pending byte to output to the stream */
  100:     uInt   pending;      /* nb of bytes in the pending buffer */
  101:     int   wrap;          /* bit 0 true for zlib, bit 1 true for gzip */
  102:     gz_headerp  gzhead;  /* gzip header information to write */
  103:     uInt   gzindex;      /* where in extra, name, or comment */
  104:     Byte  method;        /* STORED (for zip only) or DEFLATED */
  105:     int   last_flush;    /* value of flush param for previous deflate call */
  106: 
  107:                 /* used by deflate.c: */
  108: 
  109:     uInt  w_size;        /* LZ77 window size (32K by default) */
  110:     uInt  w_bits;        /* log2(w_size)  (8..16) */
  111:     uInt  w_mask;        /* w_size - 1 */
  112: 
  113:     Bytef *window;
  114:     /* Sliding window. Input bytes are read into the second half of the window,
  115:      * and move to the first half later to keep a dictionary of at least wSize
  116:      * bytes. With this organization, matches are limited to a distance of
  117:      * wSize-MAX_MATCH bytes, but this ensures that IO is always
  118:      * performed with a length multiple of the block size. Also, it limits
  119:      * the window size to 64K, which is quite useful on MSDOS.
  120:      * To do: use the user input buffer as sliding window.
  121:      */
  122: 
  123:     ulg window_size;
  124:     /* Actual size of window: 2*wSize, except when the user input buffer
  125:      * is directly used as sliding window.
  126:      */
  127: 
  128:     Posf *prev;
  129:     /* Link to older string with same hash index. To limit the size of this
  130:      * array to 64K, this link is maintained only for the last 32K strings.
  131:      * An index in this array is thus a window index modulo 32K.
  132:      */
  133: 
  134:     Posf *head; /* Heads of the hash chains or NIL. */
  135: 
  136:     uInt  ins_h;          /* hash index of string to be inserted */
  137:     uInt  hash_size;      /* number of elements in hash table */
  138:     uInt  hash_bits;      /* log2(hash_size) */
  139:     uInt  hash_mask;      /* hash_size-1 */
  140: 
  141:     uInt  hash_shift;
  142:     /* Number of bits by which ins_h must be shifted at each input
  143:      * step. It must be such that after MIN_MATCH steps, the oldest
  144:      * byte no longer takes part in the hash key, that is:
  145:      *   hash_shift * MIN_MATCH >= hash_bits
  146:      */
  147: 
  148:     long block_start;
  149:     /* Window position at the beginning of the current output block. Gets
  150:      * negative when the window is moved backwards.
  151:      */
  152: 
  153:     uInt match_length;           /* length of best match */
  154:     IPos prev_match;             /* previous match */
  155:     int match_available;         /* set if previous match exists */
  156:     uInt strstart;               /* start of string to insert */
  157:     uInt match_start;            /* start of matching string */
  158:     uInt lookahead;              /* number of valid bytes ahead in window */
  159: 
  160:     uInt prev_length;
  161:     /* Length of the best match at previous step. Matches not greater than this
  162:      * are discarded. This is used in the lazy match evaluation.
  163:      */
  164: 
  165:     uInt max_chain_length;
  166:     /* To speed up deflation, hash chains are never searched beyond this
  167:      * length.  A higher limit improves compression ratio but degrades the
  168:      * speed.
  169:      */
  170: 
  171:     uInt max_lazy_match;
  172:     /* Attempt to find a better match only when the current match is strictly
  173:      * smaller than this value. This mechanism is used only for compression
  174:      * levels >= 4.
  175:      */
  176: #   define max_insert_length  max_lazy_match
  177:     /* Insert new strings in the hash table only if the match length is not
  178:      * greater than this length. This saves time but degrades compression.
  179:      * max_insert_length is used only for compression levels <= 3.
  180:      */
  181: 
  182:     int level;    /* compression level (1..9) */
  183:     int strategy; /* favor or force Huffman coding*/
  184: 
  185:     uInt good_match;
  186:     /* Use a faster search when the previous match is longer than this */
  187: 
  188:     int nice_match; /* Stop searching when current match exceeds this */
  189: 
  190:                 /* used by trees.c: */
  191:     /* Didn't use ct_data typedef below to supress compiler warning */
  192:     struct ct_data_s dyn_ltree[HEAP_SIZE];   /* literal and length tree */
  193:     struct ct_data_s dyn_dtree[2*D_CODES+1]; /* distance tree */
  194:     struct ct_data_s bl_tree[2*BL_CODES+1];  /* Huffman tree for bit lengths */
  195: 
  196:     struct tree_desc_s l_desc;               /* desc. for literal tree */
  197:     struct tree_desc_s d_desc;               /* desc. for distance tree */
  198:     struct tree_desc_s bl_desc;              /* desc. for bit length tree */
  199: 
  200:     ush bl_count[MAX_BITS+1];
  201:     /* number of codes at each bit length for an optimal tree */
  202: 
  203:     int heap[2*L_CODES+1];      /* heap used to build the Huffman trees */
  204:     int heap_len;               /* number of elements in the heap */
  205:     int heap_max;               /* element of largest frequency */
  206:     /* The sons of heap[n] are heap[2*n] and heap[2*n+1]. heap[0] is not used.
  207:      * The same heap array is used to build all trees.
  208:      */
  209: 
  210:     uch depth[2*L_CODES+1];
  211:     /* Depth of each subtree used as tie breaker for trees of equal frequency
  212:      */
  213: 
  214:     uchf *l_buf;          /* buffer for literals or lengths */
  215: 
  216:     uInt  lit_bufsize;
  217:     /* Size of match buffer for literals/lengths.  There are 4 reasons for
  218:      * limiting lit_bufsize to 64K:
  219:      *   - frequencies can be kept in 16 bit counters
  220:      *   - if compression is not successful for the first block, all input
  221:      *     data is still in the window so we can still emit a stored block even
  222:      *     when input comes from standard input.  (This can also be done for
  223:      *     all blocks if lit_bufsize is not greater than 32K.)
  224:      *   - if compression is not successful for a file smaller than 64K, we can
  225:      *     even emit a stored file instead of a stored block (saving 5 bytes).
  226:      *     This is applicable only for zip (not gzip or zlib).
  227:      *   - creating new Huffman trees less frequently may not provide fast
  228:      *     adaptation to changes in the input data statistics. (Take for
  229:      *     example a binary file with poorly compressible code followed by
  230:      *     a highly compressible string table.) Smaller buffer sizes give
  231:      *     fast adaptation but have of course the overhead of transmitting
  232:      *     trees more frequently.
  233:      *   - I can't count above 4
  234:      */
  235: 
  236:     uInt last_lit;      /* running index in l_buf */
  237: 
  238:     ushf *d_buf;
  239:     /* Buffer for distances. To simplify the code, d_buf and l_buf have
  240:      * the same number of elements. To use different lengths, an extra flag
  241:      * array would be necessary.
  242:      */
  243: 
  244:     ulg opt_len;        /* bit length of current block with optimal trees */
  245:     ulg static_len;     /* bit length of current block with static trees */
  246:     uInt matches;       /* number of string matches in current block */
  247:     int last_eob_len;   /* bit length of EOB code for last block */
  248: 
  249: #ifdef DEBUG
  250:     ulg compressed_len; /* total bit length of compressed file mod 2^32 */
  251:     ulg bits_sent;      /* bit length of compressed data sent mod 2^32 */
  252: #endif
  253: 
  254:     ush bi_buf;
  255:     /* Output buffer. bits are inserted starting at the bottom (least
  256:      * significant bits).
  257:      */
  258:     int bi_valid;
  259:     /* Number of valid bits in bi_buf.  All bits above the last valid bit
  260:      * are always zero.
  261:      */
  262: 
  263: } FAR deflate_state;
  264: 
  265: /* Output a byte on the stream.
  266:  * IN assertion: there is enough room in pending_buf.
  267:  */
  268: #define put_byte(s, c) {s->pending_buf[s->pending++] = (c);}
  269: 
  270: 
  271: #define MIN_LOOKAHEAD (MAX_MATCH+MIN_MATCH+1)
  272: /* Minimum amount of lookahead, except at the end of the input file.
  273:  * See deflate.c for comments about the MIN_MATCH+1.
  274:  */
  275: 
  276: #define MAX_DIST(s)  ((s)->w_size-MIN_LOOKAHEAD)
  277: /* In order to simplify the code, particularly on 16 bit machines, match
  278:  * distances are limited to MAX_DIST instead of WSIZE.
  279:  */
  280: 
  281:         /* in trees.c */
  282: void _tr_init         OF((deflate_state *s));
  283: int  _tr_tally        OF((deflate_state *s, unsigned dist, unsigned lc));
  284: void _tr_flush_block  OF((deflate_state *s, charf *buf, ulg stored_len,
  285:                           int eof));
  286: void _tr_align        OF((deflate_state *s));
  287: void _tr_stored_block OF((deflate_state *s, charf *buf, ulg stored_len,
  288:                           int eof));
  289: 
  290: #define d_code(dist) \
  291:    ((dist) < 256 ? _dist_code[dist] : _dist_code[256+((dist)>>7)])
  292: /* Mapping from a distance to a distance code. dist is the distance - 1 and
  293:  * must not have side effects. _dist_code[256] and _dist_code[257] are never
  294:  * used.
  295:  */
  296: 
  297: #ifndef DEBUG
  298: /* Inline versions of _tr_tally for speed: */
  299: 
  300: #if defined(GEN_TREES_H) || !defined(STDC)
  301:   extern uch _length_code[];
  302:   extern uch _dist_code[];
  303: #else
  304:   extern const uch _length_code[];
  305:   extern const uch _dist_code[];
  306: #endif
  307: 
  308: # define _tr_tally_lit(s, c, flush) \
  309:   { uch cc = (c); \
  310:     s->d_buf[s->last_lit] = 0; \
  311:     s->l_buf[s->last_lit++] = cc; \
  312:     s->dyn_ltree[cc].Freq++; \
  313:     flush = (s->last_lit == s->lit_bufsize-1); \
  314:    }
  315: # define _tr_tally_dist(s, distance, length, flush) \
  316:   { uch len = (length); \
  317:     ush dist = (distance); \
  318:     s->d_buf[s->last_lit] = dist; \
  319:     s->l_buf[s->last_lit++] = len; \
  320:     dist--; \
  321:     s->dyn_ltree[_length_code[len]+LITERALS+1].Freq++; \
  322:     s->dyn_dtree[d_code(dist)].Freq++; \
  323:     flush = (s->last_lit == s->lit_bufsize-1); \
  324:   }
  325: #else
  326: # define _tr_tally_lit(s, c, flush) flush = _tr_tally(s, 0, c)
  327: # define _tr_tally_dist(s, distance, length, flush) \
  328:               flush = _tr_tally(s, distance, length)
  329: #endif
  330: 
  331: #endif /* DEFLATE_H */

FreeBSD-CVSweb <freebsd-cvsweb@FreeBSD.org>