// Stream status #define INIT_STATE 42 #define BUSY_STATE 113 #define FINISH_STATE 666 #define HASH_BITS 15 #define HASH_SIZE (1 << HASH_BITS) #define HASH_MASK (HASH_SIZE - 1) // Size of match buffer for literals/lengths. There are 4 reasons for // limiting lit_bufsize to 64K: // - frequencies can be kept in 16 bit counters // - if compression is not successful for the first block, all input // data is still in the window so we can still emit a stored block even // when input comes from standard input. (This can also be done for // all blocks if lit_bufsize is not greater than 32K.) // - if compression is not successful for a file smaller than 64K, we can // even emit a stored file instead of a stored block (saving 5 bytes). // This is applicable only for zip (not gzip or zlib). // - creating new Huffman trees less frequently may not provide fast // adaptation to changes in the input data statistics. (Take for // example a binary file with poorly compressible code followed by // a highly compressible string table.) Smaller buffer sizes give // fast adaptation but have of course the overhead of transmitting // trees more frequently. // - I can't count above 4 #define LIT_BUFSIZE (1 << 14) #define MAX_BLOCK_SIZE 0xffff // Number of bits by which ins_h must be shifted at each input // step. It must be such that after MIN_MATCH steps, the oldest // byte no longer takes part in the hash key. #define HASH_SHIFT ((HASH_BITS + MIN_MATCH - 1) / MIN_MATCH) // Matches of length 3 are discarded if their distance exceeds TOO_FAR #define TOO_FAR 32767 // Number of length codes, not counting the special END_BLOCK code #define LENGTH_CODES 29 // Number of codes used to transfer the bit lengths #define BL_CODES 19 // Number of literal bytes 0..255 #define LITERALS 256 // Number of Literal or Length codes, including the END_BLOCK code #define L_CODES (LITERALS + 1 + LENGTH_CODES) // See definition of array dist_code below #define DIST_CODE_LEN 512 // Maximum heap size #define HEAP_SIZE (2 * L_CODES + 1) // Index within the heap array of least frequent node in the Huffman tree #define SMALLEST 1 // Bit length codes must not exceed MAX_BL_BITS bits #define MAX_BL_BITS 7 // End of block literal code #define END_BLOCK 256 // Repeat previous bit length 3-6 times (2 bits of repeat count) #define REP_3_6 16 // Repeat a zero length 3-10 times (3 bits of repeat count) #define REPZ_3_10 17 // Repeat a zero length 11-138 times (7 bits of repeat count) #define REPZ_11_138 18 // Number of bits used within bi_buf. (bi_buf might be implemented on // more than 16 bits on some systems.) #define BUF_SIZE (8 * 2) // Minimum amount of lookahead, except at the end of the input file. // See deflate.c for comments about the MIN_MATCH+1. #define MIN_LOOKAHEAD (MAX_MATCH + MIN_MATCH + 1) typedef enum { NEED_MORE, // block not completed, need more input or more output BLOCK_DONE, // block flush performed FINISH_STARTED, // finish started, need only more output at next deflate FINISH_DONE // finish done, accept no more input or output } block_state; // Data structure describing a single value and its code string. typedef struct ct_data_s { union { word freq; // frequency count word code; // bit string } fc; union { word dad; // father node in Huffman tree word len; // length of bit string } dl; } ct_data; typedef struct static_tree_desc_s { const ct_data *static_tree; // static tree or NULL const ulong *extra_bits; // extra bits for each code or NULL ulong extra_base; // base index for extra_bits ulong elems; // max number of elements in the tree ulong max_length; // max bit length for the codes } static_tree_desc; typedef struct tree_desc_s { ct_data *dyn_tree; // the dynamic tree ulong max_code; // largest code with non zero frequency static_tree_desc *stat_desc; // the corresponding static tree } tree_desc; // Main structure which the deflate algorithm works from typedef struct deflate_state_s { z_stream *z; // pointer back to this zlib stream ulong status; // as the name implies EFlush last_flush; // value of flush param for previous deflate call int noheader; // suppress zlib header and adler32 byte pending_buf[MAX_BLOCK_SIZE + 5];// output still pending byte *pending_out; // next pending byte to output to the stream ulong pending; // nb of bytes in the pending buffer // Sliding window. Input bytes are read into the second half of the window, // and move to the first half later to keep a dictionary of at least wSize // bytes. With this organization, matches are limited to a distance of // wSize-MAX_MATCH bytes, but this ensures that IO is always // performed with a length multiple of the block size. Also, it limits // the window size to 64K, which is quite useful on MSDOS. // To do: use the user input buffer as sliding window. byte window[WINDOW_SIZE * 2]; // Link to older string with same hash index. To limit the size of this // array to 64K, this link is maintained only for the last 32K strings. // An index in this array is thus a window index modulo 32K. word prev[WINDOW_SIZE]; word head[HASH_SIZE]; // Heads of the hash chains or NULL. ulong ins_h; // hash index of string to be inserted // Window position at the beginning of the current output block. Gets // negative when the window is moved backwards. int block_start; ulong match_length; // length of best match ulong prev_match; // previous match ulong match_available; // set if previous match exists ulong strstart; // start of string to insert ulong match_start; // start of matching string ulong lookahead; // number of valid bytes ahead in window // Length of the best match at previous step. Matches not greater than this // are discarded. This is used in the lazy match evaluation. ulong prev_length; // Attempt to find a better match only when the current match is strictly // smaller than this value. This mechanism is used only for compression levels >= 4. ulong max_lazy_match; ulong good_match; // Use a faster search when the previous match is longer than this ulong nice_match; // Stop searching when current match exceeds this // To speed up deflation, hash chains are never searched beyond this // length. A higher limit improves compression ratio but degrades the speed. ulong max_chain_length; ELevel level; // compression level (0..9) ct_data dyn_ltree[HEAP_SIZE]; // literal and length tree ct_data dyn_dtree[(2 * D_CODES) + 1]; // distance tree ct_data bl_tree[(2 * BL_CODES) + 1]; // Huffman tree for bit lengths tree_desc l_desc; // desc. for literal tree tree_desc d_desc; // desc. for distance tree tree_desc bl_desc; // desc. for bit length tree word bl_count[MAX_WBITS + 1]; // number of codes at each bit length for an optimal tree // The sons of heap[n] are heap[2*n] and heap[2*n+1]. heap[0] is not used. // The same heap array is used to build all trees. ulong heap[(2 * L_CODES) + 1]; // heap used to build the Huffman trees ulong heap_len; // number of elements in the heap ulong heap_max; // element of largest frequency byte depth[(2 * L_CODES) + 1]; // Depth of each subtree used as tie breaker for trees of equal frequency byte l_buf[LIT_BUFSIZE]; // buffer for literals or lengths ulong last_lit; // running index in l_buf // Buffer for distances. To simplify the code, d_buf and l_buf have // the same number of elements. To use different lengths, an extra flag // array would be necessary. word d_buf[LIT_BUFSIZE]; ulong opt_len; // bit length of current block with optimal trees ulong static_len; // bit length of current block with static trees ulong matches; // number of string matches in current block ulong last_eob_len; // bit length of EOB code for last block word bi_buf; // Output buffer. bits are inserted starting at the bottom (least significant bits). ulong bi_valid; // Number of valid bits in bi_buf. All bits above the last valid bit are always zero. ulong adler; } deflate_state; // Compression function. Returns the block state after the call. typedef block_state (*compress_func) (deflate_state *s, EFlush flush); typedef struct config_s { word good_length; // reduce lazy search above this match length word max_lazy; // do not perform lazy search above this match length word nice_length; // quit search above this match length word max_chain; compress_func func; } config; // end