jediacademy/code/zlib32/deflate.h

231 lines
9.3 KiB
C
Raw Normal View History

2013-04-04 22:35:38 +00:00
// Stream status
#define INIT_STATE 42
#define BUSY_STATE 113
#define FINISH_STATE 666
#define HASH_BITS 15
#define HASH_SIZE (1 << HASH_BITS)
#define HASH_MASK (HASH_SIZE - 1)
// Size of match buffer for literals/lengths. There are 4 reasons for
// limiting lit_bufsize to 64K:
// - frequencies can be kept in 16 bit counters
// - if compression is not successful for the first block, all input
// data is still in the window so we can still emit a stored block even
// when input comes from standard input. (This can also be done for
// all blocks if lit_bufsize is not greater than 32K.)
// - if compression is not successful for a file smaller than 64K, we can
// even emit a stored file instead of a stored block (saving 5 bytes).
// This is applicable only for zip (not gzip or zlib).
// - creating new Huffman trees less frequently may not provide fast
// adaptation to changes in the input data statistics. (Take for
// example a binary file with poorly compressible code followed by
// a highly compressible string table.) Smaller buffer sizes give
// fast adaptation but have of course the overhead of transmitting
// trees more frequently.
// - I can't count above 4
#define LIT_BUFSIZE (1 << 14)
#define MAX_BLOCK_SIZE 0xffff
// Number of bits by which ins_h must be shifted at each input
// step. It must be such that after MIN_MATCH steps, the oldest
// byte no longer takes part in the hash key.
#define HASH_SHIFT ((HASH_BITS + MIN_MATCH - 1) / MIN_MATCH)
// Matches of length 3 are discarded if their distance exceeds TOO_FAR
#define TOO_FAR 32767
// Number of length codes, not counting the special END_BLOCK code
#define LENGTH_CODES 29
// Number of codes used to transfer the bit lengths
#define BL_CODES 19
// Number of literal bytes 0..255
#define LITERALS 256
// Number of Literal or Length codes, including the END_BLOCK code
#define L_CODES (LITERALS + 1 + LENGTH_CODES)
// See definition of array dist_code below
#define DIST_CODE_LEN 512
// Maximum heap size
#define HEAP_SIZE (2 * L_CODES + 1)
// Index within the heap array of least frequent node in the Huffman tree
#define SMALLEST 1
// Bit length codes must not exceed MAX_BL_BITS bits
#define MAX_BL_BITS 7
// End of block literal code
#define END_BLOCK 256
// Repeat previous bit length 3-6 times (2 bits of repeat count)
#define REP_3_6 16
// Repeat a zero length 3-10 times (3 bits of repeat count)
#define REPZ_3_10 17
// Repeat a zero length 11-138 times (7 bits of repeat count)
#define REPZ_11_138 18
// Number of bits used within bi_buf. (bi_buf might be implemented on
// more than 16 bits on some systems.)
#define BUF_SIZE (8 * 2)
// Minimum amount of lookahead, except at the end of the input file.
// See deflate.c for comments about the MIN_MATCH+1.
#define MIN_LOOKAHEAD (MAX_MATCH + MIN_MATCH + 1)
typedef enum
{
NEED_MORE, // block not completed, need more input or more output
BLOCK_DONE, // block flush performed
FINISH_STARTED, // finish started, need only more output at next deflate
FINISH_DONE // finish done, accept no more input or output
} block_state;
// Data structure describing a single value and its code string.
typedef struct ct_data_s
{
union
{
word freq; // frequency count
word code; // bit string
} fc;
union
{
word dad; // father node in Huffman tree
word len; // length of bit string
} dl;
} ct_data;
typedef struct static_tree_desc_s
{
const ct_data *static_tree; // static tree or NULL
const ulong *extra_bits; // extra bits for each code or NULL
ulong extra_base; // base index for extra_bits
ulong elems; // max number of elements in the tree
ulong max_length; // max bit length for the codes
} static_tree_desc;
typedef struct tree_desc_s
{
ct_data *dyn_tree; // the dynamic tree
ulong max_code; // largest code with non zero frequency
static_tree_desc *stat_desc; // the corresponding static tree
} tree_desc;
// Main structure which the deflate algorithm works from
typedef struct deflate_state_s
{
z_stream *z; // pointer back to this zlib stream
ulong status; // as the name implies
EFlush last_flush; // value of flush param for previous deflate call
int noheader; // suppress zlib header and adler32
byte pending_buf[MAX_BLOCK_SIZE + 5];// output still pending
byte *pending_out; // next pending byte to output to the stream
ulong pending; // nb of bytes in the pending buffer
// Sliding window. Input bytes are read into the second half of the window,
// and move to the first half later to keep a dictionary of at least wSize
// bytes. With this organization, matches are limited to a distance of
// wSize-MAX_MATCH bytes, but this ensures that IO is always
// performed with a length multiple of the block size. Also, it limits
// the window size to 64K, which is quite useful on MSDOS.
// To do: use the user input buffer as sliding window.
byte window[WINDOW_SIZE * 2];
// Link to older string with same hash index. To limit the size of this
// array to 64K, this link is maintained only for the last 32K strings.
// An index in this array is thus a window index modulo 32K.
word prev[WINDOW_SIZE];
word head[HASH_SIZE]; // Heads of the hash chains or NULL.
ulong ins_h; // hash index of string to be inserted
// Window position at the beginning of the current output block. Gets
// negative when the window is moved backwards.
int block_start;
ulong match_length; // length of best match
ulong prev_match; // previous match
ulong match_available; // set if previous match exists
ulong strstart; // start of string to insert
ulong match_start; // start of matching string
ulong lookahead; // number of valid bytes ahead in window
// Length of the best match at previous step. Matches not greater than this
// are discarded. This is used in the lazy match evaluation.
ulong prev_length;
// Attempt to find a better match only when the current match is strictly
// smaller than this value. This mechanism is used only for compression levels >= 4.
ulong max_lazy_match;
ulong good_match; // Use a faster search when the previous match is longer than this
ulong nice_match; // Stop searching when current match exceeds this
// To speed up deflation, hash chains are never searched beyond this
// length. A higher limit improves compression ratio but degrades the speed.
ulong max_chain_length;
ELevel level; // compression level (0..9)
ct_data dyn_ltree[HEAP_SIZE]; // literal and length tree
ct_data dyn_dtree[(2 * D_CODES) + 1]; // distance tree
ct_data bl_tree[(2 * BL_CODES) + 1]; // Huffman tree for bit lengths
tree_desc l_desc; // desc. for literal tree
tree_desc d_desc; // desc. for distance tree
tree_desc bl_desc; // desc. for bit length tree
word bl_count[MAX_WBITS + 1]; // number of codes at each bit length for an optimal tree
// The sons of heap[n] are heap[2*n] and heap[2*n+1]. heap[0] is not used.
// The same heap array is used to build all trees.
ulong heap[(2 * L_CODES) + 1]; // heap used to build the Huffman trees
ulong heap_len; // number of elements in the heap
ulong heap_max; // element of largest frequency
byte depth[(2 * L_CODES) + 1]; // Depth of each subtree used as tie breaker for trees of equal frequency
byte l_buf[LIT_BUFSIZE]; // buffer for literals or lengths
ulong last_lit; // running index in l_buf
// Buffer for distances. To simplify the code, d_buf and l_buf have
// the same number of elements. To use different lengths, an extra flag
// array would be necessary.
word d_buf[LIT_BUFSIZE];
ulong opt_len; // bit length of current block with optimal trees
ulong static_len; // bit length of current block with static trees
ulong matches; // number of string matches in current block
ulong last_eob_len; // bit length of EOB code for last block
word bi_buf; // Output buffer. bits are inserted starting at the bottom (least significant bits).
ulong bi_valid; // Number of valid bits in bi_buf. All bits above the last valid bit are always zero.
ulong adler;
} deflate_state;
// Compression function. Returns the block state after the call.
typedef block_state (*compress_func) (deflate_state *s, EFlush flush);
typedef struct config_s
{
word good_length; // reduce lazy search above this match length
word max_lazy; // do not perform lazy search above this match length
word nice_length; // quit search above this match length
word max_chain;
compress_func func;
} config;
// end