array = xzalloc((size_t)(((size)+1L)/2) * 2*sizeof(type)); \
}
-#define FREE(array) { \
+#define FREE(array) \
+{ \
free(array); \
array = NULL; \
}
};
-/* ===========================================================================
- */
-static void fill_window(void);
-
-static int longest_match(IPos cur_match);
-
-#ifdef DEBUG
-static void check_match(IPos start, IPos match, int length);
-#endif
-
-
-/* from trees.c */
-static int ct_tally(int dist, int lc);
-static ulg flush_block(char *buf, ulg stored_len, int eof);
-
/* global buffers */
/* To save memory for 16 bit systems, some arrays are overlaid between
}
+/* ===========================================================================
+ * Fill the window when the lookahead becomes insufficient.
+ * Updates strstart and lookahead, and sets eofile if end of input file.
+ * IN assertion: lookahead < MIN_LOOKAHEAD && strstart + lookahead > 0
+ * OUT assertions: at least one byte has been read, or eofile is set;
+ * file reads are performed for at least two bytes (required for the
+ * translate_eol option).
+ */
+static void fill_window(void)
+{
+ unsigned n, m;
+ unsigned more = WINDOW_SIZE - lookahead - strstart;
+ /* Amount of free space at the end of the window. */
+
+ /* If the window is almost full and there is insufficient lookahead,
+ * move the upper half to the lower one to make room in the upper half.
+ */
+ if (more == (unsigned) -1) {
+ /* Very unlikely, but possible on 16 bit machine if strstart == 0
+ * and lookahead == 1 (input done one byte at time)
+ */
+ more--;
+ } else if (strstart >= WSIZE + MAX_DIST) {
+ /* By the IN assertion, the window is not empty so we can't confuse
+ * more == 0 with more == 64K on a 16 bit machine.
+ */
+ Assert(WINDOW_SIZE == 2 * WSIZE, "no sliding with BIG_MEM");
+
+ memcpy(window, window + WSIZE, WSIZE);
+ match_start -= WSIZE;
+ strstart -= WSIZE; /* we now have strstart >= MAX_DIST: */
+
+ block_start -= WSIZE;
+
+ for (n = 0; n < HASH_SIZE; n++) {
+ m = head[n];
+ head[n] = (Pos) (m >= WSIZE ? m - WSIZE : 0);
+ }
+ for (n = 0; n < WSIZE; n++) {
+ m = prev[n];
+ prev[n] = (Pos) (m >= WSIZE ? m - WSIZE : 0);
+ /* If n is not on any hash chain, prev[n] is garbage but
+ * its value will never be used.
+ */
+ }
+ more += WSIZE;
+ }
+ /* At this point, more >= 2 */
+ if (!eofile) {
+ n = file_read(window + strstart + lookahead, more);
+ if (n == 0 || n == (unsigned) -1) {
+ eofile = 1;
+ } else {
+ lookahead += n;
+ }
+ }
+}
+
+
/* ===========================================================================
* Update a hash value with the given input byte
* IN assertion: all calls to to UPDATE_HASH are made with consecutive
#endif
-/* ===========================================================================
- * Fill the window when the lookahead becomes insufficient.
- * Updates strstart and lookahead, and sets eofile if end of input file.
- * IN assertion: lookahead < MIN_LOOKAHEAD && strstart + lookahead > 0
- * OUT assertions: at least one byte has been read, or eofile is set;
- * file reads are performed for at least two bytes (required for the
- * translate_eol option).
+/* trees.c -- output deflated data using Huffman coding
+ * Copyright (C) 1992-1993 Jean-loup Gailly
+ * This is free software; you can redistribute it and/or modify it under the
+ * terms of the GNU General Public License, see the file COPYING.
*/
-static void fill_window(void)
-{
- unsigned n, m;
- unsigned more = WINDOW_SIZE - lookahead - strstart;
- /* Amount of free space at the end of the window. */
- /* If the window is almost full and there is insufficient lookahead,
- * move the upper half to the lower one to make room in the upper half.
- */
- if (more == (unsigned) -1) {
- /* Very unlikely, but possible on 16 bit machine if strstart == 0
- * and lookahead == 1 (input done one byte at time)
- */
- more--;
- } else if (strstart >= WSIZE + MAX_DIST) {
- /* By the IN assertion, the window is not empty so we can't confuse
- * more == 0 with more == 64K on a 16 bit machine.
- */
- Assert(WINDOW_SIZE == 2 * WSIZE, "no sliding with BIG_MEM");
+/* PURPOSE
+ * Encode various sets of source values using variable-length
+ * binary code trees.
+ *
+ * DISCUSSION
+ * The PKZIP "deflation" process uses several Huffman trees. The more
+ * common source values are represented by shorter bit sequences.
+ *
+ * Each code tree is stored in the ZIP file in a compressed form
+ * which is itself a Huffman encoding of the lengths of
+ * all the code strings (in ascending order by source values).
+ * The actual code strings are reconstructed from the lengths in
+ * the UNZIP process, as described in the "application note"
+ * (APPNOTE.TXT) distributed as part of PKWARE's PKZIP program.
+ *
+ * REFERENCES
+ * Lynch, Thomas J.
+ * Data Compression: Techniques and Applications, pp. 53-55.
+ * Lifetime Learning Publications, 1985. ISBN 0-534-03418-7.
+ *
+ * Storer, James A.
+ * Data Compression: Methods and Theory, pp. 49-50.
+ * Computer Science Press, 1988. ISBN 0-7167-8156-5.
+ *
+ * Sedgewick, R.
+ * Algorithms, p290.
+ * Addison-Wesley, 1983. ISBN 0-201-06672-6.
+ *
+ * INTERFACE
+ * void ct_init(ush *attr, int *methodp)
+ * Allocate the match buffer, initialize the various tables and save
+ * the location of the internal file attribute (ascii/binary) and
+ * method (DEFLATE/STORE)
+ *
+ * void ct_tally(int dist, int lc);
+ * Save the match info and tally the frequency counts.
+ *
+ * long flush_block (char *buf, ulg stored_len, int eof)
+ * Determine the best encoding for the current block: dynamic trees,
+ * static trees or store, and output the encoded block to the zip
+ * file. Returns the total compressed length for the file so far.
+ */
- memcpy(window, window + WSIZE, WSIZE);
- match_start -= WSIZE;
- strstart -= WSIZE; /* we now have strstart >= MAX_DIST: */
+#define MAX_BITS 15
+/* All codes must not exceed MAX_BITS bits */
- block_start -= WSIZE;
+#define MAX_BL_BITS 7
+/* Bit length codes must not exceed MAX_BL_BITS bits */
- for (n = 0; n < HASH_SIZE; n++) {
- m = head[n];
- head[n] = (Pos) (m >= WSIZE ? m - WSIZE : 0);
- }
- for (n = 0; n < WSIZE; n++) {
- m = prev[n];
- prev[n] = (Pos) (m >= WSIZE ? m - WSIZE : 0);
- /* If n is not on any hash chain, prev[n] is garbage but
- * its value will never be used.
- */
- }
- more += WSIZE;
- }
- /* At this point, more >= 2 */
- if (!eofile) {
- n = file_read(window + strstart + lookahead, more);
- if (n == 0 || n == (unsigned) -1) {
- eofile = 1;
- } else {
- lookahead += n;
- }
- }
-}
+#define LENGTH_CODES 29
+/* number of length codes, not counting the special END_BLOCK code */
+#define LITERALS 256
+/* number of literal bytes 0..255 */
-/* ===========================================================================
- * Same as above, but achieves better compression. We use a lazy
- * evaluation for matches: a match is finally adopted only if there is
- * no better match at the next window position.
- *
- * Processes a new input file and return its compressed length. Sets
- * the compressed length, crc, deflate flags and internal file
- * attributes.
- */
+#define END_BLOCK 256
+/* end of block literal code */
-/* Flush the current block, with given end-of-file flag.
- * IN assertion: strstart is set to the end of the current match. */
-#define FLUSH_BLOCK(eof) \
- flush_block( \
- block_start >= 0L \
- ? (char*)&window[(unsigned)block_start] \
- : (char*)NULL, \
- (long)strstart - block_start, \
- (eof) \
- )
+#define L_CODES (LITERALS+1+LENGTH_CODES)
+/* number of Literal or Length codes, including the END_BLOCK code */
-/* Insert string s in the dictionary and set match_head to the previous head
- * of the hash chain (the most recent string with same hash key). Return
- * the previous length of the hash chain.
- * IN assertion: all calls to to INSERT_STRING are made with consecutive
- * input characters and the first MIN_MATCH bytes of s are valid
- * (except for the last MIN_MATCH-1 bytes of the input file). */
-#define INSERT_STRING(s, match_head) \
-{ \
- UPDATE_HASH(ins_h, window[(s) + MIN_MATCH-1]); \
- prev[(s) & WMASK] = match_head = head[ins_h]; \
- head[ins_h] = (s); \
-}
+#define D_CODES 30
+/* number of distance codes */
-static ulg deflate(void)
-{
- IPos hash_head; /* head of hash chain */
- IPos prev_match; /* previous match */
- int flush; /* set if current block must be flushed */
- int match_available = 0; /* set if previous match exists */
- unsigned match_length = MIN_MATCH - 1; /* length of best match */
+#define BL_CODES 19
+/* number of codes used to transfer the bit lengths */
- /* Process the input block. */
- while (lookahead != 0) {
- /* Insert the string window[strstart .. strstart+2] in the
- * dictionary, and set hash_head to the head of the hash chain:
- */
- INSERT_STRING(strstart, hash_head);
+typedef uch extra_bits_t;
- /* Find the longest match, discarding those <= prev_length.
- */
- prev_length = match_length, prev_match = match_start;
- match_length = MIN_MATCH - 1;
+/* extra bits for each length code */
+static const extra_bits_t extra_lbits[LENGTH_CODES]= {
+ 0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 1, 1, 2, 2, 2, 2, 3, 3, 3, 3, 4, 4,
+ 4, 4, 5, 5, 5, 5, 0
+};
- if (hash_head != 0 && prev_length < max_lazy_match
- && strstart - hash_head <= MAX_DIST
- ) {
- /* To simplify the code, we prevent matches with the string
- * of window index 0 (in particular we have to avoid a match
- * of the string with itself at the start of the input file).
- */
- match_length = longest_match(hash_head);
- /* longest_match() sets match_start */
- if (match_length > lookahead)
- match_length = lookahead;
-
- /* Ignore a length 3 match if it is too distant: */
- if (match_length == MIN_MATCH && strstart - match_start > TOO_FAR) {
- /* If prev_match is also MIN_MATCH, match_start is garbage
- * but we will ignore the current match anyway.
- */
- match_length--;
- }
- }
- /* If there was a match at the previous step and the current
- * match is not better, output the previous match:
- */
- if (prev_length >= MIN_MATCH && match_length <= prev_length) {
- check_match(strstart - 1, prev_match, prev_length);
- flush = ct_tally(strstart - 1 - prev_match, prev_length - MIN_MATCH);
-
- /* Insert in hash table all strings up to the end of the match.
- * strstart-1 and strstart are already inserted.
- */
- lookahead -= prev_length - 1;
- prev_length -= 2;
- do {
- strstart++;
- INSERT_STRING(strstart, hash_head);
- /* strstart never exceeds WSIZE-MAX_MATCH, so there are
- * always MIN_MATCH bytes ahead. If lookahead < MIN_MATCH
- * these bytes are garbage, but it does not matter since the
- * next lookahead bytes will always be emitted as literals.
- */
- } while (--prev_length != 0);
- match_available = 0;
- match_length = MIN_MATCH - 1;
- strstart++;
- if (flush) {
- FLUSH_BLOCK(0);
- block_start = strstart;
- }
- } else if (match_available) {
- /* If there was no match at the previous position, output a
- * single literal. If there was a match but the current match
- * is longer, truncate the previous match to a single literal.
- */
- Tracevv((stderr, "%c", window[strstart - 1]));
- if (ct_tally(0, window[strstart - 1])) {
- FLUSH_BLOCK(0);
- block_start = strstart;
- }
- strstart++;
- lookahead--;
- } else {
- /* There is no previous match to compare with, wait for
- * the next step to decide.
- */
- match_available = 1;
- strstart++;
- lookahead--;
- }
- Assert(strstart <= isize && lookahead <= isize, "a bit too far");
-
- /* Make sure that we always have enough lookahead, except
- * at the end of the input file. We need MAX_MATCH bytes
- * for the next match, plus MIN_MATCH bytes to insert the
- * string following the next match.
- */
- while (lookahead < MIN_LOOKAHEAD && !eofile)
- fill_window();
- }
- if (match_available)
- ct_tally(0, window[strstart - 1]);
-
- return FLUSH_BLOCK(1); /* eof */
-}
-/* trees.c -- output deflated data using Huffman coding
- * Copyright (C) 1992-1993 Jean-loup Gailly
- * This is free software; you can redistribute it and/or modify it under the
- * terms of the GNU General Public License, see the file COPYING.
- */
-
-/* PURPOSE
- * Encode various sets of source values using variable-length
- * binary code trees.
- *
- * DISCUSSION
- * The PKZIP "deflation" process uses several Huffman trees. The more
- * common source values are represented by shorter bit sequences.
- *
- * Each code tree is stored in the ZIP file in a compressed form
- * which is itself a Huffman encoding of the lengths of
- * all the code strings (in ascending order by source values).
- * The actual code strings are reconstructed from the lengths in
- * the UNZIP process, as described in the "application note"
- * (APPNOTE.TXT) distributed as part of PKWARE's PKZIP program.
- *
- * REFERENCES
- * Lynch, Thomas J.
- * Data Compression: Techniques and Applications, pp. 53-55.
- * Lifetime Learning Publications, 1985. ISBN 0-534-03418-7.
- *
- * Storer, James A.
- * Data Compression: Methods and Theory, pp. 49-50.
- * Computer Science Press, 1988. ISBN 0-7167-8156-5.
- *
- * Sedgewick, R.
- * Algorithms, p290.
- * Addison-Wesley, 1983. ISBN 0-201-06672-6.
- *
- * INTERFACE
- * void ct_init(ush *attr, int *methodp)
- * Allocate the match buffer, initialize the various tables and save
- * the location of the internal file attribute (ascii/binary) and
- * method (DEFLATE/STORE)
- *
- * void ct_tally(int dist, int lc);
- * Save the match info and tally the frequency counts.
- *
- * long flush_block (char *buf, ulg stored_len, int eof)
- * Determine the best encoding for the current block: dynamic trees,
- * static trees or store, and output the encoded block to the zip
- * file. Returns the total compressed length for the file so far.
- */
-
-/* ===========================================================================
- * Constants
- */
-
-#define MAX_BITS 15
-/* All codes must not exceed MAX_BITS bits */
-
-#define MAX_BL_BITS 7
-/* Bit length codes must not exceed MAX_BL_BITS bits */
-
-#define LENGTH_CODES 29
-/* number of length codes, not counting the special END_BLOCK code */
-
-#define LITERALS 256
-/* number of literal bytes 0..255 */
-
-#define END_BLOCK 256
-/* end of block literal code */
-
-#define L_CODES (LITERALS+1+LENGTH_CODES)
-/* number of Literal or Length codes, including the END_BLOCK code */
-
-#define D_CODES 30
-/* number of distance codes */
-
-#define BL_CODES 19
-/* number of codes used to transfer the bit lengths */
-
-typedef uch extra_bits_t;
-
-/* extra bits for each length code */
-static const extra_bits_t extra_lbits[LENGTH_CODES]= {
- 0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 1, 1, 2, 2, 2, 2, 3, 3, 3, 3, 4, 4,
- 4, 4, 5, 5, 5, 5, 0
-};
-
-/* extra bits for each distance code */
-static const extra_bits_t extra_dbits[D_CODES] = {
- 0, 0, 0, 0, 1, 1, 2, 2, 3, 3, 4, 4, 5, 5, 6, 6, 7, 7, 8, 8, 9, 9,
- 10, 10, 11, 11, 12, 12, 13, 13
-};
+/* extra bits for each distance code */
+static const extra_bits_t extra_dbits[D_CODES] = {
+ 0, 0, 0, 0, 1, 1, 2, 2, 3, 3, 4, 4, 5, 5, 6, 6, 7, 7, 8, 8, 9, 9,
+ 10, 10, 11, 11, 12, 12, 13, 13
+};
/* extra bits for each bit length code */
static const extra_bits_t extra_blbits[BL_CODES] = {
/* repeat a zero length 11-138 times (7 bits of repeat count) */
/* ===========================================================================
- * Local data
- */
-
+*/
/* Data structure describing a single value and its code string. */
typedef struct ct_data {
union {
/* ===========================================================================
*/
-static void init_block(void);
-static void gen_bitlen(tree_desc * desc);
static void gen_codes(ct_data * tree, int max_code);
static void build_tree(tree_desc * desc);
static void scan_tree(ct_data * tree, int max_code);
static int build_bl_tree(void);
static void send_all_trees(int lcodes, int dcodes, int blcodes);
static void compress_block(ct_data * ltree, ct_data * dtree);
-static void set_file_type(void);
#ifndef DEBUG
/* Mapping from a distance to a distance code. dist is the distance - 1 and
* must not have side effects. dist_code[256] and dist_code[257] are never
* used.
+ * The arguments must not have side effects.
+ */
+
+
+/* ===========================================================================
+ * Initialize a new block.
*/
+static void init_block(void)
+{
+ int n; /* iterates over tree elements */
+
+ /* Initialize the trees. */
+ for (n = 0; n < L_CODES; n++)
+ dyn_ltree[n].Freq = 0;
+ for (n = 0; n < D_CODES; n++)
+ dyn_dtree[n].Freq = 0;
+ for (n = 0; n < BL_CODES; n++)
+ bl_tree[n].Freq = 0;
-/* the arguments must not have side effects */
+ dyn_ltree[END_BLOCK].Freq = 1;
+ opt_len = static_len = 0L;
+ last_lit = last_dist = last_flags = 0;
+ flags = 0;
+ flag_bit = 1;
+}
/* ===========================================================================
for (code = 0; code < LENGTH_CODES - 1; code++) {
base_length[code] = length;
for (n = 0; n < (1 << extra_lbits[code]); n++) {
- length_code[length++] = (uch) code;
+ length_code[length++] = code;
}
}
Assert(length == 256, "ct_init: length != 256");
* in two different ways: code 284 + 5 bits or code 285, so we
* overwrite length_code[255] to use the best encoding:
*/
- length_code[length - 1] = (uch) code;
+ length_code[length - 1] = code;
/* Initialize the mapping dist (0..32K) -> dist code (0..29) */
dist = 0;
}
-/* ===========================================================================
- * Initialize a new block.
- */
-static void init_block(void)
-{
- int n; /* iterates over tree elements */
-
- /* Initialize the trees. */
- for (n = 0; n < L_CODES; n++)
- dyn_ltree[n].Freq = 0;
- for (n = 0; n < D_CODES; n++)
- dyn_dtree[n].Freq = 0;
- for (n = 0; n < BL_CODES; n++)
- bl_tree[n].Freq = 0;
-
- dyn_ltree[END_BLOCK].Freq = 1;
- opt_len = static_len = 0L;
- last_lit = last_dist = last_flags = 0;
- flags = 0;
- flag_bit = 1;
-}
-
-
/* ===========================================================================
* Restore the heap property by moving down the tree starting at node k,
* exchanging a node with the smallest of its two sons if necessary, stopping
opt_len += (ulg) f *(bits + xbits);
if (stree)
- static_len += (ulg) f *(stree[n].Len + xbits);
+ static_len += (ulg) f * (stree[n].Len + xbits);
}
if (overflow == 0)
return;
}
}
+
/* ===========================================================================
* Generate the codes for a given tree and bit counts (which need not be
* optimal).
}
}
+
/* ===========================================================================
* Construct one Huffman tree and assigns the code bit strings and lengths.
* Update the total bit length for the current block.
gen_codes((ct_data *) tree, max_code);
}
+
/* ===========================================================================
* Scan a literal or distance tree to determine the frequencies of the codes
* in the bit length tree. Updates opt_len to take into account the repeat
}
}
+
/* ===========================================================================
* Send a literal or distance tree in compressed form, using the codes in
* bl_tree.
}
}
+
/* ===========================================================================
* Construct the Huffman tree for the bit lengths and return the index in
* bl_order of the last bit length code to send.
return max_blindex;
}
+
/* ===========================================================================
* Send the header for a block using dynamic Huffman trees: the counts, the
* lengths of the bit length codes, the literal tree and the distance tree.
Tracev((stderr, "\ndist tree: sent %ld", bits_sent));
}
+
/* ===========================================================================
- * Determine the best encoding for the current block: dynamic trees, static
- * trees or store, and output the encoded block to the zip file. This function
- * returns the total compressed length for the file so far.
+ * Set the file type to ASCII or BINARY, using a crude approximation:
+ * binary if more than 20% of the bytes are <= 6 or >= 128, ascii otherwise.
+ * IN assertion: the fields freq of dyn_ltree are set and the total of all
+ * frequencies does not exceed 64K (to fit in an int on 16 bit machines).
*/
-static ulg flush_block(char *buf, ulg stored_len, int eof)
+static void set_file_type(void)
{
- ulg opt_lenb, static_lenb; /* opt_len and static_len in bytes */
- int max_blindex; /* index of last bit length code of non zero freq */
-
- flag_buf[last_flags] = flags; /* Save the flags for the last 8 items */
-
- /* Check if the file is ascii or binary */
- if (*file_type == (ush) UNKNOWN)
- set_file_type();
+ int n = 0;
+ unsigned ascii_freq = 0;
+ unsigned bin_freq = 0;
- /* Construct the literal and distance trees */
- build_tree((tree_desc *) (&l_desc));
- Tracev((stderr, "\nlit data: dyn %ld, stat %ld", opt_len, static_len));
+ while (n < 7)
+ bin_freq += dyn_ltree[n++].Freq;
+ while (n < 128)
+ ascii_freq += dyn_ltree[n++].Freq;
+ while (n < LITERALS)
+ bin_freq += dyn_ltree[n++].Freq;
+ *file_type = (bin_freq > (ascii_freq >> 2)) ? BINARY : ASCII;
+ if (*file_type == BINARY && translate_eol) {
+ bb_error_msg("-l used on binary file");
+ }
+}
- build_tree((tree_desc *) (&d_desc));
- Tracev((stderr, "\ndist data: dyn %ld, stat %ld", opt_len, static_len));
- /* At this point, opt_len and static_len are the total bit lengths of
- * the compressed block data, excluding the tree representations.
- */
-
- /* Build the bit length tree for the above two trees, and get the index
- * in bl_order of the last bit length code to send.
- */
- max_blindex = build_bl_tree();
-
- /* Determine the best encoding. Compute first the block length in bytes */
- opt_lenb = (opt_len + 3 + 7) >> 3;
- static_lenb = (static_len + 3 + 7) >> 3;
-
- Trace((stderr,
- "\nopt %lu(%lu) stat %lu(%lu) stored %lu lit %u dist %u ",
- opt_lenb, opt_len, static_lenb, static_len, stored_len,
- last_lit, last_dist));
-
- if (static_lenb <= opt_lenb)
- opt_lenb = static_lenb;
-
- /* If compression failed and this is the first and last block,
- * and if the zip file can be seeked (to rewrite the local header),
- * the whole file is transformed into a stored file:
- */
- if (stored_len <= opt_lenb && eof && compressed_len == 0L && seekable()) {
- /* Since LIT_BUFSIZE <= 2*WSIZE, the input data must be there: */
- if (buf == NULL)
- bb_error_msg("block vanished");
-
- copy_block(buf, (unsigned) stored_len, 0); /* without header */
- compressed_len = stored_len << 3;
- *file_method = STORED;
-
- } else if (stored_len + 4 <= opt_lenb && buf != (char *) 0) {
- /* 4: two words for the lengths */
- /* The test buf != NULL is only necessary if LIT_BUFSIZE > WSIZE.
- * Otherwise we can't have processed more than WSIZE input bytes since
- * the last block flush, because compression would have been
- * successful. If LIT_BUFSIZE <= WSIZE, it is never too late to
- * transform a block into a stored block.
- */
- send_bits((STORED_BLOCK << 1) + eof, 3); /* send block type */
- compressed_len = (compressed_len + 3 + 7) & ~7L;
- compressed_len += (stored_len + 4) << 3;
-
- copy_block(buf, (unsigned) stored_len, 1); /* with header */
-
- } else if (static_lenb == opt_lenb) {
- send_bits((STATIC_TREES << 1) + eof, 3);
- compress_block((ct_data *) static_ltree, (ct_data *) static_dtree);
- compressed_len += 3 + static_len;
- } else {
- send_bits((DYN_TREES << 1) + eof, 3);
- send_all_trees(l_desc.max_code + 1, d_desc.max_code + 1,
- max_blindex + 1);
- compress_block((ct_data *) dyn_ltree, (ct_data *) dyn_dtree);
- compressed_len += 3 + opt_len;
- }
- Assert(compressed_len == bits_sent, "bad compressed size");
- init_block();
-
- if (eof) {
- bi_windup();
- compressed_len += 7; /* align on byte boundary */
- }
- Tracev((stderr, "\ncomprlen %lu(%lu) ", compressed_len >> 3,
- compressed_len - 7 * eof));
-
- return compressed_len >> 3;
-}
/* ===========================================================================
* Save the match info and tally the frequency counts. Return true if
SEND_CODE(END_BLOCK, ltree);
}
+
/* ===========================================================================
- * Set the file type to ASCII or BINARY, using a crude approximation:
- * binary if more than 20% of the bytes are <= 6 or >= 128, ascii otherwise.
- * IN assertion: the fields freq of dyn_ltree are set and the total of all
- * frequencies does not exceed 64K (to fit in an int on 16 bit machines).
+ * Determine the best encoding for the current block: dynamic trees, static
+ * trees or store, and output the encoded block to the zip file. This function
+ * returns the total compressed length for the file so far.
*/
-static void set_file_type(void)
+static ulg flush_block(char *buf, ulg stored_len, int eof)
{
- int n = 0;
- unsigned ascii_freq = 0;
- unsigned bin_freq = 0;
+ ulg opt_lenb, static_lenb; /* opt_len and static_len in bytes */
+ int max_blindex; /* index of last bit length code of non zero freq */
- while (n < 7)
- bin_freq += dyn_ltree[n++].Freq;
- while (n < 128)
- ascii_freq += dyn_ltree[n++].Freq;
- while (n < LITERALS)
- bin_freq += dyn_ltree[n++].Freq;
- *file_type = (bin_freq > (ascii_freq >> 2)) ? BINARY : ASCII;
- if (*file_type == BINARY && translate_eol) {
- bb_error_msg("-l used on binary file");
+ flag_buf[last_flags] = flags; /* Save the flags for the last 8 items */
+
+ /* Check if the file is ascii or binary */
+ if (*file_type == (ush) UNKNOWN)
+ set_file_type();
+
+ /* Construct the literal and distance trees */
+ build_tree((tree_desc *) (&l_desc));
+ Tracev((stderr, "\nlit data: dyn %ld, stat %ld", opt_len, static_len));
+
+ build_tree((tree_desc *) (&d_desc));
+ Tracev((stderr, "\ndist data: dyn %ld, stat %ld", opt_len, static_len));
+ /* At this point, opt_len and static_len are the total bit lengths of
+ * the compressed block data, excluding the tree representations.
+ */
+
+ /* Build the bit length tree for the above two trees, and get the index
+ * in bl_order of the last bit length code to send.
+ */
+ max_blindex = build_bl_tree();
+
+ /* Determine the best encoding. Compute first the block length in bytes */
+ opt_lenb = (opt_len + 3 + 7) >> 3;
+ static_lenb = (static_len + 3 + 7) >> 3;
+
+ Trace((stderr,
+ "\nopt %lu(%lu) stat %lu(%lu) stored %lu lit %u dist %u ",
+ opt_lenb, opt_len, static_lenb, static_len, stored_len,
+ last_lit, last_dist));
+
+ if (static_lenb <= opt_lenb)
+ opt_lenb = static_lenb;
+
+ /* If compression failed and this is the first and last block,
+ * and if the zip file can be seeked (to rewrite the local header),
+ * the whole file is transformed into a stored file:
+ */
+ if (stored_len <= opt_lenb && eof && compressed_len == 0L && seekable()) {
+ /* Since LIT_BUFSIZE <= 2*WSIZE, the input data must be there: */
+ if (buf == NULL)
+ bb_error_msg("block vanished");
+
+ copy_block(buf, (unsigned) stored_len, 0); /* without header */
+ compressed_len = stored_len << 3;
+ *file_method = STORED;
+
+ } else if (stored_len + 4 <= opt_lenb && buf != (char *) 0) {
+ /* 4: two words for the lengths */
+ /* The test buf != NULL is only necessary if LIT_BUFSIZE > WSIZE.
+ * Otherwise we can't have processed more than WSIZE input bytes since
+ * the last block flush, because compression would have been
+ * successful. If LIT_BUFSIZE <= WSIZE, it is never too late to
+ * transform a block into a stored block.
+ */
+ send_bits((STORED_BLOCK << 1) + eof, 3); /* send block type */
+ compressed_len = (compressed_len + 3 + 7) & ~7L;
+ compressed_len += (stored_len + 4) << 3;
+
+ copy_block(buf, (unsigned) stored_len, 1); /* with header */
+
+ } else if (static_lenb == opt_lenb) {
+ send_bits((STATIC_TREES << 1) + eof, 3);
+ compress_block((ct_data *) static_ltree, (ct_data *) static_dtree);
+ compressed_len += 3 + static_len;
+ } else {
+ send_bits((DYN_TREES << 1) + eof, 3);
+ send_all_trees(l_desc.max_code + 1, d_desc.max_code + 1,
+ max_blindex + 1);
+ compress_block((ct_data *) dyn_ltree, (ct_data *) dyn_dtree);
+ compressed_len += 3 + opt_len;
+ }
+ Assert(compressed_len == bits_sent, "bad compressed size");
+ init_block();
+
+ if (eof) {
+ bi_windup();
+ compressed_len += 7; /* align on byte boundary */
+ }
+ Tracev((stderr, "\ncomprlen %lu(%lu) ", compressed_len >> 3,
+ compressed_len - 7 * eof));
+
+ return compressed_len >> 3;
+}
+
+
+/* ===========================================================================
+ * Same as above, but achieves better compression. We use a lazy
+ * evaluation for matches: a match is finally adopted only if there is
+ * no better match at the next window position.
+ *
+ * Processes a new input file and return its compressed length. Sets
+ * the compressed length, crc, deflate flags and internal file
+ * attributes.
+ */
+
+/* Flush the current block, with given end-of-file flag.
+ * IN assertion: strstart is set to the end of the current match. */
+#define FLUSH_BLOCK(eof) \
+ flush_block( \
+ block_start >= 0L \
+ ? (char*)&window[(unsigned)block_start] \
+ : (char*)NULL, \
+ (long)strstart - block_start, \
+ (eof) \
+ )
+
+/* Insert string s in the dictionary and set match_head to the previous head
+ * of the hash chain (the most recent string with same hash key). Return
+ * the previous length of the hash chain.
+ * IN assertion: all calls to to INSERT_STRING are made with consecutive
+ * input characters and the first MIN_MATCH bytes of s are valid
+ * (except for the last MIN_MATCH-1 bytes of the input file). */
+#define INSERT_STRING(s, match_head) \
+{ \
+ UPDATE_HASH(ins_h, window[(s) + MIN_MATCH-1]); \
+ prev[(s) & WMASK] = match_head = head[ins_h]; \
+ head[ins_h] = (s); \
+}
+
+static ulg deflate(void)
+{
+ IPos hash_head; /* head of hash chain */
+ IPos prev_match; /* previous match */
+ int flush; /* set if current block must be flushed */
+ int match_available = 0; /* set if previous match exists */
+ unsigned match_length = MIN_MATCH - 1; /* length of best match */
+
+ /* Process the input block. */
+ while (lookahead != 0) {
+ /* Insert the string window[strstart .. strstart+2] in the
+ * dictionary, and set hash_head to the head of the hash chain:
+ */
+ INSERT_STRING(strstart, hash_head);
+
+ /* Find the longest match, discarding those <= prev_length.
+ */
+ prev_length = match_length, prev_match = match_start;
+ match_length = MIN_MATCH - 1;
+
+ if (hash_head != 0 && prev_length < max_lazy_match
+ && strstart - hash_head <= MAX_DIST
+ ) {
+ /* To simplify the code, we prevent matches with the string
+ * of window index 0 (in particular we have to avoid a match
+ * of the string with itself at the start of the input file).
+ */
+ match_length = longest_match(hash_head);
+ /* longest_match() sets match_start */
+ if (match_length > lookahead)
+ match_length = lookahead;
+
+ /* Ignore a length 3 match if it is too distant: */
+ if (match_length == MIN_MATCH && strstart - match_start > TOO_FAR) {
+ /* If prev_match is also MIN_MATCH, match_start is garbage
+ * but we will ignore the current match anyway.
+ */
+ match_length--;
+ }
+ }
+ /* If there was a match at the previous step and the current
+ * match is not better, output the previous match:
+ */
+ if (prev_length >= MIN_MATCH && match_length <= prev_length) {
+ check_match(strstart - 1, prev_match, prev_length);
+ flush = ct_tally(strstart - 1 - prev_match, prev_length - MIN_MATCH);
+
+ /* Insert in hash table all strings up to the end of the match.
+ * strstart-1 and strstart are already inserted.
+ */
+ lookahead -= prev_length - 1;
+ prev_length -= 2;
+ do {
+ strstart++;
+ INSERT_STRING(strstart, hash_head);
+ /* strstart never exceeds WSIZE-MAX_MATCH, so there are
+ * always MIN_MATCH bytes ahead. If lookahead < MIN_MATCH
+ * these bytes are garbage, but it does not matter since the
+ * next lookahead bytes will always be emitted as literals.
+ */
+ } while (--prev_length != 0);
+ match_available = 0;
+ match_length = MIN_MATCH - 1;
+ strstart++;
+ if (flush) {
+ FLUSH_BLOCK(0);
+ block_start = strstart;
+ }
+ } else if (match_available) {
+ /* If there was no match at the previous position, output a
+ * single literal. If there was a match but the current match
+ * is longer, truncate the previous match to a single literal.
+ */
+ Tracevv((stderr, "%c", window[strstart - 1]));
+ if (ct_tally(0, window[strstart - 1])) {
+ FLUSH_BLOCK(0);
+ block_start = strstart;
+ }
+ strstart++;
+ lookahead--;
+ } else {
+ /* There is no previous match to compare with, wait for
+ * the next step to decide.
+ */
+ match_available = 1;
+ strstart++;
+ lookahead--;
+ }
+ Assert(strstart <= isize && lookahead <= isize, "a bit too far");
+
+ /* Make sure that we always have enough lookahead, except
+ * at the end of the input file. We need MAX_MATCH bytes
+ * for the next match, plus MIN_MATCH bytes to insert the
+ * string following the next match.
+ */
+ while (lookahead < MIN_LOOKAHEAD && !eofile)
+ fill_window();
}
+ if (match_available)
+ ct_tally(0, window[strstart - 1]);
+
+ return FLUSH_BLOCK(1); /* eof */
}
+
/* ===========================================================================
* Deflate in to out.
* IN assertions: the input and output buffers are cleared.