aa: 85.1% -- replaced with aa.gz
*/
//config:config GZIP
-//config: bool "gzip (19 kb)"
+//config: bool "gzip (17 kb)"
//config: default y
//config: help
//config: gzip is used to compress files.
#include "libbb.h"
#include "bb_archive.h"
-
/* ===========================================================================
*/
//#define DEBUG 1
# define Tracecv(c,x)
#endif
-
/* ===========================================================================
*/
#if CONFIG_GZIP_FAST == 0
# define MAX_SUFFIX 30
#endif
-
/* ===========================================================================
* Compile with MEDIUM_MEM to reduce the memory requirements or
* with SMALL_MEM to use as little memory as possible. Use BIG_MEM if the
* affects the compression ratio. The compressed output
* is still correct, and might even be smaller in some cases.
*/
-
#ifdef SMALL_MEM
-# define HASH_BITS 13 /* Number of bits used to hash strings */
+# define HASH_BITS 13 /* Number of bits used to hash strings */
#endif
#ifdef MEDIUM_MEM
-# define HASH_BITS 14
+# define HASH_BITS 14
#endif
#ifndef HASH_BITS
-# define HASH_BITS 15
+# define HASH_BITS 15
/* For portability to 16 bit machines, do not use values above 15. */
#endif
#endif
/* Matches of length 3 are discarded if their distance exceeds TOO_FAR */
-
/* ===========================================================================
* These types are not really 'char', 'short' and 'long'
*/
#endif /* ENABLE_FEATURE_GZIP_LEVELS */
};
-
struct globals {
+/* =========================================================================== */
+/* global buffers, allocated once */
+
+#define DECLARE(type, array, size) \
+ type * array
+#define ALLOC(type, array, size) \
+ array = xzalloc((size_t)(((size)+1L)/2) * 2*sizeof(type))
+#define FREE(array) \
+ do { free(array); array = NULL; } while (0)
+
+ /* buffer for literals or lengths */
+ /* DECLARE(uch, l_buf, LIT_BUFSIZE); */
+ DECLARE(uch, l_buf, INBUFSIZ);
+
+ DECLARE(ush, d_buf, DIST_BUFSIZE);
+ DECLARE(uch, outbuf, OUTBUFSIZ);
+
+/* Sliding window. Input bytes are read into the second half of the window,
+ * and move to the first half later to keep a dictionary of at least WSIZE
+ * bytes. With this organization, matches are limited to a distance of
+ * WSIZE-MAX_MATCH bytes, but this ensures that IO is always
+ * performed with a length multiple of the block size. Also, it limits
+ * the window size to 64K, which is quite useful on MSDOS.
+ * To do: limit the window size to WSIZE+BSZ if SMALL_MEM (the code would
+ * be less efficient).
+ */
+ DECLARE(uch, window, 2L * WSIZE);
+
+/* Link to older string with same hash index. To limit the size of this
+ * array to 64K, this link is maintained only for the last 32K strings.
+ * An index in this array is thus a window index modulo 32K.
+ */
+ /* DECLARE(Pos, prev, WSIZE); */
+ DECLARE(ush, prev, 1L << BITS);
+
+/* Heads of the hash chains or 0. */
+ /* DECLARE(Pos, head, 1<<HASH_BITS); */
+#define head (G1.prev + WSIZE) /* hash head (see deflate.c) */
#if ENABLE_FEATURE_GZIP_LEVELS
unsigned max_chain_length;
#define nice_match (G1.nice_match)
#endif
+/* =========================================================================== */
+/* all members below are zeroed out in pack_gzip() for each next file */
+
+ uint32_t crc; /* shift register contents */
+ /*uint32_t *crc_32_tab;*/
+
/* window position at the beginning of the current output block. Gets
* negative when the window is moved backwards.
*/
#ifdef DEBUG
ulg bits_sent; /* bit length of the compressed data */
+# define DEBUG_bits_sent(v) (void)(G1.bits_sent v)
+#else
+# define DEBUG_bits_sent(v) ((void)0)
#endif
-
- /*uint32_t *crc_32_tab;*/
- uint32_t crc; /* shift register contents */
-
-/* ===========================================================================
- */
-#define DECLARE(type, array, size) \
- type * array
-#define ALLOC(type, array, size) \
- array = xzalloc((size_t)(((size)+1L)/2) * 2*sizeof(type))
-#define FREE(array) \
- do { free(array); array = NULL; } while (0)
-
- /* global buffers */
-
- /* buffer for literals or lengths */
- /* DECLARE(uch, l_buf, LIT_BUFSIZE); */
- DECLARE(uch, l_buf, INBUFSIZ);
-
- DECLARE(ush, d_buf, DIST_BUFSIZE);
- DECLARE(uch, outbuf, OUTBUFSIZ);
-
-/* Sliding window. Input bytes are read into the second half of the window,
- * and move to the first half later to keep a dictionary of at least WSIZE
- * bytes. With this organization, matches are limited to a distance of
- * WSIZE-MAX_MATCH bytes, but this ensures that IO is always
- * performed with a length multiple of the block size. Also, it limits
- * the window size to 64K, which is quite useful on MSDOS.
- * To do: limit the window size to WSIZE+BSZ if SMALL_MEM (the code would
- * be less efficient).
- */
- DECLARE(uch, window, 2L * WSIZE);
-
-/* Link to older string with same hash index. To limit the size of this
- * array to 64K, this link is maintained only for the last 32K strings.
- * An index in this array is thus a window index modulo 32K.
- */
- /* DECLARE(Pos, prev, WSIZE); */
- DECLARE(ush, prev, 1L << BITS);
-
-/* Heads of the hash chains or 0. */
- /* DECLARE(Pos, head, 1<<HASH_BITS); */
-#define head (G1.prev + WSIZE) /* hash head (see deflate.c) */
};
#define G1 (*(ptr_to_globals - 1))
-
/* ===========================================================================
* Write the output buffer outbuf[0..outcnt-1] and update bytes_out.
* (used for the compressed data only)
G1.outcnt = 0;
}
-
/* ===========================================================================
*/
/* put_8bit is used for the compressed output */
}
*dst = (uch)w;
w >>= 8;
+ G1.outcnt = ++outcnt;
#else
*dst = (uch)w;
w >>= 8;
G1.outcnt = outcnt + 2;
return;
}
+ G1.outcnt = ++outcnt;
#endif
/* Slowpath: we will need to do flush_outbuf() */
- G1.outcnt = ++outcnt;
if (outcnt == OUTBUFSIZ)
- flush_outbuf();
- put_8bit(w);
+ flush_outbuf(); /* here */
+ put_8bit(w); /* or here */
}
#define OPTIMIZED_PUT_32BIT (CONFIG_GZIP_FAST > 0 && BB_UNALIGNED_MEMACCESS_OK && BB_LITTLE_ENDIAN)
static void put_32bit(ulg n)
{
-#if OPTIMIZED_PUT_32BIT
- unsigned outcnt = G1.outcnt;
- if (outcnt < OUTBUFSIZ-4) {
- /* Common case */
- ulg *dst32 = (void*) &G1.outbuf[outcnt];
- *dst32 = n; /* unaligned LSB 32-bit store */
- G1.outcnt = outcnt + 4;
- return;
+ if (OPTIMIZED_PUT_32BIT) {
+ unsigned outcnt = G1.outcnt;
+ if (outcnt < OUTBUFSIZ-4) {
+ /* Common case */
+ ulg *dst32 = (void*) &G1.outbuf[outcnt];
+ *dst32 = n; /* unaligned LSB 32-bit store */
+ //bb_error_msg("%p", dst32); // store alignment debugging
+ G1.outcnt = outcnt + 4;
+ return;
+ }
}
-#endif
put_16bit(n);
put_16bit(n >> 16);
}
+static ALWAYS_INLINE void flush_outbuf_if_32bit_optimized(void)
+{
+ /* If put_32bit() performs 32bit stores && it is used in send_bits() */
+ if (OPTIMIZED_PUT_32BIT && BUF_SIZE > 16)
+ flush_outbuf();
+}
/* ===========================================================================
* Run a set of bytes through the crc shift register. If s is a NULL
G1.crc = crc32_block_endian0(G1.crc, s, n, global_crc32_table /*G1.crc_32_tab*/);
}
-
/* ===========================================================================
* Read a new buffer from the current input file, perform end-of-line
* translation, and update the crc and input file size.
return len;
}
-
/* ===========================================================================
* Send a value on a given number of bits.
* IN assertion: length <= 16 and value fits in length bits.
#ifdef DEBUG
Tracev((stderr, " l %2d v %4x ", length, value));
Assert(length > 0 && length <= 15, "invalid length");
- G1.bits_sent += length;
+ DEBUG_bits_sent(+= length);
#endif
BUILD_BUG_ON(BUF_SIZE != 32 && BUF_SIZE != 16);
G1.bi_valid = length;
}
-
/* ===========================================================================
* Reverse the first len bits of a code, using straightforward code (a faster
* method would use a table)
}
}
-
/* ===========================================================================
* Write out any remaining bits in an incomplete byte.
*/
}
G1.bi_buf = 0;
G1.bi_valid = 0;
-#ifdef DEBUG
- G1.bits_sent = (G1.bits_sent + 7) & ~7;
-#endif
+ DEBUG_bits_sent(= (G1.bits_sent + 7) & ~7);
}
-
/* ===========================================================================
* Copy a stored block to the zip file, storing first the length and its
* one's complement if requested.
bi_windup(); /* align on byte boundary */
if (header) {
- put_16bit(len);
- put_16bit(~len);
-#ifdef DEBUG
- G1.bits_sent += 2 * 16;
-#endif
+ unsigned v = ((uint16_t)len) | ((~len) << 16);
+ put_32bit(v);
+ DEBUG_bits_sent(+= 2 * 16);
}
-#ifdef DEBUG
- G1.bits_sent += (ulg) len << 3;
-#endif
+ DEBUG_bits_sent(+= (ulg) len << 3);
while (len--) {
put_8bit(*buf++);
}
+ /* The above can 32-bit misalign outbuf */
+ if (G1.outcnt & 3) /* syscalls are expensive, is it really misaligned? */
+ flush_outbuf_if_32bit_optimized();
}
-
/* ===========================================================================
* Fill the window when the lookahead becomes insufficient.
* Updates strstart and lookahead, and sets eofile if end of input file.
fill_window();
}
-
/* ===========================================================================
* Set match_start to the longest match starting at the given string and
* return its length. Matches shorter or equal to prev_length are discarded,
return best_len;
}
-
#ifdef DEBUG
/* ===========================================================================
* Check that the match at match_start is indeed a match.
ulg opt_len; /* bit length of current block with optimal trees */
ulg static_len; /* bit length of current block with static trees */
- ulg compressed_len; /* total bit length of compressed file */
+// ulg compressed_len; /* total bit length of compressed file */
};
#define G2ptr ((struct globals2*)(ptr_to_globals))
#define G2 (*G2ptr)
-
/* ===========================================================================
*/
-static void gen_codes(ct_data * tree, int max_code);
-static void build_tree(tree_desc * desc);
-static void scan_tree(ct_data * tree, int max_code);
-static void send_tree(ct_data * tree, int max_code);
-static int build_bl_tree(void);
-static void send_all_trees(int lcodes, int dcodes, int blcodes);
-static void compress_block(ct_data * ltree, ct_data * dtree);
-
-
#ifndef DEBUG
/* Send a code of the given tree. c and tree must not have side effects */
# define SEND_CODE(c, tree) send_bits(tree[c].Code, tree[c].Len)
* The arguments must not have side effects.
*/
-
/* ===========================================================================
* Initialize a new block.
*/
G2.flag_bit = 1;
}
-
/* ===========================================================================
* Restore the heap property by moving down the tree starting at node k,
* exchanging a node with the smallest of its two sons if necessary, stopping
G2.heap[k] = v;
}
-
/* ===========================================================================
* Compute the optimal bit lengths for a tree and update the total bit length
* for the current block.
}
}
-
/* ===========================================================================
* Generate the codes for a given tree and bit counts (which need not be
* optimal).
}
}
-
/* ===========================================================================
* Construct one Huffman tree and assigns the code bit strings and lengths.
* Update the total bit length for the current block.
gen_codes((ct_data *) tree, max_code);
}
-
/* ===========================================================================
* Scan a literal or distance tree to determine the frequencies of the codes
* in the bit length tree. Updates opt_len to take into account the repeat
}
}
-
/* ===========================================================================
* Send a literal or distance tree in compressed form, using the codes in
* bl_tree.
}
}
-
/* ===========================================================================
* Construct the Huffman tree for the bit lengths and return the index in
* bl_order of the last bit length code to send.
return max_blindex;
}
-
/* ===========================================================================
* Send the header for a block using dynamic Huffman trees: the counts, the
* lengths of the bit length codes, the literal tree and the distance tree.
Tracev((stderr, "\ndist tree: sent %ld", (long)G1.bits_sent));
}
-
/* ===========================================================================
* Save the match info and tally the frequency counts. Return true if
* the current block must be flushed.
SEND_CODE(END_BLOCK, ltree);
}
-
/* ===========================================================================
* Determine the best encoding for the current block: dynamic trees, static
* trees or store, and output the encoded block to the zip file. This function
* returns the total compressed length for the file so far.
*/
-static ulg flush_block(char *buf, ulg stored_len, int eof)
+static void flush_block(char *buf, ulg stored_len, int eof)
{
ulg opt_lenb, static_lenb; /* opt_len and static_len in bytes */
int max_blindex; /* index of last bit length code of non zero freq */
* and if the zip file can be seeked (to rewrite the local header),
* the whole file is transformed into a stored file:
*/
- if (stored_len <= opt_lenb && eof && G2.compressed_len == 0L && seekable()) {
- /* Since LIT_BUFSIZE <= 2*WSIZE, the input data must be there: */
- if (buf == NULL)
- bb_error_msg("block vanished");
-
- copy_block(buf, (unsigned) stored_len, 0); /* without header */
- G2.compressed_len = stored_len << 3;
- } else if (stored_len + 4 <= opt_lenb && buf != NULL) {
+// seekable() is constant FALSE in busybox, and G2.compressed_len is disabled
+// (this was the only user)
+// if (stored_len <= opt_lenb && eof && G2.compressed_len == 0L && seekable()) {
+// /* Since LIT_BUFSIZE <= 2*WSIZE, the input data must be there: */
+// if (buf == NULL)
+// bb_error_msg("block vanished");
+//
+// G2.compressed_len = stored_len << 3;
+// copy_block(buf, (unsigned) stored_len, 0); /* without header */
+// } else
+ if (stored_len + 4 <= opt_lenb && buf != NULL) {
/* 4: two words for the lengths */
/* The test buf != NULL is only necessary if LIT_BUFSIZE > WSIZE.
* Otherwise we can't have processed more than WSIZE input bytes since
* transform a block into a stored block.
*/
send_bits((STORED_BLOCK << 1) + eof, 3); /* send block type */
- G2.compressed_len = (G2.compressed_len + 3 + 7) & ~7L;
- G2.compressed_len += (stored_len + 4) << 3;
-
+// G2.compressed_len = ((G2.compressed_len + 3 + 7) & ~7L)
+// + ((stored_len + 4) << 3);
copy_block(buf, (unsigned) stored_len, 1); /* with header */
- } else if (static_lenb == opt_lenb) {
+ } else
+ if (static_lenb == opt_lenb) {
send_bits((STATIC_TREES << 1) + eof, 3);
compress_block((ct_data *) G2.static_ltree, (ct_data *) G2.static_dtree);
- G2.compressed_len += 3 + G2.static_len;
+// G2.compressed_len += 3 + G2.static_len;
} else {
send_bits((DYN_TREES << 1) + eof, 3);
send_all_trees(G2.l_desc.max_code + 1, G2.d_desc.max_code + 1,
max_blindex + 1);
compress_block((ct_data *) G2.dyn_ltree, (ct_data *) G2.dyn_dtree);
- G2.compressed_len += 3 + G2.opt_len;
+// G2.compressed_len += 3 + G2.opt_len;
}
- Assert(G2.compressed_len == G1.bits_sent, "bad compressed size");
+// Assert(G2.compressed_len == G1.bits_sent, "bad compressed size");
init_block();
if (eof) {
bi_windup();
- G2.compressed_len += 7; /* align on byte boundary */
+// G2.compressed_len += 7; /* align on byte boundary */
}
- Tracev((stderr, "\ncomprlen %lu(%lu) ",
- (unsigned long)G2.compressed_len >> 3,
- (unsigned long)G2.compressed_len - 7 * eof));
+// Tracev((stderr, "\ncomprlen %lu(%lu) ",
+// (unsigned long)G2.compressed_len >> 3,
+// (unsigned long)G2.compressed_len - 7 * eof));
- return G2.compressed_len >> 3;
+ return; /* was "return G2.compressed_len >> 3;" */
}
-
/* ===========================================================================
* Update a hash value with the given input byte
* IN assertion: all calls to UPDATE_HASH are made with consecutive
*/
#define UPDATE_HASH(h, c) (h = (((h)<<H_SHIFT) ^ (c)) & HASH_MASK)
-
/* ===========================================================================
* Same as above, but achieves better compression. We use a lazy
* evaluation for matches: a match is finally adopted only if there is
head[G1.ins_h] = (s); \
} while (0)
-static NOINLINE ulg deflate(void)
+static NOINLINE void deflate(void)
{
IPos hash_head; /* head of hash chain */
IPos prev_match; /* previous match */
if (match_available)
ct_tally(0, G1.window[G1.strstart - 1]);
- return FLUSH_BLOCK(1); /* eof */
+ FLUSH_BLOCK(1); /* eof */
}
-
/* ===========================================================================
* Initialize the bit string routines.
*/
{
//G1.bi_buf = 0; // globals are zeroed in pack_gzip()
//G1.bi_valid = 0; // globals are zeroed in pack_gzip()
-#ifdef DEBUG
- //G1.bits_sent = 0L; // globals are zeroed in pack_gzip()
-#endif
+ //DEBUG_bits_sent(= 0L); // globals are zeroed in pack_gzip()
}
-
/* ===========================================================================
* Initialize the "longest match" routines for a new file
*/
*/
}
-
/* ===========================================================================
* Allocate the match buffer, initialize the various tables and save the
* location of the internal file attribute (ascii/binary) and method
int code; /* code value */
int dist; /* distance index */
- //G2.compressed_len = 0L; // globals are zeroed in pack_gzip()
+// //G2.compressed_len = 0L; // globals are zeroed in pack_gzip()
#ifdef NOT_NEEDED
if (G2.static_dtree[0].Len != 0)
n = 0;
while (n <= 143) {
G2.static_ltree[n++].Len = 8;
- G2.bl_count[8]++;
+ //G2.bl_count[8]++;
}
+ //G2.bl_count[8] = 143 + 1;
while (n <= 255) {
G2.static_ltree[n++].Len = 9;
- G2.bl_count[9]++;
+ //G2.bl_count[9]++;
}
+ //G2.bl_count[9] = 255 - 143;
while (n <= 279) {
G2.static_ltree[n++].Len = 7;
- G2.bl_count[7]++;
+ //G2.bl_count[7]++;
}
+ //G2.bl_count[7] = 279 - 255;
while (n <= 287) {
G2.static_ltree[n++].Len = 8;
- G2.bl_count[8]++;
+ //G2.bl_count[8]++;
}
+ //G2.bl_count[8] += 287 - 279;
+ G2.bl_count[7] = 279 - 255;
+ G2.bl_count[8] = (143 + 1) + (287 - 279);
+ G2.bl_count[9] = 255 - 143;
/* Codes 286 and 287 do not exist, but we must include them in the
* tree construction to get a canonical Huffman tree (longest code
* all ones)
init_block();
}
-
/* ===========================================================================
* Deflate in to out.
* IN assertions: the input and output buffers are cleared.
put_16bit(deflate_flags | 0x300); /* extra flags. OS id = 3 (Unix) */
-#if OPTIMIZED_PUT_32BIT
- /* put_32bit() performs 32bit stores. If we use it in send_bits()... */
- if (BUF_SIZE > 16)
- /* then all stores are misaligned, unless we flush the buffer now */
- flush_outbuf();
-#endif
+ /* The above 32-bit misaligns outbuf (10 bytes are stored), flush it */
+ flush_outbuf_if_32bit_optimized();
deflate();
flush_outbuf();
}
-
/* ======================================================================== */
static
IF_DESKTOP(long long) int FAST_FUNC pack_gzip(transformer_state_t *xstate UNUSED_PARAM)
{
- /* Reinit G1.xxx except pointers to allocated buffers */
- memset(&G1, 0, offsetof(struct globals, l_buf));
+ /* Reinit G1.xxx except pointers to allocated buffers, and entire G2 */
+ memset(&G1.crc, 0, (sizeof(G1) - offsetof(struct globals, crc)) + sizeof(G2));
/* Clear input and output buffers */
//G1.outcnt = 0;
//G1.isize = 0;
/* Reinit G2.xxx */
- memset(&G2, 0, sizeof(G2));
G2.l_desc.dyn_tree = G2.dyn_ltree;
G2.l_desc.static_tree = G2.static_ltree;
G2.l_desc.extra_bits = extra_lbits;
/* Must match bbunzip's constants OPT_STDOUT, OPT_FORCE! */
#if ENABLE_FEATURE_GZIP_LONG_OPTIONS
- opt = getopt32long(argv, "cfkv" IF_FEATURE_GZIP_DECOMPRESS("dt") "qn123456789", gzip_longopts);
+ opt = getopt32long(argv, BBUNPK_OPTSTR IF_FEATURE_GZIP_DECOMPRESS("dt") "n123456789", gzip_longopts);
#else
- opt = getopt32(argv, "cfkv" IF_FEATURE_GZIP_DECOMPRESS("dt") "qn123456789");
+ opt = getopt32(argv, BBUNPK_OPTSTR IF_FEATURE_GZIP_DECOMPRESS("dt") "n123456789");
#endif
#if ENABLE_FEATURE_GZIP_DECOMPRESS /* gunzip_main may not be visible... */
- if (opt & 0x30) // -d and/or -t
+ if (opt & (BBUNPK_OPT_DECOMPRESS|BBUNPK_OPT_TEST)) /* -d and/or -t */
return gunzip_main(argc, argv);
#endif
#if ENABLE_FEATURE_GZIP_LEVELS
- opt >>= ENABLE_FEATURE_GZIP_DECOMPRESS ? 8 : 6; /* drop cfkv[dt]qn bits */
+ opt >>= (BBUNPK_OPTSTRLEN IF_FEATURE_GZIP_DECOMPRESS(+ 2) + 1); /* drop cfkvq[dt]n bits */
if (opt == 0)
opt = 1 << 6; /* default: 6 */
opt = ffs(opt >> 4); /* Maps -1..-4 to [0], -5 to [1] ... -9 to [5] */
max_lazy_match = gzip_level_config[opt].lazy2 * 2;
nice_match = gzip_level_config[opt].nice2 * 2;
#endif
- option_mask32 &= 0xf; /* retain only -cfkv */
+ option_mask32 &= BBUNPK_OPTSTRMASK; /* retain only -cfkvq */
/* Allocate all global buffers (for DYN_ALLOC option) */
ALLOC(uch, G1.l_buf, INBUFSIZ);
ALLOC(ush, G1.prev, 1L << BITS);
/* Initialize the CRC32 table */
- global_crc32_table = crc32_filltable(NULL, 0);
+ global_crc32_new_table_le();
argv += optind;
return bbunpack(argv, pack_gzip, append_ext, "gz");