ba73f1132f72175a5d994cbac2ca7ecc72262b8b
[oweals/busybox.git] / archival / libunarchive / decompress_uncompress.c
1 /* vi: set sw=4 ts=4: */
2 #include "libbb.h"
3
4 /* uncompress for busybox -- (c) 2002 Robert Griebl
5  *
6  * based on the original compress42.c source
7  * (see disclaimer below)
8  */
9
10
11 /* (N)compress42.c - File compression ala IEEE Computer, Mar 1992.
12  *
13  * Authors:
14  *   Spencer W. Thomas   (decvax!harpo!utah-cs!utah-gr!thomas)
15  *   Jim McKie           (decvax!mcvax!jim)
16  *   Steve Davies        (decvax!vax135!petsd!peora!srd)
17  *   Ken Turkowski       (decvax!decwrl!turtlevax!ken)
18  *   James A. Woods      (decvax!ihnp4!ames!jaw)
19  *   Joe Orost           (decvax!vax135!petsd!joe)
20  *   Dave Mack           (csu@alembic.acs.com)
21  *   Peter Jannesen, Network Communication Systems
22  *                       (peter@ncs.nl)
23  *
24  * marc@suse.de : a small security fix for a buffer overflow
25  *
26  * [... History snipped ...]
27  *
28  */
29
30 /* Default input buffer size */
31 #define IBUFSIZ 2048
32
33 /* Default output buffer size */
34 #define OBUFSIZ 2048
35
36 /* Defines for third byte of header */
37 #define MAGIC_1         (char_type)'\037'       /* First byte of compressed file               */
38 #define MAGIC_2         (char_type)'\235'       /* Second byte of compressed file              */
39 #define BIT_MASK        0x1f    /* Mask for 'number of compresssion bits'       */
40                                                         /* Masks 0x20 and 0x40 are free.                */
41                                                         /* I think 0x20 should mean that there is       */
42                                                         /* a fourth header byte (for expansion).        */
43 #define BLOCK_MODE      0x80    /* Block compresssion if table is full and      */
44                         /* compression rate is dropping flush tables    */
45                         /* the next two codes should not be changed lightly, as they must not   */
46                         /* lie within the contiguous general code space.                        */
47 #define FIRST   257             /* first free entry                             */
48 #define CLEAR   256             /* table clear output code                      */
49
50 #define INIT_BITS 9             /* initial number of bits/code */
51
52
53 /* machine variants which require cc -Dmachine:  pdp11, z8000, DOS */
54 #define FAST
55
56 #define HBITS           17      /* 50% occupancy */
57 #define HSIZE      (1<<HBITS)
58 #define HMASK      (HSIZE-1)
59 #define HPRIME           9941
60 #define BITS               16
61 #undef  MAXSEG_64K
62 #define MAXCODE(n)      (1L << (n))
63
64 #define htabof(i)                               htab[i]
65 #define codetabof(i)                    codetab[i]
66 #define tab_prefixof(i)                 codetabof(i)
67 #define tab_suffixof(i)                 ((unsigned char *)(htab))[i]
68 #define de_stack                                ((unsigned char *)&(htab[HSIZE-1]))
69 #define clear_htab()                    memset(htab, -1, HSIZE)
70 #define clear_tab_prefixof()    memset(codetab, 0, 256);
71
72
73 /*
74  * Decompress stdin to stdout.  This routine adapts to the codes in the
75  * file building the "string" table on-the-fly; requiring no table to
76  * be stored in the compressed file.  The tables used herein are shared
77  * with those of the compress() routine.  See the definitions above.
78  */
79
80 USE_DESKTOP(long long) int
81 uncompress(int fd_in, int fd_out)
82 {
83         USE_DESKTOP(long long total_written = 0;)
84         unsigned char *stackp;
85         long code;
86         int finchar;
87         long oldcode;
88         long incode;
89         int inbits;
90         int posbits;
91         int outpos;
92         int insize;
93         int bitmask;
94         long free_ent;
95         long maxcode;
96         long maxmaxcode;
97         int n_bits;
98         int rsize = 0;
99         RESERVE_CONFIG_UBUFFER(inbuf, IBUFSIZ + 64);
100         RESERVE_CONFIG_UBUFFER(outbuf, OBUFSIZ + 2048);
101         unsigned char htab[HSIZE];
102         unsigned short codetab[HSIZE];
103
104         /* Hmm, these were statics - why?! */
105         /* user settable max # bits/code */
106         int maxbits; /* = BITS; */
107         /* block compress mode -C compatible with 2.0 */
108         int block_mode; /* = BLOCK_MODE; */
109
110         memset(inbuf, 0, IBUFSIZ + 64);
111         memset(outbuf, 0, OBUFSIZ + 2048);
112
113         insize = 0;
114
115         /* xread isn't good here, we have to return - caller may want
116          * to do some cleanup (e.g. delete incomplete unpacked file etc) */
117         if (full_read(fd_in, inbuf, 1) != 1) {
118                 bb_error_msg("short read");
119                 return -1;
120         }
121
122         maxbits = inbuf[0] & BIT_MASK;
123         block_mode = inbuf[0] & BLOCK_MODE;
124         maxmaxcode = MAXCODE(maxbits);
125
126         if (maxbits > BITS) {
127                 bb_error_msg("compressed with %d bits, can only handle "
128                                 "%d bits", maxbits, BITS);
129                 return -1;
130         }
131
132         n_bits = INIT_BITS;
133         maxcode = MAXCODE(INIT_BITS) - 1;
134         bitmask = (1 << INIT_BITS) - 1;
135         oldcode = -1;
136         finchar = 0;
137         outpos = 0;
138         posbits = 0 << 3;
139
140         free_ent = ((block_mode) ? FIRST : 256);
141
142         /* As above, initialize the first 256 entries in the table. */
143         clear_tab_prefixof();
144
145         for (code = 255; code >= 0; --code) {
146                 tab_suffixof(code) = (unsigned char) code;
147         }
148
149         do {
150  resetbuf:
151                 {
152                         int i;
153                         int e;
154                         int o;
155
156                         o = posbits >> 3;
157                         e = insize - o;
158
159                         for (i = 0; i < e; ++i)
160                                 inbuf[i] = inbuf[i + o];
161
162                         insize = e;
163                         posbits = 0;
164                 }
165
166                 if (insize < (int) (IBUFSIZ + 64) - IBUFSIZ) {
167                         rsize = safe_read(fd_in, inbuf + insize, IBUFSIZ);
168 //error check??
169                         insize += rsize;
170                 }
171
172                 inbits = ((rsize > 0) ? (insize - insize % n_bits) << 3 :
173                                   (insize << 3) - (n_bits - 1));
174
175                 while (inbits > posbits) {
176                         if (free_ent > maxcode) {
177                                 posbits =
178                                         ((posbits - 1) +
179                                          ((n_bits << 3) -
180                                           (posbits - 1 + (n_bits << 3)) % (n_bits << 3)));
181                                 ++n_bits;
182                                 if (n_bits == maxbits) {
183                                         maxcode = maxmaxcode;
184                                 } else {
185                                         maxcode = MAXCODE(n_bits) - 1;
186                                 }
187                                 bitmask = (1 << n_bits) - 1;
188                                 goto resetbuf;
189                         }
190                         {
191                                 unsigned char *p = &inbuf[posbits >> 3];
192
193                                 code = ((((long) (p[0])) | ((long) (p[1]) << 8) |
194                                          ((long) (p[2]) << 16)) >> (posbits & 0x7)) & bitmask;
195                         }
196                         posbits += n_bits;
197
198
199                         if (oldcode == -1) {
200                                 oldcode = code;
201                                 finchar = (int) oldcode;
202                                 outbuf[outpos++] = (unsigned char) finchar;
203                                 continue;
204                         }
205
206                         if (code == CLEAR && block_mode) {
207                                 clear_tab_prefixof();
208                                 free_ent = FIRST - 1;
209                                 posbits =
210                                         ((posbits - 1) +
211                                          ((n_bits << 3) -
212                                           (posbits - 1 + (n_bits << 3)) % (n_bits << 3)));
213                                 n_bits = INIT_BITS;
214                                 maxcode = MAXCODE(INIT_BITS) - 1;
215                                 bitmask = (1 << INIT_BITS) - 1;
216                                 goto resetbuf;
217                         }
218
219                         incode = code;
220                         stackp = de_stack;
221
222                         /* Special case for KwKwK string. */
223                         if (code >= free_ent) {
224                                 if (code > free_ent) {
225                                         unsigned char *p;
226
227                                         posbits -= n_bits;
228                                         p = &inbuf[posbits >> 3];
229
230                                         bb_error_msg
231                                                 ("insize:%d posbits:%d inbuf:%02X %02X %02X %02X %02X (%d)",
232                                                  insize, posbits, p[-1], p[0], p[1], p[2], p[3],
233                                                  (posbits & 07));
234                                         bb_error_msg("uncompress: corrupt input");
235                                         return -1;
236                                 }
237
238                                 *--stackp = (unsigned char) finchar;
239                                 code = oldcode;
240                         }
241
242                         /* Generate output characters in reverse order */
243                         while ((long) code >= (long) 256) {
244                                 *--stackp = tab_suffixof(code);
245                                 code = tab_prefixof(code);
246                         }
247
248                         finchar = tab_suffixof(code);
249                         *--stackp = (unsigned char) finchar;
250
251                         /* And put them out in forward order */
252                         {
253                                 int i;
254
255                                 i = de_stack - stackp;
256                                 if (outpos + i >= OBUFSIZ) {
257                                         do {
258                                                 if (i > OBUFSIZ - outpos) {
259                                                         i = OBUFSIZ - outpos;
260                                                 }
261
262                                                 if (i > 0) {
263                                                         memcpy(outbuf + outpos, stackp, i);
264                                                         outpos += i;
265                                                 }
266
267                                                 if (outpos >= OBUFSIZ) {
268                                                         full_write(fd_out, outbuf, outpos);
269 //error check??
270                                                         USE_DESKTOP(total_written += outpos;)
271                                                         outpos = 0;
272                                                 }
273                                                 stackp += i;
274                                                 i = de_stack - stackp;
275                                         } while (i > 0);
276                                 } else {
277                                         memcpy(outbuf + outpos, stackp, i);
278                                         outpos += i;
279                                 }
280                         }
281
282                         /* Generate the new entry. */
283                         code = free_ent;
284                         if (code < maxmaxcode) {
285                                 tab_prefixof(code) = (unsigned short) oldcode;
286                                 tab_suffixof(code) = (unsigned char) finchar;
287                                 free_ent = code + 1;
288                         }
289
290                         /* Remember previous code.  */
291                         oldcode = incode;
292                 }
293
294         } while (rsize > 0);
295
296         if (outpos > 0) {
297                 full_write(fd_out, outbuf, outpos);
298 //error check??
299                 USE_DESKTOP(total_written += outpos;)
300         }
301
302         RELEASE_CONFIG_BUFFER(inbuf);
303         RELEASE_CONFIG_BUFFER(outbuf);
304         return USE_DESKTOP(total_written) + 0;
305 }