Patch from Robert P. Day, moving byte order checks to use platform.h macros.
[oweals/busybox.git] / archival / libunarchive / decompress_unlzma.c
1 /*
2  * Small lzma deflate implementation.
3  * Copyright (C) 2006  Aurelien Jacobs <aurel@gnuage.org>
4  *
5  * Based on LzmaDecode.c from the LZMA SDK 4.22 (http://www.7-zip.org/)
6  * Copyright (C) 1999-2005  Igor Pavlov
7  *
8  * This program is free software; you can redistribute it and/or
9  * modify it under the terms of the GNU Lesser General Public
10  * License as published by the Free Software Foundation; either
11  * version 2.1 of the License, or (at your option) any later version.
12  *
13  * This program is distributed in the hope that it will be useful,
14  * but WITHOUT ANY WARRANTY; without even the implied warranty of
15  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
16  * Lesser General Public License for more details.
17  *
18  * You should have received a copy of the GNU Lesser General Public
19  * License along with this library; if not, write to the Free Software
20  * Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA  02110-1301  USA
21  */
22
23 #include <stdint.h>
24 #include <unistd.h>
25 #include <stdio.h>
26 #include <byteswap.h>
27
28 #include "libbb.h"
29
30 #include "rangecoder.h"
31
32
33 typedef struct {
34         uint8_t pos;
35         uint32_t dict_size;
36         uint64_t dst_size;
37 } __attribute__ ((packed)) lzma_header_t;
38
39
40 #define LZMA_BASE_SIZE 1846
41 #define LZMA_LIT_SIZE 768
42
43 #define LZMA_NUM_POS_BITS_MAX 4
44
45 #define LZMA_LEN_NUM_LOW_BITS 3
46 #define LZMA_LEN_NUM_MID_BITS 3
47 #define LZMA_LEN_NUM_HIGH_BITS 8
48
49 #define LZMA_LEN_CHOICE 0
50 #define LZMA_LEN_CHOICE_2 (LZMA_LEN_CHOICE + 1)
51 #define LZMA_LEN_LOW (LZMA_LEN_CHOICE_2 + 1)
52 #define LZMA_LEN_MID (LZMA_LEN_LOW \
53                       + (1 << (LZMA_NUM_POS_BITS_MAX + LZMA_LEN_NUM_LOW_BITS)))
54 #define LZMA_LEN_HIGH (LZMA_LEN_MID \
55                        +(1 << (LZMA_NUM_POS_BITS_MAX + LZMA_LEN_NUM_MID_BITS)))
56 #define LZMA_NUM_LEN_PROBS (LZMA_LEN_HIGH + (1 << LZMA_LEN_NUM_HIGH_BITS))
57
58 #define LZMA_NUM_STATES 12
59 #define LZMA_NUM_LIT_STATES 7
60
61 #define LZMA_START_POS_MODEL_INDEX 4
62 #define LZMA_END_POS_MODEL_INDEX 14
63 #define LZMA_NUM_FULL_DISTANCES (1 << (LZMA_END_POS_MODEL_INDEX >> 1))
64
65 #define LZMA_NUM_POS_SLOT_BITS 6
66 #define LZMA_NUM_LEN_TO_POS_STATES 4
67
68 #define LZMA_NUM_ALIGN_BITS 4
69
70 #define LZMA_MATCH_MIN_LEN 2
71
72 #define LZMA_IS_MATCH 0
73 #define LZMA_IS_REP (LZMA_IS_MATCH + (LZMA_NUM_STATES <<LZMA_NUM_POS_BITS_MAX))
74 #define LZMA_IS_REP_G0 (LZMA_IS_REP + LZMA_NUM_STATES)
75 #define LZMA_IS_REP_G1 (LZMA_IS_REP_G0 + LZMA_NUM_STATES)
76 #define LZMA_IS_REP_G2 (LZMA_IS_REP_G1 + LZMA_NUM_STATES)
77 #define LZMA_IS_REP_0_LONG (LZMA_IS_REP_G2 + LZMA_NUM_STATES)
78 #define LZMA_POS_SLOT (LZMA_IS_REP_0_LONG \
79                        + (LZMA_NUM_STATES << LZMA_NUM_POS_BITS_MAX))
80 #define LZMA_SPEC_POS (LZMA_POS_SLOT \
81                        +(LZMA_NUM_LEN_TO_POS_STATES << LZMA_NUM_POS_SLOT_BITS))
82 #define LZMA_ALIGN (LZMA_SPEC_POS \
83                     + LZMA_NUM_FULL_DISTANCES - LZMA_END_POS_MODEL_INDEX)
84 #define LZMA_LEN_CODER (LZMA_ALIGN + (1 << LZMA_NUM_ALIGN_BITS))
85 #define LZMA_REP_LEN_CODER (LZMA_LEN_CODER + LZMA_NUM_LEN_PROBS)
86 #define LZMA_LITERAL (LZMA_REP_LEN_CODER + LZMA_NUM_LEN_PROBS)
87
88
89 int unlzma(int src_fd, int dst_fd)
90 {
91         lzma_header_t header;
92         int lc, pb, lp;
93         uint32_t pos_state_mask;
94         uint32_t literal_pos_mask;
95         uint32_t pos;
96         uint16_t *p;
97         uint16_t *prob;
98         uint16_t *prob_lit;
99         int num_bits;
100         int num_probs;
101         rc_t rc;
102         int i, mi;
103         uint8_t *buffer;
104         uint8_t previous_byte = 0;
105         size_t buffer_pos = 0, global_pos = 0;
106         int len = 0;
107         int state = 0;
108         uint32_t rep0 = 1, rep1 = 1, rep2 = 1, rep3 = 1;
109
110         if (read(src_fd, &header, sizeof(header)) != sizeof(header))
111                 bb_error_msg_and_die("can't read header");
112
113         if (header.pos >= (9 * 5 * 5))
114                 bb_error_msg_and_die("bad header");
115         mi = header.pos / 9;
116         lc = header.pos % 9;
117         pb = mi / 5;
118         lp = mi % 5;
119         pos_state_mask = (1 << pb) - 1;
120         literal_pos_mask = (1 << lp) - 1;
121
122 #if BB_BIG_ENDIAN
123         header.dict_size = bswap_32(header.dict_size);
124         header.dst_size = bswap_64(header.dst_size);
125 #endif
126
127         if (header.dict_size == 0)
128                 header.dict_size = 1;
129
130         buffer = xmalloc(MIN(header.dst_size, header.dict_size));
131
132         num_probs = LZMA_BASE_SIZE + (LZMA_LIT_SIZE << (lc + lp));
133         p = xmalloc(num_probs * sizeof(*p));
134         num_probs = LZMA_LITERAL + (LZMA_LIT_SIZE << (lc + lp));
135         for (i = 0; i < num_probs; i++)
136                 p[i] = (1 << RC_MODEL_TOTAL_BITS) >> 1;
137
138         rc_init(&rc, src_fd, 0x10000);
139
140         while (global_pos + buffer_pos < header.dst_size) {
141                 int pos_state = (buffer_pos + global_pos) & pos_state_mask;
142
143                 prob =
144                         p + LZMA_IS_MATCH + (state << LZMA_NUM_POS_BITS_MAX) + pos_state;
145                 if (rc_is_bit_0(&rc, prob)) {
146                         mi = 1;
147                         rc_update_bit_0(&rc, prob);
148                         prob = (p + LZMA_LITERAL + (LZMA_LIT_SIZE
149                                         * ((((buffer_pos + global_pos) & literal_pos_mask) << lc)
150                                         + (previous_byte >> (8 - lc)))));
151
152                         if (state >= LZMA_NUM_LIT_STATES) {
153                                 int match_byte;
154
155                                 pos = buffer_pos - rep0;
156                                 while (pos >= header.dict_size)
157                                         pos += header.dict_size;
158                                 match_byte = buffer[pos];
159                                 do {
160                                         int bit;
161
162                                         match_byte <<= 1;
163                                         bit = match_byte & 0x100;
164                                         prob_lit = prob + 0x100 + bit + mi;
165                                         if (rc_get_bit(&rc, prob_lit, &mi)) {
166                                                 if (!bit)
167                                                         break;
168                                         } else {
169                                                 if (bit)
170                                                         break;
171                                         }
172                                 } while (mi < 0x100);
173                         }
174                         while (mi < 0x100) {
175                                 prob_lit = prob + mi;
176                                 rc_get_bit(&rc, prob_lit, &mi);
177                         }
178                         previous_byte = (uint8_t) mi;
179
180                         buffer[buffer_pos++] = previous_byte;
181                         if (buffer_pos == header.dict_size) {
182                                 buffer_pos = 0;
183                                 global_pos += header.dict_size;
184                                 write(dst_fd, buffer, header.dict_size);
185                         }
186                         if (state < 4)
187                                 state = 0;
188                         else if (state < 10)
189                                 state -= 3;
190                         else
191                                 state -= 6;
192                 } else {
193                         int offset;
194                         uint16_t *prob_len;
195
196                         rc_update_bit_1(&rc, prob);
197                         prob = p + LZMA_IS_REP + state;
198                         if (rc_is_bit_0(&rc, prob)) {
199                                 rc_update_bit_0(&rc, prob);
200                                 rep3 = rep2;
201                                 rep2 = rep1;
202                                 rep1 = rep0;
203                                 state = state < LZMA_NUM_LIT_STATES ? 0 : 3;
204                                 prob = p + LZMA_LEN_CODER;
205                         } else {
206                                 rc_update_bit_1(&rc, prob);
207                                 prob = p + LZMA_IS_REP_G0 + state;
208                                 if (rc_is_bit_0(&rc, prob)) {
209                                         rc_update_bit_0(&rc, prob);
210                                         prob = (p + LZMA_IS_REP_0_LONG
211                                                         + (state << LZMA_NUM_POS_BITS_MAX) + pos_state);
212                                         if (rc_is_bit_0(&rc, prob)) {
213                                                 rc_update_bit_0(&rc, prob);
214
215                                                 state = state < LZMA_NUM_LIT_STATES ? 9 : 11;
216                                                 pos = buffer_pos - rep0;
217                                                 while (pos >= header.dict_size)
218                                                         pos += header.dict_size;
219                                                 previous_byte = buffer[pos];
220                                                 buffer[buffer_pos++] = previous_byte;
221                                                 if (buffer_pos == header.dict_size) {
222                                                         buffer_pos = 0;
223                                                         global_pos += header.dict_size;
224                                                         write(dst_fd, buffer, header.dict_size);
225                                                 }
226                                                 continue;
227                                         } else {
228                                                 rc_update_bit_1(&rc, prob);
229                                         }
230                                 } else {
231                                         uint32_t distance;
232
233                                         rc_update_bit_1(&rc, prob);
234                                         prob = p + LZMA_IS_REP_G1 + state;
235                                         if (rc_is_bit_0(&rc, prob)) {
236                                                 rc_update_bit_0(&rc, prob);
237                                                 distance = rep1;
238                                         } else {
239                                                 rc_update_bit_1(&rc, prob);
240                                                 prob = p + LZMA_IS_REP_G2 + state;
241                                                 if (rc_is_bit_0(&rc, prob)) {
242                                                         rc_update_bit_0(&rc, prob);
243                                                         distance = rep2;
244                                                 } else {
245                                                         rc_update_bit_1(&rc, prob);
246                                                         distance = rep3;
247                                                         rep3 = rep2;
248                                                 }
249                                                 rep2 = rep1;
250                                         }
251                                         rep1 = rep0;
252                                         rep0 = distance;
253                                 }
254                                 state = state < LZMA_NUM_LIT_STATES ? 8 : 11;
255                                 prob = p + LZMA_REP_LEN_CODER;
256                         }
257
258                         prob_len = prob + LZMA_LEN_CHOICE;
259                         if (rc_is_bit_0(&rc, prob_len)) {
260                                 rc_update_bit_0(&rc, prob_len);
261                                 prob_len = (prob + LZMA_LEN_LOW
262                                                         + (pos_state << LZMA_LEN_NUM_LOW_BITS));
263                                 offset = 0;
264                                 num_bits = LZMA_LEN_NUM_LOW_BITS;
265                         } else {
266                                 rc_update_bit_1(&rc, prob_len);
267                                 prob_len = prob + LZMA_LEN_CHOICE_2;
268                                 if (rc_is_bit_0(&rc, prob_len)) {
269                                         rc_update_bit_0(&rc, prob_len);
270                                         prob_len = (prob + LZMA_LEN_MID
271                                                                 + (pos_state << LZMA_LEN_NUM_MID_BITS));
272                                         offset = 1 << LZMA_LEN_NUM_LOW_BITS;
273                                         num_bits = LZMA_LEN_NUM_MID_BITS;
274                                 } else {
275                                         rc_update_bit_1(&rc, prob_len);
276                                         prob_len = prob + LZMA_LEN_HIGH;
277                                         offset = ((1 << LZMA_LEN_NUM_LOW_BITS)
278                                                           + (1 << LZMA_LEN_NUM_MID_BITS));
279                                         num_bits = LZMA_LEN_NUM_HIGH_BITS;
280                                 }
281                         }
282                         rc_bit_tree_decode(&rc, prob_len, num_bits, &len);
283                         len += offset;
284
285                         if (state < 4) {
286                                 int pos_slot;
287
288                                 state += LZMA_NUM_LIT_STATES;
289                                 prob =
290                                         p + LZMA_POS_SLOT +
291                                         ((len <
292                                           LZMA_NUM_LEN_TO_POS_STATES ? len :
293                                           LZMA_NUM_LEN_TO_POS_STATES - 1)
294                                          << LZMA_NUM_POS_SLOT_BITS);
295                                 rc_bit_tree_decode(&rc, prob, LZMA_NUM_POS_SLOT_BITS,
296                                                                    &pos_slot);
297                                 if (pos_slot >= LZMA_START_POS_MODEL_INDEX) {
298                                         num_bits = (pos_slot >> 1) - 1;
299                                         rep0 = 2 | (pos_slot & 1);
300                                         if (pos_slot < LZMA_END_POS_MODEL_INDEX) {
301                                                 rep0 <<= num_bits;
302                                                 prob = p + LZMA_SPEC_POS + rep0 - pos_slot - 1;
303                                         } else {
304                                                 num_bits -= LZMA_NUM_ALIGN_BITS;
305                                                 while (num_bits--)
306                                                         rep0 = (rep0 << 1) | rc_direct_bit(&rc);
307                                                 prob = p + LZMA_ALIGN;
308                                                 rep0 <<= LZMA_NUM_ALIGN_BITS;
309                                                 num_bits = LZMA_NUM_ALIGN_BITS;
310                                         }
311                                         i = 1;
312                                         mi = 1;
313                                         while (num_bits--) {
314                                                 if (rc_get_bit(&rc, prob + mi, &mi))
315                                                         rep0 |= i;
316                                                 i <<= 1;
317                                         }
318                                 } else
319                                         rep0 = pos_slot;
320                                 if (++rep0 == 0)
321                                         break;
322                         }
323
324                         len += LZMA_MATCH_MIN_LEN;
325
326                         do {
327                                 pos = buffer_pos - rep0;
328                                 while (pos >= header.dict_size)
329                                         pos += header.dict_size;
330                                 previous_byte = buffer[pos];
331                                 buffer[buffer_pos++] = previous_byte;
332                                 if (buffer_pos == header.dict_size) {
333                                         buffer_pos = 0;
334                                         global_pos += header.dict_size;
335                                         write(dst_fd, buffer, header.dict_size);
336                                 }
337                                 len--;
338                         } while (len != 0 && buffer_pos < header.dst_size);
339                 }
340         }
341
342         write(dst_fd, buffer, buffer_pos);
343         rc_free(&rc);
344         return 0;
345 }
346
347 /* vi:set ts=4: */