just include fcntl.h not sys/fcntl.h
[oweals/busybox.git] / archival / libunarchive / decompress_unlzma.c
1 /*
2  * Small lzma deflate implementation.
3  * Copyright (C) 2006  Aurelien Jacobs <aurel@gnuage.org>
4  *
5  * Based on LzmaDecode.c from the LZMA SDK 4.22 (http://www.7-zip.org/)
6  * Copyright (C) 1999-2005  Igor Pavlov
7  *
8  * This program is free software; you can redistribute it and/or
9  * modify it under the terms of the GNU Lesser General Public
10  * License as published by the Free Software Foundation; either
11  * version 2.1 of the License, or (at your option) any later version.
12  *
13  * This program is distributed in the hope that it will be useful,
14  * but WITHOUT ANY WARRANTY; without even the implied warranty of
15  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
16  * Lesser General Public License for more details.
17  *
18  * You should have received a copy of the GNU Lesser General Public
19  * License along with this library; if not, write to the Free Software
20  * Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA  02110-1301  USA
21  */
22
23 #include <stdint.h>
24 #include <unistd.h>
25 #include <stdio.h>
26 #include <byteswap.h>
27
28 #include "libbb.h"
29
30 #include "unarchive.h"
31
32 #include "rangecoder.h"
33
34
35 typedef struct {
36         uint8_t pos;
37         uint32_t dict_size;
38         uint64_t dst_size;
39 } __attribute__ ((packed)) lzma_header_t;
40
41
42 #define LZMA_BASE_SIZE 1846
43 #define LZMA_LIT_SIZE 768
44
45 #define LZMA_NUM_POS_BITS_MAX 4
46
47 #define LZMA_LEN_NUM_LOW_BITS 3
48 #define LZMA_LEN_NUM_MID_BITS 3
49 #define LZMA_LEN_NUM_HIGH_BITS 8
50
51 #define LZMA_LEN_CHOICE 0
52 #define LZMA_LEN_CHOICE_2 (LZMA_LEN_CHOICE + 1)
53 #define LZMA_LEN_LOW (LZMA_LEN_CHOICE_2 + 1)
54 #define LZMA_LEN_MID (LZMA_LEN_LOW \
55                       + (1 << (LZMA_NUM_POS_BITS_MAX + LZMA_LEN_NUM_LOW_BITS)))
56 #define LZMA_LEN_HIGH (LZMA_LEN_MID \
57                        +(1 << (LZMA_NUM_POS_BITS_MAX + LZMA_LEN_NUM_MID_BITS)))
58 #define LZMA_NUM_LEN_PROBS (LZMA_LEN_HIGH + (1 << LZMA_LEN_NUM_HIGH_BITS))
59
60 #define LZMA_NUM_STATES 12
61 #define LZMA_NUM_LIT_STATES 7
62
63 #define LZMA_START_POS_MODEL_INDEX 4
64 #define LZMA_END_POS_MODEL_INDEX 14
65 #define LZMA_NUM_FULL_DISTANCES (1 << (LZMA_END_POS_MODEL_INDEX >> 1))
66
67 #define LZMA_NUM_POS_SLOT_BITS 6
68 #define LZMA_NUM_LEN_TO_POS_STATES 4
69
70 #define LZMA_NUM_ALIGN_BITS 4
71
72 #define LZMA_MATCH_MIN_LEN 2
73
74 #define LZMA_IS_MATCH 0
75 #define LZMA_IS_REP (LZMA_IS_MATCH + (LZMA_NUM_STATES <<LZMA_NUM_POS_BITS_MAX))
76 #define LZMA_IS_REP_G0 (LZMA_IS_REP + LZMA_NUM_STATES)
77 #define LZMA_IS_REP_G1 (LZMA_IS_REP_G0 + LZMA_NUM_STATES)
78 #define LZMA_IS_REP_G2 (LZMA_IS_REP_G1 + LZMA_NUM_STATES)
79 #define LZMA_IS_REP_0_LONG (LZMA_IS_REP_G2 + LZMA_NUM_STATES)
80 #define LZMA_POS_SLOT (LZMA_IS_REP_0_LONG \
81                        + (LZMA_NUM_STATES << LZMA_NUM_POS_BITS_MAX))
82 #define LZMA_SPEC_POS (LZMA_POS_SLOT \
83                        +(LZMA_NUM_LEN_TO_POS_STATES << LZMA_NUM_POS_SLOT_BITS))
84 #define LZMA_ALIGN (LZMA_SPEC_POS \
85                     + LZMA_NUM_FULL_DISTANCES - LZMA_END_POS_MODEL_INDEX)
86 #define LZMA_LEN_CODER (LZMA_ALIGN + (1 << LZMA_NUM_ALIGN_BITS))
87 #define LZMA_REP_LEN_CODER (LZMA_LEN_CODER + LZMA_NUM_LEN_PROBS)
88 #define LZMA_LITERAL (LZMA_REP_LEN_CODER + LZMA_NUM_LEN_PROBS)
89
90
91 int unlzma(int src_fd, int dst_fd)
92 {
93         lzma_header_t header;
94         int lc, pb, lp;
95         uint32_t pos_state_mask;
96         uint32_t literal_pos_mask;
97         uint32_t pos;
98         uint16_t *p;
99         uint16_t *prob;
100         uint16_t *prob_lit;
101         int num_bits;
102         int num_probs;
103         rc_t rc;
104         int i, mi;
105         uint8_t *buffer;
106         uint8_t previous_byte = 0;
107         size_t buffer_pos = 0, global_pos = 0;
108         int len = 0;
109         int state = 0;
110         uint32_t rep0 = 1, rep1 = 1, rep2 = 1, rep3 = 1;
111
112         if (read(src_fd, &header, sizeof(header)) != sizeof(header))
113                 bb_error_msg_and_die("can't read header");
114
115         if (header.pos >= (9 * 5 * 5))
116                 bb_error_msg_and_die("bad header");
117         mi = header.pos / 9;
118         lc = header.pos % 9;
119         pb = mi / 5;
120         lp = mi % 5;
121         pos_state_mask = (1 << pb) - 1;
122         literal_pos_mask = (1 << lp) - 1;
123
124 #if BB_BIG_ENDIAN
125         header.dict_size = bswap_32(header.dict_size);
126         header.dst_size = bswap_64(header.dst_size);
127 #endif
128
129         if (header.dict_size == 0)
130                 header.dict_size = 1;
131
132         buffer = xmalloc(MIN(header.dst_size, header.dict_size));
133
134         num_probs = LZMA_BASE_SIZE + (LZMA_LIT_SIZE << (lc + lp));
135         p = xmalloc(num_probs * sizeof(*p));
136         num_probs = LZMA_LITERAL + (LZMA_LIT_SIZE << (lc + lp));
137         for (i = 0; i < num_probs; i++)
138                 p[i] = (1 << RC_MODEL_TOTAL_BITS) >> 1;
139
140         rc_init(&rc, src_fd, 0x10000);
141
142         while (global_pos + buffer_pos < header.dst_size) {
143                 int pos_state = (buffer_pos + global_pos) & pos_state_mask;
144
145                 prob =
146                         p + LZMA_IS_MATCH + (state << LZMA_NUM_POS_BITS_MAX) + pos_state;
147                 if (rc_is_bit_0(&rc, prob)) {
148                         mi = 1;
149                         rc_update_bit_0(&rc, prob);
150                         prob = (p + LZMA_LITERAL + (LZMA_LIT_SIZE
151                                         * ((((buffer_pos + global_pos) & literal_pos_mask) << lc)
152                                         + (previous_byte >> (8 - lc)))));
153
154                         if (state >= LZMA_NUM_LIT_STATES) {
155                                 int match_byte;
156
157                                 pos = buffer_pos - rep0;
158                                 while (pos >= header.dict_size)
159                                         pos += header.dict_size;
160                                 match_byte = buffer[pos];
161                                 do {
162                                         int bit;
163
164                                         match_byte <<= 1;
165                                         bit = match_byte & 0x100;
166                                         prob_lit = prob + 0x100 + bit + mi;
167                                         if (rc_get_bit(&rc, prob_lit, &mi)) {
168                                                 if (!bit)
169                                                         break;
170                                         } else {
171                                                 if (bit)
172                                                         break;
173                                         }
174                                 } while (mi < 0x100);
175                         }
176                         while (mi < 0x100) {
177                                 prob_lit = prob + mi;
178                                 rc_get_bit(&rc, prob_lit, &mi);
179                         }
180                         previous_byte = (uint8_t) mi;
181
182                         buffer[buffer_pos++] = previous_byte;
183                         if (buffer_pos == header.dict_size) {
184                                 buffer_pos = 0;
185                                 global_pos += header.dict_size;
186                                 write(dst_fd, buffer, header.dict_size);
187                         }
188                         if (state < 4)
189                                 state = 0;
190                         else if (state < 10)
191                                 state -= 3;
192                         else
193                                 state -= 6;
194                 } else {
195                         int offset;
196                         uint16_t *prob_len;
197
198                         rc_update_bit_1(&rc, prob);
199                         prob = p + LZMA_IS_REP + state;
200                         if (rc_is_bit_0(&rc, prob)) {
201                                 rc_update_bit_0(&rc, prob);
202                                 rep3 = rep2;
203                                 rep2 = rep1;
204                                 rep1 = rep0;
205                                 state = state < LZMA_NUM_LIT_STATES ? 0 : 3;
206                                 prob = p + LZMA_LEN_CODER;
207                         } else {
208                                 rc_update_bit_1(&rc, prob);
209                                 prob = p + LZMA_IS_REP_G0 + state;
210                                 if (rc_is_bit_0(&rc, prob)) {
211                                         rc_update_bit_0(&rc, prob);
212                                         prob = (p + LZMA_IS_REP_0_LONG
213                                                         + (state << LZMA_NUM_POS_BITS_MAX) + pos_state);
214                                         if (rc_is_bit_0(&rc, prob)) {
215                                                 rc_update_bit_0(&rc, prob);
216
217                                                 state = state < LZMA_NUM_LIT_STATES ? 9 : 11;
218                                                 pos = buffer_pos - rep0;
219                                                 while (pos >= header.dict_size)
220                                                         pos += header.dict_size;
221                                                 previous_byte = buffer[pos];
222                                                 buffer[buffer_pos++] = previous_byte;
223                                                 if (buffer_pos == header.dict_size) {
224                                                         buffer_pos = 0;
225                                                         global_pos += header.dict_size;
226                                                         write(dst_fd, buffer, header.dict_size);
227                                                 }
228                                                 continue;
229                                         } else {
230                                                 rc_update_bit_1(&rc, prob);
231                                         }
232                                 } else {
233                                         uint32_t distance;
234
235                                         rc_update_bit_1(&rc, prob);
236                                         prob = p + LZMA_IS_REP_G1 + state;
237                                         if (rc_is_bit_0(&rc, prob)) {
238                                                 rc_update_bit_0(&rc, prob);
239                                                 distance = rep1;
240                                         } else {
241                                                 rc_update_bit_1(&rc, prob);
242                                                 prob = p + LZMA_IS_REP_G2 + state;
243                                                 if (rc_is_bit_0(&rc, prob)) {
244                                                         rc_update_bit_0(&rc, prob);
245                                                         distance = rep2;
246                                                 } else {
247                                                         rc_update_bit_1(&rc, prob);
248                                                         distance = rep3;
249                                                         rep3 = rep2;
250                                                 }
251                                                 rep2 = rep1;
252                                         }
253                                         rep1 = rep0;
254                                         rep0 = distance;
255                                 }
256                                 state = state < LZMA_NUM_LIT_STATES ? 8 : 11;
257                                 prob = p + LZMA_REP_LEN_CODER;
258                         }
259
260                         prob_len = prob + LZMA_LEN_CHOICE;
261                         if (rc_is_bit_0(&rc, prob_len)) {
262                                 rc_update_bit_0(&rc, prob_len);
263                                 prob_len = (prob + LZMA_LEN_LOW
264                                                         + (pos_state << LZMA_LEN_NUM_LOW_BITS));
265                                 offset = 0;
266                                 num_bits = LZMA_LEN_NUM_LOW_BITS;
267                         } else {
268                                 rc_update_bit_1(&rc, prob_len);
269                                 prob_len = prob + LZMA_LEN_CHOICE_2;
270                                 if (rc_is_bit_0(&rc, prob_len)) {
271                                         rc_update_bit_0(&rc, prob_len);
272                                         prob_len = (prob + LZMA_LEN_MID
273                                                                 + (pos_state << LZMA_LEN_NUM_MID_BITS));
274                                         offset = 1 << LZMA_LEN_NUM_LOW_BITS;
275                                         num_bits = LZMA_LEN_NUM_MID_BITS;
276                                 } else {
277                                         rc_update_bit_1(&rc, prob_len);
278                                         prob_len = prob + LZMA_LEN_HIGH;
279                                         offset = ((1 << LZMA_LEN_NUM_LOW_BITS)
280                                                           + (1 << LZMA_LEN_NUM_MID_BITS));
281                                         num_bits = LZMA_LEN_NUM_HIGH_BITS;
282                                 }
283                         }
284                         rc_bit_tree_decode(&rc, prob_len, num_bits, &len);
285                         len += offset;
286
287                         if (state < 4) {
288                                 int pos_slot;
289
290                                 state += LZMA_NUM_LIT_STATES;
291                                 prob =
292                                         p + LZMA_POS_SLOT +
293                                         ((len <
294                                           LZMA_NUM_LEN_TO_POS_STATES ? len :
295                                           LZMA_NUM_LEN_TO_POS_STATES - 1)
296                                          << LZMA_NUM_POS_SLOT_BITS);
297                                 rc_bit_tree_decode(&rc, prob, LZMA_NUM_POS_SLOT_BITS,
298                                                                    &pos_slot);
299                                 if (pos_slot >= LZMA_START_POS_MODEL_INDEX) {
300                                         num_bits = (pos_slot >> 1) - 1;
301                                         rep0 = 2 | (pos_slot & 1);
302                                         if (pos_slot < LZMA_END_POS_MODEL_INDEX) {
303                                                 rep0 <<= num_bits;
304                                                 prob = p + LZMA_SPEC_POS + rep0 - pos_slot - 1;
305                                         } else {
306                                                 num_bits -= LZMA_NUM_ALIGN_BITS;
307                                                 while (num_bits--)
308                                                         rep0 = (rep0 << 1) | rc_direct_bit(&rc);
309                                                 prob = p + LZMA_ALIGN;
310                                                 rep0 <<= LZMA_NUM_ALIGN_BITS;
311                                                 num_bits = LZMA_NUM_ALIGN_BITS;
312                                         }
313                                         i = 1;
314                                         mi = 1;
315                                         while (num_bits--) {
316                                                 if (rc_get_bit(&rc, prob + mi, &mi))
317                                                         rep0 |= i;
318                                                 i <<= 1;
319                                         }
320                                 } else
321                                         rep0 = pos_slot;
322                                 if (++rep0 == 0)
323                                         break;
324                         }
325
326                         len += LZMA_MATCH_MIN_LEN;
327
328                         do {
329                                 pos = buffer_pos - rep0;
330                                 while (pos >= header.dict_size)
331                                         pos += header.dict_size;
332                                 previous_byte = buffer[pos];
333                                 buffer[buffer_pos++] = previous_byte;
334                                 if (buffer_pos == header.dict_size) {
335                                         buffer_pos = 0;
336                                         global_pos += header.dict_size;
337                                         write(dst_fd, buffer, header.dict_size);
338                                 }
339                                 len--;
340                         } while (len != 0 && buffer_pos < header.dst_size);
341                 }
342         }
343
344         write(dst_fd, buffer, buffer_pos);
345         rc_free(&rc);
346         return 0;
347 }
348
349 /* vi:set ts=4: */