Start 1.33.0 development cycle
[oweals/busybox.git] / archival / libarchive / bz / compress.c
1 /*
2  * bzip2 is written by Julian Seward <jseward@bzip.org>.
3  * Adapted for busybox by Denys Vlasenko <vda.linux@googlemail.com>.
4  * See README and LICENSE files in this directory for more information.
5  */
6
7 /*-------------------------------------------------------------*/
8 /*--- Compression machinery (not incl block sorting)        ---*/
9 /*---                                            compress.c ---*/
10 /*-------------------------------------------------------------*/
11
12 /* ------------------------------------------------------------------
13 This file is part of bzip2/libbzip2, a program and library for
14 lossless, block-sorting data compression.
15
16 bzip2/libbzip2 version 1.0.4 of 20 December 2006
17 Copyright (C) 1996-2006 Julian Seward <jseward@bzip.org>
18
19 Please read the WARNING, DISCLAIMER and PATENTS sections in the
20 README file.
21
22 This program is released under the terms of the license contained
23 in the file LICENSE.
24 ------------------------------------------------------------------ */
25
26 /* CHANGES
27  * 0.9.0    -- original version.
28  * 0.9.0a/b -- no changes in this file.
29  * 0.9.0c   -- changed setting of nGroups in sendMTFValues()
30  *             so as to do a bit better on small files
31 */
32
33 /* #include "bzlib_private.h" */
34
35 #if BZIP2_SPEED >= 5
36 # define ALWAYS_INLINE_5 ALWAYS_INLINE
37 #else
38 # define ALWAYS_INLINE_5 /*nothing*/
39 #endif
40
41 /*---------------------------------------------------*/
42 /*--- Bit stream I/O                              ---*/
43 /*---------------------------------------------------*/
44
45 /*---------------------------------------------------*/
46 static
47 void BZ2_bsInitWrite(EState* s)
48 {
49         s->bsLive = 0;
50         s->bsBuff = 0;
51 }
52
53
54 /*---------------------------------------------------*/
55 static NOINLINE
56 void bsFinishWrite(EState* s)
57 {
58         while (s->bsLive > 0) {
59                 *s->posZ++ = (uint8_t)(s->bsBuff >> 24);
60                 s->bsBuff <<= 8;
61                 s->bsLive -= 8;
62         }
63 }
64
65
66 /*---------------------------------------------------*/
67 static
68 /* Helps only on level 5, on other levels hurts. ? */
69 ALWAYS_INLINE_5
70 void bsW(EState* s, int32_t n, uint32_t v)
71 {
72         while (s->bsLive >= 8) {
73                 *s->posZ++ = (uint8_t)(s->bsBuff >> 24);
74                 s->bsBuff <<= 8;
75                 s->bsLive -= 8;
76         }
77         s->bsBuff |= (v << (32 - s->bsLive - n));
78         s->bsLive += n;
79 }
80 /* Same with n == 16: */
81 static
82 ALWAYS_INLINE_5
83 void bsW16(EState* s, uint32_t v)
84 {
85         while (s->bsLive >= 8) {
86                 *s->posZ++ = (uint8_t)(s->bsBuff >> 24);
87                 s->bsBuff <<= 8;
88                 s->bsLive -= 8;
89         }
90         s->bsBuff |= (v << (16 - s->bsLive));
91         s->bsLive += 16;
92 }
93 /* Same with n == 1: */
94 static
95 ALWAYS_INLINE /* one callsite */
96 void bsW1_1(EState* s)
97 {
98         /* need space for only 1 bit, no need for loop freeing > 8 bits */
99         if (s->bsLive >= 8) {
100                 *s->posZ++ = (uint8_t)(s->bsBuff >> 24);
101                 s->bsBuff <<= 8;
102                 s->bsLive -= 8;
103         }
104         s->bsBuff |= (1 << (31 - s->bsLive));
105         s->bsLive += 1;
106 }
107 static
108 ALWAYS_INLINE_5
109 void bsW1_0(EState* s)
110 {
111         /* need space for only 1 bit, no need for loop freeing > 8 bits */
112         if (s->bsLive >= 8) {
113                 *s->posZ++ = (uint8_t)(s->bsBuff >> 24);
114                 s->bsBuff <<= 8;
115                 s->bsLive -= 8;
116         }
117         //s->bsBuff |= (0 << (31 - s->bsLive));
118         s->bsLive += 1;
119 }
120
121
122 /*---------------------------------------------------*/
123 static ALWAYS_INLINE
124 void bsPutU16(EState* s, unsigned u)
125 {
126         bsW16(s, u);
127 }
128
129
130 /*---------------------------------------------------*/
131 static
132 void bsPutU32(EState* s, unsigned u)
133 {
134         //bsW(s, 32, u); // can't use: may try "uint32 << -n"
135         bsW16(s, (u >> 16) & 0xffff);
136         bsW16(s, u         & 0xffff);
137 }
138
139
140 /*---------------------------------------------------*/
141 /*--- The back end proper                         ---*/
142 /*---------------------------------------------------*/
143
144 /*---------------------------------------------------*/
145 static
146 void makeMaps_e(EState* s)
147 {
148         int i;
149         unsigned cnt = 0;
150         for (i = 0; i < 256; i++) {
151                 if (s->inUse[i]) {
152                         s->unseqToSeq[i] = cnt;
153                         cnt++;
154                 }
155         }
156         s->nInUse = cnt;
157 }
158
159
160 /*---------------------------------------------------*/
161 /*
162  * This bit of code is performance-critical.
163  * On 32bit x86, gcc-6.3.0 was observed to spill ryy_j to stack,
164  * resulting in abysmal performance (x3 slowdown).
165  * Forcing it into a separate function alleviates register pressure,
166  * and spillage no longer happens.
167  * Other versions of gcc do not exhibit this problem, but out-of-line code
168  * seems to be helping them too (code is both smaller and faster).
169  * Therefore NOINLINE is enabled for the entire 32bit x86 arch for now,
170  * without a check for gcc version.
171  */
172 static
173 #if defined __i386__
174 NOINLINE
175 #endif
176 int inner_loop(uint8_t *yy, uint8_t ll_i)
177 {
178         register uint8_t  rtmp;
179         register uint8_t* ryy_j;
180         rtmp  = yy[1];
181         yy[1] = yy[0];
182         ryy_j = &(yy[1]);
183         while (ll_i != rtmp) {
184                 register uint8_t rtmp2;
185                 ryy_j++;
186                 rtmp2  = rtmp;
187                 rtmp   = *ryy_j;
188                 *ryy_j = rtmp2;
189         }
190         yy[0] = rtmp;
191         return ryy_j - &(yy[0]);
192 }
193 static NOINLINE
194 void generateMTFValues(EState* s)
195 {
196         uint8_t yy[256];
197         int i;
198         int zPend;
199         int32_t wr;
200
201         /*
202          * After sorting (eg, here),
203          * s->arr1[0 .. s->nblock-1] holds sorted order,
204          * and
205          * ((uint8_t*)s->arr2)[0 .. s->nblock-1]
206          * holds the original block data.
207          *
208          * The first thing to do is generate the MTF values,
209          * and put them in ((uint16_t*)s->arr1)[0 .. s->nblock-1].
210          *
211          * Because there are strictly fewer or equal MTF values
212          * than block values, ptr values in this area are overwritten
213          * with MTF values only when they are no longer needed.
214          *
215          * The final compressed bitstream is generated into the
216          * area starting at &((uint8_t*)s->arr2)[s->nblock]
217          *
218          * These storage aliases are set up in bzCompressInit(),
219          * except for the last one, which is arranged in
220          * compressBlock().
221          */
222         uint32_t* ptr   = s->ptr;
223
224         makeMaps_e(s);
225
226         wr = 0;
227         zPend = 0;
228         for (i = 0; i <= s->nInUse+1; i++)
229                 s->mtfFreq[i] = 0;
230
231         for (i = 0; i < s->nInUse; i++)
232                 yy[i] = (uint8_t) i;
233
234         for (i = 0; i < s->nblock; i++) {
235                 uint8_t ll_i = ll_i; /* gcc 4.3.1 thinks it may be used w/o init */
236                 int32_t j;
237
238                 AssertD(wr <= i, "generateMTFValues(1)");
239                 j = ptr[i] - 1;
240                 if (j < 0)
241                         j += s->nblock;
242                 ll_i = s->unseqToSeq[s->block[j]];
243                 AssertD(ll_i < s->nInUse, "generateMTFValues(2a)");
244
245                 if (yy[0] == ll_i) {
246                         zPend++;
247                         continue;
248                 }
249
250                 if (zPend > 0) {
251  process_zPend:
252                         zPend--;
253                         while (1) {
254 #if 0
255                                 if (zPend & 1) {
256                                         s->mtfv[wr] = BZ_RUNB; wr++;
257                                         s->mtfFreq[BZ_RUNB]++;
258                                 } else {
259                                         s->mtfv[wr] = BZ_RUNA; wr++;
260                                         s->mtfFreq[BZ_RUNA]++;
261                                 }
262 #else /* same as above, since BZ_RUNA is 0 and BZ_RUNB is 1 */
263                                 unsigned run = zPend & 1;
264                                 s->mtfv[wr] = run;
265                                 wr++;
266                                 s->mtfFreq[run]++;
267 #endif
268                                 zPend -= 2;
269                                 if (zPend < 0)
270                                         break;
271                                 zPend = (unsigned)zPend / 2;
272                                 /* bbox: unsigned div is easier */
273                         }
274                         if (i < 0) /* came via "goto process_zPend"? exit */
275                                 goto end;
276                         zPend = 0;
277                 }
278                 j = inner_loop(yy, ll_i);
279                 s->mtfv[wr] = j+1;
280                 wr++;
281                 s->mtfFreq[j+1]++;
282         }
283
284         i = -1;
285         if (zPend > 0)
286                 goto process_zPend; /* "process it and come back here" */
287  end:
288         s->mtfv[wr] = s->nInUse+1;
289         wr++;
290         s->mtfFreq[s->nInUse+1]++;
291
292         s->nMTF = wr;
293 }
294
295
296 /*---------------------------------------------------*/
297 #define BZ_LESSER_ICOST  0
298 #define BZ_GREATER_ICOST 15
299
300 static NOINLINE
301 void sendMTFValues(EState* s)
302 {
303         int32_t t, i;
304         unsigned iter;
305         unsigned gs;
306         int32_t alphaSize;
307         unsigned nSelectors, selCtr;
308         int32_t nGroups;
309
310         /*
311          * uint8_t len[BZ_N_GROUPS][BZ_MAX_ALPHA_SIZE];
312          * is a global since the decoder also needs it.
313          *
314          * int32_t  code[BZ_N_GROUPS][BZ_MAX_ALPHA_SIZE];
315          * int32_t  rfreq[BZ_N_GROUPS][BZ_MAX_ALPHA_SIZE];
316          * are also globals only used in this proc.
317          * Made global to keep stack frame size small.
318          */
319 #define code     sendMTFValues__code
320 #define rfreq    sendMTFValues__rfreq
321 #define len_pack sendMTFValues__len_pack
322
323         unsigned /*uint16_t*/ cost[BZ_N_GROUPS];
324
325         uint16_t* mtfv = s->mtfv;
326
327         alphaSize = s->nInUse + 2;
328         for (t = 0; t < BZ_N_GROUPS; t++) {
329                 unsigned v;
330                 for (v = 0; v < alphaSize; v++)
331                         s->len[t][v] = BZ_GREATER_ICOST;
332         }
333
334         /*--- Decide how many coding tables to use ---*/
335         AssertH(s->nMTF > 0, 3001);
336         // 1..199 = 2
337         // 200..599 = 3
338         // 600..1199 = 4
339         // 1200..2399 = 5
340         // 2400..99999 = 6
341         nGroups = 2;
342         nGroups += (s->nMTF >= 200);
343         nGroups += (s->nMTF >= 600);
344         nGroups += (s->nMTF >= 1200);
345         nGroups += (s->nMTF >= 2400);
346
347         /*--- Generate an initial set of coding tables ---*/
348         {
349                 unsigned nPart, remF;
350
351                 nPart = nGroups;
352                 remF  = s->nMTF;
353                 gs = 0;
354                 while (nPart > 0) {
355                         unsigned v;
356                         unsigned ge;
357                         unsigned tFreq, aFreq;
358
359                         tFreq = remF / nPart;
360                         ge = gs;
361                         aFreq = 0;
362                         while (aFreq < tFreq && ge < alphaSize) {
363                                 aFreq += s->mtfFreq[ge++];
364                         }
365                         ge--;
366
367                         if (ge > gs
368                          && nPart != nGroups && nPart != 1
369                          && ((nGroups - nPart) % 2 == 1) /* bbox: can this be replaced by x & 1? */
370                         ) {
371                                 aFreq -= s->mtfFreq[ge];
372                                 ge--;
373                         }
374
375                         for (v = 0; v < alphaSize; v++)
376                                 if (v >= gs && v <= ge)
377                                         s->len[nPart-1][v] = BZ_LESSER_ICOST;
378                                 else
379                                         s->len[nPart-1][v] = BZ_GREATER_ICOST;
380
381                         nPart--;
382                         gs = ge + 1;
383                         remF -= aFreq;
384                 }
385         }
386
387         /*
388          * Iterate up to BZ_N_ITERS times to improve the tables.
389          */
390         for (iter = 0; iter < BZ_N_ITERS; iter++) {
391                 for (t = 0; t < nGroups; t++) {
392                         unsigned v;
393                         for (v = 0; v < alphaSize; v++)
394                                 s->rfreq[t][v] = 0;
395                 }
396
397 #if BZIP2_SPEED >= 5
398                 /*
399                  * Set up an auxiliary length table which is used to fast-track
400                  * the common case (nGroups == 6).
401                  */
402                 if (nGroups == 6) {
403                         unsigned v;
404                         for (v = 0; v < alphaSize; v++) {
405                                 s->len_pack[v][0] = (s->len[1][v] << 16) | s->len[0][v];
406                                 s->len_pack[v][1] = (s->len[3][v] << 16) | s->len[2][v];
407                                 s->len_pack[v][2] = (s->len[5][v] << 16) | s->len[4][v];
408                         }
409                 }
410 #endif
411                 nSelectors = 0;
412                 gs = 0;
413                 while (1) {
414                         unsigned ge;
415                         unsigned bt, bc;
416
417                         /*--- Set group start & end marks. --*/
418                         if (gs >= s->nMTF)
419                                 break;
420                         ge = gs + BZ_G_SIZE - 1;
421                         if (ge >= s->nMTF)
422                                 ge = s->nMTF-1;
423
424                         /*
425                          * Calculate the cost of this group as coded
426                          * by each of the coding tables.
427                          */
428                         for (t = 0; t < nGroups; t++)
429                                 cost[t] = 0;
430 #if BZIP2_SPEED >= 5
431                         if (nGroups == 6 && 50 == ge-gs+1) {
432                                 /*--- fast track the common case ---*/
433                                 register uint32_t cost01, cost23, cost45;
434                                 register uint16_t icv;
435                                 cost01 = cost23 = cost45 = 0;
436 #define BZ_ITER(nn) \
437         icv = mtfv[gs+(nn)]; \
438         cost01 += s->len_pack[icv][0]; \
439         cost23 += s->len_pack[icv][1]; \
440         cost45 += s->len_pack[icv][2];
441                                 BZ_ITER(0);  BZ_ITER(1);  BZ_ITER(2);  BZ_ITER(3);  BZ_ITER(4);
442                                 BZ_ITER(5);  BZ_ITER(6);  BZ_ITER(7);  BZ_ITER(8);  BZ_ITER(9);
443                                 BZ_ITER(10); BZ_ITER(11); BZ_ITER(12); BZ_ITER(13); BZ_ITER(14);
444                                 BZ_ITER(15); BZ_ITER(16); BZ_ITER(17); BZ_ITER(18); BZ_ITER(19);
445                                 BZ_ITER(20); BZ_ITER(21); BZ_ITER(22); BZ_ITER(23); BZ_ITER(24);
446                                 BZ_ITER(25); BZ_ITER(26); BZ_ITER(27); BZ_ITER(28); BZ_ITER(29);
447                                 BZ_ITER(30); BZ_ITER(31); BZ_ITER(32); BZ_ITER(33); BZ_ITER(34);
448                                 BZ_ITER(35); BZ_ITER(36); BZ_ITER(37); BZ_ITER(38); BZ_ITER(39);
449                                 BZ_ITER(40); BZ_ITER(41); BZ_ITER(42); BZ_ITER(43); BZ_ITER(44);
450                                 BZ_ITER(45); BZ_ITER(46); BZ_ITER(47); BZ_ITER(48); BZ_ITER(49);
451 #undef BZ_ITER
452                                 cost[0] = cost01 & 0xffff; cost[1] = cost01 >> 16;
453                                 cost[2] = cost23 & 0xffff; cost[3] = cost23 >> 16;
454                                 cost[4] = cost45 & 0xffff; cost[5] = cost45 >> 16;
455                         } else
456 #endif
457                         {
458                                 /*--- slow version which correctly handles all situations ---*/
459                                 for (i = gs; i <= ge; i++) {
460                                         unsigned /*uint16_t*/ icv = mtfv[i];
461                                         for (t = 0; t < nGroups; t++)
462                                                 cost[t] += s->len[t][icv];
463                                 }
464                         }
465                         /*
466                          * Find the coding table which is best for this group,
467                          * and record its identity in the selector table.
468                          */
469                         /*bc = 999999999;*/
470                         /*bt = -1;*/
471                         bc = cost[0];
472                         bt = 0;
473                         for (t = 1 /*0*/; t < nGroups; t++) {
474                                 if (cost[t] < bc) {
475                                         bc = cost[t];
476                                         bt = t;
477                                 }
478                         }
479                         s->selector[nSelectors] = bt;
480                         nSelectors++;
481
482                         /*
483                          * Increment the symbol frequencies for the selected table.
484                          */
485 /* 1% faster compress. +800 bytes */
486 #if BZIP2_SPEED >= 4
487                         if (nGroups == 6 && 50 == ge-gs+1) {
488                                 /*--- fast track the common case ---*/
489 #define BZ_ITUR(nn) s->rfreq[bt][mtfv[gs + (nn)]]++
490                                 BZ_ITUR(0);  BZ_ITUR(1);  BZ_ITUR(2);  BZ_ITUR(3);  BZ_ITUR(4);
491                                 BZ_ITUR(5);  BZ_ITUR(6);  BZ_ITUR(7);  BZ_ITUR(8);  BZ_ITUR(9);
492                                 BZ_ITUR(10); BZ_ITUR(11); BZ_ITUR(12); BZ_ITUR(13); BZ_ITUR(14);
493                                 BZ_ITUR(15); BZ_ITUR(16); BZ_ITUR(17); BZ_ITUR(18); BZ_ITUR(19);
494                                 BZ_ITUR(20); BZ_ITUR(21); BZ_ITUR(22); BZ_ITUR(23); BZ_ITUR(24);
495                                 BZ_ITUR(25); BZ_ITUR(26); BZ_ITUR(27); BZ_ITUR(28); BZ_ITUR(29);
496                                 BZ_ITUR(30); BZ_ITUR(31); BZ_ITUR(32); BZ_ITUR(33); BZ_ITUR(34);
497                                 BZ_ITUR(35); BZ_ITUR(36); BZ_ITUR(37); BZ_ITUR(38); BZ_ITUR(39);
498                                 BZ_ITUR(40); BZ_ITUR(41); BZ_ITUR(42); BZ_ITUR(43); BZ_ITUR(44);
499                                 BZ_ITUR(45); BZ_ITUR(46); BZ_ITUR(47); BZ_ITUR(48); BZ_ITUR(49);
500 #undef BZ_ITUR
501                                 gs = ge + 1;
502                         } else
503 #endif
504                         {
505                                 /*--- slow version which correctly handles all situations ---*/
506                                 while (gs <= ge) {
507                                         s->rfreq[bt][mtfv[gs]]++;
508                                         gs++;
509                                 }
510                                 /* already is: gs = ge + 1; */
511                         }
512                 }
513
514                 /*
515                  * Recompute the tables based on the accumulated frequencies.
516                  */
517                 /* maxLen was changed from 20 to 17 in bzip2-1.0.3.  See
518                  * comment in huffman.c for details. */
519                 for (t = 0; t < nGroups; t++)
520                         BZ2_hbMakeCodeLengths(s, &(s->len[t][0]), &(s->rfreq[t][0]), alphaSize, 17 /*20*/);
521         }
522
523         AssertH(nGroups < 8, 3002);
524         AssertH(nSelectors < 32768 && nSelectors <= (2 + (900000 / BZ_G_SIZE)), 3003);
525
526         /*--- Compute MTF values for the selectors. ---*/
527         {
528                 uint8_t pos[BZ_N_GROUPS], ll_i, tmp2, tmp;
529
530                 for (i = 0; i < nGroups; i++)
531                         pos[i] = i;
532                 for (i = 0; i < nSelectors; i++) {
533                         unsigned j;
534                         ll_i = s->selector[i];
535                         j = 0;
536                         tmp = pos[j];
537                         while (ll_i != tmp) {
538                                 j++;
539                                 tmp2 = tmp;
540                                 tmp = pos[j];
541                                 pos[j] = tmp2;
542                         }
543                         pos[0] = tmp;
544                         s->selectorMtf[i] = j;
545                 }
546         }
547
548         /*--- Assign actual codes for the tables. --*/
549         for (t = 0; t < nGroups; t++) {
550                 unsigned minLen = 32; //todo: s->len[t][0];
551                 unsigned maxLen = 0;  //todo: s->len[t][0];
552                 for (i = 0; i < alphaSize; i++) {
553                         if (s->len[t][i] > maxLen) maxLen = s->len[t][i];
554                         if (s->len[t][i] < minLen) minLen = s->len[t][i];
555                 }
556                 AssertH(!(maxLen > 17 /*20*/), 3004);
557                 AssertH(!(minLen < 1), 3005);
558                 BZ2_hbAssignCodes(&(s->code[t][0]), &(s->len[t][0]), minLen, maxLen, alphaSize);
559         }
560
561         /*--- Transmit the mapping table. ---*/
562         {
563                 /* bbox: optimized a bit more than in bzip2 */
564                 int inUse16 = 0;
565                 for (i = 0; i < 16; i++) {
566                         if (sizeof(long) <= 4) {
567                                 inUse16 = inUse16*2 +
568                                         ((*(bb__aliased_uint32_t*)&(s->inUse[i * 16 + 0])
569                                         | *(bb__aliased_uint32_t*)&(s->inUse[i * 16 + 4])
570                                         | *(bb__aliased_uint32_t*)&(s->inUse[i * 16 + 8])
571                                         | *(bb__aliased_uint32_t*)&(s->inUse[i * 16 + 12])) != 0);
572                         } else { /* Our CPU can do better */
573                                 inUse16 = inUse16*2 +
574                                         ((*(bb__aliased_uint64_t*)&(s->inUse[i * 16 + 0])
575                                         | *(bb__aliased_uint64_t*)&(s->inUse[i * 16 + 8])) != 0);
576                         }
577                 }
578
579                 bsW16(s, inUse16);
580
581                 inUse16 <<= (sizeof(int)*8 - 16); /* move 15th bit into sign bit */
582                 for (i = 0; i < 16; i++) {
583                         if (inUse16 < 0) {
584                                 unsigned v16 = 0;
585                                 unsigned j;
586                                 for (j = 0; j < 16; j++)
587                                         v16 = v16*2 + s->inUse[i * 16 + j];
588                                 bsW16(s, v16);
589                         }
590                         inUse16 <<= 1;
591                 }
592         }
593
594         /*--- Now the selectors. ---*/
595         bsW(s, 3, nGroups);
596         bsW(s, 15, nSelectors);
597         for (i = 0; i < nSelectors; i++) {
598                 unsigned j;
599                 for (j = 0; j < s->selectorMtf[i]; j++)
600                         bsW1_1(s);
601                 bsW1_0(s);
602         }
603
604         /*--- Now the coding tables. ---*/
605         for (t = 0; t < nGroups; t++) {
606                 unsigned curr = s->len[t][0];
607                 bsW(s, 5, curr);
608                 for (i = 0; i < alphaSize; i++) {
609                         while (curr < s->len[t][i]) { bsW(s, 2, 2); curr++; /* 10 */ }
610                         while (curr > s->len[t][i]) { bsW(s, 2, 3); curr--; /* 11 */ }
611                         bsW1_0(s);
612                 }
613         }
614
615         /*--- And finally, the block data proper ---*/
616         selCtr = 0;
617         gs = 0;
618         while (1) {
619                 unsigned ge;
620
621                 if (gs >= s->nMTF)
622                         break;
623                 ge = gs + BZ_G_SIZE - 1;
624                 if (ge >= s->nMTF)
625                         ge = s->nMTF-1;
626                 AssertH(s->selector[selCtr] < nGroups, 3006);
627
628 /* Costs 1300 bytes and is _slower_ (on Intel Core 2) */
629 #if 0
630                 if (nGroups == 6 && 50 == ge-gs+1) {
631                         /*--- fast track the common case ---*/
632                         uint16_t mtfv_i;
633                         uint8_t* s_len_sel_selCtr  = &(s->len[s->selector[selCtr]][0]);
634                         int32_t* s_code_sel_selCtr = &(s->code[s->selector[selCtr]][0]);
635 #define BZ_ITAH(nn) \
636         mtfv_i = mtfv[gs+(nn)]; \
637         bsW(s, s_len_sel_selCtr[mtfv_i], s_code_sel_selCtr[mtfv_i])
638                         BZ_ITAH(0);  BZ_ITAH(1);  BZ_ITAH(2);  BZ_ITAH(3);  BZ_ITAH(4);
639                         BZ_ITAH(5);  BZ_ITAH(6);  BZ_ITAH(7);  BZ_ITAH(8);  BZ_ITAH(9);
640                         BZ_ITAH(10); BZ_ITAH(11); BZ_ITAH(12); BZ_ITAH(13); BZ_ITAH(14);
641                         BZ_ITAH(15); BZ_ITAH(16); BZ_ITAH(17); BZ_ITAH(18); BZ_ITAH(19);
642                         BZ_ITAH(20); BZ_ITAH(21); BZ_ITAH(22); BZ_ITAH(23); BZ_ITAH(24);
643                         BZ_ITAH(25); BZ_ITAH(26); BZ_ITAH(27); BZ_ITAH(28); BZ_ITAH(29);
644                         BZ_ITAH(30); BZ_ITAH(31); BZ_ITAH(32); BZ_ITAH(33); BZ_ITAH(34);
645                         BZ_ITAH(35); BZ_ITAH(36); BZ_ITAH(37); BZ_ITAH(38); BZ_ITAH(39);
646                         BZ_ITAH(40); BZ_ITAH(41); BZ_ITAH(42); BZ_ITAH(43); BZ_ITAH(44);
647                         BZ_ITAH(45); BZ_ITAH(46); BZ_ITAH(47); BZ_ITAH(48); BZ_ITAH(49);
648 #undef BZ_ITAH
649                         gs = ge+1;
650                 } else
651 #endif
652                 {
653                         /*--- slow version which correctly handles all situations ---*/
654                         /* code is bit bigger, but moves multiply out of the loop */
655                         uint8_t* s_len_sel_selCtr  = &(s->len [s->selector[selCtr]][0]);
656                         int32_t* s_code_sel_selCtr = &(s->code[s->selector[selCtr]][0]);
657                         while (gs <= ge) {
658                                 bsW(s,
659                                         s_len_sel_selCtr[mtfv[gs]],
660                                         s_code_sel_selCtr[mtfv[gs]]
661                                 );
662                                 gs++;
663                         }
664                         /* already is: gs = ge+1; */
665                 }
666                 selCtr++;
667         }
668         AssertH(selCtr == nSelectors, 3007);
669 #undef code
670 #undef rfreq
671 #undef len_pack
672 }
673
674
675 /*---------------------------------------------------*/
676 static
677 void BZ2_compressBlock(EState* s, int is_last_block)
678 {
679         int32_t origPtr = origPtr;
680
681         if (s->nblock > 0) {
682                 BZ_FINALISE_CRC(s->blockCRC);
683                 s->combinedCRC = (s->combinedCRC << 1) | (s->combinedCRC >> 31);
684                 s->combinedCRC ^= s->blockCRC;
685                 if (s->blockNo > 1)
686                         s->posZ = s->zbits; // was: s->numZ = 0;
687
688                 origPtr = BZ2_blockSort(s);
689         }
690
691         s->zbits = &((uint8_t*)s->arr2)[s->nblock];
692         s->posZ = s->zbits;
693         s->state_out_pos = s->zbits;
694
695         /*-- If this is the first block, create the stream header. --*/
696         if (s->blockNo == 1) {
697                 BZ2_bsInitWrite(s);
698                 /*bsPutU8(s, BZ_HDR_B);*/
699                 /*bsPutU8(s, BZ_HDR_Z);*/
700                 /*bsPutU8(s, BZ_HDR_h);*/
701                 /*bsPutU8(s, BZ_HDR_0 + s->blockSize100k);*/
702                 bsPutU32(s, BZ_HDR_BZh0 + s->blockSize100k);
703         }
704
705         if (s->nblock > 0) {
706                 /*bsPutU8(s, 0x31);*/
707                 /*bsPutU8(s, 0x41);*/
708                 /*bsPutU8(s, 0x59);*/
709                 /*bsPutU8(s, 0x26);*/
710                 bsPutU32(s, 0x31415926);
711                 /*bsPutU8(s, 0x53);*/
712                 /*bsPutU8(s, 0x59);*/
713                 bsPutU16(s, 0x5359);
714
715                 /*-- Now the block's CRC, so it is in a known place. --*/
716                 bsPutU32(s, s->blockCRC);
717
718                 /*
719                  * Now a single bit indicating (non-)randomisation.
720                  * As of version 0.9.5, we use a better sorting algorithm
721                  * which makes randomisation unnecessary.  So always set
722                  * the randomised bit to 'no'.  Of course, the decoder
723                  * still needs to be able to handle randomised blocks
724                  * so as to maintain backwards compatibility with
725                  * older versions of bzip2.
726                  */
727                 bsW1_0(s);
728
729                 bsW(s, 24, origPtr);
730                 generateMTFValues(s);
731                 sendMTFValues(s);
732         }
733
734         /*-- If this is the last block, add the stream trailer. --*/
735         if (is_last_block) {
736                 /*bsPutU8(s, 0x17);*/
737                 /*bsPutU8(s, 0x72);*/
738                 /*bsPutU8(s, 0x45);*/
739                 /*bsPutU8(s, 0x38);*/
740                 bsPutU32(s, 0x17724538);
741                 /*bsPutU8(s, 0x50);*/
742                 /*bsPutU8(s, 0x90);*/
743                 bsPutU16(s, 0x5090);
744                 bsPutU32(s, s->combinedCRC);
745                 bsFinishWrite(s);
746         }
747 }
748
749
750 /*-------------------------------------------------------------*/
751 /*--- end                                        compress.c ---*/
752 /*-------------------------------------------------------------*/