void bsFinishWrite(EState* s)
{
while (s->bsLive > 0) {
- s->zbits[s->numZ] = (UChar)(s->bsBuff >> 24);
+ s->zbits[s->numZ] = (uint8_t)(s->bsBuff >> 24);
s->numZ++;
s->bsBuff <<= 8;
s->bsLive -= 8;
/*---------------------------------------------------*/
static
-/* Forced inlining results in +600 bytes code,
- * 2% faster compression. Not worth it. */
-/*ALWAYS_INLINE*/
+/* Helps only on level 5, on other levels hurts. ? */
+#if CONFIG_BZIP2_FEATURE_SPEED >= 5
+ALWAYS_INLINE
+#endif
void bsW(EState* s, int32_t n, uint32_t v)
{
while (s->bsLive >= 8) {
- s->zbits[s->numZ] = (UChar)(s->bsBuff >> 24);
+ s->zbits[s->numZ] = (uint8_t)(s->bsBuff >> 24);
s->numZ++;
s->bsBuff <<= 8;
s->bsLive -= 8;
/*---------------------------------------------------*/
static
-void bsPutU32(EState* s, uint32_t u)
+void bsPutU32(EState* s, unsigned u)
{
bsW(s, 8, (u >> 24) & 0xff);
bsW(s, 8, (u >> 16) & 0xff);
/*---------------------------------------------------*/
static
-void bsPutUChar(EState* s, UChar c)
+void bsPutU16(EState* s, unsigned u)
{
- bsW(s, 8, (uint32_t)c);
+ bsW(s, 8, (u >> 8) & 0xff);
+ bsW(s, 8, u & 0xff);
}
static
void makeMaps_e(EState* s)
{
- int32_t i;
+ int i;
s->nInUse = 0;
for (i = 0; i < 256; i++) {
if (s->inUse[i]) {
static NOINLINE
void generateMTFValues(EState* s)
{
- UChar yy[256];
+ uint8_t yy[256];
int32_t i, j;
int32_t zPend;
int32_t wr;
* After sorting (eg, here),
* s->arr1[0 .. s->nblock-1] holds sorted order,
* and
- * ((UChar*)s->arr2)[0 .. s->nblock-1]
+ * ((uint8_t*)s->arr2)[0 .. s->nblock-1]
* holds the original block data.
*
* The first thing to do is generate the MTF values,
*
* The final compressed bitstream is generated into the
* area starting at
- * (UChar*) (&((UChar*)s->arr2)[s->nblock])
+ * &((uint8_t*)s->arr2)[s->nblock]
*
* These storage aliases are set up in bzCompressInit(),
* except for the last one, which is arranged in
* compressBlock().
*/
uint32_t* ptr = s->ptr;
- UChar* block = s->block;
+ uint8_t* block = s->block;
uint16_t* mtfv = s->mtfv;
makeMaps_e(s);
wr = 0;
zPend = 0;
for (i = 0; i < s->nInUse; i++)
- yy[i] = (UChar) i;
+ yy[i] = (uint8_t) i;
for (i = 0; i < s->nblock; i++) {
- UChar ll_i;
+ uint8_t ll_i;
AssertD(wr <= i, "generateMTFValues(1)");
- j = ptr[i]-1;
+ j = ptr[i] - 1;
if (j < 0)
j += s->nblock;
ll_i = s->unseqToSeq[block[j]];
s->mtfFreq[BZ_RUNA]++;
}
if (zPend < 2) break;
- zPend = (zPend - 2) / 2;
+ zPend = (uint32_t)(zPend - 2) / 2;
+ /* bbox: unsigned div is easier */
};
zPend = 0;
}
{
- register UChar rtmp;
- register UChar* ryy_j;
- register UChar rll_i;
+ register uint8_t rtmp;
+ register uint8_t* ryy_j;
+ register uint8_t rll_i;
rtmp = yy[1];
yy[1] = yy[0];
ryy_j = &(yy[1]);
rll_i = ll_i;
while (rll_i != rtmp) {
- register UChar rtmp2;
+ register uint8_t rtmp2;
ryy_j++;
rtmp2 = rtmp;
rtmp = *ryy_j;
zPend--;
while (1) {
if (zPend & 1) {
- mtfv[wr] = BZ_RUNB; wr++;
+ mtfv[wr] = BZ_RUNB;
+ wr++;
s->mtfFreq[BZ_RUNB]++;
} else {
- mtfv[wr] = BZ_RUNA; wr++;
+ mtfv[wr] = BZ_RUNA;
+ wr++;
s->mtfFreq[BZ_RUNA]++;
}
if (zPend < 2)
break;
- zPend = (zPend - 2) / 2;
+ zPend = (uint32_t)(zPend - 2) / 2;
+ /* bbox: unsigned div is easier */
};
zPend = 0;
}
int32_t nGroups, nBytes;
/*
- * UChar len [BZ_N_GROUPS][BZ_MAX_ALPHA_SIZE];
+ * uint8_t len[BZ_N_GROUPS][BZ_MAX_ALPHA_SIZE];
* is a global since the decoder also needs it.
*
* int32_t code[BZ_N_GROUPS][BZ_MAX_ALPHA_SIZE];
* are also globals only used in this proc.
* Made global to keep stack frame size small.
*/
+#define code sendMTFValues__code
+#define rfreq sendMTFValues__rfreq
+#define len_pack sendMTFValues__len_pack
uint16_t cost[BZ_N_GROUPS];
int32_t fave[BZ_N_GROUPS];
uint16_t* mtfv = s->mtfv;
- alphaSize = s->nInUse+2;
+ alphaSize = s->nInUse + 2;
for (t = 0; t < BZ_N_GROUPS; t++)
for (v = 0; v < alphaSize; v++)
s->len[t][v] = BZ_GREATER_ICOST;
gs = 0;
while (nPart > 0) {
tFreq = remF / nPart;
- ge = gs-1;
+ ge = gs - 1;
aFreq = 0;
while (aFreq < tFreq && ge < alphaSize-1) {
ge++;
if (ge > gs
&& nPart != nGroups && nPart != 1
- && ((nGroups-nPart) % 2 == 1)
+ && ((nGroups - nPart) % 2 == 1) /* bbox: can this be replaced by x & 1? */
) {
aFreq -= s->mtfFreq[ge];
ge--;
s->len[nPart-1][v] = BZ_GREATER_ICOST;
nPart--;
- gs = ge+1;
+ gs = ge + 1;
remF -= aFreq;
}
}
for (v = 0; v < alphaSize; v++)
s->rfreq[t][v] = 0;
-#ifdef FAST_GROUP6
+#if CONFIG_BZIP2_FEATURE_SPEED >= 5
/*
* Set up an auxiliary length table which is used to fast-track
* the common case (nGroups == 6).
}
}
#endif
-
nSelectors = 0;
totc = 0;
gs = 0;
*/
for (t = 0; t < nGroups; t++)
cost[t] = 0;
-#ifdef FAST_GROUP6
+#if CONFIG_BZIP2_FEATURE_SPEED >= 5
if (nGroups == 6 && 50 == ge-gs+1) {
/*--- fast track the common case ---*/
register uint32_t cost01, cost23, cost45;
* Find the coding table which is best for this group,
* and record its identity in the selector table.
*/
- bc = 999999999;
- bt = -1;
- //bc = cost[0];
- //bt = 0;
- for (t = 0; t < nGroups; t++) {
+ /*bc = 999999999;*/
+ /*bt = -1;*/
+ bc = cost[0];
+ bt = 0;
+ for (t = 1 /*0*/; t < nGroups; t++) {
if (cost[t] < bc) {
bc = cost[t];
bt = t;
/*
* Increment the symbol frequencies for the selected table.
*/
-/* ~0.5% faster compress. +800 bytes */
-#if 0
+/* 1% faster compress. +800 bytes */
+#if CONFIG_BZIP2_FEATURE_SPEED >= 4
if (nGroups == 6 && 50 == ge-gs+1) {
/*--- fast track the common case ---*/
#define BZ_ITUR(nn) s->rfreq[bt][mtfv[gs + (nn)]]++
BZ_ITUR(40); BZ_ITUR(41); BZ_ITUR(42); BZ_ITUR(43); BZ_ITUR(44);
BZ_ITUR(45); BZ_ITUR(46); BZ_ITUR(47); BZ_ITUR(48); BZ_ITUR(49);
#undef BZ_ITUR
- gs = ge+1;
+ gs = ge + 1;
} else
#endif
{
s->rfreq[bt][mtfv[gs]]++;
gs++;
}
- /* already is: gs = ge+1; */
+ /* already is: gs = ge + 1; */
}
}
/* maxLen was changed from 20 to 17 in bzip2-1.0.3. See
* comment in huffman.c for details. */
for (t = 0; t < nGroups; t++)
- BZ2_hbMakeCodeLengths(&(s->len[t][0]), &(s->rfreq[t][0]), alphaSize, 17 /*20*/);
+ BZ2_hbMakeCodeLengths(s, &(s->len[t][0]), &(s->rfreq[t][0]), alphaSize, 17 /*20*/);
}
AssertH(nGroups < 8, 3002);
/*--- Compute MTF values for the selectors. ---*/
{
- UChar pos[BZ_N_GROUPS], ll_i, tmp2, tmp;
+ uint8_t pos[BZ_N_GROUPS], ll_i, tmp2, tmp;
for (i = 0; i < nGroups; i++)
pos[i] = i;
/*--- Transmit the mapping table. ---*/
{
- Bool inUse16[16];
+ /* bbox: optimized a bit more than in bzip2 */
+ int inUse16 = 0;
for (i = 0; i < 16; i++) {
- inUse16[i] = False;
- for (j = 0; j < 16; j++)
- if (s->inUse[i * 16 + j])
- inUse16[i] = True;
+ if (sizeof(long) <= 4) {
+ inUse16 = inUse16*2 +
+ ((*(uint32_t*)&(s->inUse[i * 16 + 0])
+ | *(uint32_t*)&(s->inUse[i * 16 + 4])
+ | *(uint32_t*)&(s->inUse[i * 16 + 8])
+ | *(uint32_t*)&(s->inUse[i * 16 + 12])) != 0);
+ } else { /* Our CPU can do better */
+ inUse16 = inUse16*2 +
+ ((*(uint64_t*)&(s->inUse[i * 16 + 0])
+ | *(uint64_t*)&(s->inUse[i * 16 + 8])) != 0);
+ }
}
-
+
nBytes = s->numZ;
- for (i = 0; i < 16; i++) {
- if (inUse16[i])
- bsW(s, 1, 1);
- else
- bsW(s, 1, 0);
- }
+ bsW(s, 16, inUse16);
+ inUse16 <<= (sizeof(int)*8 - 16); /* move 15th bit into sign bit */
for (i = 0; i < 16; i++) {
- if (inUse16[i]) {
- for (j = 0; j < 16; j++) {
- if (s->inUse[i * 16 + j])
- bsW(s, 1, 1);
- else
- bsW(s, 1, 0);
- }
+ if (inUse16 < 0) {
+ unsigned v16 = 0;
+ for (j = 0; j < 16; j++)
+ v16 = v16*2 + s->inUse[i * 16 + j];
+ bsW(s, 16, v16);
}
+ inUse16 <<= 1;
}
}
if (nGroups == 6 && 50 == ge-gs+1) {
/*--- fast track the common case ---*/
uint16_t mtfv_i;
- UChar* s_len_sel_selCtr = &(s->len[s->selector[selCtr]][0]);
+ uint8_t* s_len_sel_selCtr = &(s->len[s->selector[selCtr]][0]);
int32_t* s_code_sel_selCtr = &(s->code[s->selector[selCtr]][0]);
#define BZ_ITAH(nn) \
mtfv_i = mtfv[gs+(nn)]; \
{
/*--- slow version which correctly handles all situations ---*/
/* code is bit bigger, but moves multiply out of the loop */
- UChar* s_len_sel_selCtr = &(s->len [s->selector[selCtr]][0]);
+ uint8_t* s_len_sel_selCtr = &(s->len [s->selector[selCtr]][0]);
int32_t* s_code_sel_selCtr = &(s->code[s->selector[selCtr]][0]);
while (gs <= ge) {
bsW(s,
selCtr++;
}
AssertH(selCtr == nSelectors, 3007);
+#undef code
+#undef rfreq
+#undef len_pack
}
/*---------------------------------------------------*/
static
-void BZ2_compressBlock(EState* s, Bool is_last_block)
+void BZ2_compressBlock(EState* s, int is_last_block)
{
if (s->nblock > 0) {
BZ_FINALISE_CRC(s->blockCRC);
BZ2_blockSort(s);
}
- s->zbits = (UChar*) (&((UChar*)s->arr2)[s->nblock]);
+ s->zbits = &((uint8_t*)s->arr2)[s->nblock];
/*-- If this is the first block, create the stream header. --*/
if (s->blockNo == 1) {
BZ2_bsInitWrite(s);
- /*bsPutUChar(s, BZ_HDR_B);*/
- /*bsPutUChar(s, BZ_HDR_Z);*/
- /*bsPutUChar(s, BZ_HDR_h);*/
- /*bsPutUChar(s, (UChar)(BZ_HDR_0 + s->blockSize100k));*/
+ /*bsPutU8(s, BZ_HDR_B);*/
+ /*bsPutU8(s, BZ_HDR_Z);*/
+ /*bsPutU8(s, BZ_HDR_h);*/
+ /*bsPutU8(s, BZ_HDR_0 + s->blockSize100k);*/
bsPutU32(s, BZ_HDR_BZh0 + s->blockSize100k);
}
if (s->nblock > 0) {
- /*bsPutUChar(s, 0x31);*/
- /*bsPutUChar(s, 0x41);*/
- /*bsPutUChar(s, 0x59);*/
- /*bsPutUChar(s, 0x26);*/
+ /*bsPutU8(s, 0x31);*/
+ /*bsPutU8(s, 0x41);*/
+ /*bsPutU8(s, 0x59);*/
+ /*bsPutU8(s, 0x26);*/
bsPutU32(s, 0x31415926);
- bsPutUChar(s, 0x53);
- bsPutUChar(s, 0x59);
+ /*bsPutU8(s, 0x53);*/
+ /*bsPutU8(s, 0x59);*/
+ bsPutU16(s, 0x5359);
/*-- Now the block's CRC, so it is in a known place. --*/
bsPutU32(s, s->blockCRC);
/*-- If this is the last block, add the stream trailer. --*/
if (is_last_block) {
- /*bsPutUChar(s, 0x17);*/
- /*bsPutUChar(s, 0x72);*/
- /*bsPutUChar(s, 0x45);*/
- /*bsPutUChar(s, 0x38);*/
+ /*bsPutU8(s, 0x17);*/
+ /*bsPutU8(s, 0x72);*/
+ /*bsPutU8(s, 0x45);*/
+ /*bsPutU8(s, 0x38);*/
bsPutU32(s, 0x17724538);
- bsPutUChar(s, 0x50);
- bsPutUChar(s, 0x90);
+ /*bsPutU8(s, 0x50);*/
+ /*bsPutU8(s, 0x90);*/
+ bsPutU16(s, 0x5090);
bsPutU32(s, s->combinedCRC);
bsFinishWrite(s);
}