bzip2: eliminate some divisions
authorDenis Vlasenko <vda.linux@googlemail.com>
Sun, 14 Oct 2007 07:49:48 +0000 (07:49 -0000)
committerDenis Vlasenko <vda.linux@googlemail.com>
Sun, 14 Oct 2007 07:49:48 +0000 (07:49 -0000)
archival/bz/blocksort.c
archival/bz/compress.c
archival/bz/huffman.c

index ec8a2a56b9c1ab7159e33656949f70f657b86647..aaed883deca418c37f13a52b3154ec13e7515a0f 100644 (file)
@@ -246,7 +246,12 @@ void fallbackSort(uint32_t* fmap,
        for (i = 0; i < 257;    i++) ftab[i] = 0;
        for (i = 0; i < nblock; i++) ftab[eclass8[i]]++;
        for (i = 0; i < 256;    i++) ftabCopy[i] = ftab[i];
-       for (i = 1; i < 257;    i++) ftab[i] += ftab[i-1];
+
+       j = ftab[0];  /* bbox: optimized */
+       for (i = 1; i < 257;    i++) {
+               j += ftab[i];
+               ftab[i] = j;
+       }
 
        for (i = 0; i < nblock; i++) {
                j = eclass8[i];
@@ -255,7 +260,7 @@ void fallbackSort(uint32_t* fmap,
                fmap[k] = i;
        }
 
-       nBhtab = 2 + (nblock / 32);
+       nBhtab = 2 + ((uint32_t)nblock / 32); /* bbox: unsigned div is easier */
        for (i = 0; i < nBhtab; i++) bhtab[i] = 0;
        for (i = 0; i < 256; i++) SET_BH(ftab[i]);
 
@@ -737,27 +742,27 @@ void mainSort(uint32_t*   ptr,
        memset(ftab, 0, 65537 * sizeof(ftab[0]));
 
        j = block[0] << 8;
-       i = nblock-1;
+       i = nblock - 1;
 /* 3%, +300 bytes */
 #if CONFIG_BZIP2_FEATURE_SPEED >= 2
        for (; i >= 3; i -= 4) {
                quadrant[i] = 0;
-               j = (j >> 8) |(((uint16_t)block[i]) << 8);
+               j = (j >> 8) | (((uint16_t)block[i]) << 8);
                ftab[j]++;
                quadrant[i-1] = 0;
-               j = (j >> 8) |(((uint16_t)block[i-1]) << 8);
+               j = (j >> 8) | (((uint16_t)block[i-1]) << 8);
                ftab[j]++;
                quadrant[i-2] = 0;
-               j = (j >> 8) |(((uint16_t)block[i-2]) << 8);
+               j = (j >> 8) | (((uint16_t)block[i-2]) << 8);
                ftab[j]++;
                quadrant[i-3] = 0;
-               j = (j >> 8) |(((uint16_t)block[i-3]) << 8);
+               j = (j >> 8) | (((uint16_t)block[i-3]) << 8);
                ftab[j]++;
        }
 #endif
        for (; i >= 0; i--) {
                quadrant[i] = 0;
-               j = (j >> 8) |(((uint16_t)block[i]) << 8);
+               j = (j >> 8) | (((uint16_t)block[i]) << 8);
                ftab[j]++;
        }
 
@@ -768,34 +773,37 @@ void mainSort(uint32_t*   ptr,
        }
 
        /*-- Complete the initial radix sort --*/
-       for (i = 1; i <= 65536; i++)
-               ftab[i] += ftab[i-1];
+       j = ftab[0]; /* bbox: optimized */
+       for (i = 1; i <= 65536; i++) {
+               j += ftab[i];
+               ftab[i] = j;
+       }
 
        s = block[0] << 8;
-       i = nblock-1;
+       i = nblock - 1;
 #if CONFIG_BZIP2_FEATURE_SPEED >= 2
        for (; i >= 3; i -= 4) {
                s = (s >> 8) | (block[i] << 8);
-               j = ftab[s] -1;
+               j = ftab[s] - 1;
                ftab[s] = j;
                ptr[j] = i;
                s = (s >> 8) | (block[i-1] << 8);
-               j = ftab[s] -1;
+               j = ftab[s] - 1;
                ftab[s] = j;
                ptr[j] = i-1;
                s = (s >> 8) | (block[i-2] << 8);
-               j = ftab[s] -1;
+               j = ftab[s] - 1;
                ftab[s] = j;
                ptr[j] = i-2;
                s = (s >> 8) | (block[i-3] << 8);
-               j = ftab[s] -1;
+               j = ftab[s] - 1;
                ftab[s] = j;
                ptr[j] = i-3;
        }
 #endif
        for (; i >= 0; i--) {
                s = (s >> 8) | (block[i] << 8);
-               j = ftab[s] -1;
+               j = ftab[s] - 1;
                ftab[s] = j;
                ptr[j] = i;
        }
@@ -812,21 +820,23 @@ void mainSort(uint32_t*   ptr,
 
        {
                int32_t vv;
-               /* was: int32_t h = 1; */
+               /* bbox: was: int32_t h = 1; */
                /* do h = 3 * h + 1; while (h <= 256); */
-               int32_t h = 364;
+               uint32_t h = 364;
 
                do {
-                       h = h / 3;
+                       /*h = h / 3;*/
+                       h = (h * 171) >> 9; /* bbox: fast h/3 */
                        for (i = h; i <= 255; i++) {
                                vv = runningOrder[i];
                                j = i;
                                while (BIGFREQ(runningOrder[j-h]) > BIGFREQ(vv)) {
                                        runningOrder[j] = runningOrder[j-h];
                                        j = j - h;
-                                       if (j <= (h - 1)) goto zero;
+                                       if (j <= (h - 1))
+                                               goto zero;
                                }
                              zero:
+ zero:
                                runningOrder[j] = vv;
                        }
                } while (h != 1);
@@ -860,10 +870,10 @@ void mainSort(uint32_t*   ptr,
                        if (j != ss) {
                                sb = (ss << 8) + j;
                                if (!(ftab[sb] & SETMASK)) {
-                                       int32_t lo = ftab[sb]   & CLEARMASK;
+                                       int32_t lo =  ftab[sb]   & CLEARMASK;
                                        int32_t hi = (ftab[sb+1] & CLEARMASK) - 1;
                                        if (hi > lo) {
-                                               mainQSort3 (
+                                               mainQSort3(
                                                        ptr, block, quadrant, nblock,
                                                        lo, hi, BZ_N_RADIX, budget
                                                );
@@ -966,15 +976,14 @@ void mainSort(uint32_t*   ptr,
                        while ((bbSize >> shifts) > 65534) shifts++;
 
                        for (j = bbSize-1; j >= 0; j--) {
-                               int32_t a2update     = ptr[bbStart + j];
-                               uint16_t qVal        = (uint16_t)(j >> shifts);
+                               int32_t a2update   = ptr[bbStart + j];
+                               uint16_t qVal      = (uint16_t)(j >> shifts);
                                quadrant[a2update] = qVal;
                                if (a2update < BZ_N_OVERSHOOT)
                                        quadrant[a2update + nblock] = qVal;
                        }
                        AssertH(((bbSize-1) >> shifts) <= 65535, 1002);
                }
-
        }
 }
 
@@ -1041,7 +1050,8 @@ void BZ2_blockSort(EState* s)
        s->origPtr = -1;
        for (i = 0; i < s->nblock; i++)
                if (ptr[i] == 0) {
-                       s->origPtr = i; break;
+                       s->origPtr = i;
+                       break;
                };
 
        AssertH(s->origPtr != -1, 1003);
index 3e2fbd867415758deb70dbb17d87e39aaa19e329..724474e2d99aaeb5ca37bcb5416ba3c8466e690c 100644 (file)
@@ -186,7 +186,8 @@ void generateMTFValues(EState* s)
                                                s->mtfFreq[BZ_RUNA]++;
                                        }
                                        if (zPend < 2) break;
-                                       zPend = (zPend - 2) / 2;
+                                       zPend = (uint32_t)(zPend - 2) / 2;
+                                       /* bbox: unsigned div is easier */
                                };
                                zPend = 0;
                        }
@@ -219,15 +220,18 @@ void generateMTFValues(EState* s)
                zPend--;
                while (1) {
                        if (zPend & 1) {
-                               mtfv[wr] = BZ_RUNB; wr++;
+                               mtfv[wr] = BZ_RUNB;
+                               wr++;
                                s->mtfFreq[BZ_RUNB]++;
                        } else {
-                               mtfv[wr] = BZ_RUNA; wr++;
+                               mtfv[wr] = BZ_RUNA;
+                               wr++;
                                s->mtfFreq[BZ_RUNA]++;
                        }
                        if (zPend < 2)
                                break;
-                       zPend = (zPend - 2) / 2;
+                       zPend = (uint32_t)(zPend - 2) / 2;
+                       /* bbox: unsigned div is easier */
                };
                zPend = 0;
        }
@@ -288,7 +292,7 @@ void sendMTFValues(EState* s)
                gs = 0;
                while (nPart > 0) {
                        tFreq = remF / nPart;
-                       ge = gs-1;
+                       ge = gs - 1;
                        aFreq = 0;
                        while (aFreq < tFreq && ge < alphaSize-1) {
                                ge++;
@@ -297,7 +301,7 @@ void sendMTFValues(EState* s)
 
                        if (ge > gs
                         && nPart != nGroups && nPart != 1
-                        && ((nGroups - nPart) % 2 == 1)
+                        && ((nGroups - nPart) % 2 == 1) /* bbox: can this be replaced by x & 1? */
                        ) {
                                aFreq -= s->mtfFreq[ge];
                                ge--;
@@ -310,7 +314,7 @@ void sendMTFValues(EState* s)
                                        s->len[nPart-1][v] = BZ_GREATER_ICOST;
 
                        nPart--;
-                       gs = ge+1;
+                       gs = ge + 1;
                        remF -= aFreq;
                }
        }
@@ -414,7 +418,7 @@ void sendMTFValues(EState* s)
                        /*
                         * Increment the symbol frequencies for the selected table.
                         */
-/* 1% faster compress. +800 bytes */
+/* 1% faster compress. +800 bytes */ 
 #if CONFIG_BZIP2_FEATURE_SPEED >= 4
                        if (nGroups == 6 && 50 == ge-gs+1) {
                                /*--- fast track the common case ---*/
index 3f80c9976ad8d42995e44c12c4a72938e1990f4f..02838c4960a707bad3ef4fab84527874d0eba9b0 100644 (file)
@@ -183,6 +183,8 @@ void BZ2_hbMakeCodeLengths(uint8_t *len,
 
                for (i = 1; i <= alphaSize; i++) {
                        j = weight[i] >> 8;
+                       /* bbox: yes, it is a signed division.
+                        * don't replace with shift! */
                        j = 1 + (j / 2);
                        weight[i] = j << 8;
                }