From: Denis Vlasenko <vda.linux@googlemail.com>
Date: Sun, 14 Oct 2007 07:49:48 +0000 (-0000)
Subject: bzip2: eliminate some divisions
X-Git-Tag: 1_8_0~50
X-Git-Url: https://git.librecmc.org/?a=commitdiff_plain;h=6a9154b6f649341870bc06e896d2fe7235a4aef9;p=oweals%2Fbusybox.git

bzip2: eliminate some divisions
---

diff --git a/archival/bz/blocksort.c b/archival/bz/blocksort.c
index ec8a2a56b..aaed883de 100644
--- a/archival/bz/blocksort.c
+++ b/archival/bz/blocksort.c
@@ -246,7 +246,12 @@ void fallbackSort(uint32_t* fmap,
 	for (i = 0; i < 257;    i++) ftab[i] = 0;
 	for (i = 0; i < nblock; i++) ftab[eclass8[i]]++;
 	for (i = 0; i < 256;    i++) ftabCopy[i] = ftab[i];
-	for (i = 1; i < 257;    i++) ftab[i] += ftab[i-1];
+
+	j = ftab[0];  /* bbox: optimized */
+	for (i = 1; i < 257;    i++) {
+		j += ftab[i];
+		ftab[i] = j;
+	}
 
 	for (i = 0; i < nblock; i++) {
 		j = eclass8[i];
@@ -255,7 +260,7 @@ void fallbackSort(uint32_t* fmap,
 		fmap[k] = i;
 	}
 
-	nBhtab = 2 + (nblock / 32);
+	nBhtab = 2 + ((uint32_t)nblock / 32); /* bbox: unsigned div is easier */
 	for (i = 0; i < nBhtab; i++) bhtab[i] = 0;
 	for (i = 0; i < 256; i++) SET_BH(ftab[i]);
 
@@ -737,27 +742,27 @@ void mainSort(uint32_t*   ptr,
 	memset(ftab, 0, 65537 * sizeof(ftab[0]));
 
 	j = block[0] << 8;
-	i = nblock-1;
+	i = nblock - 1;
 /* 3%, +300 bytes */
 #if CONFIG_BZIP2_FEATURE_SPEED >= 2
 	for (; i >= 3; i -= 4) {
 		quadrant[i] = 0;
-		j = (j >> 8) |(((uint16_t)block[i]) << 8);
+		j = (j >> 8) | (((uint16_t)block[i]) << 8);
 		ftab[j]++;
 		quadrant[i-1] = 0;
-		j = (j >> 8) |(((uint16_t)block[i-1]) << 8);
+		j = (j >> 8) | (((uint16_t)block[i-1]) << 8);
 		ftab[j]++;
 		quadrant[i-2] = 0;
-		j = (j >> 8) |(((uint16_t)block[i-2]) << 8);
+		j = (j >> 8) | (((uint16_t)block[i-2]) << 8);
 		ftab[j]++;
 		quadrant[i-3] = 0;
-		j = (j >> 8) |(((uint16_t)block[i-3]) << 8);
+		j = (j >> 8) | (((uint16_t)block[i-3]) << 8);
 		ftab[j]++;
 	}
 #endif
 	for (; i >= 0; i--) {
 		quadrant[i] = 0;
-		j = (j >> 8) |(((uint16_t)block[i]) << 8);
+		j = (j >> 8) | (((uint16_t)block[i]) << 8);
 		ftab[j]++;
 	}
 
@@ -768,34 +773,37 @@ void mainSort(uint32_t*   ptr,
 	}
 
 	/*-- Complete the initial radix sort --*/
-	for (i = 1; i <= 65536; i++)
-		ftab[i] += ftab[i-1];
+	j = ftab[0]; /* bbox: optimized */
+	for (i = 1; i <= 65536; i++) {
+		j += ftab[i];
+		ftab[i] = j;
+	}
 
 	s = block[0] << 8;
-	i = nblock-1;
+	i = nblock - 1;
 #if CONFIG_BZIP2_FEATURE_SPEED >= 2
 	for (; i >= 3; i -= 4) {
 		s = (s >> 8) | (block[i] << 8);
-		j = ftab[s] -1;
+		j = ftab[s] - 1;
 		ftab[s] = j;
 		ptr[j] = i;
 		s = (s >> 8) | (block[i-1] << 8);
-		j = ftab[s] -1;
+		j = ftab[s] - 1;
 		ftab[s] = j;
 		ptr[j] = i-1;
 		s = (s >> 8) | (block[i-2] << 8);
-		j = ftab[s] -1;
+		j = ftab[s] - 1;
 		ftab[s] = j;
 		ptr[j] = i-2;
 		s = (s >> 8) | (block[i-3] << 8);
-		j = ftab[s] -1;
+		j = ftab[s] - 1;
 		ftab[s] = j;
 		ptr[j] = i-3;
 	}
 #endif
 	for (; i >= 0; i--) {
 		s = (s >> 8) | (block[i] << 8);
-		j = ftab[s] -1;
+		j = ftab[s] - 1;
 		ftab[s] = j;
 		ptr[j] = i;
 	}
@@ -812,21 +820,23 @@ void mainSort(uint32_t*   ptr,
 
 	{
 		int32_t vv;
-		/* was: int32_t h = 1; */
+		/* bbox: was: int32_t h = 1; */
 		/* do h = 3 * h + 1; while (h <= 256); */
-		int32_t h = 364;
+		uint32_t h = 364;
 
 		do {
-			h = h / 3;
+			/*h = h / 3;*/
+			h = (h * 171) >> 9; /* bbox: fast h/3 */
 			for (i = h; i <= 255; i++) {
 				vv = runningOrder[i];
 				j = i;
 				while (BIGFREQ(runningOrder[j-h]) > BIGFREQ(vv)) {
 					runningOrder[j] = runningOrder[j-h];
 					j = j - h;
-					if (j <= (h - 1)) goto zero;
+					if (j <= (h - 1))
+						goto zero;
 				}
-				zero:
+ zero:
 				runningOrder[j] = vv;
 			}
 		} while (h != 1);
@@ -860,10 +870,10 @@ void mainSort(uint32_t*   ptr,
 			if (j != ss) {
 				sb = (ss << 8) + j;
 				if (!(ftab[sb] & SETMASK)) {
-					int32_t lo = ftab[sb]   & CLEARMASK;
+					int32_t lo =  ftab[sb]   & CLEARMASK;
 					int32_t hi = (ftab[sb+1] & CLEARMASK) - 1;
 					if (hi > lo) {
-						mainQSort3 (
+						mainQSort3(
 							ptr, block, quadrant, nblock,
 							lo, hi, BZ_N_RADIX, budget
 						);
@@ -966,15 +976,14 @@ void mainSort(uint32_t*   ptr,
 			while ((bbSize >> shifts) > 65534) shifts++;
 
 			for (j = bbSize-1; j >= 0; j--) {
-				int32_t a2update     = ptr[bbStart + j];
-				uint16_t qVal        = (uint16_t)(j >> shifts);
+				int32_t a2update   = ptr[bbStart + j];
+				uint16_t qVal      = (uint16_t)(j >> shifts);
 				quadrant[a2update] = qVal;
 				if (a2update < BZ_N_OVERSHOOT)
 					quadrant[a2update + nblock] = qVal;
 			}
 			AssertH(((bbSize-1) >> shifts) <= 65535, 1002);
 		}
-
 	}
 }
 
@@ -1041,7 +1050,8 @@ void BZ2_blockSort(EState* s)
 	s->origPtr = -1;
 	for (i = 0; i < s->nblock; i++)
 		if (ptr[i] == 0) {
-			s->origPtr = i; break;
+			s->origPtr = i;
+			break;
 		};
 
 	AssertH(s->origPtr != -1, 1003);
diff --git a/archival/bz/compress.c b/archival/bz/compress.c
index 3e2fbd867..724474e2d 100644
--- a/archival/bz/compress.c
+++ b/archival/bz/compress.c
@@ -186,7 +186,8 @@ void generateMTFValues(EState* s)
 						s->mtfFreq[BZ_RUNA]++;
 					}
 					if (zPend < 2) break;
-					zPend = (zPend - 2) / 2;
+					zPend = (uint32_t)(zPend - 2) / 2;
+					/* bbox: unsigned div is easier */
 				};
 				zPend = 0;
 			}
@@ -219,15 +220,18 @@ void generateMTFValues(EState* s)
 		zPend--;
 		while (1) {
 			if (zPend & 1) {
-				mtfv[wr] = BZ_RUNB; wr++;
+				mtfv[wr] = BZ_RUNB;
+				wr++;
 				s->mtfFreq[BZ_RUNB]++;
 			} else {
-				mtfv[wr] = BZ_RUNA; wr++;
+				mtfv[wr] = BZ_RUNA;
+				wr++;
 				s->mtfFreq[BZ_RUNA]++;
 			}
 			if (zPend < 2)
 				break;
-			zPend = (zPend - 2) / 2;
+			zPend = (uint32_t)(zPend - 2) / 2;
+			/* bbox: unsigned div is easier */
 		};
 		zPend = 0;
 	}
@@ -288,7 +292,7 @@ void sendMTFValues(EState* s)
 		gs = 0;
 		while (nPart > 0) {
 			tFreq = remF / nPart;
-			ge = gs-1;
+			ge = gs - 1;
 			aFreq = 0;
 			while (aFreq < tFreq && ge < alphaSize-1) {
 				ge++;
@@ -297,7 +301,7 @@ void sendMTFValues(EState* s)
 
 			if (ge > gs
 			 && nPart != nGroups && nPart != 1
-			 && ((nGroups - nPart) % 2 == 1)
+			 && ((nGroups - nPart) % 2 == 1) /* bbox: can this be replaced by x & 1? */
 			) {
 				aFreq -= s->mtfFreq[ge];
 				ge--;
@@ -310,7 +314,7 @@ void sendMTFValues(EState* s)
 					s->len[nPart-1][v] = BZ_GREATER_ICOST;
 
 			nPart--;
-			gs = ge+1;
+			gs = ge + 1;
 			remF -= aFreq;
 		}
 	}
@@ -414,7 +418,7 @@ void sendMTFValues(EState* s)
 			/*
 			 * Increment the symbol frequencies for the selected table.
 			 */
-/* 1% faster compress. +800 bytes */
+/* 1% faster compress. +800 bytes */ 
 #if CONFIG_BZIP2_FEATURE_SPEED >= 4
 			if (nGroups == 6 && 50 == ge-gs+1) {
 				/*--- fast track the common case ---*/
diff --git a/archival/bz/huffman.c b/archival/bz/huffman.c
index 3f80c9976..02838c496 100644
--- a/archival/bz/huffman.c
+++ b/archival/bz/huffman.c
@@ -183,6 +183,8 @@ void BZ2_hbMakeCodeLengths(uint8_t *len,
 
 		for (i = 1; i <= alphaSize; i++) {
 			j = weight[i] >> 8;
+			/* bbox: yes, it is a signed division.
+			 * don't replace with shift! */
 			j = 1 + (j / 2);
 			weight[i] = j << 8;
 		}