aboutsummaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorDenis Vlasenko <vda.linux@googlemail.com>2007-10-14 07:49:48 +0000
committerDenis Vlasenko <vda.linux@googlemail.com>2007-10-14 07:49:48 +0000
commit6a9154b6f649341870bc06e896d2fe7235a4aef9 (patch)
treeb80d1603d370838166ada58b42cd6e0b8c7c6970
parent3f5fdc7572d932f33f81029956b87230c9b05182 (diff)
downloadbusybox-w32-6a9154b6f649341870bc06e896d2fe7235a4aef9.tar.gz
busybox-w32-6a9154b6f649341870bc06e896d2fe7235a4aef9.tar.bz2
busybox-w32-6a9154b6f649341870bc06e896d2fe7235a4aef9.zip
bzip2: eliminate some divisions
-rw-r--r--archival/bz/blocksort.c64
-rw-r--r--archival/bz/compress.c20
-rw-r--r--archival/bz/huffman.c2
3 files changed, 51 insertions, 35 deletions
diff --git a/archival/bz/blocksort.c b/archival/bz/blocksort.c
index ec8a2a56b..aaed883de 100644
--- a/archival/bz/blocksort.c
+++ b/archival/bz/blocksort.c
@@ -246,7 +246,12 @@ void fallbackSort(uint32_t* fmap,
246 for (i = 0; i < 257; i++) ftab[i] = 0; 246 for (i = 0; i < 257; i++) ftab[i] = 0;
247 for (i = 0; i < nblock; i++) ftab[eclass8[i]]++; 247 for (i = 0; i < nblock; i++) ftab[eclass8[i]]++;
248 for (i = 0; i < 256; i++) ftabCopy[i] = ftab[i]; 248 for (i = 0; i < 256; i++) ftabCopy[i] = ftab[i];
249 for (i = 1; i < 257; i++) ftab[i] += ftab[i-1]; 249
250 j = ftab[0]; /* bbox: optimized */
251 for (i = 1; i < 257; i++) {
252 j += ftab[i];
253 ftab[i] = j;
254 }
250 255
251 for (i = 0; i < nblock; i++) { 256 for (i = 0; i < nblock; i++) {
252 j = eclass8[i]; 257 j = eclass8[i];
@@ -255,7 +260,7 @@ void fallbackSort(uint32_t* fmap,
255 fmap[k] = i; 260 fmap[k] = i;
256 } 261 }
257 262
258 nBhtab = 2 + (nblock / 32); 263 nBhtab = 2 + ((uint32_t)nblock / 32); /* bbox: unsigned div is easier */
259 for (i = 0; i < nBhtab; i++) bhtab[i] = 0; 264 for (i = 0; i < nBhtab; i++) bhtab[i] = 0;
260 for (i = 0; i < 256; i++) SET_BH(ftab[i]); 265 for (i = 0; i < 256; i++) SET_BH(ftab[i]);
261 266
@@ -737,27 +742,27 @@ void mainSort(uint32_t* ptr,
737 memset(ftab, 0, 65537 * sizeof(ftab[0])); 742 memset(ftab, 0, 65537 * sizeof(ftab[0]));
738 743
739 j = block[0] << 8; 744 j = block[0] << 8;
740 i = nblock-1; 745 i = nblock - 1;
741/* 3%, +300 bytes */ 746/* 3%, +300 bytes */
742#if CONFIG_BZIP2_FEATURE_SPEED >= 2 747#if CONFIG_BZIP2_FEATURE_SPEED >= 2
743 for (; i >= 3; i -= 4) { 748 for (; i >= 3; i -= 4) {
744 quadrant[i] = 0; 749 quadrant[i] = 0;
745 j = (j >> 8) |(((uint16_t)block[i]) << 8); 750 j = (j >> 8) | (((uint16_t)block[i]) << 8);
746 ftab[j]++; 751 ftab[j]++;
747 quadrant[i-1] = 0; 752 quadrant[i-1] = 0;
748 j = (j >> 8) |(((uint16_t)block[i-1]) << 8); 753 j = (j >> 8) | (((uint16_t)block[i-1]) << 8);
749 ftab[j]++; 754 ftab[j]++;
750 quadrant[i-2] = 0; 755 quadrant[i-2] = 0;
751 j = (j >> 8) |(((uint16_t)block[i-2]) << 8); 756 j = (j >> 8) | (((uint16_t)block[i-2]) << 8);
752 ftab[j]++; 757 ftab[j]++;
753 quadrant[i-3] = 0; 758 quadrant[i-3] = 0;
754 j = (j >> 8) |(((uint16_t)block[i-3]) << 8); 759 j = (j >> 8) | (((uint16_t)block[i-3]) << 8);
755 ftab[j]++; 760 ftab[j]++;
756 } 761 }
757#endif 762#endif
758 for (; i >= 0; i--) { 763 for (; i >= 0; i--) {
759 quadrant[i] = 0; 764 quadrant[i] = 0;
760 j = (j >> 8) |(((uint16_t)block[i]) << 8); 765 j = (j >> 8) | (((uint16_t)block[i]) << 8);
761 ftab[j]++; 766 ftab[j]++;
762 } 767 }
763 768
@@ -768,34 +773,37 @@ void mainSort(uint32_t* ptr,
768 } 773 }
769 774
770 /*-- Complete the initial radix sort --*/ 775 /*-- Complete the initial radix sort --*/
771 for (i = 1; i <= 65536; i++) 776 j = ftab[0]; /* bbox: optimized */
772 ftab[i] += ftab[i-1]; 777 for (i = 1; i <= 65536; i++) {
778 j += ftab[i];
779 ftab[i] = j;
780 }
773 781
774 s = block[0] << 8; 782 s = block[0] << 8;
775 i = nblock-1; 783 i = nblock - 1;
776#if CONFIG_BZIP2_FEATURE_SPEED >= 2 784#if CONFIG_BZIP2_FEATURE_SPEED >= 2
777 for (; i >= 3; i -= 4) { 785 for (; i >= 3; i -= 4) {
778 s = (s >> 8) | (block[i] << 8); 786 s = (s >> 8) | (block[i] << 8);
779 j = ftab[s] -1; 787 j = ftab[s] - 1;
780 ftab[s] = j; 788 ftab[s] = j;
781 ptr[j] = i; 789 ptr[j] = i;
782 s = (s >> 8) | (block[i-1] << 8); 790 s = (s >> 8) | (block[i-1] << 8);
783 j = ftab[s] -1; 791 j = ftab[s] - 1;
784 ftab[s] = j; 792 ftab[s] = j;
785 ptr[j] = i-1; 793 ptr[j] = i-1;
786 s = (s >> 8) | (block[i-2] << 8); 794 s = (s >> 8) | (block[i-2] << 8);
787 j = ftab[s] -1; 795 j = ftab[s] - 1;
788 ftab[s] = j; 796 ftab[s] = j;
789 ptr[j] = i-2; 797 ptr[j] = i-2;
790 s = (s >> 8) | (block[i-3] << 8); 798 s = (s >> 8) | (block[i-3] << 8);
791 j = ftab[s] -1; 799 j = ftab[s] - 1;
792 ftab[s] = j; 800 ftab[s] = j;
793 ptr[j] = i-3; 801 ptr[j] = i-3;
794 } 802 }
795#endif 803#endif
796 for (; i >= 0; i--) { 804 for (; i >= 0; i--) {
797 s = (s >> 8) | (block[i] << 8); 805 s = (s >> 8) | (block[i] << 8);
798 j = ftab[s] -1; 806 j = ftab[s] - 1;
799 ftab[s] = j; 807 ftab[s] = j;
800 ptr[j] = i; 808 ptr[j] = i;
801 } 809 }
@@ -812,21 +820,23 @@ void mainSort(uint32_t* ptr,
812 820
813 { 821 {
814 int32_t vv; 822 int32_t vv;
815 /* was: int32_t h = 1; */ 823 /* bbox: was: int32_t h = 1; */
816 /* do h = 3 * h + 1; while (h <= 256); */ 824 /* do h = 3 * h + 1; while (h <= 256); */
817 int32_t h = 364; 825 uint32_t h = 364;
818 826
819 do { 827 do {
820 h = h / 3; 828 /*h = h / 3;*/
829 h = (h * 171) >> 9; /* bbox: fast h/3 */
821 for (i = h; i <= 255; i++) { 830 for (i = h; i <= 255; i++) {
822 vv = runningOrder[i]; 831 vv = runningOrder[i];
823 j = i; 832 j = i;
824 while (BIGFREQ(runningOrder[j-h]) > BIGFREQ(vv)) { 833 while (BIGFREQ(runningOrder[j-h]) > BIGFREQ(vv)) {
825 runningOrder[j] = runningOrder[j-h]; 834 runningOrder[j] = runningOrder[j-h];
826 j = j - h; 835 j = j - h;
827 if (j <= (h - 1)) goto zero; 836 if (j <= (h - 1))
837 goto zero;
828 } 838 }
829 zero: 839 zero:
830 runningOrder[j] = vv; 840 runningOrder[j] = vv;
831 } 841 }
832 } while (h != 1); 842 } while (h != 1);
@@ -860,10 +870,10 @@ void mainSort(uint32_t* ptr,
860 if (j != ss) { 870 if (j != ss) {
861 sb = (ss << 8) + j; 871 sb = (ss << 8) + j;
862 if (!(ftab[sb] & SETMASK)) { 872 if (!(ftab[sb] & SETMASK)) {
863 int32_t lo = ftab[sb] & CLEARMASK; 873 int32_t lo = ftab[sb] & CLEARMASK;
864 int32_t hi = (ftab[sb+1] & CLEARMASK) - 1; 874 int32_t hi = (ftab[sb+1] & CLEARMASK) - 1;
865 if (hi > lo) { 875 if (hi > lo) {
866 mainQSort3 ( 876 mainQSort3(
867 ptr, block, quadrant, nblock, 877 ptr, block, quadrant, nblock,
868 lo, hi, BZ_N_RADIX, budget 878 lo, hi, BZ_N_RADIX, budget
869 ); 879 );
@@ -966,15 +976,14 @@ void mainSort(uint32_t* ptr,
966 while ((bbSize >> shifts) > 65534) shifts++; 976 while ((bbSize >> shifts) > 65534) shifts++;
967 977
968 for (j = bbSize-1; j >= 0; j--) { 978 for (j = bbSize-1; j >= 0; j--) {
969 int32_t a2update = ptr[bbStart + j]; 979 int32_t a2update = ptr[bbStart + j];
970 uint16_t qVal = (uint16_t)(j >> shifts); 980 uint16_t qVal = (uint16_t)(j >> shifts);
971 quadrant[a2update] = qVal; 981 quadrant[a2update] = qVal;
972 if (a2update < BZ_N_OVERSHOOT) 982 if (a2update < BZ_N_OVERSHOOT)
973 quadrant[a2update + nblock] = qVal; 983 quadrant[a2update + nblock] = qVal;
974 } 984 }
975 AssertH(((bbSize-1) >> shifts) <= 65535, 1002); 985 AssertH(((bbSize-1) >> shifts) <= 65535, 1002);
976 } 986 }
977
978 } 987 }
979} 988}
980 989
@@ -1041,7 +1050,8 @@ void BZ2_blockSort(EState* s)
1041 s->origPtr = -1; 1050 s->origPtr = -1;
1042 for (i = 0; i < s->nblock; i++) 1051 for (i = 0; i < s->nblock; i++)
1043 if (ptr[i] == 0) { 1052 if (ptr[i] == 0) {
1044 s->origPtr = i; break; 1053 s->origPtr = i;
1054 break;
1045 }; 1055 };
1046 1056
1047 AssertH(s->origPtr != -1, 1003); 1057 AssertH(s->origPtr != -1, 1003);
diff --git a/archival/bz/compress.c b/archival/bz/compress.c
index 3e2fbd867..724474e2d 100644
--- a/archival/bz/compress.c
+++ b/archival/bz/compress.c
@@ -186,7 +186,8 @@ void generateMTFValues(EState* s)
186 s->mtfFreq[BZ_RUNA]++; 186 s->mtfFreq[BZ_RUNA]++;
187 } 187 }
188 if (zPend < 2) break; 188 if (zPend < 2) break;
189 zPend = (zPend - 2) / 2; 189 zPend = (uint32_t)(zPend - 2) / 2;
190 /* bbox: unsigned div is easier */
190 }; 191 };
191 zPend = 0; 192 zPend = 0;
192 } 193 }
@@ -219,15 +220,18 @@ void generateMTFValues(EState* s)
219 zPend--; 220 zPend--;
220 while (1) { 221 while (1) {
221 if (zPend & 1) { 222 if (zPend & 1) {
222 mtfv[wr] = BZ_RUNB; wr++; 223 mtfv[wr] = BZ_RUNB;
224 wr++;
223 s->mtfFreq[BZ_RUNB]++; 225 s->mtfFreq[BZ_RUNB]++;
224 } else { 226 } else {
225 mtfv[wr] = BZ_RUNA; wr++; 227 mtfv[wr] = BZ_RUNA;
228 wr++;
226 s->mtfFreq[BZ_RUNA]++; 229 s->mtfFreq[BZ_RUNA]++;
227 } 230 }
228 if (zPend < 2) 231 if (zPend < 2)
229 break; 232 break;
230 zPend = (zPend - 2) / 2; 233 zPend = (uint32_t)(zPend - 2) / 2;
234 /* bbox: unsigned div is easier */
231 }; 235 };
232 zPend = 0; 236 zPend = 0;
233 } 237 }
@@ -288,7 +292,7 @@ void sendMTFValues(EState* s)
288 gs = 0; 292 gs = 0;
289 while (nPart > 0) { 293 while (nPart > 0) {
290 tFreq = remF / nPart; 294 tFreq = remF / nPart;
291 ge = gs-1; 295 ge = gs - 1;
292 aFreq = 0; 296 aFreq = 0;
293 while (aFreq < tFreq && ge < alphaSize-1) { 297 while (aFreq < tFreq && ge < alphaSize-1) {
294 ge++; 298 ge++;
@@ -297,7 +301,7 @@ void sendMTFValues(EState* s)
297 301
298 if (ge > gs 302 if (ge > gs
299 && nPart != nGroups && nPart != 1 303 && nPart != nGroups && nPart != 1
300 && ((nGroups - nPart) % 2 == 1) 304 && ((nGroups - nPart) % 2 == 1) /* bbox: can this be replaced by x & 1? */
301 ) { 305 ) {
302 aFreq -= s->mtfFreq[ge]; 306 aFreq -= s->mtfFreq[ge];
303 ge--; 307 ge--;
@@ -310,7 +314,7 @@ void sendMTFValues(EState* s)
310 s->len[nPart-1][v] = BZ_GREATER_ICOST; 314 s->len[nPart-1][v] = BZ_GREATER_ICOST;
311 315
312 nPart--; 316 nPart--;
313 gs = ge+1; 317 gs = ge + 1;
314 remF -= aFreq; 318 remF -= aFreq;
315 } 319 }
316 } 320 }
@@ -414,7 +418,7 @@ void sendMTFValues(EState* s)
414 /* 418 /*
415 * Increment the symbol frequencies for the selected table. 419 * Increment the symbol frequencies for the selected table.
416 */ 420 */
417/* 1% faster compress. +800 bytes */ 421/* 1% faster compress. +800 bytes */
418#if CONFIG_BZIP2_FEATURE_SPEED >= 4 422#if CONFIG_BZIP2_FEATURE_SPEED >= 4
419 if (nGroups == 6 && 50 == ge-gs+1) { 423 if (nGroups == 6 && 50 == ge-gs+1) {
420 /*--- fast track the common case ---*/ 424 /*--- fast track the common case ---*/
diff --git a/archival/bz/huffman.c b/archival/bz/huffman.c
index 3f80c9976..02838c496 100644
--- a/archival/bz/huffman.c
+++ b/archival/bz/huffman.c
@@ -183,6 +183,8 @@ void BZ2_hbMakeCodeLengths(uint8_t *len,
183 183
184 for (i = 1; i <= alphaSize; i++) { 184 for (i = 1; i <= alphaSize; i++) {
185 j = weight[i] >> 8; 185 j = weight[i] >> 8;
186 /* bbox: yes, it is a signed division.
187 * don't replace with shift! */
186 j = 1 + (j / 2); 188 j = 1 + (j / 2);
187 weight[i] = j << 8; 189 weight[i] = j << 8;
188 } 190 }