diff options
author | Denis Vlasenko <vda.linux@googlemail.com> | 2007-10-14 07:49:48 +0000 |
---|---|---|
committer | Denis Vlasenko <vda.linux@googlemail.com> | 2007-10-14 07:49:48 +0000 |
commit | 6a9154b6f649341870bc06e896d2fe7235a4aef9 (patch) | |
tree | b80d1603d370838166ada58b42cd6e0b8c7c6970 | |
parent | 3f5fdc7572d932f33f81029956b87230c9b05182 (diff) | |
download | busybox-w32-6a9154b6f649341870bc06e896d2fe7235a4aef9.tar.gz busybox-w32-6a9154b6f649341870bc06e896d2fe7235a4aef9.tar.bz2 busybox-w32-6a9154b6f649341870bc06e896d2fe7235a4aef9.zip |
bzip2: eliminate some divisions
-rw-r--r-- | archival/bz/blocksort.c | 64 | ||||
-rw-r--r-- | archival/bz/compress.c | 20 | ||||
-rw-r--r-- | archival/bz/huffman.c | 2 |
3 files changed, 51 insertions, 35 deletions
diff --git a/archival/bz/blocksort.c b/archival/bz/blocksort.c index ec8a2a56b..aaed883de 100644 --- a/archival/bz/blocksort.c +++ b/archival/bz/blocksort.c | |||
@@ -246,7 +246,12 @@ void fallbackSort(uint32_t* fmap, | |||
246 | for (i = 0; i < 257; i++) ftab[i] = 0; | 246 | for (i = 0; i < 257; i++) ftab[i] = 0; |
247 | for (i = 0; i < nblock; i++) ftab[eclass8[i]]++; | 247 | for (i = 0; i < nblock; i++) ftab[eclass8[i]]++; |
248 | for (i = 0; i < 256; i++) ftabCopy[i] = ftab[i]; | 248 | for (i = 0; i < 256; i++) ftabCopy[i] = ftab[i]; |
249 | for (i = 1; i < 257; i++) ftab[i] += ftab[i-1]; | 249 | |
250 | j = ftab[0]; /* bbox: optimized */ | ||
251 | for (i = 1; i < 257; i++) { | ||
252 | j += ftab[i]; | ||
253 | ftab[i] = j; | ||
254 | } | ||
250 | 255 | ||
251 | for (i = 0; i < nblock; i++) { | 256 | for (i = 0; i < nblock; i++) { |
252 | j = eclass8[i]; | 257 | j = eclass8[i]; |
@@ -255,7 +260,7 @@ void fallbackSort(uint32_t* fmap, | |||
255 | fmap[k] = i; | 260 | fmap[k] = i; |
256 | } | 261 | } |
257 | 262 | ||
258 | nBhtab = 2 + (nblock / 32); | 263 | nBhtab = 2 + ((uint32_t)nblock / 32); /* bbox: unsigned div is easier */ |
259 | for (i = 0; i < nBhtab; i++) bhtab[i] = 0; | 264 | for (i = 0; i < nBhtab; i++) bhtab[i] = 0; |
260 | for (i = 0; i < 256; i++) SET_BH(ftab[i]); | 265 | for (i = 0; i < 256; i++) SET_BH(ftab[i]); |
261 | 266 | ||
@@ -737,27 +742,27 @@ void mainSort(uint32_t* ptr, | |||
737 | memset(ftab, 0, 65537 * sizeof(ftab[0])); | 742 | memset(ftab, 0, 65537 * sizeof(ftab[0])); |
738 | 743 | ||
739 | j = block[0] << 8; | 744 | j = block[0] << 8; |
740 | i = nblock-1; | 745 | i = nblock - 1; |
741 | /* 3%, +300 bytes */ | 746 | /* 3%, +300 bytes */ |
742 | #if CONFIG_BZIP2_FEATURE_SPEED >= 2 | 747 | #if CONFIG_BZIP2_FEATURE_SPEED >= 2 |
743 | for (; i >= 3; i -= 4) { | 748 | for (; i >= 3; i -= 4) { |
744 | quadrant[i] = 0; | 749 | quadrant[i] = 0; |
745 | j = (j >> 8) |(((uint16_t)block[i]) << 8); | 750 | j = (j >> 8) | (((uint16_t)block[i]) << 8); |
746 | ftab[j]++; | 751 | ftab[j]++; |
747 | quadrant[i-1] = 0; | 752 | quadrant[i-1] = 0; |
748 | j = (j >> 8) |(((uint16_t)block[i-1]) << 8); | 753 | j = (j >> 8) | (((uint16_t)block[i-1]) << 8); |
749 | ftab[j]++; | 754 | ftab[j]++; |
750 | quadrant[i-2] = 0; | 755 | quadrant[i-2] = 0; |
751 | j = (j >> 8) |(((uint16_t)block[i-2]) << 8); | 756 | j = (j >> 8) | (((uint16_t)block[i-2]) << 8); |
752 | ftab[j]++; | 757 | ftab[j]++; |
753 | quadrant[i-3] = 0; | 758 | quadrant[i-3] = 0; |
754 | j = (j >> 8) |(((uint16_t)block[i-3]) << 8); | 759 | j = (j >> 8) | (((uint16_t)block[i-3]) << 8); |
755 | ftab[j]++; | 760 | ftab[j]++; |
756 | } | 761 | } |
757 | #endif | 762 | #endif |
758 | for (; i >= 0; i--) { | 763 | for (; i >= 0; i--) { |
759 | quadrant[i] = 0; | 764 | quadrant[i] = 0; |
760 | j = (j >> 8) |(((uint16_t)block[i]) << 8); | 765 | j = (j >> 8) | (((uint16_t)block[i]) << 8); |
761 | ftab[j]++; | 766 | ftab[j]++; |
762 | } | 767 | } |
763 | 768 | ||
@@ -768,34 +773,37 @@ void mainSort(uint32_t* ptr, | |||
768 | } | 773 | } |
769 | 774 | ||
770 | /*-- Complete the initial radix sort --*/ | 775 | /*-- Complete the initial radix sort --*/ |
771 | for (i = 1; i <= 65536; i++) | 776 | j = ftab[0]; /* bbox: optimized */ |
772 | ftab[i] += ftab[i-1]; | 777 | for (i = 1; i <= 65536; i++) { |
778 | j += ftab[i]; | ||
779 | ftab[i] = j; | ||
780 | } | ||
773 | 781 | ||
774 | s = block[0] << 8; | 782 | s = block[0] << 8; |
775 | i = nblock-1; | 783 | i = nblock - 1; |
776 | #if CONFIG_BZIP2_FEATURE_SPEED >= 2 | 784 | #if CONFIG_BZIP2_FEATURE_SPEED >= 2 |
777 | for (; i >= 3; i -= 4) { | 785 | for (; i >= 3; i -= 4) { |
778 | s = (s >> 8) | (block[i] << 8); | 786 | s = (s >> 8) | (block[i] << 8); |
779 | j = ftab[s] -1; | 787 | j = ftab[s] - 1; |
780 | ftab[s] = j; | 788 | ftab[s] = j; |
781 | ptr[j] = i; | 789 | ptr[j] = i; |
782 | s = (s >> 8) | (block[i-1] << 8); | 790 | s = (s >> 8) | (block[i-1] << 8); |
783 | j = ftab[s] -1; | 791 | j = ftab[s] - 1; |
784 | ftab[s] = j; | 792 | ftab[s] = j; |
785 | ptr[j] = i-1; | 793 | ptr[j] = i-1; |
786 | s = (s >> 8) | (block[i-2] << 8); | 794 | s = (s >> 8) | (block[i-2] << 8); |
787 | j = ftab[s] -1; | 795 | j = ftab[s] - 1; |
788 | ftab[s] = j; | 796 | ftab[s] = j; |
789 | ptr[j] = i-2; | 797 | ptr[j] = i-2; |
790 | s = (s >> 8) | (block[i-3] << 8); | 798 | s = (s >> 8) | (block[i-3] << 8); |
791 | j = ftab[s] -1; | 799 | j = ftab[s] - 1; |
792 | ftab[s] = j; | 800 | ftab[s] = j; |
793 | ptr[j] = i-3; | 801 | ptr[j] = i-3; |
794 | } | 802 | } |
795 | #endif | 803 | #endif |
796 | for (; i >= 0; i--) { | 804 | for (; i >= 0; i--) { |
797 | s = (s >> 8) | (block[i] << 8); | 805 | s = (s >> 8) | (block[i] << 8); |
798 | j = ftab[s] -1; | 806 | j = ftab[s] - 1; |
799 | ftab[s] = j; | 807 | ftab[s] = j; |
800 | ptr[j] = i; | 808 | ptr[j] = i; |
801 | } | 809 | } |
@@ -812,21 +820,23 @@ void mainSort(uint32_t* ptr, | |||
812 | 820 | ||
813 | { | 821 | { |
814 | int32_t vv; | 822 | int32_t vv; |
815 | /* was: int32_t h = 1; */ | 823 | /* bbox: was: int32_t h = 1; */ |
816 | /* do h = 3 * h + 1; while (h <= 256); */ | 824 | /* do h = 3 * h + 1; while (h <= 256); */ |
817 | int32_t h = 364; | 825 | uint32_t h = 364; |
818 | 826 | ||
819 | do { | 827 | do { |
820 | h = h / 3; | 828 | /*h = h / 3;*/ |
829 | h = (h * 171) >> 9; /* bbox: fast h/3 */ | ||
821 | for (i = h; i <= 255; i++) { | 830 | for (i = h; i <= 255; i++) { |
822 | vv = runningOrder[i]; | 831 | vv = runningOrder[i]; |
823 | j = i; | 832 | j = i; |
824 | while (BIGFREQ(runningOrder[j-h]) > BIGFREQ(vv)) { | 833 | while (BIGFREQ(runningOrder[j-h]) > BIGFREQ(vv)) { |
825 | runningOrder[j] = runningOrder[j-h]; | 834 | runningOrder[j] = runningOrder[j-h]; |
826 | j = j - h; | 835 | j = j - h; |
827 | if (j <= (h - 1)) goto zero; | 836 | if (j <= (h - 1)) |
837 | goto zero; | ||
828 | } | 838 | } |
829 | zero: | 839 | zero: |
830 | runningOrder[j] = vv; | 840 | runningOrder[j] = vv; |
831 | } | 841 | } |
832 | } while (h != 1); | 842 | } while (h != 1); |
@@ -860,10 +870,10 @@ void mainSort(uint32_t* ptr, | |||
860 | if (j != ss) { | 870 | if (j != ss) { |
861 | sb = (ss << 8) + j; | 871 | sb = (ss << 8) + j; |
862 | if (!(ftab[sb] & SETMASK)) { | 872 | if (!(ftab[sb] & SETMASK)) { |
863 | int32_t lo = ftab[sb] & CLEARMASK; | 873 | int32_t lo = ftab[sb] & CLEARMASK; |
864 | int32_t hi = (ftab[sb+1] & CLEARMASK) - 1; | 874 | int32_t hi = (ftab[sb+1] & CLEARMASK) - 1; |
865 | if (hi > lo) { | 875 | if (hi > lo) { |
866 | mainQSort3 ( | 876 | mainQSort3( |
867 | ptr, block, quadrant, nblock, | 877 | ptr, block, quadrant, nblock, |
868 | lo, hi, BZ_N_RADIX, budget | 878 | lo, hi, BZ_N_RADIX, budget |
869 | ); | 879 | ); |
@@ -966,15 +976,14 @@ void mainSort(uint32_t* ptr, | |||
966 | while ((bbSize >> shifts) > 65534) shifts++; | 976 | while ((bbSize >> shifts) > 65534) shifts++; |
967 | 977 | ||
968 | for (j = bbSize-1; j >= 0; j--) { | 978 | for (j = bbSize-1; j >= 0; j--) { |
969 | int32_t a2update = ptr[bbStart + j]; | 979 | int32_t a2update = ptr[bbStart + j]; |
970 | uint16_t qVal = (uint16_t)(j >> shifts); | 980 | uint16_t qVal = (uint16_t)(j >> shifts); |
971 | quadrant[a2update] = qVal; | 981 | quadrant[a2update] = qVal; |
972 | if (a2update < BZ_N_OVERSHOOT) | 982 | if (a2update < BZ_N_OVERSHOOT) |
973 | quadrant[a2update + nblock] = qVal; | 983 | quadrant[a2update + nblock] = qVal; |
974 | } | 984 | } |
975 | AssertH(((bbSize-1) >> shifts) <= 65535, 1002); | 985 | AssertH(((bbSize-1) >> shifts) <= 65535, 1002); |
976 | } | 986 | } |
977 | |||
978 | } | 987 | } |
979 | } | 988 | } |
980 | 989 | ||
@@ -1041,7 +1050,8 @@ void BZ2_blockSort(EState* s) | |||
1041 | s->origPtr = -1; | 1050 | s->origPtr = -1; |
1042 | for (i = 0; i < s->nblock; i++) | 1051 | for (i = 0; i < s->nblock; i++) |
1043 | if (ptr[i] == 0) { | 1052 | if (ptr[i] == 0) { |
1044 | s->origPtr = i; break; | 1053 | s->origPtr = i; |
1054 | break; | ||
1045 | }; | 1055 | }; |
1046 | 1056 | ||
1047 | AssertH(s->origPtr != -1, 1003); | 1057 | AssertH(s->origPtr != -1, 1003); |
diff --git a/archival/bz/compress.c b/archival/bz/compress.c index 3e2fbd867..724474e2d 100644 --- a/archival/bz/compress.c +++ b/archival/bz/compress.c | |||
@@ -186,7 +186,8 @@ void generateMTFValues(EState* s) | |||
186 | s->mtfFreq[BZ_RUNA]++; | 186 | s->mtfFreq[BZ_RUNA]++; |
187 | } | 187 | } |
188 | if (zPend < 2) break; | 188 | if (zPend < 2) break; |
189 | zPend = (zPend - 2) / 2; | 189 | zPend = (uint32_t)(zPend - 2) / 2; |
190 | /* bbox: unsigned div is easier */ | ||
190 | }; | 191 | }; |
191 | zPend = 0; | 192 | zPend = 0; |
192 | } | 193 | } |
@@ -219,15 +220,18 @@ void generateMTFValues(EState* s) | |||
219 | zPend--; | 220 | zPend--; |
220 | while (1) { | 221 | while (1) { |
221 | if (zPend & 1) { | 222 | if (zPend & 1) { |
222 | mtfv[wr] = BZ_RUNB; wr++; | 223 | mtfv[wr] = BZ_RUNB; |
224 | wr++; | ||
223 | s->mtfFreq[BZ_RUNB]++; | 225 | s->mtfFreq[BZ_RUNB]++; |
224 | } else { | 226 | } else { |
225 | mtfv[wr] = BZ_RUNA; wr++; | 227 | mtfv[wr] = BZ_RUNA; |
228 | wr++; | ||
226 | s->mtfFreq[BZ_RUNA]++; | 229 | s->mtfFreq[BZ_RUNA]++; |
227 | } | 230 | } |
228 | if (zPend < 2) | 231 | if (zPend < 2) |
229 | break; | 232 | break; |
230 | zPend = (zPend - 2) / 2; | 233 | zPend = (uint32_t)(zPend - 2) / 2; |
234 | /* bbox: unsigned div is easier */ | ||
231 | }; | 235 | }; |
232 | zPend = 0; | 236 | zPend = 0; |
233 | } | 237 | } |
@@ -288,7 +292,7 @@ void sendMTFValues(EState* s) | |||
288 | gs = 0; | 292 | gs = 0; |
289 | while (nPart > 0) { | 293 | while (nPart > 0) { |
290 | tFreq = remF / nPart; | 294 | tFreq = remF / nPart; |
291 | ge = gs-1; | 295 | ge = gs - 1; |
292 | aFreq = 0; | 296 | aFreq = 0; |
293 | while (aFreq < tFreq && ge < alphaSize-1) { | 297 | while (aFreq < tFreq && ge < alphaSize-1) { |
294 | ge++; | 298 | ge++; |
@@ -297,7 +301,7 @@ void sendMTFValues(EState* s) | |||
297 | 301 | ||
298 | if (ge > gs | 302 | if (ge > gs |
299 | && nPart != nGroups && nPart != 1 | 303 | && nPart != nGroups && nPart != 1 |
300 | && ((nGroups - nPart) % 2 == 1) | 304 | && ((nGroups - nPart) % 2 == 1) /* bbox: can this be replaced by x & 1? */ |
301 | ) { | 305 | ) { |
302 | aFreq -= s->mtfFreq[ge]; | 306 | aFreq -= s->mtfFreq[ge]; |
303 | ge--; | 307 | ge--; |
@@ -310,7 +314,7 @@ void sendMTFValues(EState* s) | |||
310 | s->len[nPart-1][v] = BZ_GREATER_ICOST; | 314 | s->len[nPart-1][v] = BZ_GREATER_ICOST; |
311 | 315 | ||
312 | nPart--; | 316 | nPart--; |
313 | gs = ge+1; | 317 | gs = ge + 1; |
314 | remF -= aFreq; | 318 | remF -= aFreq; |
315 | } | 319 | } |
316 | } | 320 | } |
@@ -414,7 +418,7 @@ void sendMTFValues(EState* s) | |||
414 | /* | 418 | /* |
415 | * Increment the symbol frequencies for the selected table. | 419 | * Increment the symbol frequencies for the selected table. |
416 | */ | 420 | */ |
417 | /* 1% faster compress. +800 bytes */ | 421 | /* 1% faster compress. +800 bytes */ |
418 | #if CONFIG_BZIP2_FEATURE_SPEED >= 4 | 422 | #if CONFIG_BZIP2_FEATURE_SPEED >= 4 |
419 | if (nGroups == 6 && 50 == ge-gs+1) { | 423 | if (nGroups == 6 && 50 == ge-gs+1) { |
420 | /*--- fast track the common case ---*/ | 424 | /*--- fast track the common case ---*/ |
diff --git a/archival/bz/huffman.c b/archival/bz/huffman.c index 3f80c9976..02838c496 100644 --- a/archival/bz/huffman.c +++ b/archival/bz/huffman.c | |||
@@ -183,6 +183,8 @@ void BZ2_hbMakeCodeLengths(uint8_t *len, | |||
183 | 183 | ||
184 | for (i = 1; i <= alphaSize; i++) { | 184 | for (i = 1; i <= alphaSize; i++) { |
185 | j = weight[i] >> 8; | 185 | j = weight[i] >> 8; |
186 | /* bbox: yes, it is a signed division. | ||
187 | * don't replace with shift! */ | ||
186 | j = 1 + (j / 2); | 188 | j = 1 + (j / 2); |
187 | weight[i] = j << 8; | 189 | weight[i] = j << 8; |
188 | } | 190 | } |