diff options
author | Denys Vlasenko <vda.linux@googlemail.com> | 2015-02-02 16:07:07 +0100 |
---|---|---|
committer | Denys Vlasenko <vda.linux@googlemail.com> | 2015-02-02 16:07:07 +0100 |
commit | f7f70bf1b3025550ea4ad8d13d977b846a868a06 (patch) | |
tree | 2c18d81eab7e897d982a18667fdafa062a93e033 | |
parent | 7f7ade1964f61172125d9f4fe92f0b07ce8bc7a4 (diff) | |
download | busybox-w32-f7f70bf1b3025550ea4ad8d13d977b846a868a06.tar.gz busybox-w32-f7f70bf1b3025550ea4ad8d13d977b846a868a06.tar.bz2 busybox-w32-f7f70bf1b3025550ea4ad8d13d977b846a868a06.zip |
gzip: speed up and shrink put_16bit()
function old new delta
put_16bit 104 98 -6
Signed-off-by: Denys Vlasenko <vda.linux@googlemail.com>
-rw-r--r-- | archival/gzip.c | 41 | ||||
-rw-r--r-- | include/platform.h | 2 |
2 files changed, 36 insertions, 7 deletions
diff --git a/archival/gzip.c b/archival/gzip.c index 46367f9e6..18d795996 100644 --- a/archival/gzip.c +++ b/archival/gzip.c | |||
@@ -417,19 +417,46 @@ static void flush_outbuf(void) | |||
417 | #define put_8bit(c) \ | 417 | #define put_8bit(c) \ |
418 | do { \ | 418 | do { \ |
419 | G1.outbuf[G1.outcnt++] = (c); \ | 419 | G1.outbuf[G1.outcnt++] = (c); \ |
420 | if (G1.outcnt == OUTBUFSIZ) flush_outbuf(); \ | 420 | if (G1.outcnt == OUTBUFSIZ) \ |
421 | flush_outbuf(); \ | ||
421 | } while (0) | 422 | } while (0) |
422 | 423 | ||
423 | /* Output a 16 bit value, lsb first */ | 424 | /* Output a 16 bit value, lsb first */ |
424 | static void put_16bit(ush w) | 425 | static void put_16bit(ush w) |
425 | { | 426 | { |
426 | if (G1.outcnt < OUTBUFSIZ - 2) { | 427 | /* GCC 4.2.1 won't optimize out redundant loads of G1.outcnt |
427 | G1.outbuf[G1.outcnt++] = w; | 428 | * (probably because of fear of aliasing with G1.outbuf[] |
428 | G1.outbuf[G1.outcnt++] = w >> 8; | 429 | * stores), do it explicitly: |
429 | } else { | 430 | */ |
430 | put_8bit(w); | 431 | unsigned outcnt = G1.outcnt; |
431 | put_8bit(w >> 8); | 432 | uch *dst = &G1.outbuf[outcnt]; |
433 | |||
434 | #if BB_UNALIGNED_MEMACCESS_OK && BB_LITTLE_ENDIAN | ||
435 | if (outcnt < OUTBUFSIZ-2) { | ||
436 | /* Common case */ | ||
437 | ush *dst16 = (void*) dst; | ||
438 | *dst16 = w; /* unalinged LSB 16-bit store */ | ||
439 | G1.outcnt = outcnt + 2; | ||
440 | return; | ||
441 | } | ||
442 | *dst = (uch)w; | ||
443 | w >>= 8; | ||
444 | #else | ||
445 | *dst++ = (uch)w; | ||
446 | w >>= 8; | ||
447 | if (outcnt < OUTBUFSIZ-2) { | ||
448 | /* Common case */ | ||
449 | *dst = w; | ||
450 | G1.outcnt = outcnt + 2; | ||
451 | return; | ||
432 | } | 452 | } |
453 | #endif | ||
454 | |||
455 | /* Slowpath: we will need to do flush_outbuf() */ | ||
456 | G1.outcnt++; | ||
457 | if (G1.outcnt == OUTBUFSIZ) | ||
458 | flush_outbuf(); | ||
459 | put_8bit(w); | ||
433 | } | 460 | } |
434 | 461 | ||
435 | static void put_32bit(ulg n) | 462 | static void put_32bit(ulg n) |
diff --git a/include/platform.h b/include/platform.h index 0b0fce182..df9594507 100644 --- a/include/platform.h +++ b/include/platform.h | |||
@@ -217,6 +217,7 @@ typedef uint64_t bb__aliased_uint64_t FIX_ALIASING; | |||
217 | * a lvalue. This makes it more likely to not swap them by mistake | 217 | * a lvalue. This makes it more likely to not swap them by mistake |
218 | */ | 218 | */ |
219 | #if defined(i386) || defined(__x86_64__) || defined(__powerpc__) | 219 | #if defined(i386) || defined(__x86_64__) || defined(__powerpc__) |
220 | # define BB_UNALIGNED_MEMACCESS_OK 1 | ||
220 | # define move_from_unaligned_int(v, intp) ((v) = *(bb__aliased_int*)(intp)) | 221 | # define move_from_unaligned_int(v, intp) ((v) = *(bb__aliased_int*)(intp)) |
221 | # define move_from_unaligned_long(v, longp) ((v) = *(bb__aliased_long*)(longp)) | 222 | # define move_from_unaligned_long(v, longp) ((v) = *(bb__aliased_long*)(longp)) |
222 | # define move_from_unaligned16(v, u16p) ((v) = *(bb__aliased_uint16_t*)(u16p)) | 223 | # define move_from_unaligned16(v, u16p) ((v) = *(bb__aliased_uint16_t*)(u16p)) |
@@ -225,6 +226,7 @@ typedef uint64_t bb__aliased_uint64_t FIX_ALIASING; | |||
225 | # define move_to_unaligned32(u32p, v) (*(bb__aliased_uint32_t*)(u32p) = (v)) | 226 | # define move_to_unaligned32(u32p, v) (*(bb__aliased_uint32_t*)(u32p) = (v)) |
226 | /* #elif ... - add your favorite arch today! */ | 227 | /* #elif ... - add your favorite arch today! */ |
227 | #else | 228 | #else |
229 | # define BB_UNALIGNED_MEMACCESS_OK 0 | ||
228 | /* performs reasonably well (gcc usually inlines memcpy here) */ | 230 | /* performs reasonably well (gcc usually inlines memcpy here) */ |
229 | # define move_from_unaligned_int(v, intp) (memcpy(&(v), (intp), sizeof(int))) | 231 | # define move_from_unaligned_int(v, intp) (memcpy(&(v), (intp), sizeof(int))) |
230 | # define move_from_unaligned_long(v, longp) (memcpy(&(v), (longp), sizeof(long))) | 232 | # define move_from_unaligned_long(v, longp) (memcpy(&(v), (longp), sizeof(long))) |