diff options
| author | Denys Vlasenko <vda.linux@googlemail.com> | 2010-10-16 23:31:15 +0200 |
|---|---|---|
| committer | Denys Vlasenko <vda.linux@googlemail.com> | 2010-10-16 23:31:15 +0200 |
| commit | 4bc3b85894920df5a3102000e1d86e1c3321fc76 (patch) | |
| tree | 56f9a07f7624e0a04d2f0b3b8f61442c707b0870 | |
| parent | 273abcbf664adc92ef3bc1d9752a2b571623ad52 (diff) | |
| download | busybox-w32-4bc3b85894920df5a3102000e1d86e1c3321fc76.tar.gz busybox-w32-4bc3b85894920df5a3102000e1d86e1c3321fc76.tar.bz2 busybox-w32-4bc3b85894920df5a3102000e1d86e1c3321fc76.zip | |
sha512: inline rotr64
function old new delta
sha1_process_block64 461 446 -15
Signed-off-by: Denys Vlasenko <vda.linux@googlemail.com>
| -rw-r--r-- | libbb/sha1.c | 108 |
1 files changed, 51 insertions, 57 deletions
diff --git a/libbb/sha1.c b/libbb/sha1.c index 8c67d07bc..7e9b37d57 100644 --- a/libbb/sha1.c +++ b/libbb/sha1.c | |||
| @@ -30,11 +30,29 @@ | |||
| 30 | 30 | ||
| 31 | #include "libbb.h" | 31 | #include "libbb.h" |
| 32 | 32 | ||
| 33 | #define rotl32(x,n) (((x) << (n)) | ((x) >> (32 - (n)))) | 33 | /* gcc 4.2.1 optimizes rotr64 better with inline than with macro |
| 34 | #define rotr32(x,n) (((x) >> (n)) | ((x) << (32 - (n)))) | 34 | * (for rotX32, there is no difference). Why? My guess is that |
| 35 | /* for sha512: */ | 35 | * macro requires clever common subexpression elimination heuristics |
| 36 | #define rotr64(x,n) (((x) >> (n)) | ((x) << (64 - (n)))) | 36 | * in gcc, while inline basically forces it to happen. |
| 37 | */ | ||
| 38 | //#define rotl32(x,n) (((x) << (n)) | ((x) >> (32 - (n)))) | ||
| 39 | static ALWAYS_INLINE uint32_t rotl32(uint32_t x, unsigned n) | ||
| 40 | { | ||
| 41 | return (x << n) | (x >> (32 - n)); | ||
| 42 | } | ||
| 43 | //#define rotr32(x,n) (((x) >> (n)) | ((x) << (32 - (n)))) | ||
| 44 | static ALWAYS_INLINE uint32_t rotr32(uint32_t x, unsigned n) | ||
| 45 | { | ||
| 46 | return (x >> n) | (x << (32 - n)); | ||
| 47 | } | ||
| 48 | /* rotr64 in needed for sha512 only: */ | ||
| 49 | //#define rotr64(x,n) (((x) >> (n)) | ((x) << (64 - (n)))) | ||
| 50 | static ALWAYS_INLINE uint64_t rotr64(uint64_t x, unsigned n) | ||
| 51 | { | ||
| 52 | return (x >> n) | (x << (64 - n)); | ||
| 53 | } | ||
| 37 | #if BB_LITTLE_ENDIAN | 54 | #if BB_LITTLE_ENDIAN |
| 55 | /* ALWAYS_INLINE below would hurt code size, using plain inline: */ | ||
| 38 | static inline uint64_t hton64(uint64_t v) | 56 | static inline uint64_t hton64(uint64_t v) |
| 39 | { | 57 | { |
| 40 | return (((uint64_t)htonl(v)) << 32) | htonl(v >> 32); | 58 | return (((uint64_t)htonl(v)) << 32) | htonl(v >> 32); |
| @@ -44,14 +62,6 @@ static inline uint64_t hton64(uint64_t v) | |||
| 44 | #endif | 62 | #endif |
| 45 | #define ntoh64(v) hton64(v) | 63 | #define ntoh64(v) hton64(v) |
| 46 | 64 | ||
| 47 | /* To check alignment gcc has an appropriate operator. Other | ||
| 48 | compilers don't. */ | ||
| 49 | #if defined(__GNUC__) && __GNUC__ >= 2 | ||
| 50 | # define UNALIGNED_P(p,type) (((uintptr_t) p) % __alignof__(type) != 0) | ||
| 51 | #else | ||
| 52 | # define UNALIGNED_P(p,type) (((uintptr_t) p) % sizeof(type) != 0) | ||
| 53 | #endif | ||
| 54 | |||
| 55 | 65 | ||
| 56 | /* Some arch headers have conflicting defines */ | 66 | /* Some arch headers have conflicting defines */ |
| 57 | #undef ch | 67 | #undef ch |
| @@ -65,11 +75,8 @@ static void FAST_FUNC sha1_process_block64(sha1_ctx_t *ctx) | |||
| 65 | uint32_t W[80], a, b, c, d, e; | 75 | uint32_t W[80], a, b, c, d, e; |
| 66 | const uint32_t *words = (uint32_t*) ctx->wbuffer; | 76 | const uint32_t *words = (uint32_t*) ctx->wbuffer; |
| 67 | 77 | ||
| 68 | for (t = 0; t < 16; ++t) { | 78 | for (t = 0; t < 16; ++t) |
| 69 | W[t] = ntohl(*words); | 79 | W[t] = ntohl(words[t]); |
| 70 | words++; | ||
| 71 | } | ||
| 72 | |||
| 73 | for (/*t = 16*/; t < 80; ++t) { | 80 | for (/*t = 16*/; t < 80; ++t) { |
| 74 | uint32_t T = W[t - 3] ^ W[t - 8] ^ W[t - 14] ^ W[t - 16]; | 81 | uint32_t T = W[t - 3] ^ W[t - 8] ^ W[t - 14] ^ W[t - 16]; |
| 75 | W[t] = rotl32(T, 1); | 82 | W[t] = rotl32(T, 1); |
| @@ -190,11 +197,8 @@ static void FAST_FUNC sha256_process_block64(sha256_ctx_t *ctx) | |||
| 190 | #define R1(x) (rotr32(x, 17) ^ rotr32(x, 19) ^ (x >> 10)) | 197 | #define R1(x) (rotr32(x, 17) ^ rotr32(x, 19) ^ (x >> 10)) |
| 191 | 198 | ||
| 192 | /* Compute the message schedule according to FIPS 180-2:6.2.2 step 2. */ | 199 | /* Compute the message schedule according to FIPS 180-2:6.2.2 step 2. */ |
| 193 | for (t = 0; t < 16; ++t) { | 200 | for (t = 0; t < 16; ++t) |
| 194 | W[t] = ntohl(*words); | 201 | W[t] = ntohl(words[t]); |
| 195 | words++; | ||
| 196 | } | ||
| 197 | |||
| 198 | for (/*t = 16*/; t < 64; ++t) | 202 | for (/*t = 16*/; t < 64; ++t) |
| 199 | W[t] = R1(W[t - 2]) + W[t - 7] + R0(W[t - 15]) + W[t - 16]; | 203 | W[t] = R1(W[t - 2]) + W[t - 7] + R0(W[t - 15]) + W[t - 16]; |
| 200 | 204 | ||
| @@ -269,10 +273,8 @@ static void FAST_FUNC sha512_process_block128(sha512_ctx_t *ctx) | |||
| 269 | #define R1(x) (rotr64(x, 19) ^ rotr64(x, 61) ^ (x >> 6)) | 273 | #define R1(x) (rotr64(x, 19) ^ rotr64(x, 61) ^ (x >> 6)) |
| 270 | 274 | ||
| 271 | /* Compute the message schedule according to FIPS 180-2:6.3.2 step 2. */ | 275 | /* Compute the message schedule according to FIPS 180-2:6.3.2 step 2. */ |
| 272 | for (t = 0; t < 16; ++t) { | 276 | for (t = 0; t < 16; ++t) |
| 273 | W[t] = ntoh64(*words); | 277 | W[t] = ntoh64(words[t]); |
| 274 | words++; | ||
| 275 | } | ||
| 276 | for (/*t = 16*/; t < 80; ++t) | 278 | for (/*t = 16*/; t < 80; ++t) |
| 277 | W[t] = R1(W[t - 2]) + W[t - 7] + R0(W[t - 15]) + W[t - 16]; | 279 | W[t] = R1(W[t - 2]) + W[t - 7] + R0(W[t - 15]) + W[t - 16]; |
| 278 | 280 | ||
| @@ -363,18 +365,19 @@ void FAST_FUNC sha512_begin(sha512_ctx_t *ctx) | |||
| 363 | /* Used also for sha256 */ | 365 | /* Used also for sha256 */ |
| 364 | void FAST_FUNC sha1_hash(sha1_ctx_t *ctx, const void *buffer, size_t len) | 366 | void FAST_FUNC sha1_hash(sha1_ctx_t *ctx, const void *buffer, size_t len) |
| 365 | { | 367 | { |
| 366 | #if 0 | ||
| 367 | unsigned bufpos = ctx->total64 & 63; | 368 | unsigned bufpos = ctx->total64 & 63; |
| 368 | unsigned add = 64 - bufpos; | 369 | unsigned remaining; |
| 369 | 370 | ||
| 370 | ctx->total64 += len; | 371 | ctx->total64 += len; |
| 372 | #if 0 | ||
| 373 | remaining = 64 - bufpos; | ||
| 371 | 374 | ||
| 372 | /* Hash whole blocks */ | 375 | /* Hash whole blocks */ |
| 373 | while (len >= add) { | 376 | while (len >= remaining) { |
| 374 | memcpy(ctx->wbuffer + bufpos, buffer, add); | 377 | memcpy(ctx->wbuffer + bufpos, buffer, remaining); |
| 375 | buffer = (const char *)buffer + add; | 378 | buffer = (const char *)buffer + remaining; |
| 376 | len -= add; | 379 | len -= remaining; |
| 377 | add = 64; | 380 | remaining = 64; |
| 378 | bufpos = 0; | 381 | bufpos = 0; |
| 379 | ctx->process_block(ctx); | 382 | ctx->process_block(ctx); |
| 380 | } | 383 | } |
| @@ -383,12 +386,8 @@ void FAST_FUNC sha1_hash(sha1_ctx_t *ctx, const void *buffer, size_t len) | |||
| 383 | memcpy(ctx->wbuffer + bufpos, buffer, len); | 386 | memcpy(ctx->wbuffer + bufpos, buffer, len); |
| 384 | #else | 387 | #else |
| 385 | /* Tiny bit smaller code */ | 388 | /* Tiny bit smaller code */ |
| 386 | unsigned bufpos = ctx->total64 & 63; | ||
| 387 | |||
| 388 | ctx->total64 += len; | ||
| 389 | |||
| 390 | while (1) { | 389 | while (1) { |
| 391 | unsigned remaining = 64 - bufpos; | 390 | remaining = 64 - bufpos; |
| 392 | if (remaining > len) | 391 | if (remaining > len) |
| 393 | remaining = len; | 392 | remaining = len; |
| 394 | /* Copy data into aligned buffer */ | 393 | /* Copy data into aligned buffer */ |
| @@ -409,20 +408,24 @@ void FAST_FUNC sha1_hash(sha1_ctx_t *ctx, const void *buffer, size_t len) | |||
| 409 | 408 | ||
| 410 | void FAST_FUNC sha512_hash(sha512_ctx_t *ctx, const void *buffer, size_t len) | 409 | void FAST_FUNC sha512_hash(sha512_ctx_t *ctx, const void *buffer, size_t len) |
| 411 | { | 410 | { |
| 412 | #if 0 | ||
| 413 | unsigned bufpos = ctx->total64[0] & 127; | 411 | unsigned bufpos = ctx->total64[0] & 127; |
| 414 | unsigned add = 128 - bufpos; | 412 | unsigned remaining; |
| 415 | 413 | ||
| 414 | /* First increment the byte count. FIPS 180-2 specifies the possible | ||
| 415 | length of the file up to 2^128 _bits_. | ||
| 416 | We compute the number of _bytes_ and convert to bits later. */ | ||
| 416 | ctx->total64[0] += len; | 417 | ctx->total64[0] += len; |
| 417 | if (ctx->total64[0] < len) | 418 | if (ctx->total64[0] < len) |
| 418 | ctx->total64[1]++; | 419 | ctx->total64[1]++; |
| 420 | #if 0 | ||
| 421 | remaining = 128 - bufpos; | ||
| 419 | 422 | ||
| 420 | /* Hash whole blocks */ | 423 | /* Hash whole blocks */ |
| 421 | while (len >= add) { | 424 | while (len >= remaining) { |
| 422 | memcpy(ctx->wbuffer + bufpos, buffer, add); | 425 | memcpy(ctx->wbuffer + bufpos, buffer, remaining); |
| 423 | buffer = (const char *)buffer + add; | 426 | buffer = (const char *)buffer + remaining; |
| 424 | len -= add; | 427 | len -= remaining; |
| 425 | add = 128; | 428 | remaining = 128; |
| 426 | bufpos = 0; | 429 | bufpos = 0; |
| 427 | sha512_process_block128(ctx); | 430 | sha512_process_block128(ctx); |
| 428 | } | 431 | } |
| @@ -430,20 +433,11 @@ void FAST_FUNC sha512_hash(sha512_ctx_t *ctx, const void *buffer, size_t len) | |||
| 430 | /* Save last, partial blosk */ | 433 | /* Save last, partial blosk */ |
| 431 | memcpy(ctx->wbuffer + bufpos, buffer, len); | 434 | memcpy(ctx->wbuffer + bufpos, buffer, len); |
| 432 | #else | 435 | #else |
| 433 | unsigned bufpos = ctx->total64[0] & 127; | ||
| 434 | |||
| 435 | /* First increment the byte count. FIPS 180-2 specifies the possible | ||
| 436 | length of the file up to 2^128 _bits_. | ||
| 437 | We compute the number of _bytes_ and convert to bits later. */ | ||
| 438 | ctx->total64[0] += len; | ||
| 439 | if (ctx->total64[0] < len) | ||
| 440 | ctx->total64[1]++; | ||
| 441 | |||
| 442 | while (1) { | 436 | while (1) { |
| 443 | unsigned remaining = 128 - bufpos; | 437 | remaining = 128 - bufpos; |
| 444 | if (remaining > len) | 438 | if (remaining > len) |
| 445 | remaining = len; | 439 | remaining = len; |
| 446 | /* Copy data into aligned buffer. */ | 440 | /* Copy data into aligned buffer */ |
| 447 | memcpy(ctx->wbuffer + bufpos, buffer, remaining); | 441 | memcpy(ctx->wbuffer + bufpos, buffer, remaining); |
| 448 | len -= remaining; | 442 | len -= remaining; |
| 449 | buffer = (const char *)buffer + remaining; | 443 | buffer = (const char *)buffer + remaining; |
| @@ -452,7 +446,7 @@ void FAST_FUNC sha512_hash(sha512_ctx_t *ctx, const void *buffer, size_t len) | |||
| 452 | bufpos -= 128; | 446 | bufpos -= 128; |
| 453 | if (bufpos != 0) | 447 | if (bufpos != 0) |
| 454 | break; | 448 | break; |
| 455 | /* Buffer is filled up, process it. */ | 449 | /* Buffer is filled up, process it */ |
| 456 | sha512_process_block128(ctx); | 450 | sha512_process_block128(ctx); |
| 457 | /*bufpos = 0; - already is */ | 451 | /*bufpos = 0; - already is */ |
| 458 | } | 452 | } |
