diff options
author | Denys Vlasenko <vda.linux@googlemail.com> | 2010-10-16 23:31:15 +0200 |
---|---|---|
committer | Denys Vlasenko <vda.linux@googlemail.com> | 2010-10-16 23:31:15 +0200 |
commit | 4bc3b85894920df5a3102000e1d86e1c3321fc76 (patch) | |
tree | 56f9a07f7624e0a04d2f0b3b8f61442c707b0870 | |
parent | 273abcbf664adc92ef3bc1d9752a2b571623ad52 (diff) | |
download | busybox-w32-4bc3b85894920df5a3102000e1d86e1c3321fc76.tar.gz busybox-w32-4bc3b85894920df5a3102000e1d86e1c3321fc76.tar.bz2 busybox-w32-4bc3b85894920df5a3102000e1d86e1c3321fc76.zip |
sha512: inline rotr64
function old new delta
sha1_process_block64 461 446 -15
Signed-off-by: Denys Vlasenko <vda.linux@googlemail.com>
-rw-r--r-- | libbb/sha1.c | 108 |
1 files changed, 51 insertions, 57 deletions
diff --git a/libbb/sha1.c b/libbb/sha1.c index 8c67d07bc..7e9b37d57 100644 --- a/libbb/sha1.c +++ b/libbb/sha1.c | |||
@@ -30,11 +30,29 @@ | |||
30 | 30 | ||
31 | #include "libbb.h" | 31 | #include "libbb.h" |
32 | 32 | ||
33 | #define rotl32(x,n) (((x) << (n)) | ((x) >> (32 - (n)))) | 33 | /* gcc 4.2.1 optimizes rotr64 better with inline than with macro |
34 | #define rotr32(x,n) (((x) >> (n)) | ((x) << (32 - (n)))) | 34 | * (for rotX32, there is no difference). Why? My guess is that |
35 | /* for sha512: */ | 35 | * macro requires clever common subexpression elimination heuristics |
36 | #define rotr64(x,n) (((x) >> (n)) | ((x) << (64 - (n)))) | 36 | * in gcc, while inline basically forces it to happen. |
37 | */ | ||
38 | //#define rotl32(x,n) (((x) << (n)) | ((x) >> (32 - (n)))) | ||
39 | static ALWAYS_INLINE uint32_t rotl32(uint32_t x, unsigned n) | ||
40 | { | ||
41 | return (x << n) | (x >> (32 - n)); | ||
42 | } | ||
43 | //#define rotr32(x,n) (((x) >> (n)) | ((x) << (32 - (n)))) | ||
44 | static ALWAYS_INLINE uint32_t rotr32(uint32_t x, unsigned n) | ||
45 | { | ||
46 | return (x >> n) | (x << (32 - n)); | ||
47 | } | ||
48 | /* rotr64 in needed for sha512 only: */ | ||
49 | //#define rotr64(x,n) (((x) >> (n)) | ((x) << (64 - (n)))) | ||
50 | static ALWAYS_INLINE uint64_t rotr64(uint64_t x, unsigned n) | ||
51 | { | ||
52 | return (x >> n) | (x << (64 - n)); | ||
53 | } | ||
37 | #if BB_LITTLE_ENDIAN | 54 | #if BB_LITTLE_ENDIAN |
55 | /* ALWAYS_INLINE below would hurt code size, using plain inline: */ | ||
38 | static inline uint64_t hton64(uint64_t v) | 56 | static inline uint64_t hton64(uint64_t v) |
39 | { | 57 | { |
40 | return (((uint64_t)htonl(v)) << 32) | htonl(v >> 32); | 58 | return (((uint64_t)htonl(v)) << 32) | htonl(v >> 32); |
@@ -44,14 +62,6 @@ static inline uint64_t hton64(uint64_t v) | |||
44 | #endif | 62 | #endif |
45 | #define ntoh64(v) hton64(v) | 63 | #define ntoh64(v) hton64(v) |
46 | 64 | ||
47 | /* To check alignment gcc has an appropriate operator. Other | ||
48 | compilers don't. */ | ||
49 | #if defined(__GNUC__) && __GNUC__ >= 2 | ||
50 | # define UNALIGNED_P(p,type) (((uintptr_t) p) % __alignof__(type) != 0) | ||
51 | #else | ||
52 | # define UNALIGNED_P(p,type) (((uintptr_t) p) % sizeof(type) != 0) | ||
53 | #endif | ||
54 | |||
55 | 65 | ||
56 | /* Some arch headers have conflicting defines */ | 66 | /* Some arch headers have conflicting defines */ |
57 | #undef ch | 67 | #undef ch |
@@ -65,11 +75,8 @@ static void FAST_FUNC sha1_process_block64(sha1_ctx_t *ctx) | |||
65 | uint32_t W[80], a, b, c, d, e; | 75 | uint32_t W[80], a, b, c, d, e; |
66 | const uint32_t *words = (uint32_t*) ctx->wbuffer; | 76 | const uint32_t *words = (uint32_t*) ctx->wbuffer; |
67 | 77 | ||
68 | for (t = 0; t < 16; ++t) { | 78 | for (t = 0; t < 16; ++t) |
69 | W[t] = ntohl(*words); | 79 | W[t] = ntohl(words[t]); |
70 | words++; | ||
71 | } | ||
72 | |||
73 | for (/*t = 16*/; t < 80; ++t) { | 80 | for (/*t = 16*/; t < 80; ++t) { |
74 | uint32_t T = W[t - 3] ^ W[t - 8] ^ W[t - 14] ^ W[t - 16]; | 81 | uint32_t T = W[t - 3] ^ W[t - 8] ^ W[t - 14] ^ W[t - 16]; |
75 | W[t] = rotl32(T, 1); | 82 | W[t] = rotl32(T, 1); |
@@ -190,11 +197,8 @@ static void FAST_FUNC sha256_process_block64(sha256_ctx_t *ctx) | |||
190 | #define R1(x) (rotr32(x, 17) ^ rotr32(x, 19) ^ (x >> 10)) | 197 | #define R1(x) (rotr32(x, 17) ^ rotr32(x, 19) ^ (x >> 10)) |
191 | 198 | ||
192 | /* Compute the message schedule according to FIPS 180-2:6.2.2 step 2. */ | 199 | /* Compute the message schedule according to FIPS 180-2:6.2.2 step 2. */ |
193 | for (t = 0; t < 16; ++t) { | 200 | for (t = 0; t < 16; ++t) |
194 | W[t] = ntohl(*words); | 201 | W[t] = ntohl(words[t]); |
195 | words++; | ||
196 | } | ||
197 | |||
198 | for (/*t = 16*/; t < 64; ++t) | 202 | for (/*t = 16*/; t < 64; ++t) |
199 | W[t] = R1(W[t - 2]) + W[t - 7] + R0(W[t - 15]) + W[t - 16]; | 203 | W[t] = R1(W[t - 2]) + W[t - 7] + R0(W[t - 15]) + W[t - 16]; |
200 | 204 | ||
@@ -269,10 +273,8 @@ static void FAST_FUNC sha512_process_block128(sha512_ctx_t *ctx) | |||
269 | #define R1(x) (rotr64(x, 19) ^ rotr64(x, 61) ^ (x >> 6)) | 273 | #define R1(x) (rotr64(x, 19) ^ rotr64(x, 61) ^ (x >> 6)) |
270 | 274 | ||
271 | /* Compute the message schedule according to FIPS 180-2:6.3.2 step 2. */ | 275 | /* Compute the message schedule according to FIPS 180-2:6.3.2 step 2. */ |
272 | for (t = 0; t < 16; ++t) { | 276 | for (t = 0; t < 16; ++t) |
273 | W[t] = ntoh64(*words); | 277 | W[t] = ntoh64(words[t]); |
274 | words++; | ||
275 | } | ||
276 | for (/*t = 16*/; t < 80; ++t) | 278 | for (/*t = 16*/; t < 80; ++t) |
277 | W[t] = R1(W[t - 2]) + W[t - 7] + R0(W[t - 15]) + W[t - 16]; | 279 | W[t] = R1(W[t - 2]) + W[t - 7] + R0(W[t - 15]) + W[t - 16]; |
278 | 280 | ||
@@ -363,18 +365,19 @@ void FAST_FUNC sha512_begin(sha512_ctx_t *ctx) | |||
363 | /* Used also for sha256 */ | 365 | /* Used also for sha256 */ |
364 | void FAST_FUNC sha1_hash(sha1_ctx_t *ctx, const void *buffer, size_t len) | 366 | void FAST_FUNC sha1_hash(sha1_ctx_t *ctx, const void *buffer, size_t len) |
365 | { | 367 | { |
366 | #if 0 | ||
367 | unsigned bufpos = ctx->total64 & 63; | 368 | unsigned bufpos = ctx->total64 & 63; |
368 | unsigned add = 64 - bufpos; | 369 | unsigned remaining; |
369 | 370 | ||
370 | ctx->total64 += len; | 371 | ctx->total64 += len; |
372 | #if 0 | ||
373 | remaining = 64 - bufpos; | ||
371 | 374 | ||
372 | /* Hash whole blocks */ | 375 | /* Hash whole blocks */ |
373 | while (len >= add) { | 376 | while (len >= remaining) { |
374 | memcpy(ctx->wbuffer + bufpos, buffer, add); | 377 | memcpy(ctx->wbuffer + bufpos, buffer, remaining); |
375 | buffer = (const char *)buffer + add; | 378 | buffer = (const char *)buffer + remaining; |
376 | len -= add; | 379 | len -= remaining; |
377 | add = 64; | 380 | remaining = 64; |
378 | bufpos = 0; | 381 | bufpos = 0; |
379 | ctx->process_block(ctx); | 382 | ctx->process_block(ctx); |
380 | } | 383 | } |
@@ -383,12 +386,8 @@ void FAST_FUNC sha1_hash(sha1_ctx_t *ctx, const void *buffer, size_t len) | |||
383 | memcpy(ctx->wbuffer + bufpos, buffer, len); | 386 | memcpy(ctx->wbuffer + bufpos, buffer, len); |
384 | #else | 387 | #else |
385 | /* Tiny bit smaller code */ | 388 | /* Tiny bit smaller code */ |
386 | unsigned bufpos = ctx->total64 & 63; | ||
387 | |||
388 | ctx->total64 += len; | ||
389 | |||
390 | while (1) { | 389 | while (1) { |
391 | unsigned remaining = 64 - bufpos; | 390 | remaining = 64 - bufpos; |
392 | if (remaining > len) | 391 | if (remaining > len) |
393 | remaining = len; | 392 | remaining = len; |
394 | /* Copy data into aligned buffer */ | 393 | /* Copy data into aligned buffer */ |
@@ -409,20 +408,24 @@ void FAST_FUNC sha1_hash(sha1_ctx_t *ctx, const void *buffer, size_t len) | |||
409 | 408 | ||
410 | void FAST_FUNC sha512_hash(sha512_ctx_t *ctx, const void *buffer, size_t len) | 409 | void FAST_FUNC sha512_hash(sha512_ctx_t *ctx, const void *buffer, size_t len) |
411 | { | 410 | { |
412 | #if 0 | ||
413 | unsigned bufpos = ctx->total64[0] & 127; | 411 | unsigned bufpos = ctx->total64[0] & 127; |
414 | unsigned add = 128 - bufpos; | 412 | unsigned remaining; |
415 | 413 | ||
414 | /* First increment the byte count. FIPS 180-2 specifies the possible | ||
415 | length of the file up to 2^128 _bits_. | ||
416 | We compute the number of _bytes_ and convert to bits later. */ | ||
416 | ctx->total64[0] += len; | 417 | ctx->total64[0] += len; |
417 | if (ctx->total64[0] < len) | 418 | if (ctx->total64[0] < len) |
418 | ctx->total64[1]++; | 419 | ctx->total64[1]++; |
420 | #if 0 | ||
421 | remaining = 128 - bufpos; | ||
419 | 422 | ||
420 | /* Hash whole blocks */ | 423 | /* Hash whole blocks */ |
421 | while (len >= add) { | 424 | while (len >= remaining) { |
422 | memcpy(ctx->wbuffer + bufpos, buffer, add); | 425 | memcpy(ctx->wbuffer + bufpos, buffer, remaining); |
423 | buffer = (const char *)buffer + add; | 426 | buffer = (const char *)buffer + remaining; |
424 | len -= add; | 427 | len -= remaining; |
425 | add = 128; | 428 | remaining = 128; |
426 | bufpos = 0; | 429 | bufpos = 0; |
427 | sha512_process_block128(ctx); | 430 | sha512_process_block128(ctx); |
428 | } | 431 | } |
@@ -430,20 +433,11 @@ void FAST_FUNC sha512_hash(sha512_ctx_t *ctx, const void *buffer, size_t len) | |||
430 | /* Save last, partial blosk */ | 433 | /* Save last, partial blosk */ |
431 | memcpy(ctx->wbuffer + bufpos, buffer, len); | 434 | memcpy(ctx->wbuffer + bufpos, buffer, len); |
432 | #else | 435 | #else |
433 | unsigned bufpos = ctx->total64[0] & 127; | ||
434 | |||
435 | /* First increment the byte count. FIPS 180-2 specifies the possible | ||
436 | length of the file up to 2^128 _bits_. | ||
437 | We compute the number of _bytes_ and convert to bits later. */ | ||
438 | ctx->total64[0] += len; | ||
439 | if (ctx->total64[0] < len) | ||
440 | ctx->total64[1]++; | ||
441 | |||
442 | while (1) { | 436 | while (1) { |
443 | unsigned remaining = 128 - bufpos; | 437 | remaining = 128 - bufpos; |
444 | if (remaining > len) | 438 | if (remaining > len) |
445 | remaining = len; | 439 | remaining = len; |
446 | /* Copy data into aligned buffer. */ | 440 | /* Copy data into aligned buffer */ |
447 | memcpy(ctx->wbuffer + bufpos, buffer, remaining); | 441 | memcpy(ctx->wbuffer + bufpos, buffer, remaining); |
448 | len -= remaining; | 442 | len -= remaining; |
449 | buffer = (const char *)buffer + remaining; | 443 | buffer = (const char *)buffer + remaining; |
@@ -452,7 +446,7 @@ void FAST_FUNC sha512_hash(sha512_ctx_t *ctx, const void *buffer, size_t len) | |||
452 | bufpos -= 128; | 446 | bufpos -= 128; |
453 | if (bufpos != 0) | 447 | if (bufpos != 0) |
454 | break; | 448 | break; |
455 | /* Buffer is filled up, process it. */ | 449 | /* Buffer is filled up, process it */ |
456 | sha512_process_block128(ctx); | 450 | sha512_process_block128(ctx); |
457 | /*bufpos = 0; - already is */ | 451 | /*bufpos = 0; - already is */ |
458 | } | 452 | } |