aboutsummaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorDenys Vlasenko <vda.linux@googlemail.com>2010-10-16 23:31:15 +0200
committerDenys Vlasenko <vda.linux@googlemail.com>2010-10-16 23:31:15 +0200
commit4bc3b85894920df5a3102000e1d86e1c3321fc76 (patch)
tree56f9a07f7624e0a04d2f0b3b8f61442c707b0870
parent273abcbf664adc92ef3bc1d9752a2b571623ad52 (diff)
downloadbusybox-w32-4bc3b85894920df5a3102000e1d86e1c3321fc76.tar.gz
busybox-w32-4bc3b85894920df5a3102000e1d86e1c3321fc76.tar.bz2
busybox-w32-4bc3b85894920df5a3102000e1d86e1c3321fc76.zip
sha512: inline rotr64
function old new delta sha1_process_block64 461 446 -15 Signed-off-by: Denys Vlasenko <vda.linux@googlemail.com>
-rw-r--r--libbb/sha1.c108
1 files changed, 51 insertions, 57 deletions
diff --git a/libbb/sha1.c b/libbb/sha1.c
index 8c67d07bc..7e9b37d57 100644
--- a/libbb/sha1.c
+++ b/libbb/sha1.c
@@ -30,11 +30,29 @@
30 30
31#include "libbb.h" 31#include "libbb.h"
32 32
33#define rotl32(x,n) (((x) << (n)) | ((x) >> (32 - (n)))) 33/* gcc 4.2.1 optimizes rotr64 better with inline than with macro
34#define rotr32(x,n) (((x) >> (n)) | ((x) << (32 - (n)))) 34 * (for rotX32, there is no difference). Why? My guess is that
35/* for sha512: */ 35 * macro requires clever common subexpression elimination heuristics
36#define rotr64(x,n) (((x) >> (n)) | ((x) << (64 - (n)))) 36 * in gcc, while inline basically forces it to happen.
37 */
38//#define rotl32(x,n) (((x) << (n)) | ((x) >> (32 - (n))))
39static ALWAYS_INLINE uint32_t rotl32(uint32_t x, unsigned n)
40{
41 return (x << n) | (x >> (32 - n));
42}
43//#define rotr32(x,n) (((x) >> (n)) | ((x) << (32 - (n))))
44static ALWAYS_INLINE uint32_t rotr32(uint32_t x, unsigned n)
45{
46 return (x >> n) | (x << (32 - n));
47}
48/* rotr64 in needed for sha512 only: */
49//#define rotr64(x,n) (((x) >> (n)) | ((x) << (64 - (n))))
50static ALWAYS_INLINE uint64_t rotr64(uint64_t x, unsigned n)
51{
52 return (x >> n) | (x << (64 - n));
53}
37#if BB_LITTLE_ENDIAN 54#if BB_LITTLE_ENDIAN
55/* ALWAYS_INLINE below would hurt code size, using plain inline: */
38static inline uint64_t hton64(uint64_t v) 56static inline uint64_t hton64(uint64_t v)
39{ 57{
40 return (((uint64_t)htonl(v)) << 32) | htonl(v >> 32); 58 return (((uint64_t)htonl(v)) << 32) | htonl(v >> 32);
@@ -44,14 +62,6 @@ static inline uint64_t hton64(uint64_t v)
44#endif 62#endif
45#define ntoh64(v) hton64(v) 63#define ntoh64(v) hton64(v)
46 64
47/* To check alignment gcc has an appropriate operator. Other
48 compilers don't. */
49#if defined(__GNUC__) && __GNUC__ >= 2
50# define UNALIGNED_P(p,type) (((uintptr_t) p) % __alignof__(type) != 0)
51#else
52# define UNALIGNED_P(p,type) (((uintptr_t) p) % sizeof(type) != 0)
53#endif
54
55 65
56/* Some arch headers have conflicting defines */ 66/* Some arch headers have conflicting defines */
57#undef ch 67#undef ch
@@ -65,11 +75,8 @@ static void FAST_FUNC sha1_process_block64(sha1_ctx_t *ctx)
65 uint32_t W[80], a, b, c, d, e; 75 uint32_t W[80], a, b, c, d, e;
66 const uint32_t *words = (uint32_t*) ctx->wbuffer; 76 const uint32_t *words = (uint32_t*) ctx->wbuffer;
67 77
68 for (t = 0; t < 16; ++t) { 78 for (t = 0; t < 16; ++t)
69 W[t] = ntohl(*words); 79 W[t] = ntohl(words[t]);
70 words++;
71 }
72
73 for (/*t = 16*/; t < 80; ++t) { 80 for (/*t = 16*/; t < 80; ++t) {
74 uint32_t T = W[t - 3] ^ W[t - 8] ^ W[t - 14] ^ W[t - 16]; 81 uint32_t T = W[t - 3] ^ W[t - 8] ^ W[t - 14] ^ W[t - 16];
75 W[t] = rotl32(T, 1); 82 W[t] = rotl32(T, 1);
@@ -190,11 +197,8 @@ static void FAST_FUNC sha256_process_block64(sha256_ctx_t *ctx)
190#define R1(x) (rotr32(x, 17) ^ rotr32(x, 19) ^ (x >> 10)) 197#define R1(x) (rotr32(x, 17) ^ rotr32(x, 19) ^ (x >> 10))
191 198
192 /* Compute the message schedule according to FIPS 180-2:6.2.2 step 2. */ 199 /* Compute the message schedule according to FIPS 180-2:6.2.2 step 2. */
193 for (t = 0; t < 16; ++t) { 200 for (t = 0; t < 16; ++t)
194 W[t] = ntohl(*words); 201 W[t] = ntohl(words[t]);
195 words++;
196 }
197
198 for (/*t = 16*/; t < 64; ++t) 202 for (/*t = 16*/; t < 64; ++t)
199 W[t] = R1(W[t - 2]) + W[t - 7] + R0(W[t - 15]) + W[t - 16]; 203 W[t] = R1(W[t - 2]) + W[t - 7] + R0(W[t - 15]) + W[t - 16];
200 204
@@ -269,10 +273,8 @@ static void FAST_FUNC sha512_process_block128(sha512_ctx_t *ctx)
269#define R1(x) (rotr64(x, 19) ^ rotr64(x, 61) ^ (x >> 6)) 273#define R1(x) (rotr64(x, 19) ^ rotr64(x, 61) ^ (x >> 6))
270 274
271 /* Compute the message schedule according to FIPS 180-2:6.3.2 step 2. */ 275 /* Compute the message schedule according to FIPS 180-2:6.3.2 step 2. */
272 for (t = 0; t < 16; ++t) { 276 for (t = 0; t < 16; ++t)
273 W[t] = ntoh64(*words); 277 W[t] = ntoh64(words[t]);
274 words++;
275 }
276 for (/*t = 16*/; t < 80; ++t) 278 for (/*t = 16*/; t < 80; ++t)
277 W[t] = R1(W[t - 2]) + W[t - 7] + R0(W[t - 15]) + W[t - 16]; 279 W[t] = R1(W[t - 2]) + W[t - 7] + R0(W[t - 15]) + W[t - 16];
278 280
@@ -363,18 +365,19 @@ void FAST_FUNC sha512_begin(sha512_ctx_t *ctx)
363/* Used also for sha256 */ 365/* Used also for sha256 */
364void FAST_FUNC sha1_hash(sha1_ctx_t *ctx, const void *buffer, size_t len) 366void FAST_FUNC sha1_hash(sha1_ctx_t *ctx, const void *buffer, size_t len)
365{ 367{
366#if 0
367 unsigned bufpos = ctx->total64 & 63; 368 unsigned bufpos = ctx->total64 & 63;
368 unsigned add = 64 - bufpos; 369 unsigned remaining;
369 370
370 ctx->total64 += len; 371 ctx->total64 += len;
372#if 0
373 remaining = 64 - bufpos;
371 374
372 /* Hash whole blocks */ 375 /* Hash whole blocks */
373 while (len >= add) { 376 while (len >= remaining) {
374 memcpy(ctx->wbuffer + bufpos, buffer, add); 377 memcpy(ctx->wbuffer + bufpos, buffer, remaining);
375 buffer = (const char *)buffer + add; 378 buffer = (const char *)buffer + remaining;
376 len -= add; 379 len -= remaining;
377 add = 64; 380 remaining = 64;
378 bufpos = 0; 381 bufpos = 0;
379 ctx->process_block(ctx); 382 ctx->process_block(ctx);
380 } 383 }
@@ -383,12 +386,8 @@ void FAST_FUNC sha1_hash(sha1_ctx_t *ctx, const void *buffer, size_t len)
383 memcpy(ctx->wbuffer + bufpos, buffer, len); 386 memcpy(ctx->wbuffer + bufpos, buffer, len);
384#else 387#else
385 /* Tiny bit smaller code */ 388 /* Tiny bit smaller code */
386 unsigned bufpos = ctx->total64 & 63;
387
388 ctx->total64 += len;
389
390 while (1) { 389 while (1) {
391 unsigned remaining = 64 - bufpos; 390 remaining = 64 - bufpos;
392 if (remaining > len) 391 if (remaining > len)
393 remaining = len; 392 remaining = len;
394 /* Copy data into aligned buffer */ 393 /* Copy data into aligned buffer */
@@ -409,20 +408,24 @@ void FAST_FUNC sha1_hash(sha1_ctx_t *ctx, const void *buffer, size_t len)
409 408
410void FAST_FUNC sha512_hash(sha512_ctx_t *ctx, const void *buffer, size_t len) 409void FAST_FUNC sha512_hash(sha512_ctx_t *ctx, const void *buffer, size_t len)
411{ 410{
412#if 0
413 unsigned bufpos = ctx->total64[0] & 127; 411 unsigned bufpos = ctx->total64[0] & 127;
414 unsigned add = 128 - bufpos; 412 unsigned remaining;
415 413
414 /* First increment the byte count. FIPS 180-2 specifies the possible
415 length of the file up to 2^128 _bits_.
416 We compute the number of _bytes_ and convert to bits later. */
416 ctx->total64[0] += len; 417 ctx->total64[0] += len;
417 if (ctx->total64[0] < len) 418 if (ctx->total64[0] < len)
418 ctx->total64[1]++; 419 ctx->total64[1]++;
420#if 0
421 remaining = 128 - bufpos;
419 422
420 /* Hash whole blocks */ 423 /* Hash whole blocks */
421 while (len >= add) { 424 while (len >= remaining) {
422 memcpy(ctx->wbuffer + bufpos, buffer, add); 425 memcpy(ctx->wbuffer + bufpos, buffer, remaining);
423 buffer = (const char *)buffer + add; 426 buffer = (const char *)buffer + remaining;
424 len -= add; 427 len -= remaining;
425 add = 128; 428 remaining = 128;
426 bufpos = 0; 429 bufpos = 0;
427 sha512_process_block128(ctx); 430 sha512_process_block128(ctx);
428 } 431 }
@@ -430,20 +433,11 @@ void FAST_FUNC sha512_hash(sha512_ctx_t *ctx, const void *buffer, size_t len)
430 /* Save last, partial blosk */ 433 /* Save last, partial blosk */
431 memcpy(ctx->wbuffer + bufpos, buffer, len); 434 memcpy(ctx->wbuffer + bufpos, buffer, len);
432#else 435#else
433 unsigned bufpos = ctx->total64[0] & 127;
434
435 /* First increment the byte count. FIPS 180-2 specifies the possible
436 length of the file up to 2^128 _bits_.
437 We compute the number of _bytes_ and convert to bits later. */
438 ctx->total64[0] += len;
439 if (ctx->total64[0] < len)
440 ctx->total64[1]++;
441
442 while (1) { 436 while (1) {
443 unsigned remaining = 128 - bufpos; 437 remaining = 128 - bufpos;
444 if (remaining > len) 438 if (remaining > len)
445 remaining = len; 439 remaining = len;
446 /* Copy data into aligned buffer. */ 440 /* Copy data into aligned buffer */
447 memcpy(ctx->wbuffer + bufpos, buffer, remaining); 441 memcpy(ctx->wbuffer + bufpos, buffer, remaining);
448 len -= remaining; 442 len -= remaining;
449 buffer = (const char *)buffer + remaining; 443 buffer = (const char *)buffer + remaining;
@@ -452,7 +446,7 @@ void FAST_FUNC sha512_hash(sha512_ctx_t *ctx, const void *buffer, size_t len)
452 bufpos -= 128; 446 bufpos -= 128;
453 if (bufpos != 0) 447 if (bufpos != 0)
454 break; 448 break;
455 /* Buffer is filled up, process it. */ 449 /* Buffer is filled up, process it */
456 sha512_process_block128(ctx); 450 sha512_process_block128(ctx);
457 /*bufpos = 0; - already is */ 451 /*bufpos = 0; - already is */
458 } 452 }