diff options
Diffstat (limited to 'libbb/sha1.c')
-rw-r--r-- | libbb/sha1.c | 204 |
1 files changed, 127 insertions, 77 deletions
diff --git a/libbb/sha1.c b/libbb/sha1.c index beeb70cf6..d79291148 100644 --- a/libbb/sha1.c +++ b/libbb/sha1.c | |||
@@ -30,11 +30,29 @@ | |||
30 | 30 | ||
31 | #include "libbb.h" | 31 | #include "libbb.h" |
32 | 32 | ||
33 | #define rotl32(x,n) (((x) << (n)) | ((x) >> (32 - (n)))) | 33 | /* gcc 4.2.1 optimizes rotr64 better with inline than with macro |
34 | #define rotr32(x,n) (((x) >> (n)) | ((x) << (32 - (n)))) | 34 | * (for rotX32, there is no difference). Why? My guess is that |
35 | /* for sha512: */ | 35 | * macro requires clever common subexpression elimination heuristics |
36 | #define rotr64(x,n) (((x) >> (n)) | ((x) << (64 - (n)))) | 36 | * in gcc, while inline basically forces it to happen. |
37 | */ | ||
38 | //#define rotl32(x,n) (((x) << (n)) | ((x) >> (32 - (n)))) | ||
39 | static ALWAYS_INLINE uint32_t rotl32(uint32_t x, unsigned n) | ||
40 | { | ||
41 | return (x << n) | (x >> (32 - n)); | ||
42 | } | ||
43 | //#define rotr32(x,n) (((x) >> (n)) | ((x) << (32 - (n)))) | ||
44 | static ALWAYS_INLINE uint32_t rotr32(uint32_t x, unsigned n) | ||
45 | { | ||
46 | return (x >> n) | (x << (32 - n)); | ||
47 | } | ||
48 | /* rotr64 in needed for sha512 only: */ | ||
49 | //#define rotr64(x,n) (((x) >> (n)) | ((x) << (64 - (n)))) | ||
50 | static ALWAYS_INLINE uint64_t rotr64(uint64_t x, unsigned n) | ||
51 | { | ||
52 | return (x >> n) | (x << (64 - n)); | ||
53 | } | ||
37 | #if BB_LITTLE_ENDIAN | 54 | #if BB_LITTLE_ENDIAN |
55 | /* ALWAYS_INLINE below would hurt code size, using plain inline: */ | ||
38 | static inline uint64_t hton64(uint64_t v) | 56 | static inline uint64_t hton64(uint64_t v) |
39 | { | 57 | { |
40 | return (((uint64_t)htonl(v)) << 32) | htonl(v >> 32); | 58 | return (((uint64_t)htonl(v)) << 32) | htonl(v >> 32); |
@@ -44,14 +62,6 @@ static inline uint64_t hton64(uint64_t v) | |||
44 | #endif | 62 | #endif |
45 | #define ntoh64(v) hton64(v) | 63 | #define ntoh64(v) hton64(v) |
46 | 64 | ||
47 | /* To check alignment gcc has an appropriate operator. Other | ||
48 | compilers don't. */ | ||
49 | #if defined(__GNUC__) && __GNUC__ >= 2 | ||
50 | # define UNALIGNED_P(p,type) (((uintptr_t) p) % __alignof__(type) != 0) | ||
51 | #else | ||
52 | # define UNALIGNED_P(p,type) (((uintptr_t) p) % sizeof(type) != 0) | ||
53 | #endif | ||
54 | |||
55 | 65 | ||
56 | /* Some arch headers have conflicting defines */ | 66 | /* Some arch headers have conflicting defines */ |
57 | #undef ch | 67 | #undef ch |
@@ -65,11 +75,8 @@ static void FAST_FUNC sha1_process_block64(sha1_ctx_t *ctx) | |||
65 | uint32_t W[80], a, b, c, d, e; | 75 | uint32_t W[80], a, b, c, d, e; |
66 | const uint32_t *words = (uint32_t*) ctx->wbuffer; | 76 | const uint32_t *words = (uint32_t*) ctx->wbuffer; |
67 | 77 | ||
68 | for (t = 0; t < 16; ++t) { | 78 | for (t = 0; t < 16; ++t) |
69 | W[t] = ntohl(*words); | 79 | W[t] = ntohl(words[t]); |
70 | words++; | ||
71 | } | ||
72 | |||
73 | for (/*t = 16*/; t < 80; ++t) { | 80 | for (/*t = 16*/; t < 80; ++t) { |
74 | uint32_t T = W[t - 3] ^ W[t - 8] ^ W[t - 14] ^ W[t - 16]; | 81 | uint32_t T = W[t - 3] ^ W[t - 8] ^ W[t - 14] ^ W[t - 16]; |
75 | W[t] = rotl32(T, 1); | 82 | W[t] = rotl32(T, 1); |
@@ -190,11 +197,8 @@ static void FAST_FUNC sha256_process_block64(sha256_ctx_t *ctx) | |||
190 | #define R1(x) (rotr32(x, 17) ^ rotr32(x, 19) ^ (x >> 10)) | 197 | #define R1(x) (rotr32(x, 17) ^ rotr32(x, 19) ^ (x >> 10)) |
191 | 198 | ||
192 | /* Compute the message schedule according to FIPS 180-2:6.2.2 step 2. */ | 199 | /* Compute the message schedule according to FIPS 180-2:6.2.2 step 2. */ |
193 | for (t = 0; t < 16; ++t) { | 200 | for (t = 0; t < 16; ++t) |
194 | W[t] = ntohl(*words); | 201 | W[t] = ntohl(words[t]); |
195 | words++; | ||
196 | } | ||
197 | |||
198 | for (/*t = 16*/; t < 64; ++t) | 202 | for (/*t = 16*/; t < 64; ++t) |
199 | W[t] = R1(W[t - 2]) + W[t - 7] + R0(W[t - 15]) + W[t - 16]; | 203 | W[t] = R1(W[t - 2]) + W[t - 7] + R0(W[t - 15]) + W[t - 16]; |
200 | 204 | ||
@@ -269,10 +273,8 @@ static void FAST_FUNC sha512_process_block128(sha512_ctx_t *ctx) | |||
269 | #define R1(x) (rotr64(x, 19) ^ rotr64(x, 61) ^ (x >> 6)) | 273 | #define R1(x) (rotr64(x, 19) ^ rotr64(x, 61) ^ (x >> 6)) |
270 | 274 | ||
271 | /* Compute the message schedule according to FIPS 180-2:6.3.2 step 2. */ | 275 | /* Compute the message schedule according to FIPS 180-2:6.3.2 step 2. */ |
272 | for (t = 0; t < 16; ++t) { | 276 | for (t = 0; t < 16; ++t) |
273 | W[t] = ntoh64(*words); | 277 | W[t] = ntoh64(words[t]); |
274 | words++; | ||
275 | } | ||
276 | for (/*t = 16*/; t < 80; ++t) | 278 | for (/*t = 16*/; t < 80; ++t) |
277 | W[t] = R1(W[t - 2]) + W[t - 7] + R0(W[t - 15]) + W[t - 16]; | 279 | W[t] = R1(W[t - 2]) + W[t - 7] + R0(W[t - 15]) + W[t - 16]; |
278 | 280 | ||
@@ -327,7 +329,9 @@ static const uint32_t init256[] = { | |||
327 | 0x510e527f, | 329 | 0x510e527f, |
328 | 0x9b05688c, | 330 | 0x9b05688c, |
329 | 0x1f83d9ab, | 331 | 0x1f83d9ab, |
330 | 0x5be0cd19 | 332 | 0x5be0cd19, |
333 | 0, | ||
334 | 0, | ||
331 | }; | 335 | }; |
332 | static const uint32_t init512_lo[] = { | 336 | static const uint32_t init512_lo[] = { |
333 | 0xf3bcc908, | 337 | 0xf3bcc908, |
@@ -337,7 +341,9 @@ static const uint32_t init512_lo[] = { | |||
337 | 0xade682d1, | 341 | 0xade682d1, |
338 | 0x2b3e6c1f, | 342 | 0x2b3e6c1f, |
339 | 0xfb41bd6b, | 343 | 0xfb41bd6b, |
340 | 0x137e2179 | 344 | 0x137e2179, |
345 | 0, | ||
346 | 0, | ||
341 | }; | 347 | }; |
342 | 348 | ||
343 | /* Initialize structure containing state of computation. | 349 | /* Initialize structure containing state of computation. |
@@ -345,7 +351,7 @@ static const uint32_t init512_lo[] = { | |||
345 | void FAST_FUNC sha256_begin(sha256_ctx_t *ctx) | 351 | void FAST_FUNC sha256_begin(sha256_ctx_t *ctx) |
346 | { | 352 | { |
347 | memcpy(ctx->hash, init256, sizeof(init256)); | 353 | memcpy(ctx->hash, init256, sizeof(init256)); |
348 | ctx->total64 = 0; | 354 | /*ctx->total64 = 0; - done by extending init256 with two 32-bit zeros */ |
349 | ctx->process_block = sha256_process_block64; | 355 | ctx->process_block = sha256_process_block64; |
350 | } | 356 | } |
351 | 357 | ||
@@ -354,36 +360,61 @@ void FAST_FUNC sha256_begin(sha256_ctx_t *ctx) | |||
354 | void FAST_FUNC sha512_begin(sha512_ctx_t *ctx) | 360 | void FAST_FUNC sha512_begin(sha512_ctx_t *ctx) |
355 | { | 361 | { |
356 | int i; | 362 | int i; |
357 | for (i = 0; i < 8; i++) | 363 | /* Two extra iterations zero out ctx->total64[] */ |
364 | for (i = 0; i < 8+2; i++) | ||
358 | ctx->hash[i] = ((uint64_t)(init256[i]) << 32) + init512_lo[i]; | 365 | ctx->hash[i] = ((uint64_t)(init256[i]) << 32) + init512_lo[i]; |
359 | ctx->total64[0] = ctx->total64[1] = 0; | 366 | /*ctx->total64[0] = ctx->total64[1] = 0; - already done */ |
360 | } | 367 | } |
361 | 368 | ||
362 | 369 | ||
363 | /* Used also for sha256 */ | 370 | /* Used also for sha256 */ |
364 | void FAST_FUNC sha1_hash(const void *buffer, size_t len, sha1_ctx_t *ctx) | 371 | void FAST_FUNC sha1_hash(sha1_ctx_t *ctx, const void *buffer, size_t len) |
365 | { | 372 | { |
366 | unsigned in_buf = ctx->total64 & 63; | 373 | unsigned bufpos = ctx->total64 & 63; |
367 | unsigned add = 64 - in_buf; | 374 | unsigned remaining; |
368 | 375 | ||
369 | ctx->total64 += len; | 376 | ctx->total64 += len; |
370 | 377 | #if 0 | |
371 | while (len >= add) { /* transfer whole blocks while possible */ | 378 | remaining = 64 - bufpos; |
372 | memcpy(ctx->wbuffer + in_buf, buffer, add); | 379 | |
373 | buffer = (const char *)buffer + add; | 380 | /* Hash whole blocks */ |
374 | len -= add; | 381 | while (len >= remaining) { |
375 | add = 64; | 382 | memcpy(ctx->wbuffer + bufpos, buffer, remaining); |
376 | in_buf = 0; | 383 | buffer = (const char *)buffer + remaining; |
384 | len -= remaining; | ||
385 | remaining = 64; | ||
386 | bufpos = 0; | ||
377 | ctx->process_block(ctx); | 387 | ctx->process_block(ctx); |
378 | } | 388 | } |
379 | 389 | ||
380 | memcpy(ctx->wbuffer + in_buf, buffer, len); | 390 | /* Save last, partial blosk */ |
391 | memcpy(ctx->wbuffer + bufpos, buffer, len); | ||
392 | #else | ||
393 | /* Tiny bit smaller code */ | ||
394 | while (1) { | ||
395 | remaining = 64 - bufpos; | ||
396 | if (remaining > len) | ||
397 | remaining = len; | ||
398 | /* Copy data into aligned buffer */ | ||
399 | memcpy(ctx->wbuffer + bufpos, buffer, remaining); | ||
400 | len -= remaining; | ||
401 | buffer = (const char *)buffer + remaining; | ||
402 | bufpos += remaining; | ||
403 | /* clever way to do "if (bufpos != 64) break; ... ; bufpos = 0;" */ | ||
404 | bufpos -= 64; | ||
405 | if (bufpos != 0) | ||
406 | break; | ||
407 | /* Buffer is filled up, process it */ | ||
408 | ctx->process_block(ctx); | ||
409 | /*bufpos = 0; - already is */ | ||
410 | } | ||
411 | #endif | ||
381 | } | 412 | } |
382 | 413 | ||
383 | void FAST_FUNC sha512_hash(const void *buffer, size_t len, sha512_ctx_t *ctx) | 414 | void FAST_FUNC sha512_hash(sha512_ctx_t *ctx, const void *buffer, size_t len) |
384 | { | 415 | { |
385 | unsigned in_buf = ctx->total64[0] & 127; | 416 | unsigned bufpos = ctx->total64[0] & 127; |
386 | unsigned add = 128 - in_buf; | 417 | unsigned remaining; |
387 | 418 | ||
388 | /* First increment the byte count. FIPS 180-2 specifies the possible | 419 | /* First increment the byte count. FIPS 180-2 specifies the possible |
389 | length of the file up to 2^128 _bits_. | 420 | length of the file up to 2^128 _bits_. |
@@ -391,36 +422,57 @@ void FAST_FUNC sha512_hash(const void *buffer, size_t len, sha512_ctx_t *ctx) | |||
391 | ctx->total64[0] += len; | 422 | ctx->total64[0] += len; |
392 | if (ctx->total64[0] < len) | 423 | if (ctx->total64[0] < len) |
393 | ctx->total64[1]++; | 424 | ctx->total64[1]++; |
394 | 425 | #if 0 | |
395 | while (len >= add) { /* transfer whole blocks while possible */ | 426 | remaining = 128 - bufpos; |
396 | memcpy(ctx->wbuffer + in_buf, buffer, add); | 427 | |
397 | buffer = (const char *)buffer + add; | 428 | /* Hash whole blocks */ |
398 | len -= add; | 429 | while (len >= remaining) { |
399 | add = 128; | 430 | memcpy(ctx->wbuffer + bufpos, buffer, remaining); |
400 | in_buf = 0; | 431 | buffer = (const char *)buffer + remaining; |
432 | len -= remaining; | ||
433 | remaining = 128; | ||
434 | bufpos = 0; | ||
401 | sha512_process_block128(ctx); | 435 | sha512_process_block128(ctx); |
402 | } | 436 | } |
403 | 437 | ||
404 | memcpy(ctx->wbuffer + in_buf, buffer, len); | 438 | /* Save last, partial blosk */ |
439 | memcpy(ctx->wbuffer + bufpos, buffer, len); | ||
440 | #else | ||
441 | while (1) { | ||
442 | remaining = 128 - bufpos; | ||
443 | if (remaining > len) | ||
444 | remaining = len; | ||
445 | /* Copy data into aligned buffer */ | ||
446 | memcpy(ctx->wbuffer + bufpos, buffer, remaining); | ||
447 | len -= remaining; | ||
448 | buffer = (const char *)buffer + remaining; | ||
449 | bufpos += remaining; | ||
450 | /* clever way to do "if (bufpos != 128) break; ... ; bufpos = 0;" */ | ||
451 | bufpos -= 128; | ||
452 | if (bufpos != 0) | ||
453 | break; | ||
454 | /* Buffer is filled up, process it */ | ||
455 | sha512_process_block128(ctx); | ||
456 | /*bufpos = 0; - already is */ | ||
457 | } | ||
458 | #endif | ||
405 | } | 459 | } |
406 | 460 | ||
407 | 461 | ||
408 | /* Used also for sha256 */ | 462 | /* Used also for sha256 */ |
409 | void FAST_FUNC sha1_end(void *resbuf, sha1_ctx_t *ctx) | 463 | void FAST_FUNC sha1_end(sha1_ctx_t *ctx, void *resbuf) |
410 | { | 464 | { |
411 | unsigned pad, in_buf; | 465 | unsigned bufpos = ctx->total64 & 63; |
412 | 466 | ||
413 | in_buf = ctx->total64 & 63; | ||
414 | /* Pad the buffer to the next 64-byte boundary with 0x80,0,0,0... */ | 467 | /* Pad the buffer to the next 64-byte boundary with 0x80,0,0,0... */ |
415 | ctx->wbuffer[in_buf++] = 0x80; | 468 | ctx->wbuffer[bufpos++] = 0x80; |
416 | 469 | ||
417 | /* This loop iterates either once or twice, no more, no less */ | 470 | /* This loop iterates either once or twice, no more, no less */ |
418 | while (1) { | 471 | while (1) { |
419 | pad = 64 - in_buf; | 472 | unsigned remaining = 64 - bufpos; |
420 | memset(ctx->wbuffer + in_buf, 0, pad); | 473 | memset(ctx->wbuffer + bufpos, 0, remaining); |
421 | in_buf = 0; | ||
422 | /* Do we have enough space for the length count? */ | 474 | /* Do we have enough space for the length count? */ |
423 | if (pad >= 8) { | 475 | if (remaining >= 8) { |
424 | /* Store the 64-bit counter of bits in the buffer in BE format */ | 476 | /* Store the 64-bit counter of bits in the buffer in BE format */ |
425 | uint64_t t = ctx->total64 << 3; | 477 | uint64_t t = ctx->total64 << 3; |
426 | t = hton64(t); | 478 | t = hton64(t); |
@@ -428,35 +480,32 @@ void FAST_FUNC sha1_end(void *resbuf, sha1_ctx_t *ctx) | |||
428 | *(uint64_t *) (&ctx->wbuffer[64 - 8]) = t; | 480 | *(uint64_t *) (&ctx->wbuffer[64 - 8]) = t; |
429 | } | 481 | } |
430 | ctx->process_block(ctx); | 482 | ctx->process_block(ctx); |
431 | if (pad >= 8) | 483 | if (remaining >= 8) |
432 | break; | 484 | break; |
485 | bufpos = 0; | ||
433 | } | 486 | } |
434 | 487 | ||
435 | in_buf = (ctx->process_block == sha1_process_block64) ? 5 : 8; | 488 | bufpos = (ctx->process_block == sha1_process_block64) ? 5 : 8; |
436 | /* This way we do not impose alignment constraints on resbuf: */ | 489 | /* This way we do not impose alignment constraints on resbuf: */ |
437 | if (BB_LITTLE_ENDIAN) { | 490 | if (BB_LITTLE_ENDIAN) { |
438 | unsigned i; | 491 | unsigned i; |
439 | for (i = 0; i < in_buf; ++i) | 492 | for (i = 0; i < bufpos; ++i) |
440 | ctx->hash[i] = htonl(ctx->hash[i]); | 493 | ctx->hash[i] = htonl(ctx->hash[i]); |
441 | } | 494 | } |
442 | memcpy(resbuf, ctx->hash, sizeof(ctx->hash[0]) * in_buf); | 495 | memcpy(resbuf, ctx->hash, sizeof(ctx->hash[0]) * bufpos); |
443 | } | 496 | } |
444 | 497 | ||
445 | void FAST_FUNC sha512_end(void *resbuf, sha512_ctx_t *ctx) | 498 | void FAST_FUNC sha512_end(sha512_ctx_t *ctx, void *resbuf) |
446 | { | 499 | { |
447 | unsigned pad, in_buf; | 500 | unsigned bufpos = ctx->total64[0] & 127; |
448 | 501 | ||
449 | in_buf = ctx->total64[0] & 127; | 502 | /* Pad the buffer to the next 128-byte boundary with 0x80,0,0,0... */ |
450 | /* Pad the buffer to the next 128-byte boundary with 0x80,0,0,0... | 503 | ctx->wbuffer[bufpos++] = 0x80; |
451 | * (FIPS 180-2:5.1.2) | ||
452 | */ | ||
453 | ctx->wbuffer[in_buf++] = 0x80; | ||
454 | 504 | ||
455 | while (1) { | 505 | while (1) { |
456 | pad = 128 - in_buf; | 506 | unsigned remaining = 128 - bufpos; |
457 | memset(ctx->wbuffer + in_buf, 0, pad); | 507 | memset(ctx->wbuffer + bufpos, 0, remaining); |
458 | in_buf = 0; | 508 | if (remaining >= 16) { |
459 | if (pad >= 16) { | ||
460 | /* Store the 128-bit counter of bits in the buffer in BE format */ | 509 | /* Store the 128-bit counter of bits in the buffer in BE format */ |
461 | uint64_t t; | 510 | uint64_t t; |
462 | t = ctx->total64[0] << 3; | 511 | t = ctx->total64[0] << 3; |
@@ -467,8 +516,9 @@ void FAST_FUNC sha512_end(void *resbuf, sha512_ctx_t *ctx) | |||
467 | *(uint64_t *) (&ctx->wbuffer[128 - 16]) = t; | 516 | *(uint64_t *) (&ctx->wbuffer[128 - 16]) = t; |
468 | } | 517 | } |
469 | sha512_process_block128(ctx); | 518 | sha512_process_block128(ctx); |
470 | if (pad >= 16) | 519 | if (remaining >= 16) |
471 | break; | 520 | break; |
521 | bufpos = 0; | ||
472 | } | 522 | } |
473 | 523 | ||
474 | if (BB_LITTLE_ENDIAN) { | 524 | if (BB_LITTLE_ENDIAN) { |