diff options
Diffstat (limited to '')
| -rw-r--r-- | src/lib/libcrypto/arch/amd64/Makefile.inc | 3 | ||||
| -rw-r--r-- | src/lib/libcrypto/sha/sha1_amd64.c | 8 | ||||
| -rw-r--r-- | src/lib/libcrypto/sha/sha1_amd64_shani.S | 170 |
3 files changed, 179 insertions, 2 deletions
diff --git a/src/lib/libcrypto/arch/amd64/Makefile.inc b/src/lib/libcrypto/arch/amd64/Makefile.inc index 33c7dbba26..f8f829cca1 100644 --- a/src/lib/libcrypto/arch/amd64/Makefile.inc +++ b/src/lib/libcrypto/arch/amd64/Makefile.inc | |||
| @@ -1,4 +1,4 @@ | |||
| 1 | # $OpenBSD: Makefile.inc,v 1.34 2024/12/04 13:13:33 jsing Exp $ | 1 | # $OpenBSD: Makefile.inc,v 1.35 2024/12/06 11:57:17 jsing Exp $ |
| 2 | 2 | ||
| 3 | # amd64-specific libcrypto build rules | 3 | # amd64-specific libcrypto build rules |
| 4 | 4 | ||
| @@ -51,6 +51,7 @@ SSLASM+= rc4 rc4-x86_64 | |||
| 51 | CFLAGS+= -DSHA1_ASM | 51 | CFLAGS+= -DSHA1_ASM |
| 52 | SRCS+= sha1_amd64.c | 52 | SRCS+= sha1_amd64.c |
| 53 | SRCS+= sha1_amd64_generic.S | 53 | SRCS+= sha1_amd64_generic.S |
| 54 | SRCS+= sha1_amd64_shani.S | ||
| 54 | CFLAGS+= -DSHA256_ASM | 55 | CFLAGS+= -DSHA256_ASM |
| 55 | SRCS+= sha256_amd64.c | 56 | SRCS+= sha256_amd64.c |
| 56 | SRCS+= sha256_amd64_generic.S | 57 | SRCS+= sha256_amd64_generic.S |
diff --git a/src/lib/libcrypto/sha/sha1_amd64.c b/src/lib/libcrypto/sha/sha1_amd64.c index b3d4ab1263..2976cc7e6e 100644 --- a/src/lib/libcrypto/sha/sha1_amd64.c +++ b/src/lib/libcrypto/sha/sha1_amd64.c | |||
| @@ -1,4 +1,4 @@ | |||
| 1 | /* $OpenBSD: sha1_amd64.c,v 1.1 2024/12/04 13:13:33 jsing Exp $ */ | 1 | /* $OpenBSD: sha1_amd64.c,v 1.2 2024/12/06 11:57:18 jsing Exp $ */ |
| 2 | /* | 2 | /* |
| 3 | * Copyright (c) 2024 Joel Sing <jsing@openbsd.org> | 3 | * Copyright (c) 2024 Joel Sing <jsing@openbsd.org> |
| 4 | * | 4 | * |
| @@ -20,9 +20,15 @@ | |||
| 20 | #include "crypto_arch.h" | 20 | #include "crypto_arch.h" |
| 21 | 21 | ||
| 22 | void sha1_block_generic(SHA_CTX *ctx, const void *in, size_t num); | 22 | void sha1_block_generic(SHA_CTX *ctx, const void *in, size_t num); |
| 23 | void sha1_block_shani(SHA_CTX *ctx, const void *in, size_t num); | ||
| 23 | 24 | ||
| 24 | void | 25 | void |
| 25 | sha1_block_data_order(SHA_CTX *ctx, const void *in, size_t num) | 26 | sha1_block_data_order(SHA_CTX *ctx, const void *in, size_t num) |
| 26 | { | 27 | { |
| 28 | if ((crypto_cpu_caps_amd64 & CRYPTO_CPU_CAPS_AMD64_SHA) != 0) { | ||
| 29 | sha1_block_shani(ctx, in, num); | ||
| 30 | return; | ||
| 31 | } | ||
| 32 | |||
| 27 | sha1_block_generic(ctx, in, num); | 33 | sha1_block_generic(ctx, in, num); |
| 28 | } | 34 | } |
diff --git a/src/lib/libcrypto/sha/sha1_amd64_shani.S b/src/lib/libcrypto/sha/sha1_amd64_shani.S new file mode 100644 index 0000000000..d7699d10f1 --- /dev/null +++ b/src/lib/libcrypto/sha/sha1_amd64_shani.S | |||
| @@ -0,0 +1,170 @@ | |||
| 1 | /* $OpenBSD: sha1_amd64_shani.S,v 1.1 2024/12/06 11:57:18 jsing Exp $ */ | ||
| 2 | /* | ||
| 3 | * Copyright (c) 2024 Joel Sing <jsing@openbsd.org> | ||
| 4 | * | ||
| 5 | * Permission to use, copy, modify, and distribute this software for any | ||
| 6 | * purpose with or without fee is hereby granted, provided that the above | ||
| 7 | * copyright notice and this permission notice appear in all copies. | ||
| 8 | * | ||
| 9 | * THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES | ||
| 10 | * WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF | ||
| 11 | * MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR | ||
| 12 | * ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES | ||
| 13 | * WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN | ||
| 14 | * ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF | ||
| 15 | * OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE. | ||
| 16 | */ | ||
| 17 | |||
| 18 | #ifdef __CET__ | ||
| 19 | #include <cet.h> | ||
| 20 | #else | ||
| 21 | #define _CET_ENDBR | ||
| 22 | #endif | ||
| 23 | |||
| 24 | /* | ||
| 25 | * SHA-1 implementation using the Intel SHA extensions: | ||
| 26 | * | ||
| 27 | * https://www.intel.com/content/www/us/en/developer/articles/technical/intel-sha-extensions.html | ||
| 28 | */ | ||
| 29 | |||
| 30 | #define ctx %rdi | ||
| 31 | #define in %rsi | ||
| 32 | #define num %rdx | ||
| 33 | |||
| 34 | #define end %rbx | ||
| 35 | |||
| 36 | #define xabcd_save %xmm0 | ||
| 37 | #define xe_save %xmm1 | ||
| 38 | |||
| 39 | #define xabcd %xmm2 | ||
| 40 | #define xe0 %xmm3 | ||
| 41 | #define xe1 %xmm4 | ||
| 42 | |||
| 43 | #define xmsg0 %xmm5 | ||
| 44 | #define xmsg1 %xmm6 | ||
| 45 | #define xmsg2 %xmm7 | ||
| 46 | #define xmsg3 %xmm8 | ||
| 47 | |||
| 48 | #define xshufmask %xmm9 | ||
| 49 | |||
| 50 | |||
| 51 | #define sha1_message_schedule_load(idx, m, xmsg) \ | ||
| 52 | movdqu (idx*16)(m), xmsg; \ | ||
| 53 | pshufb xshufmask, xmsg; | ||
| 54 | |||
| 55 | #define sha1_message_schedule_update(xm0, xm1, xm2, xm3) \ | ||
| 56 | sha1msg1 xm1, xm0; \ | ||
| 57 | pxor xm2, xm0; \ | ||
| 58 | sha1msg2 xm3, xm0; | ||
| 59 | |||
| 60 | #define sha1_shani_round(fn, xmsg, xe, xe_next) \ | ||
| 61 | sha1nexte xmsg, xe; \ | ||
| 62 | movdqa xabcd, xe_next; \ | ||
| 63 | sha1rnds4 fn, xe, xabcd; | ||
| 64 | |||
| 65 | #define sha1_shani_round_load(fn, idx, m, xmsg, xe, xe_next) \ | ||
| 66 | sha1_message_schedule_load(idx, m, xmsg); \ | ||
| 67 | sha1_shani_round(fn, xmsg, xe, xe_next); | ||
| 68 | |||
| 69 | #define sha1_shani_round_update(fn, xm0, xm1, xm2, xm3, xe, xe_next) \ | ||
| 70 | sha1_message_schedule_update(xm0, xm1, xm2, xm3); \ | ||
| 71 | sha1_shani_round(fn, xm0, xe, xe_next); | ||
| 72 | |||
| 73 | |||
| 74 | .text | ||
| 75 | |||
| 76 | /* | ||
| 77 | * void sha1_block_shani(SHA256_CTX *ctx, const void *in, size_t num); | ||
| 78 | * | ||
| 79 | * Standard x86-64 ABI: rdi = ctx, rsi = in, rdx = num | ||
| 80 | */ | ||
| 81 | .align 16 | ||
| 82 | .globl sha1_block_shani | ||
| 83 | .type sha1_block_shani,@function | ||
| 84 | sha1_block_shani: | ||
| 85 | _CET_ENDBR | ||
| 86 | |||
| 87 | /* Save callee save registers. */ | ||
| 88 | pushq %rbx | ||
| 89 | |||
| 90 | /* Compute end of message. */ | ||
| 91 | shlq $6, num | ||
| 92 | leaq (in, num, 1), end | ||
| 93 | |||
| 94 | /* Load endian shuffle mask. */ | ||
| 95 | movdqa shufmask(%rip), xshufmask | ||
| 96 | |||
| 97 | /* Load current hash state from context. */ | ||
| 98 | movdqu (0*16)(ctx), xabcd | ||
| 99 | pshufd $0x1b, xabcd, xabcd /* dcba -> abcd */ | ||
| 100 | pxor xe0, xe0 | ||
| 101 | pinsrd $3, (1*16)(ctx), xe0 /* e */ | ||
| 102 | |||
| 103 | jmp .Lshani_block_loop | ||
| 104 | |||
| 105 | .align 16 | ||
| 106 | .Lshani_block_loop: | ||
| 107 | /* Save state for accumulation. */ | ||
| 108 | movdqa xabcd, xabcd_save | ||
| 109 | movdqa xe0, xe_save | ||
| 110 | |||
| 111 | /* Rounds 0 through 15 (four rounds at a time). */ | ||
| 112 | sha1_message_schedule_load(0, in, xmsg0); | ||
| 113 | paddd xmsg0, xe0 | ||
| 114 | movdqa xabcd, xe1 | ||
| 115 | sha1rnds4 $0, xe0, xabcd | ||
| 116 | |||
| 117 | sha1_shani_round_load($0, 1, in, xmsg1, xe1, xe0); | ||
| 118 | sha1_shani_round_load($0, 2, in, xmsg2, xe0, xe1); | ||
| 119 | sha1_shani_round_load($0, 3, in, xmsg3, xe1, xe0); | ||
| 120 | |||
| 121 | /* Rounds 16 through 79 (four rounds at a time). */ | ||
| 122 | sha1_shani_round_update($0, xmsg0, xmsg1, xmsg2, xmsg3, xe0, xe1) | ||
| 123 | sha1_shani_round_update($1, xmsg1, xmsg2, xmsg3, xmsg0, xe1, xe0) | ||
| 124 | sha1_shani_round_update($1, xmsg2, xmsg3, xmsg0, xmsg1, xe0, xe1) | ||
| 125 | sha1_shani_round_update($1, xmsg3, xmsg0, xmsg1, xmsg2, xe1, xe0) | ||
| 126 | |||
| 127 | sha1_shani_round_update($1, xmsg0, xmsg1, xmsg2, xmsg3, xe0, xe1) | ||
| 128 | sha1_shani_round_update($1, xmsg1, xmsg2, xmsg3, xmsg0, xe1, xe0) | ||
| 129 | sha1_shani_round_update($2, xmsg2, xmsg3, xmsg0, xmsg1, xe0, xe1) | ||
| 130 | sha1_shani_round_update($2, xmsg3, xmsg0, xmsg1, xmsg2, xe1, xe0) | ||
| 131 | |||
| 132 | sha1_shani_round_update($2, xmsg0, xmsg1, xmsg2, xmsg3, xe0, xe1) | ||
| 133 | sha1_shani_round_update($2, xmsg1, xmsg2, xmsg3, xmsg0, xe1, xe0) | ||
| 134 | sha1_shani_round_update($2, xmsg2, xmsg3, xmsg0, xmsg1, xe0, xe1) | ||
| 135 | sha1_shani_round_update($3, xmsg3, xmsg0, xmsg1, xmsg2, xe1, xe0) | ||
| 136 | |||
| 137 | sha1_shani_round_update($3, xmsg0, xmsg1, xmsg2, xmsg3, xe0, xe1) | ||
| 138 | sha1_shani_round_update($3, xmsg1, xmsg2, xmsg3, xmsg0, xe1, xe0) | ||
| 139 | sha1_shani_round_update($3, xmsg2, xmsg3, xmsg0, xmsg1, xe0, xe1) | ||
| 140 | sha1_shani_round_update($3, xmsg3, xmsg0, xmsg1, xmsg2, xe1, xe0) | ||
| 141 | |||
| 142 | /* Accumulate hash state. */ | ||
| 143 | paddd xabcd_save, xabcd | ||
| 144 | sha1nexte xe_save, xe0 | ||
| 145 | |||
| 146 | addq $64, in | ||
| 147 | cmpq end, in | ||
| 148 | jb .Lshani_block_loop | ||
| 149 | |||
| 150 | /* Update stored hash context. */ | ||
| 151 | pshufd $0x1b, xabcd, xabcd /* abcd -> dcba */ | ||
| 152 | movdqu xabcd, (0*16)(ctx) | ||
| 153 | pextrd $3, xe0, (1*16)(ctx) /* e */ | ||
| 154 | |||
| 155 | /* Restore callee save registers. */ | ||
| 156 | popq %rbx | ||
| 157 | |||
| 158 | ret | ||
| 159 | |||
| 160 | .rodata | ||
| 161 | |||
| 162 | /* | ||
| 163 | * Shuffle mask - byte reversal for little endian to big endian word conversion, | ||
| 164 | * and reordering to abcd. | ||
| 165 | */ | ||
| 166 | .align 16 | ||
| 167 | .type shufmask,@object | ||
| 168 | shufmask: | ||
| 169 | .octa 0x000102030405060708090a0b0c0d0e0f | ||
| 170 | .size shufmask,.-shufmask | ||
