diff options
author | Denys Vlasenko <vda.linux@googlemail.com> | 2022-02-06 00:55:52 +0100 |
---|---|---|
committer | Denys Vlasenko <vda.linux@googlemail.com> | 2022-02-06 00:56:13 +0100 |
commit | 4f40735c87f8292a87c066b3b7099b0be007cf59 (patch) | |
tree | 26cd4b1adea86e0b7301a2899a8a4b38937541f1 | |
parent | 31c1c310772fa6c897ee1585ea15fc38f3ab3dff (diff) | |
download | busybox-w32-4f40735c87f8292a87c066b3b7099b0be007cf59.tar.gz busybox-w32-4f40735c87f8292a87c066b3b7099b0be007cf59.tar.bz2 busybox-w32-4f40735c87f8292a87c066b3b7099b0be007cf59.zip |
libbb/sha256: code shrink in 32-bit x86
function old new delta
sha256_process_block64_shaNI 722 713 -9
Signed-off-by: Denys Vlasenko <vda.linux@googlemail.com>
-rw-r--r-- | libbb/hash_md5_sha256_x86-32_shaNI.S | 93 |
1 files changed, 48 insertions, 45 deletions
diff --git a/libbb/hash_md5_sha256_x86-32_shaNI.S b/libbb/hash_md5_sha256_x86-32_shaNI.S index 632dab7e6..417da37d8 100644 --- a/libbb/hash_md5_sha256_x86-32_shaNI.S +++ b/libbb/hash_md5_sha256_x86-32_shaNI.S | |||
@@ -31,7 +31,7 @@ | |||
31 | #define MSGTMP1 %xmm4 | 31 | #define MSGTMP1 %xmm4 |
32 | #define MSGTMP2 %xmm5 | 32 | #define MSGTMP2 %xmm5 |
33 | #define MSGTMP3 %xmm6 | 33 | #define MSGTMP3 %xmm6 |
34 | #define MSGTMP4 %xmm7 | 34 | #define XMMTMP4 %xmm7 |
35 | 35 | ||
36 | .balign 8 # allow decoders to fetch at least 3 first insns | 36 | .balign 8 # allow decoders to fetch at least 3 first insns |
37 | sha256_process_block64_shaNI: | 37 | sha256_process_block64_shaNI: |
@@ -45,10 +45,12 @@ sha256_process_block64_shaNI: | |||
45 | 45 | ||
46 | shuf128_32 $0xB1, STATE0, STATE0 /* CDAB */ | 46 | shuf128_32 $0xB1, STATE0, STATE0 /* CDAB */ |
47 | shuf128_32 $0x1B, STATE1, STATE1 /* EFGH */ | 47 | shuf128_32 $0x1B, STATE1, STATE1 /* EFGH */ |
48 | mova128 STATE0, MSGTMP4 | 48 | mova128 STATE0, XMMTMP4 |
49 | palignr $8, STATE1, STATE0 /* ABEF */ | 49 | palignr $8, STATE1, STATE0 /* ABEF */ |
50 | pblendw $0xF0, MSGTMP4, STATE1 /* CDGH */ | 50 | pblendw $0xF0, XMMTMP4, STATE1 /* CDGH */ |
51 | 51 | ||
52 | /* XMMTMP4 holds flip mask from here... */ | ||
53 | mova128 PSHUFFLE_BSWAP32_FLIP_MASK, XMMTMP4 | ||
52 | movl $K256+8*16, SHA256CONSTANTS | 54 | movl $K256+8*16, SHA256CONSTANTS |
53 | 55 | ||
54 | /* Save hash values for addition after rounds */ | 56 | /* Save hash values for addition after rounds */ |
@@ -57,7 +59,7 @@ sha256_process_block64_shaNI: | |||
57 | 59 | ||
58 | /* Rounds 0-3 */ | 60 | /* Rounds 0-3 */ |
59 | movu128 0*16(DATA_PTR), MSG | 61 | movu128 0*16(DATA_PTR), MSG |
60 | pshufb PSHUFFLE_BSWAP32_FLIP_MASK, MSG | 62 | pshufb XMMTMP4, MSG |
61 | mova128 MSG, MSGTMP0 | 63 | mova128 MSG, MSGTMP0 |
62 | paddd 0*16-8*16(SHA256CONSTANTS), MSG | 64 | paddd 0*16-8*16(SHA256CONSTANTS), MSG |
63 | sha256rnds2 STATE0, STATE1 | 65 | sha256rnds2 STATE0, STATE1 |
@@ -66,7 +68,7 @@ sha256_process_block64_shaNI: | |||
66 | 68 | ||
67 | /* Rounds 4-7 */ | 69 | /* Rounds 4-7 */ |
68 | movu128 1*16(DATA_PTR), MSG | 70 | movu128 1*16(DATA_PTR), MSG |
69 | pshufb PSHUFFLE_BSWAP32_FLIP_MASK, MSG | 71 | pshufb XMMTMP4, MSG |
70 | mova128 MSG, MSGTMP1 | 72 | mova128 MSG, MSGTMP1 |
71 | paddd 1*16-8*16(SHA256CONSTANTS), MSG | 73 | paddd 1*16-8*16(SHA256CONSTANTS), MSG |
72 | sha256rnds2 STATE0, STATE1 | 74 | sha256rnds2 STATE0, STATE1 |
@@ -76,7 +78,7 @@ sha256_process_block64_shaNI: | |||
76 | 78 | ||
77 | /* Rounds 8-11 */ | 79 | /* Rounds 8-11 */ |
78 | movu128 2*16(DATA_PTR), MSG | 80 | movu128 2*16(DATA_PTR), MSG |
79 | pshufb PSHUFFLE_BSWAP32_FLIP_MASK, MSG | 81 | pshufb XMMTMP4, MSG |
80 | mova128 MSG, MSGTMP2 | 82 | mova128 MSG, MSGTMP2 |
81 | paddd 2*16-8*16(SHA256CONSTANTS), MSG | 83 | paddd 2*16-8*16(SHA256CONSTANTS), MSG |
82 | sha256rnds2 STATE0, STATE1 | 84 | sha256rnds2 STATE0, STATE1 |
@@ -86,13 +88,14 @@ sha256_process_block64_shaNI: | |||
86 | 88 | ||
87 | /* Rounds 12-15 */ | 89 | /* Rounds 12-15 */ |
88 | movu128 3*16(DATA_PTR), MSG | 90 | movu128 3*16(DATA_PTR), MSG |
89 | pshufb PSHUFFLE_BSWAP32_FLIP_MASK, MSG | 91 | pshufb XMMTMP4, MSG |
92 | /* ...to here */ | ||
90 | mova128 MSG, MSGTMP3 | 93 | mova128 MSG, MSGTMP3 |
91 | paddd 3*16-8*16(SHA256CONSTANTS), MSG | 94 | paddd 3*16-8*16(SHA256CONSTANTS), MSG |
92 | sha256rnds2 STATE0, STATE1 | 95 | sha256rnds2 STATE0, STATE1 |
93 | mova128 MSGTMP3, MSGTMP4 | 96 | mova128 MSGTMP3, XMMTMP4 |
94 | palignr $4, MSGTMP2, MSGTMP4 | 97 | palignr $4, MSGTMP2, XMMTMP4 |
95 | paddd MSGTMP4, MSGTMP0 | 98 | paddd XMMTMP4, MSGTMP0 |
96 | sha256msg2 MSGTMP3, MSGTMP0 | 99 | sha256msg2 MSGTMP3, MSGTMP0 |
97 | shuf128_32 $0x0E, MSG, MSG | 100 | shuf128_32 $0x0E, MSG, MSG |
98 | sha256rnds2 STATE1, STATE0 | 101 | sha256rnds2 STATE1, STATE0 |
@@ -102,9 +105,9 @@ sha256_process_block64_shaNI: | |||
102 | mova128 MSGTMP0, MSG | 105 | mova128 MSGTMP0, MSG |
103 | paddd 4*16-8*16(SHA256CONSTANTS), MSG | 106 | paddd 4*16-8*16(SHA256CONSTANTS), MSG |
104 | sha256rnds2 STATE0, STATE1 | 107 | sha256rnds2 STATE0, STATE1 |
105 | mova128 MSGTMP0, MSGTMP4 | 108 | mova128 MSGTMP0, XMMTMP4 |
106 | palignr $4, MSGTMP3, MSGTMP4 | 109 | palignr $4, MSGTMP3, XMMTMP4 |
107 | paddd MSGTMP4, MSGTMP1 | 110 | paddd XMMTMP4, MSGTMP1 |
108 | sha256msg2 MSGTMP0, MSGTMP1 | 111 | sha256msg2 MSGTMP0, MSGTMP1 |
109 | shuf128_32 $0x0E, MSG, MSG | 112 | shuf128_32 $0x0E, MSG, MSG |
110 | sha256rnds2 STATE1, STATE0 | 113 | sha256rnds2 STATE1, STATE0 |
@@ -114,9 +117,9 @@ sha256_process_block64_shaNI: | |||
114 | mova128 MSGTMP1, MSG | 117 | mova128 MSGTMP1, MSG |
115 | paddd 5*16-8*16(SHA256CONSTANTS), MSG | 118 | paddd 5*16-8*16(SHA256CONSTANTS), MSG |
116 | sha256rnds2 STATE0, STATE1 | 119 | sha256rnds2 STATE0, STATE1 |
117 | mova128 MSGTMP1, MSGTMP4 | 120 | mova128 MSGTMP1, XMMTMP4 |
118 | palignr $4, MSGTMP0, MSGTMP4 | 121 | palignr $4, MSGTMP0, XMMTMP4 |
119 | paddd MSGTMP4, MSGTMP2 | 122 | paddd XMMTMP4, MSGTMP2 |
120 | sha256msg2 MSGTMP1, MSGTMP2 | 123 | sha256msg2 MSGTMP1, MSGTMP2 |
121 | shuf128_32 $0x0E, MSG, MSG | 124 | shuf128_32 $0x0E, MSG, MSG |
122 | sha256rnds2 STATE1, STATE0 | 125 | sha256rnds2 STATE1, STATE0 |
@@ -126,9 +129,9 @@ sha256_process_block64_shaNI: | |||
126 | mova128 MSGTMP2, MSG | 129 | mova128 MSGTMP2, MSG |
127 | paddd 6*16-8*16(SHA256CONSTANTS), MSG | 130 | paddd 6*16-8*16(SHA256CONSTANTS), MSG |
128 | sha256rnds2 STATE0, STATE1 | 131 | sha256rnds2 STATE0, STATE1 |
129 | mova128 MSGTMP2, MSGTMP4 | 132 | mova128 MSGTMP2, XMMTMP4 |
130 | palignr $4, MSGTMP1, MSGTMP4 | 133 | palignr $4, MSGTMP1, XMMTMP4 |
131 | paddd MSGTMP4, MSGTMP3 | 134 | paddd XMMTMP4, MSGTMP3 |
132 | sha256msg2 MSGTMP2, MSGTMP3 | 135 | sha256msg2 MSGTMP2, MSGTMP3 |
133 | shuf128_32 $0x0E, MSG, MSG | 136 | shuf128_32 $0x0E, MSG, MSG |
134 | sha256rnds2 STATE1, STATE0 | 137 | sha256rnds2 STATE1, STATE0 |
@@ -138,9 +141,9 @@ sha256_process_block64_shaNI: | |||
138 | mova128 MSGTMP3, MSG | 141 | mova128 MSGTMP3, MSG |
139 | paddd 7*16-8*16(SHA256CONSTANTS), MSG | 142 | paddd 7*16-8*16(SHA256CONSTANTS), MSG |
140 | sha256rnds2 STATE0, STATE1 | 143 | sha256rnds2 STATE0, STATE1 |
141 | mova128 MSGTMP3, MSGTMP4 | 144 | mova128 MSGTMP3, XMMTMP4 |
142 | palignr $4, MSGTMP2, MSGTMP4 | 145 | palignr $4, MSGTMP2, XMMTMP4 |
143 | paddd MSGTMP4, MSGTMP0 | 146 | paddd XMMTMP4, MSGTMP0 |
144 | sha256msg2 MSGTMP3, MSGTMP0 | 147 | sha256msg2 MSGTMP3, MSGTMP0 |
145 | shuf128_32 $0x0E, MSG, MSG | 148 | shuf128_32 $0x0E, MSG, MSG |
146 | sha256rnds2 STATE1, STATE0 | 149 | sha256rnds2 STATE1, STATE0 |
@@ -150,9 +153,9 @@ sha256_process_block64_shaNI: | |||
150 | mova128 MSGTMP0, MSG | 153 | mova128 MSGTMP0, MSG |
151 | paddd 8*16-8*16(SHA256CONSTANTS), MSG | 154 | paddd 8*16-8*16(SHA256CONSTANTS), MSG |
152 | sha256rnds2 STATE0, STATE1 | 155 | sha256rnds2 STATE0, STATE1 |
153 | mova128 MSGTMP0, MSGTMP4 | 156 | mova128 MSGTMP0, XMMTMP4 |
154 | palignr $4, MSGTMP3, MSGTMP4 | 157 | palignr $4, MSGTMP3, XMMTMP4 |
155 | paddd MSGTMP4, MSGTMP1 | 158 | paddd XMMTMP4, MSGTMP1 |
156 | sha256msg2 MSGTMP0, MSGTMP1 | 159 | sha256msg2 MSGTMP0, MSGTMP1 |
157 | shuf128_32 $0x0E, MSG, MSG | 160 | shuf128_32 $0x0E, MSG, MSG |
158 | sha256rnds2 STATE1, STATE0 | 161 | sha256rnds2 STATE1, STATE0 |
@@ -162,9 +165,9 @@ sha256_process_block64_shaNI: | |||
162 | mova128 MSGTMP1, MSG | 165 | mova128 MSGTMP1, MSG |
163 | paddd 9*16-8*16(SHA256CONSTANTS), MSG | 166 | paddd 9*16-8*16(SHA256CONSTANTS), MSG |
164 | sha256rnds2 STATE0, STATE1 | 167 | sha256rnds2 STATE0, STATE1 |
165 | mova128 MSGTMP1, MSGTMP4 | 168 | mova128 MSGTMP1, XMMTMP4 |
166 | palignr $4, MSGTMP0, MSGTMP4 | 169 | palignr $4, MSGTMP0, XMMTMP4 |
167 | paddd MSGTMP4, MSGTMP2 | 170 | paddd XMMTMP4, MSGTMP2 |
168 | sha256msg2 MSGTMP1, MSGTMP2 | 171 | sha256msg2 MSGTMP1, MSGTMP2 |
169 | shuf128_32 $0x0E, MSG, MSG | 172 | shuf128_32 $0x0E, MSG, MSG |
170 | sha256rnds2 STATE1, STATE0 | 173 | sha256rnds2 STATE1, STATE0 |
@@ -174,9 +177,9 @@ sha256_process_block64_shaNI: | |||
174 | mova128 MSGTMP2, MSG | 177 | mova128 MSGTMP2, MSG |
175 | paddd 10*16-8*16(SHA256CONSTANTS), MSG | 178 | paddd 10*16-8*16(SHA256CONSTANTS), MSG |
176 | sha256rnds2 STATE0, STATE1 | 179 | sha256rnds2 STATE0, STATE1 |
177 | mova128 MSGTMP2, MSGTMP4 | 180 | mova128 MSGTMP2, XMMTMP4 |
178 | palignr $4, MSGTMP1, MSGTMP4 | 181 | palignr $4, MSGTMP1, XMMTMP4 |
179 | paddd MSGTMP4, MSGTMP3 | 182 | paddd XMMTMP4, MSGTMP3 |
180 | sha256msg2 MSGTMP2, MSGTMP3 | 183 | sha256msg2 MSGTMP2, MSGTMP3 |
181 | shuf128_32 $0x0E, MSG, MSG | 184 | shuf128_32 $0x0E, MSG, MSG |
182 | sha256rnds2 STATE1, STATE0 | 185 | sha256rnds2 STATE1, STATE0 |
@@ -186,9 +189,9 @@ sha256_process_block64_shaNI: | |||
186 | mova128 MSGTMP3, MSG | 189 | mova128 MSGTMP3, MSG |
187 | paddd 11*16-8*16(SHA256CONSTANTS), MSG | 190 | paddd 11*16-8*16(SHA256CONSTANTS), MSG |
188 | sha256rnds2 STATE0, STATE1 | 191 | sha256rnds2 STATE0, STATE1 |
189 | mova128 MSGTMP3, MSGTMP4 | 192 | mova128 MSGTMP3, XMMTMP4 |
190 | palignr $4, MSGTMP2, MSGTMP4 | 193 | palignr $4, MSGTMP2, XMMTMP4 |
191 | paddd MSGTMP4, MSGTMP0 | 194 | paddd XMMTMP4, MSGTMP0 |
192 | sha256msg2 MSGTMP3, MSGTMP0 | 195 | sha256msg2 MSGTMP3, MSGTMP0 |
193 | shuf128_32 $0x0E, MSG, MSG | 196 | shuf128_32 $0x0E, MSG, MSG |
194 | sha256rnds2 STATE1, STATE0 | 197 | sha256rnds2 STATE1, STATE0 |
@@ -198,9 +201,9 @@ sha256_process_block64_shaNI: | |||
198 | mova128 MSGTMP0, MSG | 201 | mova128 MSGTMP0, MSG |
199 | paddd 12*16-8*16(SHA256CONSTANTS), MSG | 202 | paddd 12*16-8*16(SHA256CONSTANTS), MSG |
200 | sha256rnds2 STATE0, STATE1 | 203 | sha256rnds2 STATE0, STATE1 |
201 | mova128 MSGTMP0, MSGTMP4 | 204 | mova128 MSGTMP0, XMMTMP4 |
202 | palignr $4, MSGTMP3, MSGTMP4 | 205 | palignr $4, MSGTMP3, XMMTMP4 |
203 | paddd MSGTMP4, MSGTMP1 | 206 | paddd XMMTMP4, MSGTMP1 |
204 | sha256msg2 MSGTMP0, MSGTMP1 | 207 | sha256msg2 MSGTMP0, MSGTMP1 |
205 | shuf128_32 $0x0E, MSG, MSG | 208 | shuf128_32 $0x0E, MSG, MSG |
206 | sha256rnds2 STATE1, STATE0 | 209 | sha256rnds2 STATE1, STATE0 |
@@ -210,9 +213,9 @@ sha256_process_block64_shaNI: | |||
210 | mova128 MSGTMP1, MSG | 213 | mova128 MSGTMP1, MSG |
211 | paddd 13*16-8*16(SHA256CONSTANTS), MSG | 214 | paddd 13*16-8*16(SHA256CONSTANTS), MSG |
212 | sha256rnds2 STATE0, STATE1 | 215 | sha256rnds2 STATE0, STATE1 |
213 | mova128 MSGTMP1, MSGTMP4 | 216 | mova128 MSGTMP1, XMMTMP4 |
214 | palignr $4, MSGTMP0, MSGTMP4 | 217 | palignr $4, MSGTMP0, XMMTMP4 |
215 | paddd MSGTMP4, MSGTMP2 | 218 | paddd XMMTMP4, MSGTMP2 |
216 | sha256msg2 MSGTMP1, MSGTMP2 | 219 | sha256msg2 MSGTMP1, MSGTMP2 |
217 | shuf128_32 $0x0E, MSG, MSG | 220 | shuf128_32 $0x0E, MSG, MSG |
218 | sha256rnds2 STATE1, STATE0 | 221 | sha256rnds2 STATE1, STATE0 |
@@ -221,9 +224,9 @@ sha256_process_block64_shaNI: | |||
221 | mova128 MSGTMP2, MSG | 224 | mova128 MSGTMP2, MSG |
222 | paddd 14*16-8*16(SHA256CONSTANTS), MSG | 225 | paddd 14*16-8*16(SHA256CONSTANTS), MSG |
223 | sha256rnds2 STATE0, STATE1 | 226 | sha256rnds2 STATE0, STATE1 |
224 | mova128 MSGTMP2, MSGTMP4 | 227 | mova128 MSGTMP2, XMMTMP4 |
225 | palignr $4, MSGTMP1, MSGTMP4 | 228 | palignr $4, MSGTMP1, XMMTMP4 |
226 | paddd MSGTMP4, MSGTMP3 | 229 | paddd XMMTMP4, MSGTMP3 |
227 | sha256msg2 MSGTMP2, MSGTMP3 | 230 | sha256msg2 MSGTMP2, MSGTMP3 |
228 | shuf128_32 $0x0E, MSG, MSG | 231 | shuf128_32 $0x0E, MSG, MSG |
229 | sha256rnds2 STATE1, STATE0 | 232 | sha256rnds2 STATE1, STATE0 |
@@ -242,9 +245,9 @@ sha256_process_block64_shaNI: | |||
242 | /* Write hash values back in the correct order */ | 245 | /* Write hash values back in the correct order */ |
243 | shuf128_32 $0x1B, STATE0, STATE0 /* FEBA */ | 246 | shuf128_32 $0x1B, STATE0, STATE0 /* FEBA */ |
244 | shuf128_32 $0xB1, STATE1, STATE1 /* DCHG */ | 247 | shuf128_32 $0xB1, STATE1, STATE1 /* DCHG */ |
245 | mova128 STATE0, MSGTMP4 | 248 | mova128 STATE0, XMMTMP4 |
246 | pblendw $0xF0, STATE1, STATE0 /* DCBA */ | 249 | pblendw $0xF0, STATE1, STATE0 /* DCBA */ |
247 | palignr $8, MSGTMP4, STATE1 /* HGFE */ | 250 | palignr $8, XMMTMP4, STATE1 /* HGFE */ |
248 | 251 | ||
249 | movu128 STATE0, 76+0*16(%eax) | 252 | movu128 STATE0, 76+0*16(%eax) |
250 | movu128 STATE1, 76+1*16(%eax) | 253 | movu128 STATE1, 76+1*16(%eax) |