diff options
author | Denys Vlasenko <vda.linux@googlemail.com> | 2022-02-09 00:33:39 +0100 |
---|---|---|
committer | Denys Vlasenko <vda.linux@googlemail.com> | 2022-02-09 00:33:39 +0100 |
commit | c0ff0d4528d718c20b9ca2290bd10d59e9f794a3 (patch) | |
tree | 7c0879d37c70fef047a55ef72f4aff5dcf88ab12 | |
parent | eb8d5f3b8f3c91f3ed82a52b4ce52a154c146ede (diff) | |
download | busybox-w32-c0ff0d4528d718c20b9ca2290bd10d59e9f794a3.tar.gz busybox-w32-c0ff0d4528d718c20b9ca2290bd10d59e9f794a3.tar.bz2 busybox-w32-c0ff0d4528d718c20b9ca2290bd10d59e9f794a3.zip |
libbb/sha256: code shrink in 32-bit x86
function old new delta
sha256_process_block64_shaNI 713 697 -16
Signed-off-by: Denys Vlasenko <vda.linux@googlemail.com>
-rw-r--r-- | libbb/hash_md5_sha256_x86-32_shaNI.S | 130 | ||||
-rw-r--r-- | libbb/hash_md5_sha256_x86-64_shaNI.S | 107 |
2 files changed, 114 insertions, 123 deletions
diff --git a/libbb/hash_md5_sha256_x86-32_shaNI.S b/libbb/hash_md5_sha256_x86-32_shaNI.S index 39e2baf41..a849dfcc2 100644 --- a/libbb/hash_md5_sha256_x86-32_shaNI.S +++ b/libbb/hash_md5_sha256_x86-32_shaNI.S | |||
@@ -31,35 +31,27 @@ | |||
31 | #define MSGTMP1 %xmm4 | 31 | #define MSGTMP1 %xmm4 |
32 | #define MSGTMP2 %xmm5 | 32 | #define MSGTMP2 %xmm5 |
33 | #define MSGTMP3 %xmm6 | 33 | #define MSGTMP3 %xmm6 |
34 | #define XMMTMP4 %xmm7 | ||
35 | 34 | ||
36 | .balign 8 # allow decoders to fetch at least 3 first insns | 35 | #define XMMTMP %xmm7 |
37 | sha256_process_block64_shaNI: | ||
38 | pushl %ebp | ||
39 | movl %esp, %ebp | ||
40 | subl $32, %esp | ||
41 | andl $~0xF, %esp # paddd needs aligned memory operand | ||
42 | 36 | ||
37 | .balign 8 # allow decoders to fetch at least 2 first insns | ||
38 | sha256_process_block64_shaNI: | ||
43 | movu128 76+0*16(%eax), STATE0 | 39 | movu128 76+0*16(%eax), STATE0 |
44 | movu128 76+1*16(%eax), STATE1 | 40 | movu128 76+1*16(%eax), STATE1 |
45 | 41 | ||
46 | shuf128_32 $0xB1, STATE0, STATE0 /* CDAB */ | 42 | shuf128_32 $0xB1, STATE0, STATE0 /* CDAB */ |
47 | shuf128_32 $0x1B, STATE1, STATE1 /* EFGH */ | 43 | shuf128_32 $0x1B, STATE1, STATE1 /* EFGH */ |
48 | mova128 STATE0, XMMTMP4 | 44 | mova128 STATE0, XMMTMP |
49 | palignr $8, STATE1, STATE0 /* ABEF */ | 45 | palignr $8, STATE1, STATE0 /* ABEF */ |
50 | pblendw $0xF0, XMMTMP4, STATE1 /* CDGH */ | 46 | pblendw $0xF0, XMMTMP, STATE1 /* CDGH */ |
51 | 47 | ||
52 | /* XMMTMP4 holds flip mask from here... */ | 48 | /* XMMTMP holds flip mask from here... */ |
53 | mova128 PSHUFFLE_BSWAP32_FLIP_MASK, XMMTMP4 | 49 | mova128 PSHUFFLE_BSWAP32_FLIP_MASK, XMMTMP |
54 | movl $K256+8*16, SHA256CONSTANTS | 50 | movl $K256+8*16, SHA256CONSTANTS |
55 | 51 | ||
56 | /* Save hash values for addition after rounds */ | ||
57 | mova128 STATE0, 0*16(%esp) | ||
58 | mova128 STATE1, 1*16(%esp) | ||
59 | |||
60 | /* Rounds 0-3 */ | 52 | /* Rounds 0-3 */ |
61 | movu128 0*16(DATA_PTR), MSG | 53 | movu128 0*16(DATA_PTR), MSG |
62 | pshufb XMMTMP4, MSG | 54 | pshufb XMMTMP, MSG |
63 | mova128 MSG, MSGTMP0 | 55 | mova128 MSG, MSGTMP0 |
64 | paddd 0*16-8*16(SHA256CONSTANTS), MSG | 56 | paddd 0*16-8*16(SHA256CONSTANTS), MSG |
65 | sha256rnds2 STATE0, STATE1 | 57 | sha256rnds2 STATE0, STATE1 |
@@ -68,7 +60,7 @@ sha256_process_block64_shaNI: | |||
68 | 60 | ||
69 | /* Rounds 4-7 */ | 61 | /* Rounds 4-7 */ |
70 | movu128 1*16(DATA_PTR), MSG | 62 | movu128 1*16(DATA_PTR), MSG |
71 | pshufb XMMTMP4, MSG | 63 | pshufb XMMTMP, MSG |
72 | mova128 MSG, MSGTMP1 | 64 | mova128 MSG, MSGTMP1 |
73 | paddd 1*16-8*16(SHA256CONSTANTS), MSG | 65 | paddd 1*16-8*16(SHA256CONSTANTS), MSG |
74 | sha256rnds2 STATE0, STATE1 | 66 | sha256rnds2 STATE0, STATE1 |
@@ -78,7 +70,7 @@ sha256_process_block64_shaNI: | |||
78 | 70 | ||
79 | /* Rounds 8-11 */ | 71 | /* Rounds 8-11 */ |
80 | movu128 2*16(DATA_PTR), MSG | 72 | movu128 2*16(DATA_PTR), MSG |
81 | pshufb XMMTMP4, MSG | 73 | pshufb XMMTMP, MSG |
82 | mova128 MSG, MSGTMP2 | 74 | mova128 MSG, MSGTMP2 |
83 | paddd 2*16-8*16(SHA256CONSTANTS), MSG | 75 | paddd 2*16-8*16(SHA256CONSTANTS), MSG |
84 | sha256rnds2 STATE0, STATE1 | 76 | sha256rnds2 STATE0, STATE1 |
@@ -88,14 +80,14 @@ sha256_process_block64_shaNI: | |||
88 | 80 | ||
89 | /* Rounds 12-15 */ | 81 | /* Rounds 12-15 */ |
90 | movu128 3*16(DATA_PTR), MSG | 82 | movu128 3*16(DATA_PTR), MSG |
91 | pshufb XMMTMP4, MSG | 83 | pshufb XMMTMP, MSG |
92 | /* ...to here */ | 84 | /* ...to here */ |
93 | mova128 MSG, MSGTMP3 | 85 | mova128 MSG, MSGTMP3 |
94 | paddd 3*16-8*16(SHA256CONSTANTS), MSG | 86 | paddd 3*16-8*16(SHA256CONSTANTS), MSG |
95 | sha256rnds2 STATE0, STATE1 | 87 | sha256rnds2 STATE0, STATE1 |
96 | mova128 MSGTMP3, XMMTMP4 | 88 | mova128 MSGTMP3, XMMTMP |
97 | palignr $4, MSGTMP2, XMMTMP4 | 89 | palignr $4, MSGTMP2, XMMTMP |
98 | paddd XMMTMP4, MSGTMP0 | 90 | paddd XMMTMP, MSGTMP0 |
99 | sha256msg2 MSGTMP3, MSGTMP0 | 91 | sha256msg2 MSGTMP3, MSGTMP0 |
100 | shuf128_32 $0x0E, MSG, MSG | 92 | shuf128_32 $0x0E, MSG, MSG |
101 | sha256rnds2 STATE1, STATE0 | 93 | sha256rnds2 STATE1, STATE0 |
@@ -105,9 +97,9 @@ sha256_process_block64_shaNI: | |||
105 | mova128 MSGTMP0, MSG | 97 | mova128 MSGTMP0, MSG |
106 | paddd 4*16-8*16(SHA256CONSTANTS), MSG | 98 | paddd 4*16-8*16(SHA256CONSTANTS), MSG |
107 | sha256rnds2 STATE0, STATE1 | 99 | sha256rnds2 STATE0, STATE1 |
108 | mova128 MSGTMP0, XMMTMP4 | 100 | mova128 MSGTMP0, XMMTMP |
109 | palignr $4, MSGTMP3, XMMTMP4 | 101 | palignr $4, MSGTMP3, XMMTMP |
110 | paddd XMMTMP4, MSGTMP1 | 102 | paddd XMMTMP, MSGTMP1 |
111 | sha256msg2 MSGTMP0, MSGTMP1 | 103 | sha256msg2 MSGTMP0, MSGTMP1 |
112 | shuf128_32 $0x0E, MSG, MSG | 104 | shuf128_32 $0x0E, MSG, MSG |
113 | sha256rnds2 STATE1, STATE0 | 105 | sha256rnds2 STATE1, STATE0 |
@@ -117,9 +109,9 @@ sha256_process_block64_shaNI: | |||
117 | mova128 MSGTMP1, MSG | 109 | mova128 MSGTMP1, MSG |
118 | paddd 5*16-8*16(SHA256CONSTANTS), MSG | 110 | paddd 5*16-8*16(SHA256CONSTANTS), MSG |
119 | sha256rnds2 STATE0, STATE1 | 111 | sha256rnds2 STATE0, STATE1 |
120 | mova128 MSGTMP1, XMMTMP4 | 112 | mova128 MSGTMP1, XMMTMP |
121 | palignr $4, MSGTMP0, XMMTMP4 | 113 | palignr $4, MSGTMP0, XMMTMP |
122 | paddd XMMTMP4, MSGTMP2 | 114 | paddd XMMTMP, MSGTMP2 |
123 | sha256msg2 MSGTMP1, MSGTMP2 | 115 | sha256msg2 MSGTMP1, MSGTMP2 |
124 | shuf128_32 $0x0E, MSG, MSG | 116 | shuf128_32 $0x0E, MSG, MSG |
125 | sha256rnds2 STATE1, STATE0 | 117 | sha256rnds2 STATE1, STATE0 |
@@ -129,9 +121,9 @@ sha256_process_block64_shaNI: | |||
129 | mova128 MSGTMP2, MSG | 121 | mova128 MSGTMP2, MSG |
130 | paddd 6*16-8*16(SHA256CONSTANTS), MSG | 122 | paddd 6*16-8*16(SHA256CONSTANTS), MSG |
131 | sha256rnds2 STATE0, STATE1 | 123 | sha256rnds2 STATE0, STATE1 |
132 | mova128 MSGTMP2, XMMTMP4 | 124 | mova128 MSGTMP2, XMMTMP |
133 | palignr $4, MSGTMP1, XMMTMP4 | 125 | palignr $4, MSGTMP1, XMMTMP |
134 | paddd XMMTMP4, MSGTMP3 | 126 | paddd XMMTMP, MSGTMP3 |
135 | sha256msg2 MSGTMP2, MSGTMP3 | 127 | sha256msg2 MSGTMP2, MSGTMP3 |
136 | shuf128_32 $0x0E, MSG, MSG | 128 | shuf128_32 $0x0E, MSG, MSG |
137 | sha256rnds2 STATE1, STATE0 | 129 | sha256rnds2 STATE1, STATE0 |
@@ -141,9 +133,9 @@ sha256_process_block64_shaNI: | |||
141 | mova128 MSGTMP3, MSG | 133 | mova128 MSGTMP3, MSG |
142 | paddd 7*16-8*16(SHA256CONSTANTS), MSG | 134 | paddd 7*16-8*16(SHA256CONSTANTS), MSG |
143 | sha256rnds2 STATE0, STATE1 | 135 | sha256rnds2 STATE0, STATE1 |
144 | mova128 MSGTMP3, XMMTMP4 | 136 | mova128 MSGTMP3, XMMTMP |
145 | palignr $4, MSGTMP2, XMMTMP4 | 137 | palignr $4, MSGTMP2, XMMTMP |
146 | paddd XMMTMP4, MSGTMP0 | 138 | paddd XMMTMP, MSGTMP0 |
147 | sha256msg2 MSGTMP3, MSGTMP0 | 139 | sha256msg2 MSGTMP3, MSGTMP0 |
148 | shuf128_32 $0x0E, MSG, MSG | 140 | shuf128_32 $0x0E, MSG, MSG |
149 | sha256rnds2 STATE1, STATE0 | 141 | sha256rnds2 STATE1, STATE0 |
@@ -153,9 +145,9 @@ sha256_process_block64_shaNI: | |||
153 | mova128 MSGTMP0, MSG | 145 | mova128 MSGTMP0, MSG |
154 | paddd 8*16-8*16(SHA256CONSTANTS), MSG | 146 | paddd 8*16-8*16(SHA256CONSTANTS), MSG |
155 | sha256rnds2 STATE0, STATE1 | 147 | sha256rnds2 STATE0, STATE1 |
156 | mova128 MSGTMP0, XMMTMP4 | 148 | mova128 MSGTMP0, XMMTMP |
157 | palignr $4, MSGTMP3, XMMTMP4 | 149 | palignr $4, MSGTMP3, XMMTMP |
158 | paddd XMMTMP4, MSGTMP1 | 150 | paddd XMMTMP, MSGTMP1 |
159 | sha256msg2 MSGTMP0, MSGTMP1 | 151 | sha256msg2 MSGTMP0, MSGTMP1 |
160 | shuf128_32 $0x0E, MSG, MSG | 152 | shuf128_32 $0x0E, MSG, MSG |
161 | sha256rnds2 STATE1, STATE0 | 153 | sha256rnds2 STATE1, STATE0 |
@@ -165,9 +157,9 @@ sha256_process_block64_shaNI: | |||
165 | mova128 MSGTMP1, MSG | 157 | mova128 MSGTMP1, MSG |
166 | paddd 9*16-8*16(SHA256CONSTANTS), MSG | 158 | paddd 9*16-8*16(SHA256CONSTANTS), MSG |
167 | sha256rnds2 STATE0, STATE1 | 159 | sha256rnds2 STATE0, STATE1 |
168 | mova128 MSGTMP1, XMMTMP4 | 160 | mova128 MSGTMP1, XMMTMP |
169 | palignr $4, MSGTMP0, XMMTMP4 | 161 | palignr $4, MSGTMP0, XMMTMP |
170 | paddd XMMTMP4, MSGTMP2 | 162 | paddd XMMTMP, MSGTMP2 |
171 | sha256msg2 MSGTMP1, MSGTMP2 | 163 | sha256msg2 MSGTMP1, MSGTMP2 |
172 | shuf128_32 $0x0E, MSG, MSG | 164 | shuf128_32 $0x0E, MSG, MSG |
173 | sha256rnds2 STATE1, STATE0 | 165 | sha256rnds2 STATE1, STATE0 |
@@ -177,9 +169,9 @@ sha256_process_block64_shaNI: | |||
177 | mova128 MSGTMP2, MSG | 169 | mova128 MSGTMP2, MSG |
178 | paddd 10*16-8*16(SHA256CONSTANTS), MSG | 170 | paddd 10*16-8*16(SHA256CONSTANTS), MSG |
179 | sha256rnds2 STATE0, STATE1 | 171 | sha256rnds2 STATE0, STATE1 |
180 | mova128 MSGTMP2, XMMTMP4 | 172 | mova128 MSGTMP2, XMMTMP |
181 | palignr $4, MSGTMP1, XMMTMP4 | 173 | palignr $4, MSGTMP1, XMMTMP |
182 | paddd XMMTMP4, MSGTMP3 | 174 | paddd XMMTMP, MSGTMP3 |
183 | sha256msg2 MSGTMP2, MSGTMP3 | 175 | sha256msg2 MSGTMP2, MSGTMP3 |
184 | shuf128_32 $0x0E, MSG, MSG | 176 | shuf128_32 $0x0E, MSG, MSG |
185 | sha256rnds2 STATE1, STATE0 | 177 | sha256rnds2 STATE1, STATE0 |
@@ -189,9 +181,9 @@ sha256_process_block64_shaNI: | |||
189 | mova128 MSGTMP3, MSG | 181 | mova128 MSGTMP3, MSG |
190 | paddd 11*16-8*16(SHA256CONSTANTS), MSG | 182 | paddd 11*16-8*16(SHA256CONSTANTS), MSG |
191 | sha256rnds2 STATE0, STATE1 | 183 | sha256rnds2 STATE0, STATE1 |
192 | mova128 MSGTMP3, XMMTMP4 | 184 | mova128 MSGTMP3, XMMTMP |
193 | palignr $4, MSGTMP2, XMMTMP4 | 185 | palignr $4, MSGTMP2, XMMTMP |
194 | paddd XMMTMP4, MSGTMP0 | 186 | paddd XMMTMP, MSGTMP0 |
195 | sha256msg2 MSGTMP3, MSGTMP0 | 187 | sha256msg2 MSGTMP3, MSGTMP0 |
196 | shuf128_32 $0x0E, MSG, MSG | 188 | shuf128_32 $0x0E, MSG, MSG |
197 | sha256rnds2 STATE1, STATE0 | 189 | sha256rnds2 STATE1, STATE0 |
@@ -201,9 +193,9 @@ sha256_process_block64_shaNI: | |||
201 | mova128 MSGTMP0, MSG | 193 | mova128 MSGTMP0, MSG |
202 | paddd 12*16-8*16(SHA256CONSTANTS), MSG | 194 | paddd 12*16-8*16(SHA256CONSTANTS), MSG |
203 | sha256rnds2 STATE0, STATE1 | 195 | sha256rnds2 STATE0, STATE1 |
204 | mova128 MSGTMP0, XMMTMP4 | 196 | mova128 MSGTMP0, XMMTMP |
205 | palignr $4, MSGTMP3, XMMTMP4 | 197 | palignr $4, MSGTMP3, XMMTMP |
206 | paddd XMMTMP4, MSGTMP1 | 198 | paddd XMMTMP, MSGTMP1 |
207 | sha256msg2 MSGTMP0, MSGTMP1 | 199 | sha256msg2 MSGTMP0, MSGTMP1 |
208 | shuf128_32 $0x0E, MSG, MSG | 200 | shuf128_32 $0x0E, MSG, MSG |
209 | sha256rnds2 STATE1, STATE0 | 201 | sha256rnds2 STATE1, STATE0 |
@@ -213,9 +205,9 @@ sha256_process_block64_shaNI: | |||
213 | mova128 MSGTMP1, MSG | 205 | mova128 MSGTMP1, MSG |
214 | paddd 13*16-8*16(SHA256CONSTANTS), MSG | 206 | paddd 13*16-8*16(SHA256CONSTANTS), MSG |
215 | sha256rnds2 STATE0, STATE1 | 207 | sha256rnds2 STATE0, STATE1 |
216 | mova128 MSGTMP1, XMMTMP4 | 208 | mova128 MSGTMP1, XMMTMP |
217 | palignr $4, MSGTMP0, XMMTMP4 | 209 | palignr $4, MSGTMP0, XMMTMP |
218 | paddd XMMTMP4, MSGTMP2 | 210 | paddd XMMTMP, MSGTMP2 |
219 | sha256msg2 MSGTMP1, MSGTMP2 | 211 | sha256msg2 MSGTMP1, MSGTMP2 |
220 | shuf128_32 $0x0E, MSG, MSG | 212 | shuf128_32 $0x0E, MSG, MSG |
221 | sha256rnds2 STATE1, STATE0 | 213 | sha256rnds2 STATE1, STATE0 |
@@ -224,9 +216,9 @@ sha256_process_block64_shaNI: | |||
224 | mova128 MSGTMP2, MSG | 216 | mova128 MSGTMP2, MSG |
225 | paddd 14*16-8*16(SHA256CONSTANTS), MSG | 217 | paddd 14*16-8*16(SHA256CONSTANTS), MSG |
226 | sha256rnds2 STATE0, STATE1 | 218 | sha256rnds2 STATE0, STATE1 |
227 | mova128 MSGTMP2, XMMTMP4 | 219 | mova128 MSGTMP2, XMMTMP |
228 | palignr $4, MSGTMP1, XMMTMP4 | 220 | palignr $4, MSGTMP1, XMMTMP |
229 | paddd XMMTMP4, MSGTMP3 | 221 | paddd XMMTMP, MSGTMP3 |
230 | sha256msg2 MSGTMP2, MSGTMP3 | 222 | sha256msg2 MSGTMP2, MSGTMP3 |
231 | shuf128_32 $0x0E, MSG, MSG | 223 | shuf128_32 $0x0E, MSG, MSG |
232 | sha256rnds2 STATE1, STATE0 | 224 | sha256rnds2 STATE1, STATE0 |
@@ -238,22 +230,20 @@ sha256_process_block64_shaNI: | |||
238 | shuf128_32 $0x0E, MSG, MSG | 230 | shuf128_32 $0x0E, MSG, MSG |
239 | sha256rnds2 STATE1, STATE0 | 231 | sha256rnds2 STATE1, STATE0 |
240 | 232 | ||
241 | /* Add current hash values with previously saved */ | ||
242 | paddd 0*16(%esp), STATE0 | ||
243 | paddd 1*16(%esp), STATE1 | ||
244 | |||
245 | /* Write hash values back in the correct order */ | 233 | /* Write hash values back in the correct order */ |
246 | shuf128_32 $0x1B, STATE0, STATE0 /* FEBA */ | 234 | shuf128_32 $0x1B, STATE0, STATE0 /* FEBA */ |
247 | shuf128_32 $0xB1, STATE1, STATE1 /* DCHG */ | 235 | shuf128_32 $0xB1, STATE1, STATE1 /* DCHG */ |
248 | mova128 STATE0, XMMTMP4 | 236 | mova128 STATE0, XMMTMP |
249 | pblendw $0xF0, STATE1, STATE0 /* DCBA */ | 237 | pblendw $0xF0, STATE1, STATE0 /* DCBA */ |
250 | palignr $8, XMMTMP4, STATE1 /* HGFE */ | 238 | palignr $8, XMMTMP, STATE1 /* HGFE */ |
251 | 239 | /* add current hash values to previous ones */ | |
240 | movu128 76+0*16(%eax), XMMTMP | ||
241 | paddd XMMTMP, STATE0 | ||
242 | movu128 76+1*16(%eax), XMMTMP | ||
252 | movu128 STATE0, 76+0*16(%eax) | 243 | movu128 STATE0, 76+0*16(%eax) |
244 | paddd XMMTMP, STATE1 | ||
253 | movu128 STATE1, 76+1*16(%eax) | 245 | movu128 STATE1, 76+1*16(%eax) |
254 | 246 | ||
255 | movl %ebp, %esp | ||
256 | popl %ebp | ||
257 | ret | 247 | ret |
258 | .size sha256_process_block64_shaNI, .-sha256_process_block64_shaNI | 248 | .size sha256_process_block64_shaNI, .-sha256_process_block64_shaNI |
259 | 249 | ||
diff --git a/libbb/hash_md5_sha256_x86-64_shaNI.S b/libbb/hash_md5_sha256_x86-64_shaNI.S index c6c931341..b5c950a9a 100644 --- a/libbb/hash_md5_sha256_x86-64_shaNI.S +++ b/libbb/hash_md5_sha256_x86-64_shaNI.S | |||
@@ -31,7 +31,8 @@ | |||
31 | #define MSGTMP1 %xmm4 | 31 | #define MSGTMP1 %xmm4 |
32 | #define MSGTMP2 %xmm5 | 32 | #define MSGTMP2 %xmm5 |
33 | #define MSGTMP3 %xmm6 | 33 | #define MSGTMP3 %xmm6 |
34 | #define XMMTMP4 %xmm7 | 34 | |
35 | #define XMMTMP %xmm7 | ||
35 | 36 | ||
36 | #define ABEF_SAVE %xmm9 | 37 | #define ABEF_SAVE %xmm9 |
37 | #define CDGH_SAVE %xmm10 | 38 | #define CDGH_SAVE %xmm10 |
@@ -41,14 +42,14 @@ sha256_process_block64_shaNI: | |||
41 | movu128 80+0*16(%rdi), STATE0 | 42 | movu128 80+0*16(%rdi), STATE0 |
42 | movu128 80+1*16(%rdi), STATE1 | 43 | movu128 80+1*16(%rdi), STATE1 |
43 | 44 | ||
44 | shuf128_32 $0xB1, STATE0, STATE0 /* CDAB */ | 45 | shuf128_32 $0xB1, STATE0, STATE0 /* CDAB */ |
45 | shuf128_32 $0x1B, STATE1, STATE1 /* EFGH */ | 46 | shuf128_32 $0x1B, STATE1, STATE1 /* EFGH */ |
46 | mova128 STATE0, XMMTMP4 | 47 | mova128 STATE0, XMMTMP |
47 | palignr $8, STATE1, STATE0 /* ABEF */ | 48 | palignr $8, STATE1, STATE0 /* ABEF */ |
48 | pblendw $0xF0, XMMTMP4, STATE1 /* CDGH */ | 49 | pblendw $0xF0, XMMTMP, STATE1 /* CDGH */ |
49 | 50 | ||
50 | /* XMMTMP4 holds flip mask from here... */ | 51 | /* XMMTMP holds flip mask from here... */ |
51 | mova128 PSHUFFLE_BSWAP32_FLIP_MASK(%rip), XMMTMP4 | 52 | mova128 PSHUFFLE_BSWAP32_FLIP_MASK(%rip), XMMTMP |
52 | leaq K256+8*16(%rip), SHA256CONSTANTS | 53 | leaq K256+8*16(%rip), SHA256CONSTANTS |
53 | 54 | ||
54 | /* Save hash values for addition after rounds */ | 55 | /* Save hash values for addition after rounds */ |
@@ -57,7 +58,7 @@ sha256_process_block64_shaNI: | |||
57 | 58 | ||
58 | /* Rounds 0-3 */ | 59 | /* Rounds 0-3 */ |
59 | movu128 0*16(DATA_PTR), MSG | 60 | movu128 0*16(DATA_PTR), MSG |
60 | pshufb XMMTMP4, MSG | 61 | pshufb XMMTMP, MSG |
61 | mova128 MSG, MSGTMP0 | 62 | mova128 MSG, MSGTMP0 |
62 | paddd 0*16-8*16(SHA256CONSTANTS), MSG | 63 | paddd 0*16-8*16(SHA256CONSTANTS), MSG |
63 | sha256rnds2 STATE0, STATE1 | 64 | sha256rnds2 STATE0, STATE1 |
@@ -66,7 +67,7 @@ sha256_process_block64_shaNI: | |||
66 | 67 | ||
67 | /* Rounds 4-7 */ | 68 | /* Rounds 4-7 */ |
68 | movu128 1*16(DATA_PTR), MSG | 69 | movu128 1*16(DATA_PTR), MSG |
69 | pshufb XMMTMP4, MSG | 70 | pshufb XMMTMP, MSG |
70 | mova128 MSG, MSGTMP1 | 71 | mova128 MSG, MSGTMP1 |
71 | paddd 1*16-8*16(SHA256CONSTANTS), MSG | 72 | paddd 1*16-8*16(SHA256CONSTANTS), MSG |
72 | sha256rnds2 STATE0, STATE1 | 73 | sha256rnds2 STATE0, STATE1 |
@@ -76,7 +77,7 @@ sha256_process_block64_shaNI: | |||
76 | 77 | ||
77 | /* Rounds 8-11 */ | 78 | /* Rounds 8-11 */ |
78 | movu128 2*16(DATA_PTR), MSG | 79 | movu128 2*16(DATA_PTR), MSG |
79 | pshufb XMMTMP4, MSG | 80 | pshufb XMMTMP, MSG |
80 | mova128 MSG, MSGTMP2 | 81 | mova128 MSG, MSGTMP2 |
81 | paddd 2*16-8*16(SHA256CONSTANTS), MSG | 82 | paddd 2*16-8*16(SHA256CONSTANTS), MSG |
82 | sha256rnds2 STATE0, STATE1 | 83 | sha256rnds2 STATE0, STATE1 |
@@ -86,14 +87,14 @@ sha256_process_block64_shaNI: | |||
86 | 87 | ||
87 | /* Rounds 12-15 */ | 88 | /* Rounds 12-15 */ |
88 | movu128 3*16(DATA_PTR), MSG | 89 | movu128 3*16(DATA_PTR), MSG |
89 | pshufb XMMTMP4, MSG | 90 | pshufb XMMTMP, MSG |
90 | /* ...to here */ | 91 | /* ...to here */ |
91 | mova128 MSG, MSGTMP3 | 92 | mova128 MSG, MSGTMP3 |
92 | paddd 3*16-8*16(SHA256CONSTANTS), MSG | 93 | paddd 3*16-8*16(SHA256CONSTANTS), MSG |
93 | sha256rnds2 STATE0, STATE1 | 94 | sha256rnds2 STATE0, STATE1 |
94 | mova128 MSGTMP3, XMMTMP4 | 95 | mova128 MSGTMP3, XMMTMP |
95 | palignr $4, MSGTMP2, XMMTMP4 | 96 | palignr $4, MSGTMP2, XMMTMP |
96 | paddd XMMTMP4, MSGTMP0 | 97 | paddd XMMTMP, MSGTMP0 |
97 | sha256msg2 MSGTMP3, MSGTMP0 | 98 | sha256msg2 MSGTMP3, MSGTMP0 |
98 | shuf128_32 $0x0E, MSG, MSG | 99 | shuf128_32 $0x0E, MSG, MSG |
99 | sha256rnds2 STATE1, STATE0 | 100 | sha256rnds2 STATE1, STATE0 |
@@ -103,9 +104,9 @@ sha256_process_block64_shaNI: | |||
103 | mova128 MSGTMP0, MSG | 104 | mova128 MSGTMP0, MSG |
104 | paddd 4*16-8*16(SHA256CONSTANTS), MSG | 105 | paddd 4*16-8*16(SHA256CONSTANTS), MSG |
105 | sha256rnds2 STATE0, STATE1 | 106 | sha256rnds2 STATE0, STATE1 |
106 | mova128 MSGTMP0, XMMTMP4 | 107 | mova128 MSGTMP0, XMMTMP |
107 | palignr $4, MSGTMP3, XMMTMP4 | 108 | palignr $4, MSGTMP3, XMMTMP |
108 | paddd XMMTMP4, MSGTMP1 | 109 | paddd XMMTMP, MSGTMP1 |
109 | sha256msg2 MSGTMP0, MSGTMP1 | 110 | sha256msg2 MSGTMP0, MSGTMP1 |
110 | shuf128_32 $0x0E, MSG, MSG | 111 | shuf128_32 $0x0E, MSG, MSG |
111 | sha256rnds2 STATE1, STATE0 | 112 | sha256rnds2 STATE1, STATE0 |
@@ -115,9 +116,9 @@ sha256_process_block64_shaNI: | |||
115 | mova128 MSGTMP1, MSG | 116 | mova128 MSGTMP1, MSG |
116 | paddd 5*16-8*16(SHA256CONSTANTS), MSG | 117 | paddd 5*16-8*16(SHA256CONSTANTS), MSG |
117 | sha256rnds2 STATE0, STATE1 | 118 | sha256rnds2 STATE0, STATE1 |
118 | mova128 MSGTMP1, XMMTMP4 | 119 | mova128 MSGTMP1, XMMTMP |
119 | palignr $4, MSGTMP0, XMMTMP4 | 120 | palignr $4, MSGTMP0, XMMTMP |
120 | paddd XMMTMP4, MSGTMP2 | 121 | paddd XMMTMP, MSGTMP2 |
121 | sha256msg2 MSGTMP1, MSGTMP2 | 122 | sha256msg2 MSGTMP1, MSGTMP2 |
122 | shuf128_32 $0x0E, MSG, MSG | 123 | shuf128_32 $0x0E, MSG, MSG |
123 | sha256rnds2 STATE1, STATE0 | 124 | sha256rnds2 STATE1, STATE0 |
@@ -127,9 +128,9 @@ sha256_process_block64_shaNI: | |||
127 | mova128 MSGTMP2, MSG | 128 | mova128 MSGTMP2, MSG |
128 | paddd 6*16-8*16(SHA256CONSTANTS), MSG | 129 | paddd 6*16-8*16(SHA256CONSTANTS), MSG |
129 | sha256rnds2 STATE0, STATE1 | 130 | sha256rnds2 STATE0, STATE1 |
130 | mova128 MSGTMP2, XMMTMP4 | 131 | mova128 MSGTMP2, XMMTMP |
131 | palignr $4, MSGTMP1, XMMTMP4 | 132 | palignr $4, MSGTMP1, XMMTMP |
132 | paddd XMMTMP4, MSGTMP3 | 133 | paddd XMMTMP, MSGTMP3 |
133 | sha256msg2 MSGTMP2, MSGTMP3 | 134 | sha256msg2 MSGTMP2, MSGTMP3 |
134 | shuf128_32 $0x0E, MSG, MSG | 135 | shuf128_32 $0x0E, MSG, MSG |
135 | sha256rnds2 STATE1, STATE0 | 136 | sha256rnds2 STATE1, STATE0 |
@@ -139,9 +140,9 @@ sha256_process_block64_shaNI: | |||
139 | mova128 MSGTMP3, MSG | 140 | mova128 MSGTMP3, MSG |
140 | paddd 7*16-8*16(SHA256CONSTANTS), MSG | 141 | paddd 7*16-8*16(SHA256CONSTANTS), MSG |
141 | sha256rnds2 STATE0, STATE1 | 142 | sha256rnds2 STATE0, STATE1 |
142 | mova128 MSGTMP3, XMMTMP4 | 143 | mova128 MSGTMP3, XMMTMP |
143 | palignr $4, MSGTMP2, XMMTMP4 | 144 | palignr $4, MSGTMP2, XMMTMP |
144 | paddd XMMTMP4, MSGTMP0 | 145 | paddd XMMTMP, MSGTMP0 |
145 | sha256msg2 MSGTMP3, MSGTMP0 | 146 | sha256msg2 MSGTMP3, MSGTMP0 |
146 | shuf128_32 $0x0E, MSG, MSG | 147 | shuf128_32 $0x0E, MSG, MSG |
147 | sha256rnds2 STATE1, STATE0 | 148 | sha256rnds2 STATE1, STATE0 |
@@ -151,9 +152,9 @@ sha256_process_block64_shaNI: | |||
151 | mova128 MSGTMP0, MSG | 152 | mova128 MSGTMP0, MSG |
152 | paddd 8*16-8*16(SHA256CONSTANTS), MSG | 153 | paddd 8*16-8*16(SHA256CONSTANTS), MSG |
153 | sha256rnds2 STATE0, STATE1 | 154 | sha256rnds2 STATE0, STATE1 |
154 | mova128 MSGTMP0, XMMTMP4 | 155 | mova128 MSGTMP0, XMMTMP |
155 | palignr $4, MSGTMP3, XMMTMP4 | 156 | palignr $4, MSGTMP3, XMMTMP |
156 | paddd XMMTMP4, MSGTMP1 | 157 | paddd XMMTMP, MSGTMP1 |
157 | sha256msg2 MSGTMP0, MSGTMP1 | 158 | sha256msg2 MSGTMP0, MSGTMP1 |
158 | shuf128_32 $0x0E, MSG, MSG | 159 | shuf128_32 $0x0E, MSG, MSG |
159 | sha256rnds2 STATE1, STATE0 | 160 | sha256rnds2 STATE1, STATE0 |
@@ -163,9 +164,9 @@ sha256_process_block64_shaNI: | |||
163 | mova128 MSGTMP1, MSG | 164 | mova128 MSGTMP1, MSG |
164 | paddd 9*16-8*16(SHA256CONSTANTS), MSG | 165 | paddd 9*16-8*16(SHA256CONSTANTS), MSG |
165 | sha256rnds2 STATE0, STATE1 | 166 | sha256rnds2 STATE0, STATE1 |
166 | mova128 MSGTMP1, XMMTMP4 | 167 | mova128 MSGTMP1, XMMTMP |
167 | palignr $4, MSGTMP0, XMMTMP4 | 168 | palignr $4, MSGTMP0, XMMTMP |
168 | paddd XMMTMP4, MSGTMP2 | 169 | paddd XMMTMP, MSGTMP2 |
169 | sha256msg2 MSGTMP1, MSGTMP2 | 170 | sha256msg2 MSGTMP1, MSGTMP2 |
170 | shuf128_32 $0x0E, MSG, MSG | 171 | shuf128_32 $0x0E, MSG, MSG |
171 | sha256rnds2 STATE1, STATE0 | 172 | sha256rnds2 STATE1, STATE0 |
@@ -175,9 +176,9 @@ sha256_process_block64_shaNI: | |||
175 | mova128 MSGTMP2, MSG | 176 | mova128 MSGTMP2, MSG |
176 | paddd 10*16-8*16(SHA256CONSTANTS), MSG | 177 | paddd 10*16-8*16(SHA256CONSTANTS), MSG |
177 | sha256rnds2 STATE0, STATE1 | 178 | sha256rnds2 STATE0, STATE1 |
178 | mova128 MSGTMP2, XMMTMP4 | 179 | mova128 MSGTMP2, XMMTMP |
179 | palignr $4, MSGTMP1, XMMTMP4 | 180 | palignr $4, MSGTMP1, XMMTMP |
180 | paddd XMMTMP4, MSGTMP3 | 181 | paddd XMMTMP, MSGTMP3 |
181 | sha256msg2 MSGTMP2, MSGTMP3 | 182 | sha256msg2 MSGTMP2, MSGTMP3 |
182 | shuf128_32 $0x0E, MSG, MSG | 183 | shuf128_32 $0x0E, MSG, MSG |
183 | sha256rnds2 STATE1, STATE0 | 184 | sha256rnds2 STATE1, STATE0 |
@@ -187,9 +188,9 @@ sha256_process_block64_shaNI: | |||
187 | mova128 MSGTMP3, MSG | 188 | mova128 MSGTMP3, MSG |
188 | paddd 11*16-8*16(SHA256CONSTANTS), MSG | 189 | paddd 11*16-8*16(SHA256CONSTANTS), MSG |
189 | sha256rnds2 STATE0, STATE1 | 190 | sha256rnds2 STATE0, STATE1 |
190 | mova128 MSGTMP3, XMMTMP4 | 191 | mova128 MSGTMP3, XMMTMP |
191 | palignr $4, MSGTMP2, XMMTMP4 | 192 | palignr $4, MSGTMP2, XMMTMP |
192 | paddd XMMTMP4, MSGTMP0 | 193 | paddd XMMTMP, MSGTMP0 |
193 | sha256msg2 MSGTMP3, MSGTMP0 | 194 | sha256msg2 MSGTMP3, MSGTMP0 |
194 | shuf128_32 $0x0E, MSG, MSG | 195 | shuf128_32 $0x0E, MSG, MSG |
195 | sha256rnds2 STATE1, STATE0 | 196 | sha256rnds2 STATE1, STATE0 |
@@ -199,9 +200,9 @@ sha256_process_block64_shaNI: | |||
199 | mova128 MSGTMP0, MSG | 200 | mova128 MSGTMP0, MSG |
200 | paddd 12*16-8*16(SHA256CONSTANTS), MSG | 201 | paddd 12*16-8*16(SHA256CONSTANTS), MSG |
201 | sha256rnds2 STATE0, STATE1 | 202 | sha256rnds2 STATE0, STATE1 |
202 | mova128 MSGTMP0, XMMTMP4 | 203 | mova128 MSGTMP0, XMMTMP |
203 | palignr $4, MSGTMP3, XMMTMP4 | 204 | palignr $4, MSGTMP3, XMMTMP |
204 | paddd XMMTMP4, MSGTMP1 | 205 | paddd XMMTMP, MSGTMP1 |
205 | sha256msg2 MSGTMP0, MSGTMP1 | 206 | sha256msg2 MSGTMP0, MSGTMP1 |
206 | shuf128_32 $0x0E, MSG, MSG | 207 | shuf128_32 $0x0E, MSG, MSG |
207 | sha256rnds2 STATE1, STATE0 | 208 | sha256rnds2 STATE1, STATE0 |
@@ -211,9 +212,9 @@ sha256_process_block64_shaNI: | |||
211 | mova128 MSGTMP1, MSG | 212 | mova128 MSGTMP1, MSG |
212 | paddd 13*16-8*16(SHA256CONSTANTS), MSG | 213 | paddd 13*16-8*16(SHA256CONSTANTS), MSG |
213 | sha256rnds2 STATE0, STATE1 | 214 | sha256rnds2 STATE0, STATE1 |
214 | mova128 MSGTMP1, XMMTMP4 | 215 | mova128 MSGTMP1, XMMTMP |
215 | palignr $4, MSGTMP0, XMMTMP4 | 216 | palignr $4, MSGTMP0, XMMTMP |
216 | paddd XMMTMP4, MSGTMP2 | 217 | paddd XMMTMP, MSGTMP2 |
217 | sha256msg2 MSGTMP1, MSGTMP2 | 218 | sha256msg2 MSGTMP1, MSGTMP2 |
218 | shuf128_32 $0x0E, MSG, MSG | 219 | shuf128_32 $0x0E, MSG, MSG |
219 | sha256rnds2 STATE1, STATE0 | 220 | sha256rnds2 STATE1, STATE0 |
@@ -222,9 +223,9 @@ sha256_process_block64_shaNI: | |||
222 | mova128 MSGTMP2, MSG | 223 | mova128 MSGTMP2, MSG |
223 | paddd 14*16-8*16(SHA256CONSTANTS), MSG | 224 | paddd 14*16-8*16(SHA256CONSTANTS), MSG |
224 | sha256rnds2 STATE0, STATE1 | 225 | sha256rnds2 STATE0, STATE1 |
225 | mova128 MSGTMP2, XMMTMP4 | 226 | mova128 MSGTMP2, XMMTMP |
226 | palignr $4, MSGTMP1, XMMTMP4 | 227 | palignr $4, MSGTMP1, XMMTMP |
227 | paddd XMMTMP4, MSGTMP3 | 228 | paddd XMMTMP, MSGTMP3 |
228 | sha256msg2 MSGTMP2, MSGTMP3 | 229 | sha256msg2 MSGTMP2, MSGTMP3 |
229 | shuf128_32 $0x0E, MSG, MSG | 230 | shuf128_32 $0x0E, MSG, MSG |
230 | sha256rnds2 STATE1, STATE0 | 231 | sha256rnds2 STATE1, STATE0 |
@@ -241,11 +242,11 @@ sha256_process_block64_shaNI: | |||
241 | paddd CDGH_SAVE, STATE1 | 242 | paddd CDGH_SAVE, STATE1 |
242 | 243 | ||
243 | /* Write hash values back in the correct order */ | 244 | /* Write hash values back in the correct order */ |
244 | shuf128_32 $0x1B, STATE0, STATE0 /* FEBA */ | 245 | shuf128_32 $0x1B, STATE0, STATE0 /* FEBA */ |
245 | shuf128_32 $0xB1, STATE1, STATE1 /* DCHG */ | 246 | shuf128_32 $0xB1, STATE1, STATE1 /* DCHG */ |
246 | mova128 STATE0, XMMTMP4 | 247 | mova128 STATE0, XMMTMP |
247 | pblendw $0xF0, STATE1, STATE0 /* DCBA */ | 248 | pblendw $0xF0, STATE1, STATE0 /* DCBA */ |
248 | palignr $8, XMMTMP4, STATE1 /* HGFE */ | 249 | palignr $8, XMMTMP, STATE1 /* HGFE */ |
249 | 250 | ||
250 | movu128 STATE0, 80+0*16(%rdi) | 251 | movu128 STATE0, 80+0*16(%rdi) |
251 | movu128 STATE1, 80+1*16(%rdi) | 252 | movu128 STATE1, 80+1*16(%rdi) |