diff options
Diffstat (limited to 'libbb/hash_md5_sha256_x86-32_shaNI.S')
-rw-r--r-- | libbb/hash_md5_sha256_x86-32_shaNI.S | 130 |
1 files changed, 60 insertions, 70 deletions
diff --git a/libbb/hash_md5_sha256_x86-32_shaNI.S b/libbb/hash_md5_sha256_x86-32_shaNI.S index 39e2baf41..a849dfcc2 100644 --- a/libbb/hash_md5_sha256_x86-32_shaNI.S +++ b/libbb/hash_md5_sha256_x86-32_shaNI.S | |||
@@ -31,35 +31,27 @@ | |||
31 | #define MSGTMP1 %xmm4 | 31 | #define MSGTMP1 %xmm4 |
32 | #define MSGTMP2 %xmm5 | 32 | #define MSGTMP2 %xmm5 |
33 | #define MSGTMP3 %xmm6 | 33 | #define MSGTMP3 %xmm6 |
34 | #define XMMTMP4 %xmm7 | ||
35 | 34 | ||
36 | .balign 8 # allow decoders to fetch at least 3 first insns | 35 | #define XMMTMP %xmm7 |
37 | sha256_process_block64_shaNI: | ||
38 | pushl %ebp | ||
39 | movl %esp, %ebp | ||
40 | subl $32, %esp | ||
41 | andl $~0xF, %esp # paddd needs aligned memory operand | ||
42 | 36 | ||
37 | .balign 8 # allow decoders to fetch at least 2 first insns | ||
38 | sha256_process_block64_shaNI: | ||
43 | movu128 76+0*16(%eax), STATE0 | 39 | movu128 76+0*16(%eax), STATE0 |
44 | movu128 76+1*16(%eax), STATE1 | 40 | movu128 76+1*16(%eax), STATE1 |
45 | 41 | ||
46 | shuf128_32 $0xB1, STATE0, STATE0 /* CDAB */ | 42 | shuf128_32 $0xB1, STATE0, STATE0 /* CDAB */ |
47 | shuf128_32 $0x1B, STATE1, STATE1 /* EFGH */ | 43 | shuf128_32 $0x1B, STATE1, STATE1 /* EFGH */ |
48 | mova128 STATE0, XMMTMP4 | 44 | mova128 STATE0, XMMTMP |
49 | palignr $8, STATE1, STATE0 /* ABEF */ | 45 | palignr $8, STATE1, STATE0 /* ABEF */ |
50 | pblendw $0xF0, XMMTMP4, STATE1 /* CDGH */ | 46 | pblendw $0xF0, XMMTMP, STATE1 /* CDGH */ |
51 | 47 | ||
52 | /* XMMTMP4 holds flip mask from here... */ | 48 | /* XMMTMP holds flip mask from here... */ |
53 | mova128 PSHUFFLE_BSWAP32_FLIP_MASK, XMMTMP4 | 49 | mova128 PSHUFFLE_BSWAP32_FLIP_MASK, XMMTMP |
54 | movl $K256+8*16, SHA256CONSTANTS | 50 | movl $K256+8*16, SHA256CONSTANTS |
55 | 51 | ||
56 | /* Save hash values for addition after rounds */ | ||
57 | mova128 STATE0, 0*16(%esp) | ||
58 | mova128 STATE1, 1*16(%esp) | ||
59 | |||
60 | /* Rounds 0-3 */ | 52 | /* Rounds 0-3 */ |
61 | movu128 0*16(DATA_PTR), MSG | 53 | movu128 0*16(DATA_PTR), MSG |
62 | pshufb XMMTMP4, MSG | 54 | pshufb XMMTMP, MSG |
63 | mova128 MSG, MSGTMP0 | 55 | mova128 MSG, MSGTMP0 |
64 | paddd 0*16-8*16(SHA256CONSTANTS), MSG | 56 | paddd 0*16-8*16(SHA256CONSTANTS), MSG |
65 | sha256rnds2 STATE0, STATE1 | 57 | sha256rnds2 STATE0, STATE1 |
@@ -68,7 +60,7 @@ sha256_process_block64_shaNI: | |||
68 | 60 | ||
69 | /* Rounds 4-7 */ | 61 | /* Rounds 4-7 */ |
70 | movu128 1*16(DATA_PTR), MSG | 62 | movu128 1*16(DATA_PTR), MSG |
71 | pshufb XMMTMP4, MSG | 63 | pshufb XMMTMP, MSG |
72 | mova128 MSG, MSGTMP1 | 64 | mova128 MSG, MSGTMP1 |
73 | paddd 1*16-8*16(SHA256CONSTANTS), MSG | 65 | paddd 1*16-8*16(SHA256CONSTANTS), MSG |
74 | sha256rnds2 STATE0, STATE1 | 66 | sha256rnds2 STATE0, STATE1 |
@@ -78,7 +70,7 @@ sha256_process_block64_shaNI: | |||
78 | 70 | ||
79 | /* Rounds 8-11 */ | 71 | /* Rounds 8-11 */ |
80 | movu128 2*16(DATA_PTR), MSG | 72 | movu128 2*16(DATA_PTR), MSG |
81 | pshufb XMMTMP4, MSG | 73 | pshufb XMMTMP, MSG |
82 | mova128 MSG, MSGTMP2 | 74 | mova128 MSG, MSGTMP2 |
83 | paddd 2*16-8*16(SHA256CONSTANTS), MSG | 75 | paddd 2*16-8*16(SHA256CONSTANTS), MSG |
84 | sha256rnds2 STATE0, STATE1 | 76 | sha256rnds2 STATE0, STATE1 |
@@ -88,14 +80,14 @@ sha256_process_block64_shaNI: | |||
88 | 80 | ||
89 | /* Rounds 12-15 */ | 81 | /* Rounds 12-15 */ |
90 | movu128 3*16(DATA_PTR), MSG | 82 | movu128 3*16(DATA_PTR), MSG |
91 | pshufb XMMTMP4, MSG | 83 | pshufb XMMTMP, MSG |
92 | /* ...to here */ | 84 | /* ...to here */ |
93 | mova128 MSG, MSGTMP3 | 85 | mova128 MSG, MSGTMP3 |
94 | paddd 3*16-8*16(SHA256CONSTANTS), MSG | 86 | paddd 3*16-8*16(SHA256CONSTANTS), MSG |
95 | sha256rnds2 STATE0, STATE1 | 87 | sha256rnds2 STATE0, STATE1 |
96 | mova128 MSGTMP3, XMMTMP4 | 88 | mova128 MSGTMP3, XMMTMP |
97 | palignr $4, MSGTMP2, XMMTMP4 | 89 | palignr $4, MSGTMP2, XMMTMP |
98 | paddd XMMTMP4, MSGTMP0 | 90 | paddd XMMTMP, MSGTMP0 |
99 | sha256msg2 MSGTMP3, MSGTMP0 | 91 | sha256msg2 MSGTMP3, MSGTMP0 |
100 | shuf128_32 $0x0E, MSG, MSG | 92 | shuf128_32 $0x0E, MSG, MSG |
101 | sha256rnds2 STATE1, STATE0 | 93 | sha256rnds2 STATE1, STATE0 |
@@ -105,9 +97,9 @@ sha256_process_block64_shaNI: | |||
105 | mova128 MSGTMP0, MSG | 97 | mova128 MSGTMP0, MSG |
106 | paddd 4*16-8*16(SHA256CONSTANTS), MSG | 98 | paddd 4*16-8*16(SHA256CONSTANTS), MSG |
107 | sha256rnds2 STATE0, STATE1 | 99 | sha256rnds2 STATE0, STATE1 |
108 | mova128 MSGTMP0, XMMTMP4 | 100 | mova128 MSGTMP0, XMMTMP |
109 | palignr $4, MSGTMP3, XMMTMP4 | 101 | palignr $4, MSGTMP3, XMMTMP |
110 | paddd XMMTMP4, MSGTMP1 | 102 | paddd XMMTMP, MSGTMP1 |
111 | sha256msg2 MSGTMP0, MSGTMP1 | 103 | sha256msg2 MSGTMP0, MSGTMP1 |
112 | shuf128_32 $0x0E, MSG, MSG | 104 | shuf128_32 $0x0E, MSG, MSG |
113 | sha256rnds2 STATE1, STATE0 | 105 | sha256rnds2 STATE1, STATE0 |
@@ -117,9 +109,9 @@ sha256_process_block64_shaNI: | |||
117 | mova128 MSGTMP1, MSG | 109 | mova128 MSGTMP1, MSG |
118 | paddd 5*16-8*16(SHA256CONSTANTS), MSG | 110 | paddd 5*16-8*16(SHA256CONSTANTS), MSG |
119 | sha256rnds2 STATE0, STATE1 | 111 | sha256rnds2 STATE0, STATE1 |
120 | mova128 MSGTMP1, XMMTMP4 | 112 | mova128 MSGTMP1, XMMTMP |
121 | palignr $4, MSGTMP0, XMMTMP4 | 113 | palignr $4, MSGTMP0, XMMTMP |
122 | paddd XMMTMP4, MSGTMP2 | 114 | paddd XMMTMP, MSGTMP2 |
123 | sha256msg2 MSGTMP1, MSGTMP2 | 115 | sha256msg2 MSGTMP1, MSGTMP2 |
124 | shuf128_32 $0x0E, MSG, MSG | 116 | shuf128_32 $0x0E, MSG, MSG |
125 | sha256rnds2 STATE1, STATE0 | 117 | sha256rnds2 STATE1, STATE0 |
@@ -129,9 +121,9 @@ sha256_process_block64_shaNI: | |||
129 | mova128 MSGTMP2, MSG | 121 | mova128 MSGTMP2, MSG |
130 | paddd 6*16-8*16(SHA256CONSTANTS), MSG | 122 | paddd 6*16-8*16(SHA256CONSTANTS), MSG |
131 | sha256rnds2 STATE0, STATE1 | 123 | sha256rnds2 STATE0, STATE1 |
132 | mova128 MSGTMP2, XMMTMP4 | 124 | mova128 MSGTMP2, XMMTMP |
133 | palignr $4, MSGTMP1, XMMTMP4 | 125 | palignr $4, MSGTMP1, XMMTMP |
134 | paddd XMMTMP4, MSGTMP3 | 126 | paddd XMMTMP, MSGTMP3 |
135 | sha256msg2 MSGTMP2, MSGTMP3 | 127 | sha256msg2 MSGTMP2, MSGTMP3 |
136 | shuf128_32 $0x0E, MSG, MSG | 128 | shuf128_32 $0x0E, MSG, MSG |
137 | sha256rnds2 STATE1, STATE0 | 129 | sha256rnds2 STATE1, STATE0 |
@@ -141,9 +133,9 @@ sha256_process_block64_shaNI: | |||
141 | mova128 MSGTMP3, MSG | 133 | mova128 MSGTMP3, MSG |
142 | paddd 7*16-8*16(SHA256CONSTANTS), MSG | 134 | paddd 7*16-8*16(SHA256CONSTANTS), MSG |
143 | sha256rnds2 STATE0, STATE1 | 135 | sha256rnds2 STATE0, STATE1 |
144 | mova128 MSGTMP3, XMMTMP4 | 136 | mova128 MSGTMP3, XMMTMP |
145 | palignr $4, MSGTMP2, XMMTMP4 | 137 | palignr $4, MSGTMP2, XMMTMP |
146 | paddd XMMTMP4, MSGTMP0 | 138 | paddd XMMTMP, MSGTMP0 |
147 | sha256msg2 MSGTMP3, MSGTMP0 | 139 | sha256msg2 MSGTMP3, MSGTMP0 |
148 | shuf128_32 $0x0E, MSG, MSG | 140 | shuf128_32 $0x0E, MSG, MSG |
149 | sha256rnds2 STATE1, STATE0 | 141 | sha256rnds2 STATE1, STATE0 |
@@ -153,9 +145,9 @@ sha256_process_block64_shaNI: | |||
153 | mova128 MSGTMP0, MSG | 145 | mova128 MSGTMP0, MSG |
154 | paddd 8*16-8*16(SHA256CONSTANTS), MSG | 146 | paddd 8*16-8*16(SHA256CONSTANTS), MSG |
155 | sha256rnds2 STATE0, STATE1 | 147 | sha256rnds2 STATE0, STATE1 |
156 | mova128 MSGTMP0, XMMTMP4 | 148 | mova128 MSGTMP0, XMMTMP |
157 | palignr $4, MSGTMP3, XMMTMP4 | 149 | palignr $4, MSGTMP3, XMMTMP |
158 | paddd XMMTMP4, MSGTMP1 | 150 | paddd XMMTMP, MSGTMP1 |
159 | sha256msg2 MSGTMP0, MSGTMP1 | 151 | sha256msg2 MSGTMP0, MSGTMP1 |
160 | shuf128_32 $0x0E, MSG, MSG | 152 | shuf128_32 $0x0E, MSG, MSG |
161 | sha256rnds2 STATE1, STATE0 | 153 | sha256rnds2 STATE1, STATE0 |
@@ -165,9 +157,9 @@ sha256_process_block64_shaNI: | |||
165 | mova128 MSGTMP1, MSG | 157 | mova128 MSGTMP1, MSG |
166 | paddd 9*16-8*16(SHA256CONSTANTS), MSG | 158 | paddd 9*16-8*16(SHA256CONSTANTS), MSG |
167 | sha256rnds2 STATE0, STATE1 | 159 | sha256rnds2 STATE0, STATE1 |
168 | mova128 MSGTMP1, XMMTMP4 | 160 | mova128 MSGTMP1, XMMTMP |
169 | palignr $4, MSGTMP0, XMMTMP4 | 161 | palignr $4, MSGTMP0, XMMTMP |
170 | paddd XMMTMP4, MSGTMP2 | 162 | paddd XMMTMP, MSGTMP2 |
171 | sha256msg2 MSGTMP1, MSGTMP2 | 163 | sha256msg2 MSGTMP1, MSGTMP2 |
172 | shuf128_32 $0x0E, MSG, MSG | 164 | shuf128_32 $0x0E, MSG, MSG |
173 | sha256rnds2 STATE1, STATE0 | 165 | sha256rnds2 STATE1, STATE0 |
@@ -177,9 +169,9 @@ sha256_process_block64_shaNI: | |||
177 | mova128 MSGTMP2, MSG | 169 | mova128 MSGTMP2, MSG |
178 | paddd 10*16-8*16(SHA256CONSTANTS), MSG | 170 | paddd 10*16-8*16(SHA256CONSTANTS), MSG |
179 | sha256rnds2 STATE0, STATE1 | 171 | sha256rnds2 STATE0, STATE1 |
180 | mova128 MSGTMP2, XMMTMP4 | 172 | mova128 MSGTMP2, XMMTMP |
181 | palignr $4, MSGTMP1, XMMTMP4 | 173 | palignr $4, MSGTMP1, XMMTMP |
182 | paddd XMMTMP4, MSGTMP3 | 174 | paddd XMMTMP, MSGTMP3 |
183 | sha256msg2 MSGTMP2, MSGTMP3 | 175 | sha256msg2 MSGTMP2, MSGTMP3 |
184 | shuf128_32 $0x0E, MSG, MSG | 176 | shuf128_32 $0x0E, MSG, MSG |
185 | sha256rnds2 STATE1, STATE0 | 177 | sha256rnds2 STATE1, STATE0 |
@@ -189,9 +181,9 @@ sha256_process_block64_shaNI: | |||
189 | mova128 MSGTMP3, MSG | 181 | mova128 MSGTMP3, MSG |
190 | paddd 11*16-8*16(SHA256CONSTANTS), MSG | 182 | paddd 11*16-8*16(SHA256CONSTANTS), MSG |
191 | sha256rnds2 STATE0, STATE1 | 183 | sha256rnds2 STATE0, STATE1 |
192 | mova128 MSGTMP3, XMMTMP4 | 184 | mova128 MSGTMP3, XMMTMP |
193 | palignr $4, MSGTMP2, XMMTMP4 | 185 | palignr $4, MSGTMP2, XMMTMP |
194 | paddd XMMTMP4, MSGTMP0 | 186 | paddd XMMTMP, MSGTMP0 |
195 | sha256msg2 MSGTMP3, MSGTMP0 | 187 | sha256msg2 MSGTMP3, MSGTMP0 |
196 | shuf128_32 $0x0E, MSG, MSG | 188 | shuf128_32 $0x0E, MSG, MSG |
197 | sha256rnds2 STATE1, STATE0 | 189 | sha256rnds2 STATE1, STATE0 |
@@ -201,9 +193,9 @@ sha256_process_block64_shaNI: | |||
201 | mova128 MSGTMP0, MSG | 193 | mova128 MSGTMP0, MSG |
202 | paddd 12*16-8*16(SHA256CONSTANTS), MSG | 194 | paddd 12*16-8*16(SHA256CONSTANTS), MSG |
203 | sha256rnds2 STATE0, STATE1 | 195 | sha256rnds2 STATE0, STATE1 |
204 | mova128 MSGTMP0, XMMTMP4 | 196 | mova128 MSGTMP0, XMMTMP |
205 | palignr $4, MSGTMP3, XMMTMP4 | 197 | palignr $4, MSGTMP3, XMMTMP |
206 | paddd XMMTMP4, MSGTMP1 | 198 | paddd XMMTMP, MSGTMP1 |
207 | sha256msg2 MSGTMP0, MSGTMP1 | 199 | sha256msg2 MSGTMP0, MSGTMP1 |
208 | shuf128_32 $0x0E, MSG, MSG | 200 | shuf128_32 $0x0E, MSG, MSG |
209 | sha256rnds2 STATE1, STATE0 | 201 | sha256rnds2 STATE1, STATE0 |
@@ -213,9 +205,9 @@ sha256_process_block64_shaNI: | |||
213 | mova128 MSGTMP1, MSG | 205 | mova128 MSGTMP1, MSG |
214 | paddd 13*16-8*16(SHA256CONSTANTS), MSG | 206 | paddd 13*16-8*16(SHA256CONSTANTS), MSG |
215 | sha256rnds2 STATE0, STATE1 | 207 | sha256rnds2 STATE0, STATE1 |
216 | mova128 MSGTMP1, XMMTMP4 | 208 | mova128 MSGTMP1, XMMTMP |
217 | palignr $4, MSGTMP0, XMMTMP4 | 209 | palignr $4, MSGTMP0, XMMTMP |
218 | paddd XMMTMP4, MSGTMP2 | 210 | paddd XMMTMP, MSGTMP2 |
219 | sha256msg2 MSGTMP1, MSGTMP2 | 211 | sha256msg2 MSGTMP1, MSGTMP2 |
220 | shuf128_32 $0x0E, MSG, MSG | 212 | shuf128_32 $0x0E, MSG, MSG |
221 | sha256rnds2 STATE1, STATE0 | 213 | sha256rnds2 STATE1, STATE0 |
@@ -224,9 +216,9 @@ sha256_process_block64_shaNI: | |||
224 | mova128 MSGTMP2, MSG | 216 | mova128 MSGTMP2, MSG |
225 | paddd 14*16-8*16(SHA256CONSTANTS), MSG | 217 | paddd 14*16-8*16(SHA256CONSTANTS), MSG |
226 | sha256rnds2 STATE0, STATE1 | 218 | sha256rnds2 STATE0, STATE1 |
227 | mova128 MSGTMP2, XMMTMP4 | 219 | mova128 MSGTMP2, XMMTMP |
228 | palignr $4, MSGTMP1, XMMTMP4 | 220 | palignr $4, MSGTMP1, XMMTMP |
229 | paddd XMMTMP4, MSGTMP3 | 221 | paddd XMMTMP, MSGTMP3 |
230 | sha256msg2 MSGTMP2, MSGTMP3 | 222 | sha256msg2 MSGTMP2, MSGTMP3 |
231 | shuf128_32 $0x0E, MSG, MSG | 223 | shuf128_32 $0x0E, MSG, MSG |
232 | sha256rnds2 STATE1, STATE0 | 224 | sha256rnds2 STATE1, STATE0 |
@@ -238,22 +230,20 @@ sha256_process_block64_shaNI: | |||
238 | shuf128_32 $0x0E, MSG, MSG | 230 | shuf128_32 $0x0E, MSG, MSG |
239 | sha256rnds2 STATE1, STATE0 | 231 | sha256rnds2 STATE1, STATE0 |
240 | 232 | ||
241 | /* Add current hash values with previously saved */ | ||
242 | paddd 0*16(%esp), STATE0 | ||
243 | paddd 1*16(%esp), STATE1 | ||
244 | |||
245 | /* Write hash values back in the correct order */ | 233 | /* Write hash values back in the correct order */ |
246 | shuf128_32 $0x1B, STATE0, STATE0 /* FEBA */ | 234 | shuf128_32 $0x1B, STATE0, STATE0 /* FEBA */ |
247 | shuf128_32 $0xB1, STATE1, STATE1 /* DCHG */ | 235 | shuf128_32 $0xB1, STATE1, STATE1 /* DCHG */ |
248 | mova128 STATE0, XMMTMP4 | 236 | mova128 STATE0, XMMTMP |
249 | pblendw $0xF0, STATE1, STATE0 /* DCBA */ | 237 | pblendw $0xF0, STATE1, STATE0 /* DCBA */ |
250 | palignr $8, XMMTMP4, STATE1 /* HGFE */ | 238 | palignr $8, XMMTMP, STATE1 /* HGFE */ |
251 | 239 | /* add current hash values to previous ones */ | |
240 | movu128 76+0*16(%eax), XMMTMP | ||
241 | paddd XMMTMP, STATE0 | ||
242 | movu128 76+1*16(%eax), XMMTMP | ||
252 | movu128 STATE0, 76+0*16(%eax) | 243 | movu128 STATE0, 76+0*16(%eax) |
244 | paddd XMMTMP, STATE1 | ||
253 | movu128 STATE1, 76+1*16(%eax) | 245 | movu128 STATE1, 76+1*16(%eax) |
254 | 246 | ||
255 | movl %ebp, %esp | ||
256 | popl %ebp | ||
257 | ret | 247 | ret |
258 | .size sha256_process_block64_shaNI, .-sha256_process_block64_shaNI | 248 | .size sha256_process_block64_shaNI, .-sha256_process_block64_shaNI |
259 | 249 | ||