diff options
Diffstat (limited to 'libbb/hash_md5_sha256_x86-32_shaNI.S')
-rw-r--r-- | libbb/hash_md5_sha256_x86-32_shaNI.S | 92 |
1 files changed, 48 insertions, 44 deletions
diff --git a/libbb/hash_md5_sha256_x86-32_shaNI.S b/libbb/hash_md5_sha256_x86-32_shaNI.S index aa68193bd..3905bad9a 100644 --- a/libbb/hash_md5_sha256_x86-32_shaNI.S +++ b/libbb/hash_md5_sha256_x86-32_shaNI.S | |||
@@ -4,7 +4,7 @@ | |||
4 | // We use shorter insns, even though they are for "wrong" | 4 | // We use shorter insns, even though they are for "wrong" |
5 | // data type (fp, not int). | 5 | // data type (fp, not int). |
6 | // For Intel, there is no penalty for doing it at all | 6 | // For Intel, there is no penalty for doing it at all |
7 | // (CPUs which do have such penalty do not support SHA1 insns). | 7 | // (CPUs which do have such penalty do not support SHA insns). |
8 | // For AMD, the penalty is one extra cycle | 8 | // For AMD, the penalty is one extra cycle |
9 | // (allegedly: I failed to find measurable difference). | 9 | // (allegedly: I failed to find measurable difference). |
10 | 10 | ||
@@ -15,6 +15,10 @@ | |||
15 | //#define shuf128_32 pshufd | 15 | //#define shuf128_32 pshufd |
16 | #define shuf128_32 shufps | 16 | #define shuf128_32 shufps |
17 | 17 | ||
18 | // pshufb and palignr are SSSE3 insns. | ||
19 | // We do not check SSSE3 in cpuid, | ||
20 | // all SHA-capable CPUs support it as well. | ||
21 | |||
18 | .section .text.sha256_process_block64_shaNI, "ax", @progbits | 22 | .section .text.sha256_process_block64_shaNI, "ax", @progbits |
19 | .globl sha256_process_block64_shaNI | 23 | .globl sha256_process_block64_shaNI |
20 | .hidden sha256_process_block64_shaNI | 24 | .hidden sha256_process_block64_shaNI |
@@ -39,12 +43,13 @@ | |||
39 | .balign 8 # allow decoders to fetch at least 2 first insns | 43 | .balign 8 # allow decoders to fetch at least 2 first insns |
40 | sha256_process_block64_shaNI: | 44 | sha256_process_block64_shaNI: |
41 | 45 | ||
42 | movu128 76+0*16(%eax), XMMTMP /* DCBA (msb-to-lsb: 3,2,1,0) */ | 46 | movu128 76+0*16(%eax), XMMTMP /* ABCD (little-endian dword order) */ |
43 | movu128 76+1*16(%eax), STATE1 /* HGFE */ | 47 | movu128 76+1*16(%eax), STATE1 /* EFGH */ |
44 | /* shufps takes dwords 0,1 from *2nd* operand, and dwords 2,3 from 1st one */ | 48 | /* shufps takes dwords 0,1 from *2nd* operand, and dwords 2,3 from 1st one */ |
45 | mova128 STATE1, STATE0 | 49 | mova128 STATE1, STATE0 |
46 | shufps SHUF(1,0,1,0), XMMTMP, STATE0 /* ABEF */ | 50 | /* --- -------------- ABCD -- EFGH */ |
47 | shufps SHUF(3,2,3,2), XMMTMP, STATE1 /* CDGH */ | 51 | shufps SHUF(1,0,1,0), XMMTMP, STATE0 /* FEBA */ |
52 | shufps SHUF(3,2,3,2), XMMTMP, STATE1 /* HGDC */ | ||
48 | 53 | ||
49 | /* XMMTMP holds flip mask from here... */ | 54 | /* XMMTMP holds flip mask from here... */ |
50 | mova128 PSHUFFLE_BSWAP32_FLIP_MASK, XMMTMP | 55 | mova128 PSHUFFLE_BSWAP32_FLIP_MASK, XMMTMP |
@@ -55,18 +60,18 @@ sha256_process_block64_shaNI: | |||
55 | pshufb XMMTMP, MSG | 60 | pshufb XMMTMP, MSG |
56 | mova128 MSG, MSGTMP0 | 61 | mova128 MSG, MSGTMP0 |
57 | paddd 0*16-8*16(SHA256CONSTANTS), MSG | 62 | paddd 0*16-8*16(SHA256CONSTANTS), MSG |
58 | sha256rnds2 STATE0, STATE1 | 63 | sha256rnds2 MSG, STATE0, STATE1 |
59 | shuf128_32 $0x0E, MSG, MSG | 64 | shuf128_32 $0x0E, MSG, MSG |
60 | sha256rnds2 STATE1, STATE0 | 65 | sha256rnds2 MSG, STATE1, STATE0 |
61 | 66 | ||
62 | /* Rounds 4-7 */ | 67 | /* Rounds 4-7 */ |
63 | movu128 1*16(DATA_PTR), MSG | 68 | movu128 1*16(DATA_PTR), MSG |
64 | pshufb XMMTMP, MSG | 69 | pshufb XMMTMP, MSG |
65 | mova128 MSG, MSGTMP1 | 70 | mova128 MSG, MSGTMP1 |
66 | paddd 1*16-8*16(SHA256CONSTANTS), MSG | 71 | paddd 1*16-8*16(SHA256CONSTANTS), MSG |
67 | sha256rnds2 STATE0, STATE1 | 72 | sha256rnds2 MSG, STATE0, STATE1 |
68 | shuf128_32 $0x0E, MSG, MSG | 73 | shuf128_32 $0x0E, MSG, MSG |
69 | sha256rnds2 STATE1, STATE0 | 74 | sha256rnds2 MSG, STATE1, STATE0 |
70 | sha256msg1 MSGTMP1, MSGTMP0 | 75 | sha256msg1 MSGTMP1, MSGTMP0 |
71 | 76 | ||
72 | /* Rounds 8-11 */ | 77 | /* Rounds 8-11 */ |
@@ -74,9 +79,9 @@ sha256_process_block64_shaNI: | |||
74 | pshufb XMMTMP, MSG | 79 | pshufb XMMTMP, MSG |
75 | mova128 MSG, MSGTMP2 | 80 | mova128 MSG, MSGTMP2 |
76 | paddd 2*16-8*16(SHA256CONSTANTS), MSG | 81 | paddd 2*16-8*16(SHA256CONSTANTS), MSG |
77 | sha256rnds2 STATE0, STATE1 | 82 | sha256rnds2 MSG, STATE0, STATE1 |
78 | shuf128_32 $0x0E, MSG, MSG | 83 | shuf128_32 $0x0E, MSG, MSG |
79 | sha256rnds2 STATE1, STATE0 | 84 | sha256rnds2 MSG, STATE1, STATE0 |
80 | sha256msg1 MSGTMP2, MSGTMP1 | 85 | sha256msg1 MSGTMP2, MSGTMP1 |
81 | 86 | ||
82 | /* Rounds 12-15 */ | 87 | /* Rounds 12-15 */ |
@@ -85,159 +90,158 @@ sha256_process_block64_shaNI: | |||
85 | /* ...to here */ | 90 | /* ...to here */ |
86 | mova128 MSG, MSGTMP3 | 91 | mova128 MSG, MSGTMP3 |
87 | paddd 3*16-8*16(SHA256CONSTANTS), MSG | 92 | paddd 3*16-8*16(SHA256CONSTANTS), MSG |
88 | sha256rnds2 STATE0, STATE1 | 93 | sha256rnds2 MSG, STATE0, STATE1 |
89 | mova128 MSGTMP3, XMMTMP | 94 | mova128 MSGTMP3, XMMTMP |
90 | palignr $4, MSGTMP2, XMMTMP | 95 | palignr $4, MSGTMP2, XMMTMP |
91 | paddd XMMTMP, MSGTMP0 | 96 | paddd XMMTMP, MSGTMP0 |
92 | sha256msg2 MSGTMP3, MSGTMP0 | 97 | sha256msg2 MSGTMP3, MSGTMP0 |
93 | shuf128_32 $0x0E, MSG, MSG | 98 | shuf128_32 $0x0E, MSG, MSG |
94 | sha256rnds2 STATE1, STATE0 | 99 | sha256rnds2 MSG, STATE1, STATE0 |
95 | sha256msg1 MSGTMP3, MSGTMP2 | 100 | sha256msg1 MSGTMP3, MSGTMP2 |
96 | 101 | ||
97 | /* Rounds 16-19 */ | 102 | /* Rounds 16-19 */ |
98 | mova128 MSGTMP0, MSG | 103 | mova128 MSGTMP0, MSG |
99 | paddd 4*16-8*16(SHA256CONSTANTS), MSG | 104 | paddd 4*16-8*16(SHA256CONSTANTS), MSG |
100 | sha256rnds2 STATE0, STATE1 | 105 | sha256rnds2 MSG, STATE0, STATE1 |
101 | mova128 MSGTMP0, XMMTMP | 106 | mova128 MSGTMP0, XMMTMP |
102 | palignr $4, MSGTMP3, XMMTMP | 107 | palignr $4, MSGTMP3, XMMTMP |
103 | paddd XMMTMP, MSGTMP1 | 108 | paddd XMMTMP, MSGTMP1 |
104 | sha256msg2 MSGTMP0, MSGTMP1 | 109 | sha256msg2 MSGTMP0, MSGTMP1 |
105 | shuf128_32 $0x0E, MSG, MSG | 110 | shuf128_32 $0x0E, MSG, MSG |
106 | sha256rnds2 STATE1, STATE0 | 111 | sha256rnds2 MSG, STATE1, STATE0 |
107 | sha256msg1 MSGTMP0, MSGTMP3 | 112 | sha256msg1 MSGTMP0, MSGTMP3 |
108 | 113 | ||
109 | /* Rounds 20-23 */ | 114 | /* Rounds 20-23 */ |
110 | mova128 MSGTMP1, MSG | 115 | mova128 MSGTMP1, MSG |
111 | paddd 5*16-8*16(SHA256CONSTANTS), MSG | 116 | paddd 5*16-8*16(SHA256CONSTANTS), MSG |
112 | sha256rnds2 STATE0, STATE1 | 117 | sha256rnds2 MSG, STATE0, STATE1 |
113 | mova128 MSGTMP1, XMMTMP | 118 | mova128 MSGTMP1, XMMTMP |
114 | palignr $4, MSGTMP0, XMMTMP | 119 | palignr $4, MSGTMP0, XMMTMP |
115 | paddd XMMTMP, MSGTMP2 | 120 | paddd XMMTMP, MSGTMP2 |
116 | sha256msg2 MSGTMP1, MSGTMP2 | 121 | sha256msg2 MSGTMP1, MSGTMP2 |
117 | shuf128_32 $0x0E, MSG, MSG | 122 | shuf128_32 $0x0E, MSG, MSG |
118 | sha256rnds2 STATE1, STATE0 | 123 | sha256rnds2 MSG, STATE1, STATE0 |
119 | sha256msg1 MSGTMP1, MSGTMP0 | 124 | sha256msg1 MSGTMP1, MSGTMP0 |
120 | 125 | ||
121 | /* Rounds 24-27 */ | 126 | /* Rounds 24-27 */ |
122 | mova128 MSGTMP2, MSG | 127 | mova128 MSGTMP2, MSG |
123 | paddd 6*16-8*16(SHA256CONSTANTS), MSG | 128 | paddd 6*16-8*16(SHA256CONSTANTS), MSG |
124 | sha256rnds2 STATE0, STATE1 | 129 | sha256rnds2 MSG, STATE0, STATE1 |
125 | mova128 MSGTMP2, XMMTMP | 130 | mova128 MSGTMP2, XMMTMP |
126 | palignr $4, MSGTMP1, XMMTMP | 131 | palignr $4, MSGTMP1, XMMTMP |
127 | paddd XMMTMP, MSGTMP3 | 132 | paddd XMMTMP, MSGTMP3 |
128 | sha256msg2 MSGTMP2, MSGTMP3 | 133 | sha256msg2 MSGTMP2, MSGTMP3 |
129 | shuf128_32 $0x0E, MSG, MSG | 134 | shuf128_32 $0x0E, MSG, MSG |
130 | sha256rnds2 STATE1, STATE0 | 135 | sha256rnds2 MSG, STATE1, STATE0 |
131 | sha256msg1 MSGTMP2, MSGTMP1 | 136 | sha256msg1 MSGTMP2, MSGTMP1 |
132 | 137 | ||
133 | /* Rounds 28-31 */ | 138 | /* Rounds 28-31 */ |
134 | mova128 MSGTMP3, MSG | 139 | mova128 MSGTMP3, MSG |
135 | paddd 7*16-8*16(SHA256CONSTANTS), MSG | 140 | paddd 7*16-8*16(SHA256CONSTANTS), MSG |
136 | sha256rnds2 STATE0, STATE1 | 141 | sha256rnds2 MSG, STATE0, STATE1 |
137 | mova128 MSGTMP3, XMMTMP | 142 | mova128 MSGTMP3, XMMTMP |
138 | palignr $4, MSGTMP2, XMMTMP | 143 | palignr $4, MSGTMP2, XMMTMP |
139 | paddd XMMTMP, MSGTMP0 | 144 | paddd XMMTMP, MSGTMP0 |
140 | sha256msg2 MSGTMP3, MSGTMP0 | 145 | sha256msg2 MSGTMP3, MSGTMP0 |
141 | shuf128_32 $0x0E, MSG, MSG | 146 | shuf128_32 $0x0E, MSG, MSG |
142 | sha256rnds2 STATE1, STATE0 | 147 | sha256rnds2 MSG, STATE1, STATE0 |
143 | sha256msg1 MSGTMP3, MSGTMP2 | 148 | sha256msg1 MSGTMP3, MSGTMP2 |
144 | 149 | ||
145 | /* Rounds 32-35 */ | 150 | /* Rounds 32-35 */ |
146 | mova128 MSGTMP0, MSG | 151 | mova128 MSGTMP0, MSG |
147 | paddd 8*16-8*16(SHA256CONSTANTS), MSG | 152 | paddd 8*16-8*16(SHA256CONSTANTS), MSG |
148 | sha256rnds2 STATE0, STATE1 | 153 | sha256rnds2 MSG, STATE0, STATE1 |
149 | mova128 MSGTMP0, XMMTMP | 154 | mova128 MSGTMP0, XMMTMP |
150 | palignr $4, MSGTMP3, XMMTMP | 155 | palignr $4, MSGTMP3, XMMTMP |
151 | paddd XMMTMP, MSGTMP1 | 156 | paddd XMMTMP, MSGTMP1 |
152 | sha256msg2 MSGTMP0, MSGTMP1 | 157 | sha256msg2 MSGTMP0, MSGTMP1 |
153 | shuf128_32 $0x0E, MSG, MSG | 158 | shuf128_32 $0x0E, MSG, MSG |
154 | sha256rnds2 STATE1, STATE0 | 159 | sha256rnds2 MSG, STATE1, STATE0 |
155 | sha256msg1 MSGTMP0, MSGTMP3 | 160 | sha256msg1 MSGTMP0, MSGTMP3 |
156 | 161 | ||
157 | /* Rounds 36-39 */ | 162 | /* Rounds 36-39 */ |
158 | mova128 MSGTMP1, MSG | 163 | mova128 MSGTMP1, MSG |
159 | paddd 9*16-8*16(SHA256CONSTANTS), MSG | 164 | paddd 9*16-8*16(SHA256CONSTANTS), MSG |
160 | sha256rnds2 STATE0, STATE1 | 165 | sha256rnds2 MSG, STATE0, STATE1 |
161 | mova128 MSGTMP1, XMMTMP | 166 | mova128 MSGTMP1, XMMTMP |
162 | palignr $4, MSGTMP0, XMMTMP | 167 | palignr $4, MSGTMP0, XMMTMP |
163 | paddd XMMTMP, MSGTMP2 | 168 | paddd XMMTMP, MSGTMP2 |
164 | sha256msg2 MSGTMP1, MSGTMP2 | 169 | sha256msg2 MSGTMP1, MSGTMP2 |
165 | shuf128_32 $0x0E, MSG, MSG | 170 | shuf128_32 $0x0E, MSG, MSG |
166 | sha256rnds2 STATE1, STATE0 | 171 | sha256rnds2 MSG, STATE1, STATE0 |
167 | sha256msg1 MSGTMP1, MSGTMP0 | 172 | sha256msg1 MSGTMP1, MSGTMP0 |
168 | 173 | ||
169 | /* Rounds 40-43 */ | 174 | /* Rounds 40-43 */ |
170 | mova128 MSGTMP2, MSG | 175 | mova128 MSGTMP2, MSG |
171 | paddd 10*16-8*16(SHA256CONSTANTS), MSG | 176 | paddd 10*16-8*16(SHA256CONSTANTS), MSG |
172 | sha256rnds2 STATE0, STATE1 | 177 | sha256rnds2 MSG, STATE0, STATE1 |
173 | mova128 MSGTMP2, XMMTMP | 178 | mova128 MSGTMP2, XMMTMP |
174 | palignr $4, MSGTMP1, XMMTMP | 179 | palignr $4, MSGTMP1, XMMTMP |
175 | paddd XMMTMP, MSGTMP3 | 180 | paddd XMMTMP, MSGTMP3 |
176 | sha256msg2 MSGTMP2, MSGTMP3 | 181 | sha256msg2 MSGTMP2, MSGTMP3 |
177 | shuf128_32 $0x0E, MSG, MSG | 182 | shuf128_32 $0x0E, MSG, MSG |
178 | sha256rnds2 STATE1, STATE0 | 183 | sha256rnds2 MSG, STATE1, STATE0 |
179 | sha256msg1 MSGTMP2, MSGTMP1 | 184 | sha256msg1 MSGTMP2, MSGTMP1 |
180 | 185 | ||
181 | /* Rounds 44-47 */ | 186 | /* Rounds 44-47 */ |
182 | mova128 MSGTMP3, MSG | 187 | mova128 MSGTMP3, MSG |
183 | paddd 11*16-8*16(SHA256CONSTANTS), MSG | 188 | paddd 11*16-8*16(SHA256CONSTANTS), MSG |
184 | sha256rnds2 STATE0, STATE1 | 189 | sha256rnds2 MSG, STATE0, STATE1 |
185 | mova128 MSGTMP3, XMMTMP | 190 | mova128 MSGTMP3, XMMTMP |
186 | palignr $4, MSGTMP2, XMMTMP | 191 | palignr $4, MSGTMP2, XMMTMP |
187 | paddd XMMTMP, MSGTMP0 | 192 | paddd XMMTMP, MSGTMP0 |
188 | sha256msg2 MSGTMP3, MSGTMP0 | 193 | sha256msg2 MSGTMP3, MSGTMP0 |
189 | shuf128_32 $0x0E, MSG, MSG | 194 | shuf128_32 $0x0E, MSG, MSG |
190 | sha256rnds2 STATE1, STATE0 | 195 | sha256rnds2 MSG, STATE1, STATE0 |
191 | sha256msg1 MSGTMP3, MSGTMP2 | 196 | sha256msg1 MSGTMP3, MSGTMP2 |
192 | 197 | ||
193 | /* Rounds 48-51 */ | 198 | /* Rounds 48-51 */ |
194 | mova128 MSGTMP0, MSG | 199 | mova128 MSGTMP0, MSG |
195 | paddd 12*16-8*16(SHA256CONSTANTS), MSG | 200 | paddd 12*16-8*16(SHA256CONSTANTS), MSG |
196 | sha256rnds2 STATE0, STATE1 | 201 | sha256rnds2 MSG, STATE0, STATE1 |
197 | mova128 MSGTMP0, XMMTMP | 202 | mova128 MSGTMP0, XMMTMP |
198 | palignr $4, MSGTMP3, XMMTMP | 203 | palignr $4, MSGTMP3, XMMTMP |
199 | paddd XMMTMP, MSGTMP1 | 204 | paddd XMMTMP, MSGTMP1 |
200 | sha256msg2 MSGTMP0, MSGTMP1 | 205 | sha256msg2 MSGTMP0, MSGTMP1 |
201 | shuf128_32 $0x0E, MSG, MSG | 206 | shuf128_32 $0x0E, MSG, MSG |
202 | sha256rnds2 STATE1, STATE0 | 207 | sha256rnds2 MSG, STATE1, STATE0 |
203 | sha256msg1 MSGTMP0, MSGTMP3 | 208 | sha256msg1 MSGTMP0, MSGTMP3 |
204 | 209 | ||
205 | /* Rounds 52-55 */ | 210 | /* Rounds 52-55 */ |
206 | mova128 MSGTMP1, MSG | 211 | mova128 MSGTMP1, MSG |
207 | paddd 13*16-8*16(SHA256CONSTANTS), MSG | 212 | paddd 13*16-8*16(SHA256CONSTANTS), MSG |
208 | sha256rnds2 STATE0, STATE1 | 213 | sha256rnds2 MSG, STATE0, STATE1 |
209 | mova128 MSGTMP1, XMMTMP | 214 | mova128 MSGTMP1, XMMTMP |
210 | palignr $4, MSGTMP0, XMMTMP | 215 | palignr $4, MSGTMP0, XMMTMP |
211 | paddd XMMTMP, MSGTMP2 | 216 | paddd XMMTMP, MSGTMP2 |
212 | sha256msg2 MSGTMP1, MSGTMP2 | 217 | sha256msg2 MSGTMP1, MSGTMP2 |
213 | shuf128_32 $0x0E, MSG, MSG | 218 | shuf128_32 $0x0E, MSG, MSG |
214 | sha256rnds2 STATE1, STATE0 | 219 | sha256rnds2 MSG, STATE1, STATE0 |
215 | 220 | ||
216 | /* Rounds 56-59 */ | 221 | /* Rounds 56-59 */ |
217 | mova128 MSGTMP2, MSG | 222 | mova128 MSGTMP2, MSG |
218 | paddd 14*16-8*16(SHA256CONSTANTS), MSG | 223 | paddd 14*16-8*16(SHA256CONSTANTS), MSG |
219 | sha256rnds2 STATE0, STATE1 | 224 | sha256rnds2 MSG, STATE0, STATE1 |
220 | mova128 MSGTMP2, XMMTMP | 225 | mova128 MSGTMP2, XMMTMP |
221 | palignr $4, MSGTMP1, XMMTMP | 226 | palignr $4, MSGTMP1, XMMTMP |
222 | paddd XMMTMP, MSGTMP3 | 227 | paddd XMMTMP, MSGTMP3 |
223 | sha256msg2 MSGTMP2, MSGTMP3 | 228 | sha256msg2 MSGTMP2, MSGTMP3 |
224 | shuf128_32 $0x0E, MSG, MSG | 229 | shuf128_32 $0x0E, MSG, MSG |
225 | sha256rnds2 STATE1, STATE0 | 230 | sha256rnds2 MSG, STATE1, STATE0 |
226 | 231 | ||
227 | /* Rounds 60-63 */ | 232 | /* Rounds 60-63 */ |
228 | mova128 MSGTMP3, MSG | 233 | mova128 MSGTMP3, MSG |
229 | paddd 15*16-8*16(SHA256CONSTANTS), MSG | 234 | paddd 15*16-8*16(SHA256CONSTANTS), MSG |
230 | sha256rnds2 STATE0, STATE1 | 235 | sha256rnds2 MSG, STATE0, STATE1 |
231 | shuf128_32 $0x0E, MSG, MSG | 236 | shuf128_32 $0x0E, MSG, MSG |
232 | sha256rnds2 STATE1, STATE0 | 237 | sha256rnds2 MSG, STATE1, STATE0 |
233 | 238 | ||
234 | /* Write hash values back in the correct order */ | 239 | /* Write hash values back in the correct order */ |
235 | /* STATE0: ABEF (msb-to-lsb: 3,2,1,0) */ | ||
236 | /* STATE1: CDGH */ | ||
237 | mova128 STATE0, XMMTMP | 240 | mova128 STATE0, XMMTMP |
238 | /* shufps takes dwords 0,1 from *2nd* operand, and dwords 2,3 from 1st one */ | 241 | /* shufps takes dwords 0,1 from *2nd* operand, and dwords 2,3 from 1st one */ |
239 | shufps SHUF(3,2,3,2), STATE1, STATE0 /* DCBA */ | 242 | /* --- -------------- HGDC -- FEBA */ |
240 | shufps SHUF(1,0,1,0), STATE1, XMMTMP /* HGFE */ | 243 | shufps SHUF(3,2,3,2), STATE1, STATE0 /* ABCD */ |
244 | shufps SHUF(1,0,1,0), STATE1, XMMTMP /* EFGH */ | ||
241 | /* add current hash values to previous ones */ | 245 | /* add current hash values to previous ones */ |
242 | movu128 76+1*16(%eax), STATE1 | 246 | movu128 76+1*16(%eax), STATE1 |
243 | paddd XMMTMP, STATE1 | 247 | paddd XMMTMP, STATE1 |
@@ -250,7 +254,7 @@ sha256_process_block64_shaNI: | |||
250 | .size sha256_process_block64_shaNI, .-sha256_process_block64_shaNI | 254 | .size sha256_process_block64_shaNI, .-sha256_process_block64_shaNI |
251 | 255 | ||
252 | .section .rodata.cst256.K256, "aM", @progbits, 256 | 256 | .section .rodata.cst256.K256, "aM", @progbits, 256 |
253 | .balign 16 | 257 | .balign 16 |
254 | K256: | 258 | K256: |
255 | .long 0x428a2f98,0x71374491,0xb5c0fbcf,0xe9b5dba5 | 259 | .long 0x428a2f98,0x71374491,0xb5c0fbcf,0xe9b5dba5 |
256 | .long 0x3956c25b,0x59f111f1,0x923f82a4,0xab1c5ed5 | 260 | .long 0x3956c25b,0x59f111f1,0x923f82a4,0xab1c5ed5 |
@@ -270,8 +274,8 @@ K256: | |||
270 | .long 0x90befffa,0xa4506ceb,0xbef9a3f7,0xc67178f2 | 274 | .long 0x90befffa,0xa4506ceb,0xbef9a3f7,0xc67178f2 |
271 | 275 | ||
272 | .section .rodata.cst16.PSHUFFLE_BSWAP32_FLIP_MASK, "aM", @progbits, 16 | 276 | .section .rodata.cst16.PSHUFFLE_BSWAP32_FLIP_MASK, "aM", @progbits, 16 |
273 | .balign 16 | 277 | .balign 16 |
274 | PSHUFFLE_BSWAP32_FLIP_MASK: | 278 | PSHUFFLE_BSWAP32_FLIP_MASK: |
275 | .octa 0x0c0d0e0f08090a0b0405060700010203 | 279 | .octa 0x0c0d0e0f08090a0b0405060700010203 |
276 | 280 | ||
277 | #endif | 281 | #endif |