aboutsummaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorDenys Vlasenko <vda.linux@googlemail.com>2022-02-09 00:33:39 +0100
committerDenys Vlasenko <vda.linux@googlemail.com>2022-02-09 00:33:39 +0100
commitc0ff0d4528d718c20b9ca2290bd10d59e9f794a3 (patch)
tree7c0879d37c70fef047a55ef72f4aff5dcf88ab12
parenteb8d5f3b8f3c91f3ed82a52b4ce52a154c146ede (diff)
downloadbusybox-w32-c0ff0d4528d718c20b9ca2290bd10d59e9f794a3.tar.gz
busybox-w32-c0ff0d4528d718c20b9ca2290bd10d59e9f794a3.tar.bz2
busybox-w32-c0ff0d4528d718c20b9ca2290bd10d59e9f794a3.zip
libbb/sha256: code shrink in 32-bit x86
function old new delta sha256_process_block64_shaNI 713 697 -16 Signed-off-by: Denys Vlasenko <vda.linux@googlemail.com>
-rw-r--r--libbb/hash_md5_sha256_x86-32_shaNI.S130
-rw-r--r--libbb/hash_md5_sha256_x86-64_shaNI.S107
2 files changed, 114 insertions, 123 deletions
diff --git a/libbb/hash_md5_sha256_x86-32_shaNI.S b/libbb/hash_md5_sha256_x86-32_shaNI.S
index 39e2baf41..a849dfcc2 100644
--- a/libbb/hash_md5_sha256_x86-32_shaNI.S
+++ b/libbb/hash_md5_sha256_x86-32_shaNI.S
@@ -31,35 +31,27 @@
31#define MSGTMP1 %xmm4 31#define MSGTMP1 %xmm4
32#define MSGTMP2 %xmm5 32#define MSGTMP2 %xmm5
33#define MSGTMP3 %xmm6 33#define MSGTMP3 %xmm6
34#define XMMTMP4 %xmm7
35 34
36 .balign 8 # allow decoders to fetch at least 3 first insns 35#define XMMTMP %xmm7
37sha256_process_block64_shaNI:
38 pushl %ebp
39 movl %esp, %ebp
40 subl $32, %esp
41 andl $~0xF, %esp # paddd needs aligned memory operand
42 36
37 .balign 8 # allow decoders to fetch at least 2 first insns
38sha256_process_block64_shaNI:
43 movu128 76+0*16(%eax), STATE0 39 movu128 76+0*16(%eax), STATE0
44 movu128 76+1*16(%eax), STATE1 40 movu128 76+1*16(%eax), STATE1
45 41
46 shuf128_32 $0xB1, STATE0, STATE0 /* CDAB */ 42 shuf128_32 $0xB1, STATE0, STATE0 /* CDAB */
47 shuf128_32 $0x1B, STATE1, STATE1 /* EFGH */ 43 shuf128_32 $0x1B, STATE1, STATE1 /* EFGH */
48 mova128 STATE0, XMMTMP4 44 mova128 STATE0, XMMTMP
49 palignr $8, STATE1, STATE0 /* ABEF */ 45 palignr $8, STATE1, STATE0 /* ABEF */
50 pblendw $0xF0, XMMTMP4, STATE1 /* CDGH */ 46 pblendw $0xF0, XMMTMP, STATE1 /* CDGH */
51 47
52/* XMMTMP4 holds flip mask from here... */ 48/* XMMTMP holds flip mask from here... */
53 mova128 PSHUFFLE_BSWAP32_FLIP_MASK, XMMTMP4 49 mova128 PSHUFFLE_BSWAP32_FLIP_MASK, XMMTMP
54 movl $K256+8*16, SHA256CONSTANTS 50 movl $K256+8*16, SHA256CONSTANTS
55 51
56 /* Save hash values for addition after rounds */
57 mova128 STATE0, 0*16(%esp)
58 mova128 STATE1, 1*16(%esp)
59
60 /* Rounds 0-3 */ 52 /* Rounds 0-3 */
61 movu128 0*16(DATA_PTR), MSG 53 movu128 0*16(DATA_PTR), MSG
62 pshufb XMMTMP4, MSG 54 pshufb XMMTMP, MSG
63 mova128 MSG, MSGTMP0 55 mova128 MSG, MSGTMP0
64 paddd 0*16-8*16(SHA256CONSTANTS), MSG 56 paddd 0*16-8*16(SHA256CONSTANTS), MSG
65 sha256rnds2 STATE0, STATE1 57 sha256rnds2 STATE0, STATE1
@@ -68,7 +60,7 @@ sha256_process_block64_shaNI:
68 60
69 /* Rounds 4-7 */ 61 /* Rounds 4-7 */
70 movu128 1*16(DATA_PTR), MSG 62 movu128 1*16(DATA_PTR), MSG
71 pshufb XMMTMP4, MSG 63 pshufb XMMTMP, MSG
72 mova128 MSG, MSGTMP1 64 mova128 MSG, MSGTMP1
73 paddd 1*16-8*16(SHA256CONSTANTS), MSG 65 paddd 1*16-8*16(SHA256CONSTANTS), MSG
74 sha256rnds2 STATE0, STATE1 66 sha256rnds2 STATE0, STATE1
@@ -78,7 +70,7 @@ sha256_process_block64_shaNI:
78 70
79 /* Rounds 8-11 */ 71 /* Rounds 8-11 */
80 movu128 2*16(DATA_PTR), MSG 72 movu128 2*16(DATA_PTR), MSG
81 pshufb XMMTMP4, MSG 73 pshufb XMMTMP, MSG
82 mova128 MSG, MSGTMP2 74 mova128 MSG, MSGTMP2
83 paddd 2*16-8*16(SHA256CONSTANTS), MSG 75 paddd 2*16-8*16(SHA256CONSTANTS), MSG
84 sha256rnds2 STATE0, STATE1 76 sha256rnds2 STATE0, STATE1
@@ -88,14 +80,14 @@ sha256_process_block64_shaNI:
88 80
89 /* Rounds 12-15 */ 81 /* Rounds 12-15 */
90 movu128 3*16(DATA_PTR), MSG 82 movu128 3*16(DATA_PTR), MSG
91 pshufb XMMTMP4, MSG 83 pshufb XMMTMP, MSG
92/* ...to here */ 84/* ...to here */
93 mova128 MSG, MSGTMP3 85 mova128 MSG, MSGTMP3
94 paddd 3*16-8*16(SHA256CONSTANTS), MSG 86 paddd 3*16-8*16(SHA256CONSTANTS), MSG
95 sha256rnds2 STATE0, STATE1 87 sha256rnds2 STATE0, STATE1
96 mova128 MSGTMP3, XMMTMP4 88 mova128 MSGTMP3, XMMTMP
97 palignr $4, MSGTMP2, XMMTMP4 89 palignr $4, MSGTMP2, XMMTMP
98 paddd XMMTMP4, MSGTMP0 90 paddd XMMTMP, MSGTMP0
99 sha256msg2 MSGTMP3, MSGTMP0 91 sha256msg2 MSGTMP3, MSGTMP0
100 shuf128_32 $0x0E, MSG, MSG 92 shuf128_32 $0x0E, MSG, MSG
101 sha256rnds2 STATE1, STATE0 93 sha256rnds2 STATE1, STATE0
@@ -105,9 +97,9 @@ sha256_process_block64_shaNI:
105 mova128 MSGTMP0, MSG 97 mova128 MSGTMP0, MSG
106 paddd 4*16-8*16(SHA256CONSTANTS), MSG 98 paddd 4*16-8*16(SHA256CONSTANTS), MSG
107 sha256rnds2 STATE0, STATE1 99 sha256rnds2 STATE0, STATE1
108 mova128 MSGTMP0, XMMTMP4 100 mova128 MSGTMP0, XMMTMP
109 palignr $4, MSGTMP3, XMMTMP4 101 palignr $4, MSGTMP3, XMMTMP
110 paddd XMMTMP4, MSGTMP1 102 paddd XMMTMP, MSGTMP1
111 sha256msg2 MSGTMP0, MSGTMP1 103 sha256msg2 MSGTMP0, MSGTMP1
112 shuf128_32 $0x0E, MSG, MSG 104 shuf128_32 $0x0E, MSG, MSG
113 sha256rnds2 STATE1, STATE0 105 sha256rnds2 STATE1, STATE0
@@ -117,9 +109,9 @@ sha256_process_block64_shaNI:
117 mova128 MSGTMP1, MSG 109 mova128 MSGTMP1, MSG
118 paddd 5*16-8*16(SHA256CONSTANTS), MSG 110 paddd 5*16-8*16(SHA256CONSTANTS), MSG
119 sha256rnds2 STATE0, STATE1 111 sha256rnds2 STATE0, STATE1
120 mova128 MSGTMP1, XMMTMP4 112 mova128 MSGTMP1, XMMTMP
121 palignr $4, MSGTMP0, XMMTMP4 113 palignr $4, MSGTMP0, XMMTMP
122 paddd XMMTMP4, MSGTMP2 114 paddd XMMTMP, MSGTMP2
123 sha256msg2 MSGTMP1, MSGTMP2 115 sha256msg2 MSGTMP1, MSGTMP2
124 shuf128_32 $0x0E, MSG, MSG 116 shuf128_32 $0x0E, MSG, MSG
125 sha256rnds2 STATE1, STATE0 117 sha256rnds2 STATE1, STATE0
@@ -129,9 +121,9 @@ sha256_process_block64_shaNI:
129 mova128 MSGTMP2, MSG 121 mova128 MSGTMP2, MSG
130 paddd 6*16-8*16(SHA256CONSTANTS), MSG 122 paddd 6*16-8*16(SHA256CONSTANTS), MSG
131 sha256rnds2 STATE0, STATE1 123 sha256rnds2 STATE0, STATE1
132 mova128 MSGTMP2, XMMTMP4 124 mova128 MSGTMP2, XMMTMP
133 palignr $4, MSGTMP1, XMMTMP4 125 palignr $4, MSGTMP1, XMMTMP
134 paddd XMMTMP4, MSGTMP3 126 paddd XMMTMP, MSGTMP3
135 sha256msg2 MSGTMP2, MSGTMP3 127 sha256msg2 MSGTMP2, MSGTMP3
136 shuf128_32 $0x0E, MSG, MSG 128 shuf128_32 $0x0E, MSG, MSG
137 sha256rnds2 STATE1, STATE0 129 sha256rnds2 STATE1, STATE0
@@ -141,9 +133,9 @@ sha256_process_block64_shaNI:
141 mova128 MSGTMP3, MSG 133 mova128 MSGTMP3, MSG
142 paddd 7*16-8*16(SHA256CONSTANTS), MSG 134 paddd 7*16-8*16(SHA256CONSTANTS), MSG
143 sha256rnds2 STATE0, STATE1 135 sha256rnds2 STATE0, STATE1
144 mova128 MSGTMP3, XMMTMP4 136 mova128 MSGTMP3, XMMTMP
145 palignr $4, MSGTMP2, XMMTMP4 137 palignr $4, MSGTMP2, XMMTMP
146 paddd XMMTMP4, MSGTMP0 138 paddd XMMTMP, MSGTMP0
147 sha256msg2 MSGTMP3, MSGTMP0 139 sha256msg2 MSGTMP3, MSGTMP0
148 shuf128_32 $0x0E, MSG, MSG 140 shuf128_32 $0x0E, MSG, MSG
149 sha256rnds2 STATE1, STATE0 141 sha256rnds2 STATE1, STATE0
@@ -153,9 +145,9 @@ sha256_process_block64_shaNI:
153 mova128 MSGTMP0, MSG 145 mova128 MSGTMP0, MSG
154 paddd 8*16-8*16(SHA256CONSTANTS), MSG 146 paddd 8*16-8*16(SHA256CONSTANTS), MSG
155 sha256rnds2 STATE0, STATE1 147 sha256rnds2 STATE0, STATE1
156 mova128 MSGTMP0, XMMTMP4 148 mova128 MSGTMP0, XMMTMP
157 palignr $4, MSGTMP3, XMMTMP4 149 palignr $4, MSGTMP3, XMMTMP
158 paddd XMMTMP4, MSGTMP1 150 paddd XMMTMP, MSGTMP1
159 sha256msg2 MSGTMP0, MSGTMP1 151 sha256msg2 MSGTMP0, MSGTMP1
160 shuf128_32 $0x0E, MSG, MSG 152 shuf128_32 $0x0E, MSG, MSG
161 sha256rnds2 STATE1, STATE0 153 sha256rnds2 STATE1, STATE0
@@ -165,9 +157,9 @@ sha256_process_block64_shaNI:
165 mova128 MSGTMP1, MSG 157 mova128 MSGTMP1, MSG
166 paddd 9*16-8*16(SHA256CONSTANTS), MSG 158 paddd 9*16-8*16(SHA256CONSTANTS), MSG
167 sha256rnds2 STATE0, STATE1 159 sha256rnds2 STATE0, STATE1
168 mova128 MSGTMP1, XMMTMP4 160 mova128 MSGTMP1, XMMTMP
169 palignr $4, MSGTMP0, XMMTMP4 161 palignr $4, MSGTMP0, XMMTMP
170 paddd XMMTMP4, MSGTMP2 162 paddd XMMTMP, MSGTMP2
171 sha256msg2 MSGTMP1, MSGTMP2 163 sha256msg2 MSGTMP1, MSGTMP2
172 shuf128_32 $0x0E, MSG, MSG 164 shuf128_32 $0x0E, MSG, MSG
173 sha256rnds2 STATE1, STATE0 165 sha256rnds2 STATE1, STATE0
@@ -177,9 +169,9 @@ sha256_process_block64_shaNI:
177 mova128 MSGTMP2, MSG 169 mova128 MSGTMP2, MSG
178 paddd 10*16-8*16(SHA256CONSTANTS), MSG 170 paddd 10*16-8*16(SHA256CONSTANTS), MSG
179 sha256rnds2 STATE0, STATE1 171 sha256rnds2 STATE0, STATE1
180 mova128 MSGTMP2, XMMTMP4 172 mova128 MSGTMP2, XMMTMP
181 palignr $4, MSGTMP1, XMMTMP4 173 palignr $4, MSGTMP1, XMMTMP
182 paddd XMMTMP4, MSGTMP3 174 paddd XMMTMP, MSGTMP3
183 sha256msg2 MSGTMP2, MSGTMP3 175 sha256msg2 MSGTMP2, MSGTMP3
184 shuf128_32 $0x0E, MSG, MSG 176 shuf128_32 $0x0E, MSG, MSG
185 sha256rnds2 STATE1, STATE0 177 sha256rnds2 STATE1, STATE0
@@ -189,9 +181,9 @@ sha256_process_block64_shaNI:
189 mova128 MSGTMP3, MSG 181 mova128 MSGTMP3, MSG
190 paddd 11*16-8*16(SHA256CONSTANTS), MSG 182 paddd 11*16-8*16(SHA256CONSTANTS), MSG
191 sha256rnds2 STATE0, STATE1 183 sha256rnds2 STATE0, STATE1
192 mova128 MSGTMP3, XMMTMP4 184 mova128 MSGTMP3, XMMTMP
193 palignr $4, MSGTMP2, XMMTMP4 185 palignr $4, MSGTMP2, XMMTMP
194 paddd XMMTMP4, MSGTMP0 186 paddd XMMTMP, MSGTMP0
195 sha256msg2 MSGTMP3, MSGTMP0 187 sha256msg2 MSGTMP3, MSGTMP0
196 shuf128_32 $0x0E, MSG, MSG 188 shuf128_32 $0x0E, MSG, MSG
197 sha256rnds2 STATE1, STATE0 189 sha256rnds2 STATE1, STATE0
@@ -201,9 +193,9 @@ sha256_process_block64_shaNI:
201 mova128 MSGTMP0, MSG 193 mova128 MSGTMP0, MSG
202 paddd 12*16-8*16(SHA256CONSTANTS), MSG 194 paddd 12*16-8*16(SHA256CONSTANTS), MSG
203 sha256rnds2 STATE0, STATE1 195 sha256rnds2 STATE0, STATE1
204 mova128 MSGTMP0, XMMTMP4 196 mova128 MSGTMP0, XMMTMP
205 palignr $4, MSGTMP3, XMMTMP4 197 palignr $4, MSGTMP3, XMMTMP
206 paddd XMMTMP4, MSGTMP1 198 paddd XMMTMP, MSGTMP1
207 sha256msg2 MSGTMP0, MSGTMP1 199 sha256msg2 MSGTMP0, MSGTMP1
208 shuf128_32 $0x0E, MSG, MSG 200 shuf128_32 $0x0E, MSG, MSG
209 sha256rnds2 STATE1, STATE0 201 sha256rnds2 STATE1, STATE0
@@ -213,9 +205,9 @@ sha256_process_block64_shaNI:
213 mova128 MSGTMP1, MSG 205 mova128 MSGTMP1, MSG
214 paddd 13*16-8*16(SHA256CONSTANTS), MSG 206 paddd 13*16-8*16(SHA256CONSTANTS), MSG
215 sha256rnds2 STATE0, STATE1 207 sha256rnds2 STATE0, STATE1
216 mova128 MSGTMP1, XMMTMP4 208 mova128 MSGTMP1, XMMTMP
217 palignr $4, MSGTMP0, XMMTMP4 209 palignr $4, MSGTMP0, XMMTMP
218 paddd XMMTMP4, MSGTMP2 210 paddd XMMTMP, MSGTMP2
219 sha256msg2 MSGTMP1, MSGTMP2 211 sha256msg2 MSGTMP1, MSGTMP2
220 shuf128_32 $0x0E, MSG, MSG 212 shuf128_32 $0x0E, MSG, MSG
221 sha256rnds2 STATE1, STATE0 213 sha256rnds2 STATE1, STATE0
@@ -224,9 +216,9 @@ sha256_process_block64_shaNI:
224 mova128 MSGTMP2, MSG 216 mova128 MSGTMP2, MSG
225 paddd 14*16-8*16(SHA256CONSTANTS), MSG 217 paddd 14*16-8*16(SHA256CONSTANTS), MSG
226 sha256rnds2 STATE0, STATE1 218 sha256rnds2 STATE0, STATE1
227 mova128 MSGTMP2, XMMTMP4 219 mova128 MSGTMP2, XMMTMP
228 palignr $4, MSGTMP1, XMMTMP4 220 palignr $4, MSGTMP1, XMMTMP
229 paddd XMMTMP4, MSGTMP3 221 paddd XMMTMP, MSGTMP3
230 sha256msg2 MSGTMP2, MSGTMP3 222 sha256msg2 MSGTMP2, MSGTMP3
231 shuf128_32 $0x0E, MSG, MSG 223 shuf128_32 $0x0E, MSG, MSG
232 sha256rnds2 STATE1, STATE0 224 sha256rnds2 STATE1, STATE0
@@ -238,22 +230,20 @@ sha256_process_block64_shaNI:
238 shuf128_32 $0x0E, MSG, MSG 230 shuf128_32 $0x0E, MSG, MSG
239 sha256rnds2 STATE1, STATE0 231 sha256rnds2 STATE1, STATE0
240 232
241 /* Add current hash values with previously saved */
242 paddd 0*16(%esp), STATE0
243 paddd 1*16(%esp), STATE1
244
245 /* Write hash values back in the correct order */ 233 /* Write hash values back in the correct order */
246 shuf128_32 $0x1B, STATE0, STATE0 /* FEBA */ 234 shuf128_32 $0x1B, STATE0, STATE0 /* FEBA */
247 shuf128_32 $0xB1, STATE1, STATE1 /* DCHG */ 235 shuf128_32 $0xB1, STATE1, STATE1 /* DCHG */
248 mova128 STATE0, XMMTMP4 236 mova128 STATE0, XMMTMP
249 pblendw $0xF0, STATE1, STATE0 /* DCBA */ 237 pblendw $0xF0, STATE1, STATE0 /* DCBA */
250 palignr $8, XMMTMP4, STATE1 /* HGFE */ 238 palignr $8, XMMTMP, STATE1 /* HGFE */
251 239 /* add current hash values to previous ones */
240 movu128 76+0*16(%eax), XMMTMP
241 paddd XMMTMP, STATE0
242 movu128 76+1*16(%eax), XMMTMP
252 movu128 STATE0, 76+0*16(%eax) 243 movu128 STATE0, 76+0*16(%eax)
244 paddd XMMTMP, STATE1
253 movu128 STATE1, 76+1*16(%eax) 245 movu128 STATE1, 76+1*16(%eax)
254 246
255 movl %ebp, %esp
256 popl %ebp
257 ret 247 ret
258 .size sha256_process_block64_shaNI, .-sha256_process_block64_shaNI 248 .size sha256_process_block64_shaNI, .-sha256_process_block64_shaNI
259 249
diff --git a/libbb/hash_md5_sha256_x86-64_shaNI.S b/libbb/hash_md5_sha256_x86-64_shaNI.S
index c6c931341..b5c950a9a 100644
--- a/libbb/hash_md5_sha256_x86-64_shaNI.S
+++ b/libbb/hash_md5_sha256_x86-64_shaNI.S
@@ -31,7 +31,8 @@
31#define MSGTMP1 %xmm4 31#define MSGTMP1 %xmm4
32#define MSGTMP2 %xmm5 32#define MSGTMP2 %xmm5
33#define MSGTMP3 %xmm6 33#define MSGTMP3 %xmm6
34#define XMMTMP4 %xmm7 34
35#define XMMTMP %xmm7
35 36
36#define ABEF_SAVE %xmm9 37#define ABEF_SAVE %xmm9
37#define CDGH_SAVE %xmm10 38#define CDGH_SAVE %xmm10
@@ -41,14 +42,14 @@ sha256_process_block64_shaNI:
41 movu128 80+0*16(%rdi), STATE0 42 movu128 80+0*16(%rdi), STATE0
42 movu128 80+1*16(%rdi), STATE1 43 movu128 80+1*16(%rdi), STATE1
43 44
44 shuf128_32 $0xB1, STATE0, STATE0 /* CDAB */ 45 shuf128_32 $0xB1, STATE0, STATE0 /* CDAB */
45 shuf128_32 $0x1B, STATE1, STATE1 /* EFGH */ 46 shuf128_32 $0x1B, STATE1, STATE1 /* EFGH */
46 mova128 STATE0, XMMTMP4 47 mova128 STATE0, XMMTMP
47 palignr $8, STATE1, STATE0 /* ABEF */ 48 palignr $8, STATE1, STATE0 /* ABEF */
48 pblendw $0xF0, XMMTMP4, STATE1 /* CDGH */ 49 pblendw $0xF0, XMMTMP, STATE1 /* CDGH */
49 50
50/* XMMTMP4 holds flip mask from here... */ 51/* XMMTMP holds flip mask from here... */
51 mova128 PSHUFFLE_BSWAP32_FLIP_MASK(%rip), XMMTMP4 52 mova128 PSHUFFLE_BSWAP32_FLIP_MASK(%rip), XMMTMP
52 leaq K256+8*16(%rip), SHA256CONSTANTS 53 leaq K256+8*16(%rip), SHA256CONSTANTS
53 54
54 /* Save hash values for addition after rounds */ 55 /* Save hash values for addition after rounds */
@@ -57,7 +58,7 @@ sha256_process_block64_shaNI:
57 58
58 /* Rounds 0-3 */ 59 /* Rounds 0-3 */
59 movu128 0*16(DATA_PTR), MSG 60 movu128 0*16(DATA_PTR), MSG
60 pshufb XMMTMP4, MSG 61 pshufb XMMTMP, MSG
61 mova128 MSG, MSGTMP0 62 mova128 MSG, MSGTMP0
62 paddd 0*16-8*16(SHA256CONSTANTS), MSG 63 paddd 0*16-8*16(SHA256CONSTANTS), MSG
63 sha256rnds2 STATE0, STATE1 64 sha256rnds2 STATE0, STATE1
@@ -66,7 +67,7 @@ sha256_process_block64_shaNI:
66 67
67 /* Rounds 4-7 */ 68 /* Rounds 4-7 */
68 movu128 1*16(DATA_PTR), MSG 69 movu128 1*16(DATA_PTR), MSG
69 pshufb XMMTMP4, MSG 70 pshufb XMMTMP, MSG
70 mova128 MSG, MSGTMP1 71 mova128 MSG, MSGTMP1
71 paddd 1*16-8*16(SHA256CONSTANTS), MSG 72 paddd 1*16-8*16(SHA256CONSTANTS), MSG
72 sha256rnds2 STATE0, STATE1 73 sha256rnds2 STATE0, STATE1
@@ -76,7 +77,7 @@ sha256_process_block64_shaNI:
76 77
77 /* Rounds 8-11 */ 78 /* Rounds 8-11 */
78 movu128 2*16(DATA_PTR), MSG 79 movu128 2*16(DATA_PTR), MSG
79 pshufb XMMTMP4, MSG 80 pshufb XMMTMP, MSG
80 mova128 MSG, MSGTMP2 81 mova128 MSG, MSGTMP2
81 paddd 2*16-8*16(SHA256CONSTANTS), MSG 82 paddd 2*16-8*16(SHA256CONSTANTS), MSG
82 sha256rnds2 STATE0, STATE1 83 sha256rnds2 STATE0, STATE1
@@ -86,14 +87,14 @@ sha256_process_block64_shaNI:
86 87
87 /* Rounds 12-15 */ 88 /* Rounds 12-15 */
88 movu128 3*16(DATA_PTR), MSG 89 movu128 3*16(DATA_PTR), MSG
89 pshufb XMMTMP4, MSG 90 pshufb XMMTMP, MSG
90/* ...to here */ 91/* ...to here */
91 mova128 MSG, MSGTMP3 92 mova128 MSG, MSGTMP3
92 paddd 3*16-8*16(SHA256CONSTANTS), MSG 93 paddd 3*16-8*16(SHA256CONSTANTS), MSG
93 sha256rnds2 STATE0, STATE1 94 sha256rnds2 STATE0, STATE1
94 mova128 MSGTMP3, XMMTMP4 95 mova128 MSGTMP3, XMMTMP
95 palignr $4, MSGTMP2, XMMTMP4 96 palignr $4, MSGTMP2, XMMTMP
96 paddd XMMTMP4, MSGTMP0 97 paddd XMMTMP, MSGTMP0
97 sha256msg2 MSGTMP3, MSGTMP0 98 sha256msg2 MSGTMP3, MSGTMP0
98 shuf128_32 $0x0E, MSG, MSG 99 shuf128_32 $0x0E, MSG, MSG
99 sha256rnds2 STATE1, STATE0 100 sha256rnds2 STATE1, STATE0
@@ -103,9 +104,9 @@ sha256_process_block64_shaNI:
103 mova128 MSGTMP0, MSG 104 mova128 MSGTMP0, MSG
104 paddd 4*16-8*16(SHA256CONSTANTS), MSG 105 paddd 4*16-8*16(SHA256CONSTANTS), MSG
105 sha256rnds2 STATE0, STATE1 106 sha256rnds2 STATE0, STATE1
106 mova128 MSGTMP0, XMMTMP4 107 mova128 MSGTMP0, XMMTMP
107 palignr $4, MSGTMP3, XMMTMP4 108 palignr $4, MSGTMP3, XMMTMP
108 paddd XMMTMP4, MSGTMP1 109 paddd XMMTMP, MSGTMP1
109 sha256msg2 MSGTMP0, MSGTMP1 110 sha256msg2 MSGTMP0, MSGTMP1
110 shuf128_32 $0x0E, MSG, MSG 111 shuf128_32 $0x0E, MSG, MSG
111 sha256rnds2 STATE1, STATE0 112 sha256rnds2 STATE1, STATE0
@@ -115,9 +116,9 @@ sha256_process_block64_shaNI:
115 mova128 MSGTMP1, MSG 116 mova128 MSGTMP1, MSG
116 paddd 5*16-8*16(SHA256CONSTANTS), MSG 117 paddd 5*16-8*16(SHA256CONSTANTS), MSG
117 sha256rnds2 STATE0, STATE1 118 sha256rnds2 STATE0, STATE1
118 mova128 MSGTMP1, XMMTMP4 119 mova128 MSGTMP1, XMMTMP
119 palignr $4, MSGTMP0, XMMTMP4 120 palignr $4, MSGTMP0, XMMTMP
120 paddd XMMTMP4, MSGTMP2 121 paddd XMMTMP, MSGTMP2
121 sha256msg2 MSGTMP1, MSGTMP2 122 sha256msg2 MSGTMP1, MSGTMP2
122 shuf128_32 $0x0E, MSG, MSG 123 shuf128_32 $0x0E, MSG, MSG
123 sha256rnds2 STATE1, STATE0 124 sha256rnds2 STATE1, STATE0
@@ -127,9 +128,9 @@ sha256_process_block64_shaNI:
127 mova128 MSGTMP2, MSG 128 mova128 MSGTMP2, MSG
128 paddd 6*16-8*16(SHA256CONSTANTS), MSG 129 paddd 6*16-8*16(SHA256CONSTANTS), MSG
129 sha256rnds2 STATE0, STATE1 130 sha256rnds2 STATE0, STATE1
130 mova128 MSGTMP2, XMMTMP4 131 mova128 MSGTMP2, XMMTMP
131 palignr $4, MSGTMP1, XMMTMP4 132 palignr $4, MSGTMP1, XMMTMP
132 paddd XMMTMP4, MSGTMP3 133 paddd XMMTMP, MSGTMP3
133 sha256msg2 MSGTMP2, MSGTMP3 134 sha256msg2 MSGTMP2, MSGTMP3
134 shuf128_32 $0x0E, MSG, MSG 135 shuf128_32 $0x0E, MSG, MSG
135 sha256rnds2 STATE1, STATE0 136 sha256rnds2 STATE1, STATE0
@@ -139,9 +140,9 @@ sha256_process_block64_shaNI:
139 mova128 MSGTMP3, MSG 140 mova128 MSGTMP3, MSG
140 paddd 7*16-8*16(SHA256CONSTANTS), MSG 141 paddd 7*16-8*16(SHA256CONSTANTS), MSG
141 sha256rnds2 STATE0, STATE1 142 sha256rnds2 STATE0, STATE1
142 mova128 MSGTMP3, XMMTMP4 143 mova128 MSGTMP3, XMMTMP
143 palignr $4, MSGTMP2, XMMTMP4 144 palignr $4, MSGTMP2, XMMTMP
144 paddd XMMTMP4, MSGTMP0 145 paddd XMMTMP, MSGTMP0
145 sha256msg2 MSGTMP3, MSGTMP0 146 sha256msg2 MSGTMP3, MSGTMP0
146 shuf128_32 $0x0E, MSG, MSG 147 shuf128_32 $0x0E, MSG, MSG
147 sha256rnds2 STATE1, STATE0 148 sha256rnds2 STATE1, STATE0
@@ -151,9 +152,9 @@ sha256_process_block64_shaNI:
151 mova128 MSGTMP0, MSG 152 mova128 MSGTMP0, MSG
152 paddd 8*16-8*16(SHA256CONSTANTS), MSG 153 paddd 8*16-8*16(SHA256CONSTANTS), MSG
153 sha256rnds2 STATE0, STATE1 154 sha256rnds2 STATE0, STATE1
154 mova128 MSGTMP0, XMMTMP4 155 mova128 MSGTMP0, XMMTMP
155 palignr $4, MSGTMP3, XMMTMP4 156 palignr $4, MSGTMP3, XMMTMP
156 paddd XMMTMP4, MSGTMP1 157 paddd XMMTMP, MSGTMP1
157 sha256msg2 MSGTMP0, MSGTMP1 158 sha256msg2 MSGTMP0, MSGTMP1
158 shuf128_32 $0x0E, MSG, MSG 159 shuf128_32 $0x0E, MSG, MSG
159 sha256rnds2 STATE1, STATE0 160 sha256rnds2 STATE1, STATE0
@@ -163,9 +164,9 @@ sha256_process_block64_shaNI:
163 mova128 MSGTMP1, MSG 164 mova128 MSGTMP1, MSG
164 paddd 9*16-8*16(SHA256CONSTANTS), MSG 165 paddd 9*16-8*16(SHA256CONSTANTS), MSG
165 sha256rnds2 STATE0, STATE1 166 sha256rnds2 STATE0, STATE1
166 mova128 MSGTMP1, XMMTMP4 167 mova128 MSGTMP1, XMMTMP
167 palignr $4, MSGTMP0, XMMTMP4 168 palignr $4, MSGTMP0, XMMTMP
168 paddd XMMTMP4, MSGTMP2 169 paddd XMMTMP, MSGTMP2
169 sha256msg2 MSGTMP1, MSGTMP2 170 sha256msg2 MSGTMP1, MSGTMP2
170 shuf128_32 $0x0E, MSG, MSG 171 shuf128_32 $0x0E, MSG, MSG
171 sha256rnds2 STATE1, STATE0 172 sha256rnds2 STATE1, STATE0
@@ -175,9 +176,9 @@ sha256_process_block64_shaNI:
175 mova128 MSGTMP2, MSG 176 mova128 MSGTMP2, MSG
176 paddd 10*16-8*16(SHA256CONSTANTS), MSG 177 paddd 10*16-8*16(SHA256CONSTANTS), MSG
177 sha256rnds2 STATE0, STATE1 178 sha256rnds2 STATE0, STATE1
178 mova128 MSGTMP2, XMMTMP4 179 mova128 MSGTMP2, XMMTMP
179 palignr $4, MSGTMP1, XMMTMP4 180 palignr $4, MSGTMP1, XMMTMP
180 paddd XMMTMP4, MSGTMP3 181 paddd XMMTMP, MSGTMP3
181 sha256msg2 MSGTMP2, MSGTMP3 182 sha256msg2 MSGTMP2, MSGTMP3
182 shuf128_32 $0x0E, MSG, MSG 183 shuf128_32 $0x0E, MSG, MSG
183 sha256rnds2 STATE1, STATE0 184 sha256rnds2 STATE1, STATE0
@@ -187,9 +188,9 @@ sha256_process_block64_shaNI:
187 mova128 MSGTMP3, MSG 188 mova128 MSGTMP3, MSG
188 paddd 11*16-8*16(SHA256CONSTANTS), MSG 189 paddd 11*16-8*16(SHA256CONSTANTS), MSG
189 sha256rnds2 STATE0, STATE1 190 sha256rnds2 STATE0, STATE1
190 mova128 MSGTMP3, XMMTMP4 191 mova128 MSGTMP3, XMMTMP
191 palignr $4, MSGTMP2, XMMTMP4 192 palignr $4, MSGTMP2, XMMTMP
192 paddd XMMTMP4, MSGTMP0 193 paddd XMMTMP, MSGTMP0
193 sha256msg2 MSGTMP3, MSGTMP0 194 sha256msg2 MSGTMP3, MSGTMP0
194 shuf128_32 $0x0E, MSG, MSG 195 shuf128_32 $0x0E, MSG, MSG
195 sha256rnds2 STATE1, STATE0 196 sha256rnds2 STATE1, STATE0
@@ -199,9 +200,9 @@ sha256_process_block64_shaNI:
199 mova128 MSGTMP0, MSG 200 mova128 MSGTMP0, MSG
200 paddd 12*16-8*16(SHA256CONSTANTS), MSG 201 paddd 12*16-8*16(SHA256CONSTANTS), MSG
201 sha256rnds2 STATE0, STATE1 202 sha256rnds2 STATE0, STATE1
202 mova128 MSGTMP0, XMMTMP4 203 mova128 MSGTMP0, XMMTMP
203 palignr $4, MSGTMP3, XMMTMP4 204 palignr $4, MSGTMP3, XMMTMP
204 paddd XMMTMP4, MSGTMP1 205 paddd XMMTMP, MSGTMP1
205 sha256msg2 MSGTMP0, MSGTMP1 206 sha256msg2 MSGTMP0, MSGTMP1
206 shuf128_32 $0x0E, MSG, MSG 207 shuf128_32 $0x0E, MSG, MSG
207 sha256rnds2 STATE1, STATE0 208 sha256rnds2 STATE1, STATE0
@@ -211,9 +212,9 @@ sha256_process_block64_shaNI:
211 mova128 MSGTMP1, MSG 212 mova128 MSGTMP1, MSG
212 paddd 13*16-8*16(SHA256CONSTANTS), MSG 213 paddd 13*16-8*16(SHA256CONSTANTS), MSG
213 sha256rnds2 STATE0, STATE1 214 sha256rnds2 STATE0, STATE1
214 mova128 MSGTMP1, XMMTMP4 215 mova128 MSGTMP1, XMMTMP
215 palignr $4, MSGTMP0, XMMTMP4 216 palignr $4, MSGTMP0, XMMTMP
216 paddd XMMTMP4, MSGTMP2 217 paddd XMMTMP, MSGTMP2
217 sha256msg2 MSGTMP1, MSGTMP2 218 sha256msg2 MSGTMP1, MSGTMP2
218 shuf128_32 $0x0E, MSG, MSG 219 shuf128_32 $0x0E, MSG, MSG
219 sha256rnds2 STATE1, STATE0 220 sha256rnds2 STATE1, STATE0
@@ -222,9 +223,9 @@ sha256_process_block64_shaNI:
222 mova128 MSGTMP2, MSG 223 mova128 MSGTMP2, MSG
223 paddd 14*16-8*16(SHA256CONSTANTS), MSG 224 paddd 14*16-8*16(SHA256CONSTANTS), MSG
224 sha256rnds2 STATE0, STATE1 225 sha256rnds2 STATE0, STATE1
225 mova128 MSGTMP2, XMMTMP4 226 mova128 MSGTMP2, XMMTMP
226 palignr $4, MSGTMP1, XMMTMP4 227 palignr $4, MSGTMP1, XMMTMP
227 paddd XMMTMP4, MSGTMP3 228 paddd XMMTMP, MSGTMP3
228 sha256msg2 MSGTMP2, MSGTMP3 229 sha256msg2 MSGTMP2, MSGTMP3
229 shuf128_32 $0x0E, MSG, MSG 230 shuf128_32 $0x0E, MSG, MSG
230 sha256rnds2 STATE1, STATE0 231 sha256rnds2 STATE1, STATE0
@@ -241,11 +242,11 @@ sha256_process_block64_shaNI:
241 paddd CDGH_SAVE, STATE1 242 paddd CDGH_SAVE, STATE1
242 243
243 /* Write hash values back in the correct order */ 244 /* Write hash values back in the correct order */
244 shuf128_32 $0x1B, STATE0, STATE0 /* FEBA */ 245 shuf128_32 $0x1B, STATE0, STATE0 /* FEBA */
245 shuf128_32 $0xB1, STATE1, STATE1 /* DCHG */ 246 shuf128_32 $0xB1, STATE1, STATE1 /* DCHG */
246 mova128 STATE0, XMMTMP4 247 mova128 STATE0, XMMTMP
247 pblendw $0xF0, STATE1, STATE0 /* DCBA */ 248 pblendw $0xF0, STATE1, STATE0 /* DCBA */
248 palignr $8, XMMTMP4, STATE1 /* HGFE */ 249 palignr $8, XMMTMP, STATE1 /* HGFE */
249 250
250 movu128 STATE0, 80+0*16(%rdi) 251 movu128 STATE0, 80+0*16(%rdi)
251 movu128 STATE1, 80+1*16(%rdi) 252 movu128 STATE1, 80+1*16(%rdi)