diff options
author | Denys Vlasenko <vda.linux@googlemail.com> | 2022-02-06 00:30:03 +0100 |
---|---|---|
committer | Denys Vlasenko <vda.linux@googlemail.com> | 2022-02-06 00:33:42 +0100 |
commit | 31c1c310772fa6c897ee1585ea15fc38f3ab3dff (patch) | |
tree | 7fb122b079d32981ee6bf7f7565f14f570ae6ac6 | |
parent | a1429fbb8ca373efc01939d599f6f65969b1a366 (diff) | |
download | busybox-w32-31c1c310772fa6c897ee1585ea15fc38f3ab3dff.tar.gz busybox-w32-31c1c310772fa6c897ee1585ea15fc38f3ab3dff.tar.bz2 busybox-w32-31c1c310772fa6c897ee1585ea15fc38f3ab3dff.zip |
libbb/sha256: code shrink in 64-bit x86
function old new delta
sha256_process_block64_shaNI 706 701 -5
Signed-off-by: Denys Vlasenko <vda.linux@googlemail.com>
-rw-r--r-- | libbb/hash_md5_sha256_x86-64_shaNI.S | 96 |
1 files changed, 48 insertions, 48 deletions
diff --git a/libbb/hash_md5_sha256_x86-64_shaNI.S b/libbb/hash_md5_sha256_x86-64_shaNI.S index f3df541e4..dbf391135 100644 --- a/libbb/hash_md5_sha256_x86-64_shaNI.S +++ b/libbb/hash_md5_sha256_x86-64_shaNI.S | |||
@@ -31,9 +31,7 @@ | |||
31 | #define MSGTMP1 %xmm4 | 31 | #define MSGTMP1 %xmm4 |
32 | #define MSGTMP2 %xmm5 | 32 | #define MSGTMP2 %xmm5 |
33 | #define MSGTMP3 %xmm6 | 33 | #define MSGTMP3 %xmm6 |
34 | #define MSGTMP4 %xmm7 | 34 | #define XMMTMP4 %xmm7 |
35 | |||
36 | #define SHUF_MASK %xmm8 | ||
37 | 35 | ||
38 | #define ABEF_SAVE %xmm9 | 36 | #define ABEF_SAVE %xmm9 |
39 | #define CDGH_SAVE %xmm10 | 37 | #define CDGH_SAVE %xmm10 |
@@ -45,11 +43,12 @@ sha256_process_block64_shaNI: | |||
45 | 43 | ||
46 | shuf128_32 $0xB1, STATE0, STATE0 /* CDAB */ | 44 | shuf128_32 $0xB1, STATE0, STATE0 /* CDAB */ |
47 | shuf128_32 $0x1B, STATE1, STATE1 /* EFGH */ | 45 | shuf128_32 $0x1B, STATE1, STATE1 /* EFGH */ |
48 | mova128 STATE0, MSGTMP4 | 46 | mova128 STATE0, XMMTMP4 |
49 | palignr $8, STATE1, STATE0 /* ABEF */ | 47 | palignr $8, STATE1, STATE0 /* ABEF */ |
50 | pblendw $0xF0, MSGTMP4, STATE1 /* CDGH */ | 48 | pblendw $0xF0, XMMTMP4, STATE1 /* CDGH */ |
51 | 49 | ||
52 | mova128 PSHUFFLE_BSWAP32_FLIP_MASK(%rip), SHUF_MASK | 50 | /* XMMTMP4 holds flip mask from here... */ |
51 | mova128 PSHUFFLE_BSWAP32_FLIP_MASK(%rip), XMMTMP4 | ||
53 | leaq K256+8*16(%rip), SHA256CONSTANTS | 52 | leaq K256+8*16(%rip), SHA256CONSTANTS |
54 | 53 | ||
55 | /* Save hash values for addition after rounds */ | 54 | /* Save hash values for addition after rounds */ |
@@ -58,7 +57,7 @@ sha256_process_block64_shaNI: | |||
58 | 57 | ||
59 | /* Rounds 0-3 */ | 58 | /* Rounds 0-3 */ |
60 | movu128 0*16(DATA_PTR), MSG | 59 | movu128 0*16(DATA_PTR), MSG |
61 | pshufb SHUF_MASK, MSG | 60 | pshufb XMMTMP4, MSG |
62 | mova128 MSG, MSGTMP0 | 61 | mova128 MSG, MSGTMP0 |
63 | paddd 0*16-8*16(SHA256CONSTANTS), MSG | 62 | paddd 0*16-8*16(SHA256CONSTANTS), MSG |
64 | sha256rnds2 STATE0, STATE1 | 63 | sha256rnds2 STATE0, STATE1 |
@@ -67,7 +66,7 @@ sha256_process_block64_shaNI: | |||
67 | 66 | ||
68 | /* Rounds 4-7 */ | 67 | /* Rounds 4-7 */ |
69 | movu128 1*16(DATA_PTR), MSG | 68 | movu128 1*16(DATA_PTR), MSG |
70 | pshufb SHUF_MASK, MSG | 69 | pshufb XMMTMP4, MSG |
71 | mova128 MSG, MSGTMP1 | 70 | mova128 MSG, MSGTMP1 |
72 | paddd 1*16-8*16(SHA256CONSTANTS), MSG | 71 | paddd 1*16-8*16(SHA256CONSTANTS), MSG |
73 | sha256rnds2 STATE0, STATE1 | 72 | sha256rnds2 STATE0, STATE1 |
@@ -77,7 +76,7 @@ sha256_process_block64_shaNI: | |||
77 | 76 | ||
78 | /* Rounds 8-11 */ | 77 | /* Rounds 8-11 */ |
79 | movu128 2*16(DATA_PTR), MSG | 78 | movu128 2*16(DATA_PTR), MSG |
80 | pshufb SHUF_MASK, MSG | 79 | pshufb XMMTMP4, MSG |
81 | mova128 MSG, MSGTMP2 | 80 | mova128 MSG, MSGTMP2 |
82 | paddd 2*16-8*16(SHA256CONSTANTS), MSG | 81 | paddd 2*16-8*16(SHA256CONSTANTS), MSG |
83 | sha256rnds2 STATE0, STATE1 | 82 | sha256rnds2 STATE0, STATE1 |
@@ -87,13 +86,14 @@ sha256_process_block64_shaNI: | |||
87 | 86 | ||
88 | /* Rounds 12-15 */ | 87 | /* Rounds 12-15 */ |
89 | movu128 3*16(DATA_PTR), MSG | 88 | movu128 3*16(DATA_PTR), MSG |
90 | pshufb SHUF_MASK, MSG | 89 | pshufb XMMTMP4, MSG |
90 | /* ...to here */ | ||
91 | mova128 MSG, MSGTMP3 | 91 | mova128 MSG, MSGTMP3 |
92 | paddd 3*16-8*16(SHA256CONSTANTS), MSG | 92 | paddd 3*16-8*16(SHA256CONSTANTS), MSG |
93 | sha256rnds2 STATE0, STATE1 | 93 | sha256rnds2 STATE0, STATE1 |
94 | mova128 MSGTMP3, MSGTMP4 | 94 | mova128 MSGTMP3, XMMTMP4 |
95 | palignr $4, MSGTMP2, MSGTMP4 | 95 | palignr $4, MSGTMP2, XMMTMP4 |
96 | paddd MSGTMP4, MSGTMP0 | 96 | paddd XMMTMP4, MSGTMP0 |
97 | sha256msg2 MSGTMP3, MSGTMP0 | 97 | sha256msg2 MSGTMP3, MSGTMP0 |
98 | shuf128_32 $0x0E, MSG, MSG | 98 | shuf128_32 $0x0E, MSG, MSG |
99 | sha256rnds2 STATE1, STATE0 | 99 | sha256rnds2 STATE1, STATE0 |
@@ -103,9 +103,9 @@ sha256_process_block64_shaNI: | |||
103 | mova128 MSGTMP0, MSG | 103 | mova128 MSGTMP0, MSG |
104 | paddd 4*16-8*16(SHA256CONSTANTS), MSG | 104 | paddd 4*16-8*16(SHA256CONSTANTS), MSG |
105 | sha256rnds2 STATE0, STATE1 | 105 | sha256rnds2 STATE0, STATE1 |
106 | mova128 MSGTMP0, MSGTMP4 | 106 | mova128 MSGTMP0, XMMTMP4 |
107 | palignr $4, MSGTMP3, MSGTMP4 | 107 | palignr $4, MSGTMP3, XMMTMP4 |
108 | paddd MSGTMP4, MSGTMP1 | 108 | paddd XMMTMP4, MSGTMP1 |
109 | sha256msg2 MSGTMP0, MSGTMP1 | 109 | sha256msg2 MSGTMP0, MSGTMP1 |
110 | shuf128_32 $0x0E, MSG, MSG | 110 | shuf128_32 $0x0E, MSG, MSG |
111 | sha256rnds2 STATE1, STATE0 | 111 | sha256rnds2 STATE1, STATE0 |
@@ -115,9 +115,9 @@ sha256_process_block64_shaNI: | |||
115 | mova128 MSGTMP1, MSG | 115 | mova128 MSGTMP1, MSG |
116 | paddd 5*16-8*16(SHA256CONSTANTS), MSG | 116 | paddd 5*16-8*16(SHA256CONSTANTS), MSG |
117 | sha256rnds2 STATE0, STATE1 | 117 | sha256rnds2 STATE0, STATE1 |
118 | mova128 MSGTMP1, MSGTMP4 | 118 | mova128 MSGTMP1, XMMTMP4 |
119 | palignr $4, MSGTMP0, MSGTMP4 | 119 | palignr $4, MSGTMP0, XMMTMP4 |
120 | paddd MSGTMP4, MSGTMP2 | 120 | paddd XMMTMP4, MSGTMP2 |
121 | sha256msg2 MSGTMP1, MSGTMP2 | 121 | sha256msg2 MSGTMP1, MSGTMP2 |
122 | shuf128_32 $0x0E, MSG, MSG | 122 | shuf128_32 $0x0E, MSG, MSG |
123 | sha256rnds2 STATE1, STATE0 | 123 | sha256rnds2 STATE1, STATE0 |
@@ -127,9 +127,9 @@ sha256_process_block64_shaNI: | |||
127 | mova128 MSGTMP2, MSG | 127 | mova128 MSGTMP2, MSG |
128 | paddd 6*16-8*16(SHA256CONSTANTS), MSG | 128 | paddd 6*16-8*16(SHA256CONSTANTS), MSG |
129 | sha256rnds2 STATE0, STATE1 | 129 | sha256rnds2 STATE0, STATE1 |
130 | mova128 MSGTMP2, MSGTMP4 | 130 | mova128 MSGTMP2, XMMTMP4 |
131 | palignr $4, MSGTMP1, MSGTMP4 | 131 | palignr $4, MSGTMP1, XMMTMP4 |
132 | paddd MSGTMP4, MSGTMP3 | 132 | paddd XMMTMP4, MSGTMP3 |
133 | sha256msg2 MSGTMP2, MSGTMP3 | 133 | sha256msg2 MSGTMP2, MSGTMP3 |
134 | shuf128_32 $0x0E, MSG, MSG | 134 | shuf128_32 $0x0E, MSG, MSG |
135 | sha256rnds2 STATE1, STATE0 | 135 | sha256rnds2 STATE1, STATE0 |
@@ -139,9 +139,9 @@ sha256_process_block64_shaNI: | |||
139 | mova128 MSGTMP3, MSG | 139 | mova128 MSGTMP3, MSG |
140 | paddd 7*16-8*16(SHA256CONSTANTS), MSG | 140 | paddd 7*16-8*16(SHA256CONSTANTS), MSG |
141 | sha256rnds2 STATE0, STATE1 | 141 | sha256rnds2 STATE0, STATE1 |
142 | mova128 MSGTMP3, MSGTMP4 | 142 | mova128 MSGTMP3, XMMTMP4 |
143 | palignr $4, MSGTMP2, MSGTMP4 | 143 | palignr $4, MSGTMP2, XMMTMP4 |
144 | paddd MSGTMP4, MSGTMP0 | 144 | paddd XMMTMP4, MSGTMP0 |
145 | sha256msg2 MSGTMP3, MSGTMP0 | 145 | sha256msg2 MSGTMP3, MSGTMP0 |
146 | shuf128_32 $0x0E, MSG, MSG | 146 | shuf128_32 $0x0E, MSG, MSG |
147 | sha256rnds2 STATE1, STATE0 | 147 | sha256rnds2 STATE1, STATE0 |
@@ -151,9 +151,9 @@ sha256_process_block64_shaNI: | |||
151 | mova128 MSGTMP0, MSG | 151 | mova128 MSGTMP0, MSG |
152 | paddd 8*16-8*16(SHA256CONSTANTS), MSG | 152 | paddd 8*16-8*16(SHA256CONSTANTS), MSG |
153 | sha256rnds2 STATE0, STATE1 | 153 | sha256rnds2 STATE0, STATE1 |
154 | mova128 MSGTMP0, MSGTMP4 | 154 | mova128 MSGTMP0, XMMTMP4 |
155 | palignr $4, MSGTMP3, MSGTMP4 | 155 | palignr $4, MSGTMP3, XMMTMP4 |
156 | paddd MSGTMP4, MSGTMP1 | 156 | paddd XMMTMP4, MSGTMP1 |
157 | sha256msg2 MSGTMP0, MSGTMP1 | 157 | sha256msg2 MSGTMP0, MSGTMP1 |
158 | shuf128_32 $0x0E, MSG, MSG | 158 | shuf128_32 $0x0E, MSG, MSG |
159 | sha256rnds2 STATE1, STATE0 | 159 | sha256rnds2 STATE1, STATE0 |
@@ -163,9 +163,9 @@ sha256_process_block64_shaNI: | |||
163 | mova128 MSGTMP1, MSG | 163 | mova128 MSGTMP1, MSG |
164 | paddd 9*16-8*16(SHA256CONSTANTS), MSG | 164 | paddd 9*16-8*16(SHA256CONSTANTS), MSG |
165 | sha256rnds2 STATE0, STATE1 | 165 | sha256rnds2 STATE0, STATE1 |
166 | mova128 MSGTMP1, MSGTMP4 | 166 | mova128 MSGTMP1, XMMTMP4 |
167 | palignr $4, MSGTMP0, MSGTMP4 | 167 | palignr $4, MSGTMP0, XMMTMP4 |
168 | paddd MSGTMP4, MSGTMP2 | 168 | paddd XMMTMP4, MSGTMP2 |
169 | sha256msg2 MSGTMP1, MSGTMP2 | 169 | sha256msg2 MSGTMP1, MSGTMP2 |
170 | shuf128_32 $0x0E, MSG, MSG | 170 | shuf128_32 $0x0E, MSG, MSG |
171 | sha256rnds2 STATE1, STATE0 | 171 | sha256rnds2 STATE1, STATE0 |
@@ -175,9 +175,9 @@ sha256_process_block64_shaNI: | |||
175 | mova128 MSGTMP2, MSG | 175 | mova128 MSGTMP2, MSG |
176 | paddd 10*16-8*16(SHA256CONSTANTS), MSG | 176 | paddd 10*16-8*16(SHA256CONSTANTS), MSG |
177 | sha256rnds2 STATE0, STATE1 | 177 | sha256rnds2 STATE0, STATE1 |
178 | mova128 MSGTMP2, MSGTMP4 | 178 | mova128 MSGTMP2, XMMTMP4 |
179 | palignr $4, MSGTMP1, MSGTMP4 | 179 | palignr $4, MSGTMP1, XMMTMP4 |
180 | paddd MSGTMP4, MSGTMP3 | 180 | paddd XMMTMP4, MSGTMP3 |
181 | sha256msg2 MSGTMP2, MSGTMP3 | 181 | sha256msg2 MSGTMP2, MSGTMP3 |
182 | shuf128_32 $0x0E, MSG, MSG | 182 | shuf128_32 $0x0E, MSG, MSG |
183 | sha256rnds2 STATE1, STATE0 | 183 | sha256rnds2 STATE1, STATE0 |
@@ -187,9 +187,9 @@ sha256_process_block64_shaNI: | |||
187 | mova128 MSGTMP3, MSG | 187 | mova128 MSGTMP3, MSG |
188 | paddd 11*16-8*16(SHA256CONSTANTS), MSG | 188 | paddd 11*16-8*16(SHA256CONSTANTS), MSG |
189 | sha256rnds2 STATE0, STATE1 | 189 | sha256rnds2 STATE0, STATE1 |
190 | mova128 MSGTMP3, MSGTMP4 | 190 | mova128 MSGTMP3, XMMTMP4 |
191 | palignr $4, MSGTMP2, MSGTMP4 | 191 | palignr $4, MSGTMP2, XMMTMP4 |
192 | paddd MSGTMP4, MSGTMP0 | 192 | paddd XMMTMP4, MSGTMP0 |
193 | sha256msg2 MSGTMP3, MSGTMP0 | 193 | sha256msg2 MSGTMP3, MSGTMP0 |
194 | shuf128_32 $0x0E, MSG, MSG | 194 | shuf128_32 $0x0E, MSG, MSG |
195 | sha256rnds2 STATE1, STATE0 | 195 | sha256rnds2 STATE1, STATE0 |
@@ -199,9 +199,9 @@ sha256_process_block64_shaNI: | |||
199 | mova128 MSGTMP0, MSG | 199 | mova128 MSGTMP0, MSG |
200 | paddd 12*16-8*16(SHA256CONSTANTS), MSG | 200 | paddd 12*16-8*16(SHA256CONSTANTS), MSG |
201 | sha256rnds2 STATE0, STATE1 | 201 | sha256rnds2 STATE0, STATE1 |
202 | mova128 MSGTMP0, MSGTMP4 | 202 | mova128 MSGTMP0, XMMTMP4 |
203 | palignr $4, MSGTMP3, MSGTMP4 | 203 | palignr $4, MSGTMP3, XMMTMP4 |
204 | paddd MSGTMP4, MSGTMP1 | 204 | paddd XMMTMP4, MSGTMP1 |
205 | sha256msg2 MSGTMP0, MSGTMP1 | 205 | sha256msg2 MSGTMP0, MSGTMP1 |
206 | shuf128_32 $0x0E, MSG, MSG | 206 | shuf128_32 $0x0E, MSG, MSG |
207 | sha256rnds2 STATE1, STATE0 | 207 | sha256rnds2 STATE1, STATE0 |
@@ -211,9 +211,9 @@ sha256_process_block64_shaNI: | |||
211 | mova128 MSGTMP1, MSG | 211 | mova128 MSGTMP1, MSG |
212 | paddd 13*16-8*16(SHA256CONSTANTS), MSG | 212 | paddd 13*16-8*16(SHA256CONSTANTS), MSG |
213 | sha256rnds2 STATE0, STATE1 | 213 | sha256rnds2 STATE0, STATE1 |
214 | mova128 MSGTMP1, MSGTMP4 | 214 | mova128 MSGTMP1, XMMTMP4 |
215 | palignr $4, MSGTMP0, MSGTMP4 | 215 | palignr $4, MSGTMP0, XMMTMP4 |
216 | paddd MSGTMP4, MSGTMP2 | 216 | paddd XMMTMP4, MSGTMP2 |
217 | sha256msg2 MSGTMP1, MSGTMP2 | 217 | sha256msg2 MSGTMP1, MSGTMP2 |
218 | shuf128_32 $0x0E, MSG, MSG | 218 | shuf128_32 $0x0E, MSG, MSG |
219 | sha256rnds2 STATE1, STATE0 | 219 | sha256rnds2 STATE1, STATE0 |
@@ -222,9 +222,9 @@ sha256_process_block64_shaNI: | |||
222 | mova128 MSGTMP2, MSG | 222 | mova128 MSGTMP2, MSG |
223 | paddd 14*16-8*16(SHA256CONSTANTS), MSG | 223 | paddd 14*16-8*16(SHA256CONSTANTS), MSG |
224 | sha256rnds2 STATE0, STATE1 | 224 | sha256rnds2 STATE0, STATE1 |
225 | mova128 MSGTMP2, MSGTMP4 | 225 | mova128 MSGTMP2, XMMTMP4 |
226 | palignr $4, MSGTMP1, MSGTMP4 | 226 | palignr $4, MSGTMP1, XMMTMP4 |
227 | paddd MSGTMP4, MSGTMP3 | 227 | paddd XMMTMP4, MSGTMP3 |
228 | sha256msg2 MSGTMP2, MSGTMP3 | 228 | sha256msg2 MSGTMP2, MSGTMP3 |
229 | shuf128_32 $0x0E, MSG, MSG | 229 | shuf128_32 $0x0E, MSG, MSG |
230 | sha256rnds2 STATE1, STATE0 | 230 | sha256rnds2 STATE1, STATE0 |
@@ -243,9 +243,9 @@ sha256_process_block64_shaNI: | |||
243 | /* Write hash values back in the correct order */ | 243 | /* Write hash values back in the correct order */ |
244 | shuf128_32 $0x1B, STATE0, STATE0 /* FEBA */ | 244 | shuf128_32 $0x1B, STATE0, STATE0 /* FEBA */ |
245 | shuf128_32 $0xB1, STATE1, STATE1 /* DCHG */ | 245 | shuf128_32 $0xB1, STATE1, STATE1 /* DCHG */ |
246 | mova128 STATE0, MSGTMP4 | 246 | mova128 STATE0, XMMTMP4 |
247 | pblendw $0xF0, STATE1, STATE0 /* DCBA */ | 247 | pblendw $0xF0, STATE1, STATE0 /* DCBA */ |
248 | palignr $8, MSGTMP4, STATE1 /* HGFE */ | 248 | palignr $8, XMMTMP4, STATE1 /* HGFE */ |
249 | 249 | ||
250 | movu128 STATE0, 80+0*16(%rdi) | 250 | movu128 STATE0, 80+0*16(%rdi) |
251 | movu128 STATE1, 80+1*16(%rdi) | 251 | movu128 STATE1, 80+1*16(%rdi) |