diff options
author | Denys Vlasenko <vda.linux@googlemail.com> | 2022-02-09 00:33:39 +0100 |
---|---|---|
committer | Denys Vlasenko <vda.linux@googlemail.com> | 2022-02-09 00:33:39 +0100 |
commit | c0ff0d4528d718c20b9ca2290bd10d59e9f794a3 (patch) | |
tree | 7c0879d37c70fef047a55ef72f4aff5dcf88ab12 /libbb/hash_md5_sha256_x86-64_shaNI.S | |
parent | eb8d5f3b8f3c91f3ed82a52b4ce52a154c146ede (diff) | |
download | busybox-w32-c0ff0d4528d718c20b9ca2290bd10d59e9f794a3.tar.gz busybox-w32-c0ff0d4528d718c20b9ca2290bd10d59e9f794a3.tar.bz2 busybox-w32-c0ff0d4528d718c20b9ca2290bd10d59e9f794a3.zip |
libbb/sha256: code shrink in 32-bit x86
function old new delta
sha256_process_block64_shaNI 713 697 -16
Signed-off-by: Denys Vlasenko <vda.linux@googlemail.com>
Diffstat (limited to '')
-rw-r--r-- | libbb/hash_md5_sha256_x86-64_shaNI.S | 107 |
1 files changed, 54 insertions, 53 deletions
diff --git a/libbb/hash_md5_sha256_x86-64_shaNI.S b/libbb/hash_md5_sha256_x86-64_shaNI.S index c6c931341..b5c950a9a 100644 --- a/libbb/hash_md5_sha256_x86-64_shaNI.S +++ b/libbb/hash_md5_sha256_x86-64_shaNI.S | |||
@@ -31,7 +31,8 @@ | |||
31 | #define MSGTMP1 %xmm4 | 31 | #define MSGTMP1 %xmm4 |
32 | #define MSGTMP2 %xmm5 | 32 | #define MSGTMP2 %xmm5 |
33 | #define MSGTMP3 %xmm6 | 33 | #define MSGTMP3 %xmm6 |
34 | #define XMMTMP4 %xmm7 | 34 | |
35 | #define XMMTMP %xmm7 | ||
35 | 36 | ||
36 | #define ABEF_SAVE %xmm9 | 37 | #define ABEF_SAVE %xmm9 |
37 | #define CDGH_SAVE %xmm10 | 38 | #define CDGH_SAVE %xmm10 |
@@ -41,14 +42,14 @@ sha256_process_block64_shaNI: | |||
41 | movu128 80+0*16(%rdi), STATE0 | 42 | movu128 80+0*16(%rdi), STATE0 |
42 | movu128 80+1*16(%rdi), STATE1 | 43 | movu128 80+1*16(%rdi), STATE1 |
43 | 44 | ||
44 | shuf128_32 $0xB1, STATE0, STATE0 /* CDAB */ | 45 | shuf128_32 $0xB1, STATE0, STATE0 /* CDAB */ |
45 | shuf128_32 $0x1B, STATE1, STATE1 /* EFGH */ | 46 | shuf128_32 $0x1B, STATE1, STATE1 /* EFGH */ |
46 | mova128 STATE0, XMMTMP4 | 47 | mova128 STATE0, XMMTMP |
47 | palignr $8, STATE1, STATE0 /* ABEF */ | 48 | palignr $8, STATE1, STATE0 /* ABEF */ |
48 | pblendw $0xF0, XMMTMP4, STATE1 /* CDGH */ | 49 | pblendw $0xF0, XMMTMP, STATE1 /* CDGH */ |
49 | 50 | ||
50 | /* XMMTMP4 holds flip mask from here... */ | 51 | /* XMMTMP holds flip mask from here... */ |
51 | mova128 PSHUFFLE_BSWAP32_FLIP_MASK(%rip), XMMTMP4 | 52 | mova128 PSHUFFLE_BSWAP32_FLIP_MASK(%rip), XMMTMP |
52 | leaq K256+8*16(%rip), SHA256CONSTANTS | 53 | leaq K256+8*16(%rip), SHA256CONSTANTS |
53 | 54 | ||
54 | /* Save hash values for addition after rounds */ | 55 | /* Save hash values for addition after rounds */ |
@@ -57,7 +58,7 @@ sha256_process_block64_shaNI: | |||
57 | 58 | ||
58 | /* Rounds 0-3 */ | 59 | /* Rounds 0-3 */ |
59 | movu128 0*16(DATA_PTR), MSG | 60 | movu128 0*16(DATA_PTR), MSG |
60 | pshufb XMMTMP4, MSG | 61 | pshufb XMMTMP, MSG |
61 | mova128 MSG, MSGTMP0 | 62 | mova128 MSG, MSGTMP0 |
62 | paddd 0*16-8*16(SHA256CONSTANTS), MSG | 63 | paddd 0*16-8*16(SHA256CONSTANTS), MSG |
63 | sha256rnds2 STATE0, STATE1 | 64 | sha256rnds2 STATE0, STATE1 |
@@ -66,7 +67,7 @@ sha256_process_block64_shaNI: | |||
66 | 67 | ||
67 | /* Rounds 4-7 */ | 68 | /* Rounds 4-7 */ |
68 | movu128 1*16(DATA_PTR), MSG | 69 | movu128 1*16(DATA_PTR), MSG |
69 | pshufb XMMTMP4, MSG | 70 | pshufb XMMTMP, MSG |
70 | mova128 MSG, MSGTMP1 | 71 | mova128 MSG, MSGTMP1 |
71 | paddd 1*16-8*16(SHA256CONSTANTS), MSG | 72 | paddd 1*16-8*16(SHA256CONSTANTS), MSG |
72 | sha256rnds2 STATE0, STATE1 | 73 | sha256rnds2 STATE0, STATE1 |
@@ -76,7 +77,7 @@ sha256_process_block64_shaNI: | |||
76 | 77 | ||
77 | /* Rounds 8-11 */ | 78 | /* Rounds 8-11 */ |
78 | movu128 2*16(DATA_PTR), MSG | 79 | movu128 2*16(DATA_PTR), MSG |
79 | pshufb XMMTMP4, MSG | 80 | pshufb XMMTMP, MSG |
80 | mova128 MSG, MSGTMP2 | 81 | mova128 MSG, MSGTMP2 |
81 | paddd 2*16-8*16(SHA256CONSTANTS), MSG | 82 | paddd 2*16-8*16(SHA256CONSTANTS), MSG |
82 | sha256rnds2 STATE0, STATE1 | 83 | sha256rnds2 STATE0, STATE1 |
@@ -86,14 +87,14 @@ sha256_process_block64_shaNI: | |||
86 | 87 | ||
87 | /* Rounds 12-15 */ | 88 | /* Rounds 12-15 */ |
88 | movu128 3*16(DATA_PTR), MSG | 89 | movu128 3*16(DATA_PTR), MSG |
89 | pshufb XMMTMP4, MSG | 90 | pshufb XMMTMP, MSG |
90 | /* ...to here */ | 91 | /* ...to here */ |
91 | mova128 MSG, MSGTMP3 | 92 | mova128 MSG, MSGTMP3 |
92 | paddd 3*16-8*16(SHA256CONSTANTS), MSG | 93 | paddd 3*16-8*16(SHA256CONSTANTS), MSG |
93 | sha256rnds2 STATE0, STATE1 | 94 | sha256rnds2 STATE0, STATE1 |
94 | mova128 MSGTMP3, XMMTMP4 | 95 | mova128 MSGTMP3, XMMTMP |
95 | palignr $4, MSGTMP2, XMMTMP4 | 96 | palignr $4, MSGTMP2, XMMTMP |
96 | paddd XMMTMP4, MSGTMP0 | 97 | paddd XMMTMP, MSGTMP0 |
97 | sha256msg2 MSGTMP3, MSGTMP0 | 98 | sha256msg2 MSGTMP3, MSGTMP0 |
98 | shuf128_32 $0x0E, MSG, MSG | 99 | shuf128_32 $0x0E, MSG, MSG |
99 | sha256rnds2 STATE1, STATE0 | 100 | sha256rnds2 STATE1, STATE0 |
@@ -103,9 +104,9 @@ sha256_process_block64_shaNI: | |||
103 | mova128 MSGTMP0, MSG | 104 | mova128 MSGTMP0, MSG |
104 | paddd 4*16-8*16(SHA256CONSTANTS), MSG | 105 | paddd 4*16-8*16(SHA256CONSTANTS), MSG |
105 | sha256rnds2 STATE0, STATE1 | 106 | sha256rnds2 STATE0, STATE1 |
106 | mova128 MSGTMP0, XMMTMP4 | 107 | mova128 MSGTMP0, XMMTMP |
107 | palignr $4, MSGTMP3, XMMTMP4 | 108 | palignr $4, MSGTMP3, XMMTMP |
108 | paddd XMMTMP4, MSGTMP1 | 109 | paddd XMMTMP, MSGTMP1 |
109 | sha256msg2 MSGTMP0, MSGTMP1 | 110 | sha256msg2 MSGTMP0, MSGTMP1 |
110 | shuf128_32 $0x0E, MSG, MSG | 111 | shuf128_32 $0x0E, MSG, MSG |
111 | sha256rnds2 STATE1, STATE0 | 112 | sha256rnds2 STATE1, STATE0 |
@@ -115,9 +116,9 @@ sha256_process_block64_shaNI: | |||
115 | mova128 MSGTMP1, MSG | 116 | mova128 MSGTMP1, MSG |
116 | paddd 5*16-8*16(SHA256CONSTANTS), MSG | 117 | paddd 5*16-8*16(SHA256CONSTANTS), MSG |
117 | sha256rnds2 STATE0, STATE1 | 118 | sha256rnds2 STATE0, STATE1 |
118 | mova128 MSGTMP1, XMMTMP4 | 119 | mova128 MSGTMP1, XMMTMP |
119 | palignr $4, MSGTMP0, XMMTMP4 | 120 | palignr $4, MSGTMP0, XMMTMP |
120 | paddd XMMTMP4, MSGTMP2 | 121 | paddd XMMTMP, MSGTMP2 |
121 | sha256msg2 MSGTMP1, MSGTMP2 | 122 | sha256msg2 MSGTMP1, MSGTMP2 |
122 | shuf128_32 $0x0E, MSG, MSG | 123 | shuf128_32 $0x0E, MSG, MSG |
123 | sha256rnds2 STATE1, STATE0 | 124 | sha256rnds2 STATE1, STATE0 |
@@ -127,9 +128,9 @@ sha256_process_block64_shaNI: | |||
127 | mova128 MSGTMP2, MSG | 128 | mova128 MSGTMP2, MSG |
128 | paddd 6*16-8*16(SHA256CONSTANTS), MSG | 129 | paddd 6*16-8*16(SHA256CONSTANTS), MSG |
129 | sha256rnds2 STATE0, STATE1 | 130 | sha256rnds2 STATE0, STATE1 |
130 | mova128 MSGTMP2, XMMTMP4 | 131 | mova128 MSGTMP2, XMMTMP |
131 | palignr $4, MSGTMP1, XMMTMP4 | 132 | palignr $4, MSGTMP1, XMMTMP |
132 | paddd XMMTMP4, MSGTMP3 | 133 | paddd XMMTMP, MSGTMP3 |
133 | sha256msg2 MSGTMP2, MSGTMP3 | 134 | sha256msg2 MSGTMP2, MSGTMP3 |
134 | shuf128_32 $0x0E, MSG, MSG | 135 | shuf128_32 $0x0E, MSG, MSG |
135 | sha256rnds2 STATE1, STATE0 | 136 | sha256rnds2 STATE1, STATE0 |
@@ -139,9 +140,9 @@ sha256_process_block64_shaNI: | |||
139 | mova128 MSGTMP3, MSG | 140 | mova128 MSGTMP3, MSG |
140 | paddd 7*16-8*16(SHA256CONSTANTS), MSG | 141 | paddd 7*16-8*16(SHA256CONSTANTS), MSG |
141 | sha256rnds2 STATE0, STATE1 | 142 | sha256rnds2 STATE0, STATE1 |
142 | mova128 MSGTMP3, XMMTMP4 | 143 | mova128 MSGTMP3, XMMTMP |
143 | palignr $4, MSGTMP2, XMMTMP4 | 144 | palignr $4, MSGTMP2, XMMTMP |
144 | paddd XMMTMP4, MSGTMP0 | 145 | paddd XMMTMP, MSGTMP0 |
145 | sha256msg2 MSGTMP3, MSGTMP0 | 146 | sha256msg2 MSGTMP3, MSGTMP0 |
146 | shuf128_32 $0x0E, MSG, MSG | 147 | shuf128_32 $0x0E, MSG, MSG |
147 | sha256rnds2 STATE1, STATE0 | 148 | sha256rnds2 STATE1, STATE0 |
@@ -151,9 +152,9 @@ sha256_process_block64_shaNI: | |||
151 | mova128 MSGTMP0, MSG | 152 | mova128 MSGTMP0, MSG |
152 | paddd 8*16-8*16(SHA256CONSTANTS), MSG | 153 | paddd 8*16-8*16(SHA256CONSTANTS), MSG |
153 | sha256rnds2 STATE0, STATE1 | 154 | sha256rnds2 STATE0, STATE1 |
154 | mova128 MSGTMP0, XMMTMP4 | 155 | mova128 MSGTMP0, XMMTMP |
155 | palignr $4, MSGTMP3, XMMTMP4 | 156 | palignr $4, MSGTMP3, XMMTMP |
156 | paddd XMMTMP4, MSGTMP1 | 157 | paddd XMMTMP, MSGTMP1 |
157 | sha256msg2 MSGTMP0, MSGTMP1 | 158 | sha256msg2 MSGTMP0, MSGTMP1 |
158 | shuf128_32 $0x0E, MSG, MSG | 159 | shuf128_32 $0x0E, MSG, MSG |
159 | sha256rnds2 STATE1, STATE0 | 160 | sha256rnds2 STATE1, STATE0 |
@@ -163,9 +164,9 @@ sha256_process_block64_shaNI: | |||
163 | mova128 MSGTMP1, MSG | 164 | mova128 MSGTMP1, MSG |
164 | paddd 9*16-8*16(SHA256CONSTANTS), MSG | 165 | paddd 9*16-8*16(SHA256CONSTANTS), MSG |
165 | sha256rnds2 STATE0, STATE1 | 166 | sha256rnds2 STATE0, STATE1 |
166 | mova128 MSGTMP1, XMMTMP4 | 167 | mova128 MSGTMP1, XMMTMP |
167 | palignr $4, MSGTMP0, XMMTMP4 | 168 | palignr $4, MSGTMP0, XMMTMP |
168 | paddd XMMTMP4, MSGTMP2 | 169 | paddd XMMTMP, MSGTMP2 |
169 | sha256msg2 MSGTMP1, MSGTMP2 | 170 | sha256msg2 MSGTMP1, MSGTMP2 |
170 | shuf128_32 $0x0E, MSG, MSG | 171 | shuf128_32 $0x0E, MSG, MSG |
171 | sha256rnds2 STATE1, STATE0 | 172 | sha256rnds2 STATE1, STATE0 |
@@ -175,9 +176,9 @@ sha256_process_block64_shaNI: | |||
175 | mova128 MSGTMP2, MSG | 176 | mova128 MSGTMP2, MSG |
176 | paddd 10*16-8*16(SHA256CONSTANTS), MSG | 177 | paddd 10*16-8*16(SHA256CONSTANTS), MSG |
177 | sha256rnds2 STATE0, STATE1 | 178 | sha256rnds2 STATE0, STATE1 |
178 | mova128 MSGTMP2, XMMTMP4 | 179 | mova128 MSGTMP2, XMMTMP |
179 | palignr $4, MSGTMP1, XMMTMP4 | 180 | palignr $4, MSGTMP1, XMMTMP |
180 | paddd XMMTMP4, MSGTMP3 | 181 | paddd XMMTMP, MSGTMP3 |
181 | sha256msg2 MSGTMP2, MSGTMP3 | 182 | sha256msg2 MSGTMP2, MSGTMP3 |
182 | shuf128_32 $0x0E, MSG, MSG | 183 | shuf128_32 $0x0E, MSG, MSG |
183 | sha256rnds2 STATE1, STATE0 | 184 | sha256rnds2 STATE1, STATE0 |
@@ -187,9 +188,9 @@ sha256_process_block64_shaNI: | |||
187 | mova128 MSGTMP3, MSG | 188 | mova128 MSGTMP3, MSG |
188 | paddd 11*16-8*16(SHA256CONSTANTS), MSG | 189 | paddd 11*16-8*16(SHA256CONSTANTS), MSG |
189 | sha256rnds2 STATE0, STATE1 | 190 | sha256rnds2 STATE0, STATE1 |
190 | mova128 MSGTMP3, XMMTMP4 | 191 | mova128 MSGTMP3, XMMTMP |
191 | palignr $4, MSGTMP2, XMMTMP4 | 192 | palignr $4, MSGTMP2, XMMTMP |
192 | paddd XMMTMP4, MSGTMP0 | 193 | paddd XMMTMP, MSGTMP0 |
193 | sha256msg2 MSGTMP3, MSGTMP0 | 194 | sha256msg2 MSGTMP3, MSGTMP0 |
194 | shuf128_32 $0x0E, MSG, MSG | 195 | shuf128_32 $0x0E, MSG, MSG |
195 | sha256rnds2 STATE1, STATE0 | 196 | sha256rnds2 STATE1, STATE0 |
@@ -199,9 +200,9 @@ sha256_process_block64_shaNI: | |||
199 | mova128 MSGTMP0, MSG | 200 | mova128 MSGTMP0, MSG |
200 | paddd 12*16-8*16(SHA256CONSTANTS), MSG | 201 | paddd 12*16-8*16(SHA256CONSTANTS), MSG |
201 | sha256rnds2 STATE0, STATE1 | 202 | sha256rnds2 STATE0, STATE1 |
202 | mova128 MSGTMP0, XMMTMP4 | 203 | mova128 MSGTMP0, XMMTMP |
203 | palignr $4, MSGTMP3, XMMTMP4 | 204 | palignr $4, MSGTMP3, XMMTMP |
204 | paddd XMMTMP4, MSGTMP1 | 205 | paddd XMMTMP, MSGTMP1 |
205 | sha256msg2 MSGTMP0, MSGTMP1 | 206 | sha256msg2 MSGTMP0, MSGTMP1 |
206 | shuf128_32 $0x0E, MSG, MSG | 207 | shuf128_32 $0x0E, MSG, MSG |
207 | sha256rnds2 STATE1, STATE0 | 208 | sha256rnds2 STATE1, STATE0 |
@@ -211,9 +212,9 @@ sha256_process_block64_shaNI: | |||
211 | mova128 MSGTMP1, MSG | 212 | mova128 MSGTMP1, MSG |
212 | paddd 13*16-8*16(SHA256CONSTANTS), MSG | 213 | paddd 13*16-8*16(SHA256CONSTANTS), MSG |
213 | sha256rnds2 STATE0, STATE1 | 214 | sha256rnds2 STATE0, STATE1 |
214 | mova128 MSGTMP1, XMMTMP4 | 215 | mova128 MSGTMP1, XMMTMP |
215 | palignr $4, MSGTMP0, XMMTMP4 | 216 | palignr $4, MSGTMP0, XMMTMP |
216 | paddd XMMTMP4, MSGTMP2 | 217 | paddd XMMTMP, MSGTMP2 |
217 | sha256msg2 MSGTMP1, MSGTMP2 | 218 | sha256msg2 MSGTMP1, MSGTMP2 |
218 | shuf128_32 $0x0E, MSG, MSG | 219 | shuf128_32 $0x0E, MSG, MSG |
219 | sha256rnds2 STATE1, STATE0 | 220 | sha256rnds2 STATE1, STATE0 |
@@ -222,9 +223,9 @@ sha256_process_block64_shaNI: | |||
222 | mova128 MSGTMP2, MSG | 223 | mova128 MSGTMP2, MSG |
223 | paddd 14*16-8*16(SHA256CONSTANTS), MSG | 224 | paddd 14*16-8*16(SHA256CONSTANTS), MSG |
224 | sha256rnds2 STATE0, STATE1 | 225 | sha256rnds2 STATE0, STATE1 |
225 | mova128 MSGTMP2, XMMTMP4 | 226 | mova128 MSGTMP2, XMMTMP |
226 | palignr $4, MSGTMP1, XMMTMP4 | 227 | palignr $4, MSGTMP1, XMMTMP |
227 | paddd XMMTMP4, MSGTMP3 | 228 | paddd XMMTMP, MSGTMP3 |
228 | sha256msg2 MSGTMP2, MSGTMP3 | 229 | sha256msg2 MSGTMP2, MSGTMP3 |
229 | shuf128_32 $0x0E, MSG, MSG | 230 | shuf128_32 $0x0E, MSG, MSG |
230 | sha256rnds2 STATE1, STATE0 | 231 | sha256rnds2 STATE1, STATE0 |
@@ -241,11 +242,11 @@ sha256_process_block64_shaNI: | |||
241 | paddd CDGH_SAVE, STATE1 | 242 | paddd CDGH_SAVE, STATE1 |
242 | 243 | ||
243 | /* Write hash values back in the correct order */ | 244 | /* Write hash values back in the correct order */ |
244 | shuf128_32 $0x1B, STATE0, STATE0 /* FEBA */ | 245 | shuf128_32 $0x1B, STATE0, STATE0 /* FEBA */ |
245 | shuf128_32 $0xB1, STATE1, STATE1 /* DCHG */ | 246 | shuf128_32 $0xB1, STATE1, STATE1 /* DCHG */ |
246 | mova128 STATE0, XMMTMP4 | 247 | mova128 STATE0, XMMTMP |
247 | pblendw $0xF0, STATE1, STATE0 /* DCBA */ | 248 | pblendw $0xF0, STATE1, STATE0 /* DCBA */ |
248 | palignr $8, XMMTMP4, STATE1 /* HGFE */ | 249 | palignr $8, XMMTMP, STATE1 /* HGFE */ |
249 | 250 | ||
250 | movu128 STATE0, 80+0*16(%rdi) | 251 | movu128 STATE0, 80+0*16(%rdi) |
251 | movu128 STATE1, 80+1*16(%rdi) | 252 | movu128 STATE1, 80+1*16(%rdi) |