diff options
author | Denys Vlasenko <vda.linux@googlemail.com> | 2022-02-03 15:11:23 +0100 |
---|---|---|
committer | Denys Vlasenko <vda.linux@googlemail.com> | 2022-02-03 15:11:23 +0100 |
commit | de6cb4bed82356db72af81890c7c26d7e85fb50d (patch) | |
tree | 628767b49cc561c03e22ed3d734b8bccfa3a623c | |
parent | 6472ac942898437e040171cec991de1c0b962f72 (diff) | |
download | busybox-w32-de6cb4bed82356db72af81890c7c26d7e85fb50d.tar.gz busybox-w32-de6cb4bed82356db72af81890c7c26d7e85fb50d.tar.bz2 busybox-w32-de6cb4bed82356db72af81890c7c26d7e85fb50d.zip |
libbb/sha256: code shrink in 32-bit x86
function old new delta
sha256_process_block64_shaNI 747 722 -25
Signed-off-by: Denys Vlasenko <vda.linux@googlemail.com>
-rw-r--r-- | libbb/hash_md5_sha256_x86-32_shaNI.S | 35 |
1 files changed, 17 insertions, 18 deletions
diff --git a/libbb/hash_md5_sha256_x86-32_shaNI.S b/libbb/hash_md5_sha256_x86-32_shaNI.S index 56e37fa38..632dab7e6 100644 --- a/libbb/hash_md5_sha256_x86-32_shaNI.S +++ b/libbb/hash_md5_sha256_x86-32_shaNI.S | |||
@@ -49,8 +49,7 @@ sha256_process_block64_shaNI: | |||
49 | palignr $8, STATE1, STATE0 /* ABEF */ | 49 | palignr $8, STATE1, STATE0 /* ABEF */ |
50 | pblendw $0xF0, MSGTMP4, STATE1 /* CDGH */ | 50 | pblendw $0xF0, MSGTMP4, STATE1 /* CDGH */ |
51 | 51 | ||
52 | # mova128 PSHUFFLE_BSWAP32_FLIP_MASK, SHUF_MASK | 52 | movl $K256+8*16, SHA256CONSTANTS |
53 | lea K256, SHA256CONSTANTS | ||
54 | 53 | ||
55 | /* Save hash values for addition after rounds */ | 54 | /* Save hash values for addition after rounds */ |
56 | mova128 STATE0, 0*16(%esp) | 55 | mova128 STATE0, 0*16(%esp) |
@@ -60,7 +59,7 @@ sha256_process_block64_shaNI: | |||
60 | movu128 0*16(DATA_PTR), MSG | 59 | movu128 0*16(DATA_PTR), MSG |
61 | pshufb PSHUFFLE_BSWAP32_FLIP_MASK, MSG | 60 | pshufb PSHUFFLE_BSWAP32_FLIP_MASK, MSG |
62 | mova128 MSG, MSGTMP0 | 61 | mova128 MSG, MSGTMP0 |
63 | paddd 0*16(SHA256CONSTANTS), MSG | 62 | paddd 0*16-8*16(SHA256CONSTANTS), MSG |
64 | sha256rnds2 STATE0, STATE1 | 63 | sha256rnds2 STATE0, STATE1 |
65 | shuf128_32 $0x0E, MSG, MSG | 64 | shuf128_32 $0x0E, MSG, MSG |
66 | sha256rnds2 STATE1, STATE0 | 65 | sha256rnds2 STATE1, STATE0 |
@@ -69,7 +68,7 @@ sha256_process_block64_shaNI: | |||
69 | movu128 1*16(DATA_PTR), MSG | 68 | movu128 1*16(DATA_PTR), MSG |
70 | pshufb PSHUFFLE_BSWAP32_FLIP_MASK, MSG | 69 | pshufb PSHUFFLE_BSWAP32_FLIP_MASK, MSG |
71 | mova128 MSG, MSGTMP1 | 70 | mova128 MSG, MSGTMP1 |
72 | paddd 1*16(SHA256CONSTANTS), MSG | 71 | paddd 1*16-8*16(SHA256CONSTANTS), MSG |
73 | sha256rnds2 STATE0, STATE1 | 72 | sha256rnds2 STATE0, STATE1 |
74 | shuf128_32 $0x0E, MSG, MSG | 73 | shuf128_32 $0x0E, MSG, MSG |
75 | sha256rnds2 STATE1, STATE0 | 74 | sha256rnds2 STATE1, STATE0 |
@@ -79,7 +78,7 @@ sha256_process_block64_shaNI: | |||
79 | movu128 2*16(DATA_PTR), MSG | 78 | movu128 2*16(DATA_PTR), MSG |
80 | pshufb PSHUFFLE_BSWAP32_FLIP_MASK, MSG | 79 | pshufb PSHUFFLE_BSWAP32_FLIP_MASK, MSG |
81 | mova128 MSG, MSGTMP2 | 80 | mova128 MSG, MSGTMP2 |
82 | paddd 2*16(SHA256CONSTANTS), MSG | 81 | paddd 2*16-8*16(SHA256CONSTANTS), MSG |
83 | sha256rnds2 STATE0, STATE1 | 82 | sha256rnds2 STATE0, STATE1 |
84 | shuf128_32 $0x0E, MSG, MSG | 83 | shuf128_32 $0x0E, MSG, MSG |
85 | sha256rnds2 STATE1, STATE0 | 84 | sha256rnds2 STATE1, STATE0 |
@@ -89,7 +88,7 @@ sha256_process_block64_shaNI: | |||
89 | movu128 3*16(DATA_PTR), MSG | 88 | movu128 3*16(DATA_PTR), MSG |
90 | pshufb PSHUFFLE_BSWAP32_FLIP_MASK, MSG | 89 | pshufb PSHUFFLE_BSWAP32_FLIP_MASK, MSG |
91 | mova128 MSG, MSGTMP3 | 90 | mova128 MSG, MSGTMP3 |
92 | paddd 3*16(SHA256CONSTANTS), MSG | 91 | paddd 3*16-8*16(SHA256CONSTANTS), MSG |
93 | sha256rnds2 STATE0, STATE1 | 92 | sha256rnds2 STATE0, STATE1 |
94 | mova128 MSGTMP3, MSGTMP4 | 93 | mova128 MSGTMP3, MSGTMP4 |
95 | palignr $4, MSGTMP2, MSGTMP4 | 94 | palignr $4, MSGTMP2, MSGTMP4 |
@@ -101,7 +100,7 @@ sha256_process_block64_shaNI: | |||
101 | 100 | ||
102 | /* Rounds 16-19 */ | 101 | /* Rounds 16-19 */ |
103 | mova128 MSGTMP0, MSG | 102 | mova128 MSGTMP0, MSG |
104 | paddd 4*16(SHA256CONSTANTS), MSG | 103 | paddd 4*16-8*16(SHA256CONSTANTS), MSG |
105 | sha256rnds2 STATE0, STATE1 | 104 | sha256rnds2 STATE0, STATE1 |
106 | mova128 MSGTMP0, MSGTMP4 | 105 | mova128 MSGTMP0, MSGTMP4 |
107 | palignr $4, MSGTMP3, MSGTMP4 | 106 | palignr $4, MSGTMP3, MSGTMP4 |
@@ -113,7 +112,7 @@ sha256_process_block64_shaNI: | |||
113 | 112 | ||
114 | /* Rounds 20-23 */ | 113 | /* Rounds 20-23 */ |
115 | mova128 MSGTMP1, MSG | 114 | mova128 MSGTMP1, MSG |
116 | paddd 5*16(SHA256CONSTANTS), MSG | 115 | paddd 5*16-8*16(SHA256CONSTANTS), MSG |
117 | sha256rnds2 STATE0, STATE1 | 116 | sha256rnds2 STATE0, STATE1 |
118 | mova128 MSGTMP1, MSGTMP4 | 117 | mova128 MSGTMP1, MSGTMP4 |
119 | palignr $4, MSGTMP0, MSGTMP4 | 118 | palignr $4, MSGTMP0, MSGTMP4 |
@@ -125,7 +124,7 @@ sha256_process_block64_shaNI: | |||
125 | 124 | ||
126 | /* Rounds 24-27 */ | 125 | /* Rounds 24-27 */ |
127 | mova128 MSGTMP2, MSG | 126 | mova128 MSGTMP2, MSG |
128 | paddd 6*16(SHA256CONSTANTS), MSG | 127 | paddd 6*16-8*16(SHA256CONSTANTS), MSG |
129 | sha256rnds2 STATE0, STATE1 | 128 | sha256rnds2 STATE0, STATE1 |
130 | mova128 MSGTMP2, MSGTMP4 | 129 | mova128 MSGTMP2, MSGTMP4 |
131 | palignr $4, MSGTMP1, MSGTMP4 | 130 | palignr $4, MSGTMP1, MSGTMP4 |
@@ -137,7 +136,7 @@ sha256_process_block64_shaNI: | |||
137 | 136 | ||
138 | /* Rounds 28-31 */ | 137 | /* Rounds 28-31 */ |
139 | mova128 MSGTMP3, MSG | 138 | mova128 MSGTMP3, MSG |
140 | paddd 7*16(SHA256CONSTANTS), MSG | 139 | paddd 7*16-8*16(SHA256CONSTANTS), MSG |
141 | sha256rnds2 STATE0, STATE1 | 140 | sha256rnds2 STATE0, STATE1 |
142 | mova128 MSGTMP3, MSGTMP4 | 141 | mova128 MSGTMP3, MSGTMP4 |
143 | palignr $4, MSGTMP2, MSGTMP4 | 142 | palignr $4, MSGTMP2, MSGTMP4 |
@@ -149,7 +148,7 @@ sha256_process_block64_shaNI: | |||
149 | 148 | ||
150 | /* Rounds 32-35 */ | 149 | /* Rounds 32-35 */ |
151 | mova128 MSGTMP0, MSG | 150 | mova128 MSGTMP0, MSG |
152 | paddd 8*16(SHA256CONSTANTS), MSG | 151 | paddd 8*16-8*16(SHA256CONSTANTS), MSG |
153 | sha256rnds2 STATE0, STATE1 | 152 | sha256rnds2 STATE0, STATE1 |
154 | mova128 MSGTMP0, MSGTMP4 | 153 | mova128 MSGTMP0, MSGTMP4 |
155 | palignr $4, MSGTMP3, MSGTMP4 | 154 | palignr $4, MSGTMP3, MSGTMP4 |
@@ -161,7 +160,7 @@ sha256_process_block64_shaNI: | |||
161 | 160 | ||
162 | /* Rounds 36-39 */ | 161 | /* Rounds 36-39 */ |
163 | mova128 MSGTMP1, MSG | 162 | mova128 MSGTMP1, MSG |
164 | paddd 9*16(SHA256CONSTANTS), MSG | 163 | paddd 9*16-8*16(SHA256CONSTANTS), MSG |
165 | sha256rnds2 STATE0, STATE1 | 164 | sha256rnds2 STATE0, STATE1 |
166 | mova128 MSGTMP1, MSGTMP4 | 165 | mova128 MSGTMP1, MSGTMP4 |
167 | palignr $4, MSGTMP0, MSGTMP4 | 166 | palignr $4, MSGTMP0, MSGTMP4 |
@@ -173,7 +172,7 @@ sha256_process_block64_shaNI: | |||
173 | 172 | ||
174 | /* Rounds 40-43 */ | 173 | /* Rounds 40-43 */ |
175 | mova128 MSGTMP2, MSG | 174 | mova128 MSGTMP2, MSG |
176 | paddd 10*16(SHA256CONSTANTS), MSG | 175 | paddd 10*16-8*16(SHA256CONSTANTS), MSG |
177 | sha256rnds2 STATE0, STATE1 | 176 | sha256rnds2 STATE0, STATE1 |
178 | mova128 MSGTMP2, MSGTMP4 | 177 | mova128 MSGTMP2, MSGTMP4 |
179 | palignr $4, MSGTMP1, MSGTMP4 | 178 | palignr $4, MSGTMP1, MSGTMP4 |
@@ -185,7 +184,7 @@ sha256_process_block64_shaNI: | |||
185 | 184 | ||
186 | /* Rounds 44-47 */ | 185 | /* Rounds 44-47 */ |
187 | mova128 MSGTMP3, MSG | 186 | mova128 MSGTMP3, MSG |
188 | paddd 11*16(SHA256CONSTANTS), MSG | 187 | paddd 11*16-8*16(SHA256CONSTANTS), MSG |
189 | sha256rnds2 STATE0, STATE1 | 188 | sha256rnds2 STATE0, STATE1 |
190 | mova128 MSGTMP3, MSGTMP4 | 189 | mova128 MSGTMP3, MSGTMP4 |
191 | palignr $4, MSGTMP2, MSGTMP4 | 190 | palignr $4, MSGTMP2, MSGTMP4 |
@@ -197,7 +196,7 @@ sha256_process_block64_shaNI: | |||
197 | 196 | ||
198 | /* Rounds 48-51 */ | 197 | /* Rounds 48-51 */ |
199 | mova128 MSGTMP0, MSG | 198 | mova128 MSGTMP0, MSG |
200 | paddd 12*16(SHA256CONSTANTS), MSG | 199 | paddd 12*16-8*16(SHA256CONSTANTS), MSG |
201 | sha256rnds2 STATE0, STATE1 | 200 | sha256rnds2 STATE0, STATE1 |
202 | mova128 MSGTMP0, MSGTMP4 | 201 | mova128 MSGTMP0, MSGTMP4 |
203 | palignr $4, MSGTMP3, MSGTMP4 | 202 | palignr $4, MSGTMP3, MSGTMP4 |
@@ -209,7 +208,7 @@ sha256_process_block64_shaNI: | |||
209 | 208 | ||
210 | /* Rounds 52-55 */ | 209 | /* Rounds 52-55 */ |
211 | mova128 MSGTMP1, MSG | 210 | mova128 MSGTMP1, MSG |
212 | paddd 13*16(SHA256CONSTANTS), MSG | 211 | paddd 13*16-8*16(SHA256CONSTANTS), MSG |
213 | sha256rnds2 STATE0, STATE1 | 212 | sha256rnds2 STATE0, STATE1 |
214 | mova128 MSGTMP1, MSGTMP4 | 213 | mova128 MSGTMP1, MSGTMP4 |
215 | palignr $4, MSGTMP0, MSGTMP4 | 214 | palignr $4, MSGTMP0, MSGTMP4 |
@@ -220,7 +219,7 @@ sha256_process_block64_shaNI: | |||
220 | 219 | ||
221 | /* Rounds 56-59 */ | 220 | /* Rounds 56-59 */ |
222 | mova128 MSGTMP2, MSG | 221 | mova128 MSGTMP2, MSG |
223 | paddd 14*16(SHA256CONSTANTS), MSG | 222 | paddd 14*16-8*16(SHA256CONSTANTS), MSG |
224 | sha256rnds2 STATE0, STATE1 | 223 | sha256rnds2 STATE0, STATE1 |
225 | mova128 MSGTMP2, MSGTMP4 | 224 | mova128 MSGTMP2, MSGTMP4 |
226 | palignr $4, MSGTMP1, MSGTMP4 | 225 | palignr $4, MSGTMP1, MSGTMP4 |
@@ -231,7 +230,7 @@ sha256_process_block64_shaNI: | |||
231 | 230 | ||
232 | /* Rounds 60-63 */ | 231 | /* Rounds 60-63 */ |
233 | mova128 MSGTMP3, MSG | 232 | mova128 MSGTMP3, MSG |
234 | paddd 15*16(SHA256CONSTANTS), MSG | 233 | paddd 15*16-8*16(SHA256CONSTANTS), MSG |
235 | sha256rnds2 STATE0, STATE1 | 234 | sha256rnds2 STATE0, STATE1 |
236 | shuf128_32 $0x0E, MSG, MSG | 235 | shuf128_32 $0x0E, MSG, MSG |
237 | sha256rnds2 STATE1, STATE0 | 236 | sha256rnds2 STATE1, STATE0 |