aboutsummaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorDenys Vlasenko <vda.linux@googlemail.com>2022-02-06 00:55:52 +0100
committerDenys Vlasenko <vda.linux@googlemail.com>2022-02-06 00:56:13 +0100
commit4f40735c87f8292a87c066b3b7099b0be007cf59 (patch)
tree26cd4b1adea86e0b7301a2899a8a4b38937541f1
parent31c1c310772fa6c897ee1585ea15fc38f3ab3dff (diff)
downloadbusybox-w32-4f40735c87f8292a87c066b3b7099b0be007cf59.tar.gz
busybox-w32-4f40735c87f8292a87c066b3b7099b0be007cf59.tar.bz2
busybox-w32-4f40735c87f8292a87c066b3b7099b0be007cf59.zip
libbb/sha256: code shrink in 32-bit x86
function old new delta sha256_process_block64_shaNI 722 713 -9 Signed-off-by: Denys Vlasenko <vda.linux@googlemail.com>
-rw-r--r--libbb/hash_md5_sha256_x86-32_shaNI.S93
1 files changed, 48 insertions, 45 deletions
diff --git a/libbb/hash_md5_sha256_x86-32_shaNI.S b/libbb/hash_md5_sha256_x86-32_shaNI.S
index 632dab7e6..417da37d8 100644
--- a/libbb/hash_md5_sha256_x86-32_shaNI.S
+++ b/libbb/hash_md5_sha256_x86-32_shaNI.S
@@ -31,7 +31,7 @@
31#define MSGTMP1 %xmm4 31#define MSGTMP1 %xmm4
32#define MSGTMP2 %xmm5 32#define MSGTMP2 %xmm5
33#define MSGTMP3 %xmm6 33#define MSGTMP3 %xmm6
34#define MSGTMP4 %xmm7 34#define XMMTMP4 %xmm7
35 35
36 .balign 8 # allow decoders to fetch at least 3 first insns 36 .balign 8 # allow decoders to fetch at least 3 first insns
37sha256_process_block64_shaNI: 37sha256_process_block64_shaNI:
@@ -45,10 +45,12 @@ sha256_process_block64_shaNI:
45 45
46 shuf128_32 $0xB1, STATE0, STATE0 /* CDAB */ 46 shuf128_32 $0xB1, STATE0, STATE0 /* CDAB */
47 shuf128_32 $0x1B, STATE1, STATE1 /* EFGH */ 47 shuf128_32 $0x1B, STATE1, STATE1 /* EFGH */
48 mova128 STATE0, MSGTMP4 48 mova128 STATE0, XMMTMP4
49 palignr $8, STATE1, STATE0 /* ABEF */ 49 palignr $8, STATE1, STATE0 /* ABEF */
50 pblendw $0xF0, MSGTMP4, STATE1 /* CDGH */ 50 pblendw $0xF0, XMMTMP4, STATE1 /* CDGH */
51 51
52/* XMMTMP4 holds flip mask from here... */
53 mova128 PSHUFFLE_BSWAP32_FLIP_MASK, XMMTMP4
52 movl $K256+8*16, SHA256CONSTANTS 54 movl $K256+8*16, SHA256CONSTANTS
53 55
54 /* Save hash values for addition after rounds */ 56 /* Save hash values for addition after rounds */
@@ -57,7 +59,7 @@ sha256_process_block64_shaNI:
57 59
58 /* Rounds 0-3 */ 60 /* Rounds 0-3 */
59 movu128 0*16(DATA_PTR), MSG 61 movu128 0*16(DATA_PTR), MSG
60 pshufb PSHUFFLE_BSWAP32_FLIP_MASK, MSG 62 pshufb XMMTMP4, MSG
61 mova128 MSG, MSGTMP0 63 mova128 MSG, MSGTMP0
62 paddd 0*16-8*16(SHA256CONSTANTS), MSG 64 paddd 0*16-8*16(SHA256CONSTANTS), MSG
63 sha256rnds2 STATE0, STATE1 65 sha256rnds2 STATE0, STATE1
@@ -66,7 +68,7 @@ sha256_process_block64_shaNI:
66 68
67 /* Rounds 4-7 */ 69 /* Rounds 4-7 */
68 movu128 1*16(DATA_PTR), MSG 70 movu128 1*16(DATA_PTR), MSG
69 pshufb PSHUFFLE_BSWAP32_FLIP_MASK, MSG 71 pshufb XMMTMP4, MSG
70 mova128 MSG, MSGTMP1 72 mova128 MSG, MSGTMP1
71 paddd 1*16-8*16(SHA256CONSTANTS), MSG 73 paddd 1*16-8*16(SHA256CONSTANTS), MSG
72 sha256rnds2 STATE0, STATE1 74 sha256rnds2 STATE0, STATE1
@@ -76,7 +78,7 @@ sha256_process_block64_shaNI:
76 78
77 /* Rounds 8-11 */ 79 /* Rounds 8-11 */
78 movu128 2*16(DATA_PTR), MSG 80 movu128 2*16(DATA_PTR), MSG
79 pshufb PSHUFFLE_BSWAP32_FLIP_MASK, MSG 81 pshufb XMMTMP4, MSG
80 mova128 MSG, MSGTMP2 82 mova128 MSG, MSGTMP2
81 paddd 2*16-8*16(SHA256CONSTANTS), MSG 83 paddd 2*16-8*16(SHA256CONSTANTS), MSG
82 sha256rnds2 STATE0, STATE1 84 sha256rnds2 STATE0, STATE1
@@ -86,13 +88,14 @@ sha256_process_block64_shaNI:
86 88
87 /* Rounds 12-15 */ 89 /* Rounds 12-15 */
88 movu128 3*16(DATA_PTR), MSG 90 movu128 3*16(DATA_PTR), MSG
89 pshufb PSHUFFLE_BSWAP32_FLIP_MASK, MSG 91 pshufb XMMTMP4, MSG
92/* ...to here */
90 mova128 MSG, MSGTMP3 93 mova128 MSG, MSGTMP3
91 paddd 3*16-8*16(SHA256CONSTANTS), MSG 94 paddd 3*16-8*16(SHA256CONSTANTS), MSG
92 sha256rnds2 STATE0, STATE1 95 sha256rnds2 STATE0, STATE1
93 mova128 MSGTMP3, MSGTMP4 96 mova128 MSGTMP3, XMMTMP4
94 palignr $4, MSGTMP2, MSGTMP4 97 palignr $4, MSGTMP2, XMMTMP4
95 paddd MSGTMP4, MSGTMP0 98 paddd XMMTMP4, MSGTMP0
96 sha256msg2 MSGTMP3, MSGTMP0 99 sha256msg2 MSGTMP3, MSGTMP0
97 shuf128_32 $0x0E, MSG, MSG 100 shuf128_32 $0x0E, MSG, MSG
98 sha256rnds2 STATE1, STATE0 101 sha256rnds2 STATE1, STATE0
@@ -102,9 +105,9 @@ sha256_process_block64_shaNI:
102 mova128 MSGTMP0, MSG 105 mova128 MSGTMP0, MSG
103 paddd 4*16-8*16(SHA256CONSTANTS), MSG 106 paddd 4*16-8*16(SHA256CONSTANTS), MSG
104 sha256rnds2 STATE0, STATE1 107 sha256rnds2 STATE0, STATE1
105 mova128 MSGTMP0, MSGTMP4 108 mova128 MSGTMP0, XMMTMP4
106 palignr $4, MSGTMP3, MSGTMP4 109 palignr $4, MSGTMP3, XMMTMP4
107 paddd MSGTMP4, MSGTMP1 110 paddd XMMTMP4, MSGTMP1
108 sha256msg2 MSGTMP0, MSGTMP1 111 sha256msg2 MSGTMP0, MSGTMP1
109 shuf128_32 $0x0E, MSG, MSG 112 shuf128_32 $0x0E, MSG, MSG
110 sha256rnds2 STATE1, STATE0 113 sha256rnds2 STATE1, STATE0
@@ -114,9 +117,9 @@ sha256_process_block64_shaNI:
114 mova128 MSGTMP1, MSG 117 mova128 MSGTMP1, MSG
115 paddd 5*16-8*16(SHA256CONSTANTS), MSG 118 paddd 5*16-8*16(SHA256CONSTANTS), MSG
116 sha256rnds2 STATE0, STATE1 119 sha256rnds2 STATE0, STATE1
117 mova128 MSGTMP1, MSGTMP4 120 mova128 MSGTMP1, XMMTMP4
118 palignr $4, MSGTMP0, MSGTMP4 121 palignr $4, MSGTMP0, XMMTMP4
119 paddd MSGTMP4, MSGTMP2 122 paddd XMMTMP4, MSGTMP2
120 sha256msg2 MSGTMP1, MSGTMP2 123 sha256msg2 MSGTMP1, MSGTMP2
121 shuf128_32 $0x0E, MSG, MSG 124 shuf128_32 $0x0E, MSG, MSG
122 sha256rnds2 STATE1, STATE0 125 sha256rnds2 STATE1, STATE0
@@ -126,9 +129,9 @@ sha256_process_block64_shaNI:
126 mova128 MSGTMP2, MSG 129 mova128 MSGTMP2, MSG
127 paddd 6*16-8*16(SHA256CONSTANTS), MSG 130 paddd 6*16-8*16(SHA256CONSTANTS), MSG
128 sha256rnds2 STATE0, STATE1 131 sha256rnds2 STATE0, STATE1
129 mova128 MSGTMP2, MSGTMP4 132 mova128 MSGTMP2, XMMTMP4
130 palignr $4, MSGTMP1, MSGTMP4 133 palignr $4, MSGTMP1, XMMTMP4
131 paddd MSGTMP4, MSGTMP3 134 paddd XMMTMP4, MSGTMP3
132 sha256msg2 MSGTMP2, MSGTMP3 135 sha256msg2 MSGTMP2, MSGTMP3
133 shuf128_32 $0x0E, MSG, MSG 136 shuf128_32 $0x0E, MSG, MSG
134 sha256rnds2 STATE1, STATE0 137 sha256rnds2 STATE1, STATE0
@@ -138,9 +141,9 @@ sha256_process_block64_shaNI:
138 mova128 MSGTMP3, MSG 141 mova128 MSGTMP3, MSG
139 paddd 7*16-8*16(SHA256CONSTANTS), MSG 142 paddd 7*16-8*16(SHA256CONSTANTS), MSG
140 sha256rnds2 STATE0, STATE1 143 sha256rnds2 STATE0, STATE1
141 mova128 MSGTMP3, MSGTMP4 144 mova128 MSGTMP3, XMMTMP4
142 palignr $4, MSGTMP2, MSGTMP4 145 palignr $4, MSGTMP2, XMMTMP4
143 paddd MSGTMP4, MSGTMP0 146 paddd XMMTMP4, MSGTMP0
144 sha256msg2 MSGTMP3, MSGTMP0 147 sha256msg2 MSGTMP3, MSGTMP0
145 shuf128_32 $0x0E, MSG, MSG 148 shuf128_32 $0x0E, MSG, MSG
146 sha256rnds2 STATE1, STATE0 149 sha256rnds2 STATE1, STATE0
@@ -150,9 +153,9 @@ sha256_process_block64_shaNI:
150 mova128 MSGTMP0, MSG 153 mova128 MSGTMP0, MSG
151 paddd 8*16-8*16(SHA256CONSTANTS), MSG 154 paddd 8*16-8*16(SHA256CONSTANTS), MSG
152 sha256rnds2 STATE0, STATE1 155 sha256rnds2 STATE0, STATE1
153 mova128 MSGTMP0, MSGTMP4 156 mova128 MSGTMP0, XMMTMP4
154 palignr $4, MSGTMP3, MSGTMP4 157 palignr $4, MSGTMP3, XMMTMP4
155 paddd MSGTMP4, MSGTMP1 158 paddd XMMTMP4, MSGTMP1
156 sha256msg2 MSGTMP0, MSGTMP1 159 sha256msg2 MSGTMP0, MSGTMP1
157 shuf128_32 $0x0E, MSG, MSG 160 shuf128_32 $0x0E, MSG, MSG
158 sha256rnds2 STATE1, STATE0 161 sha256rnds2 STATE1, STATE0
@@ -162,9 +165,9 @@ sha256_process_block64_shaNI:
162 mova128 MSGTMP1, MSG 165 mova128 MSGTMP1, MSG
163 paddd 9*16-8*16(SHA256CONSTANTS), MSG 166 paddd 9*16-8*16(SHA256CONSTANTS), MSG
164 sha256rnds2 STATE0, STATE1 167 sha256rnds2 STATE0, STATE1
165 mova128 MSGTMP1, MSGTMP4 168 mova128 MSGTMP1, XMMTMP4
166 palignr $4, MSGTMP0, MSGTMP4 169 palignr $4, MSGTMP0, XMMTMP4
167 paddd MSGTMP4, MSGTMP2 170 paddd XMMTMP4, MSGTMP2
168 sha256msg2 MSGTMP1, MSGTMP2 171 sha256msg2 MSGTMP1, MSGTMP2
169 shuf128_32 $0x0E, MSG, MSG 172 shuf128_32 $0x0E, MSG, MSG
170 sha256rnds2 STATE1, STATE0 173 sha256rnds2 STATE1, STATE0
@@ -174,9 +177,9 @@ sha256_process_block64_shaNI:
174 mova128 MSGTMP2, MSG 177 mova128 MSGTMP2, MSG
175 paddd 10*16-8*16(SHA256CONSTANTS), MSG 178 paddd 10*16-8*16(SHA256CONSTANTS), MSG
176 sha256rnds2 STATE0, STATE1 179 sha256rnds2 STATE0, STATE1
177 mova128 MSGTMP2, MSGTMP4 180 mova128 MSGTMP2, XMMTMP4
178 palignr $4, MSGTMP1, MSGTMP4 181 palignr $4, MSGTMP1, XMMTMP4
179 paddd MSGTMP4, MSGTMP3 182 paddd XMMTMP4, MSGTMP3
180 sha256msg2 MSGTMP2, MSGTMP3 183 sha256msg2 MSGTMP2, MSGTMP3
181 shuf128_32 $0x0E, MSG, MSG 184 shuf128_32 $0x0E, MSG, MSG
182 sha256rnds2 STATE1, STATE0 185 sha256rnds2 STATE1, STATE0
@@ -186,9 +189,9 @@ sha256_process_block64_shaNI:
186 mova128 MSGTMP3, MSG 189 mova128 MSGTMP3, MSG
187 paddd 11*16-8*16(SHA256CONSTANTS), MSG 190 paddd 11*16-8*16(SHA256CONSTANTS), MSG
188 sha256rnds2 STATE0, STATE1 191 sha256rnds2 STATE0, STATE1
189 mova128 MSGTMP3, MSGTMP4 192 mova128 MSGTMP3, XMMTMP4
190 palignr $4, MSGTMP2, MSGTMP4 193 palignr $4, MSGTMP2, XMMTMP4
191 paddd MSGTMP4, MSGTMP0 194 paddd XMMTMP4, MSGTMP0
192 sha256msg2 MSGTMP3, MSGTMP0 195 sha256msg2 MSGTMP3, MSGTMP0
193 shuf128_32 $0x0E, MSG, MSG 196 shuf128_32 $0x0E, MSG, MSG
194 sha256rnds2 STATE1, STATE0 197 sha256rnds2 STATE1, STATE0
@@ -198,9 +201,9 @@ sha256_process_block64_shaNI:
198 mova128 MSGTMP0, MSG 201 mova128 MSGTMP0, MSG
199 paddd 12*16-8*16(SHA256CONSTANTS), MSG 202 paddd 12*16-8*16(SHA256CONSTANTS), MSG
200 sha256rnds2 STATE0, STATE1 203 sha256rnds2 STATE0, STATE1
201 mova128 MSGTMP0, MSGTMP4 204 mova128 MSGTMP0, XMMTMP4
202 palignr $4, MSGTMP3, MSGTMP4 205 palignr $4, MSGTMP3, XMMTMP4
203 paddd MSGTMP4, MSGTMP1 206 paddd XMMTMP4, MSGTMP1
204 sha256msg2 MSGTMP0, MSGTMP1 207 sha256msg2 MSGTMP0, MSGTMP1
205 shuf128_32 $0x0E, MSG, MSG 208 shuf128_32 $0x0E, MSG, MSG
206 sha256rnds2 STATE1, STATE0 209 sha256rnds2 STATE1, STATE0
@@ -210,9 +213,9 @@ sha256_process_block64_shaNI:
210 mova128 MSGTMP1, MSG 213 mova128 MSGTMP1, MSG
211 paddd 13*16-8*16(SHA256CONSTANTS), MSG 214 paddd 13*16-8*16(SHA256CONSTANTS), MSG
212 sha256rnds2 STATE0, STATE1 215 sha256rnds2 STATE0, STATE1
213 mova128 MSGTMP1, MSGTMP4 216 mova128 MSGTMP1, XMMTMP4
214 palignr $4, MSGTMP0, MSGTMP4 217 palignr $4, MSGTMP0, XMMTMP4
215 paddd MSGTMP4, MSGTMP2 218 paddd XMMTMP4, MSGTMP2
216 sha256msg2 MSGTMP1, MSGTMP2 219 sha256msg2 MSGTMP1, MSGTMP2
217 shuf128_32 $0x0E, MSG, MSG 220 shuf128_32 $0x0E, MSG, MSG
218 sha256rnds2 STATE1, STATE0 221 sha256rnds2 STATE1, STATE0
@@ -221,9 +224,9 @@ sha256_process_block64_shaNI:
221 mova128 MSGTMP2, MSG 224 mova128 MSGTMP2, MSG
222 paddd 14*16-8*16(SHA256CONSTANTS), MSG 225 paddd 14*16-8*16(SHA256CONSTANTS), MSG
223 sha256rnds2 STATE0, STATE1 226 sha256rnds2 STATE0, STATE1
224 mova128 MSGTMP2, MSGTMP4 227 mova128 MSGTMP2, XMMTMP4
225 palignr $4, MSGTMP1, MSGTMP4 228 palignr $4, MSGTMP1, XMMTMP4
226 paddd MSGTMP4, MSGTMP3 229 paddd XMMTMP4, MSGTMP3
227 sha256msg2 MSGTMP2, MSGTMP3 230 sha256msg2 MSGTMP2, MSGTMP3
228 shuf128_32 $0x0E, MSG, MSG 231 shuf128_32 $0x0E, MSG, MSG
229 sha256rnds2 STATE1, STATE0 232 sha256rnds2 STATE1, STATE0
@@ -242,9 +245,9 @@ sha256_process_block64_shaNI:
242 /* Write hash values back in the correct order */ 245 /* Write hash values back in the correct order */
243 shuf128_32 $0x1B, STATE0, STATE0 /* FEBA */ 246 shuf128_32 $0x1B, STATE0, STATE0 /* FEBA */
244 shuf128_32 $0xB1, STATE1, STATE1 /* DCHG */ 247 shuf128_32 $0xB1, STATE1, STATE1 /* DCHG */
245 mova128 STATE0, MSGTMP4 248 mova128 STATE0, XMMTMP4
246 pblendw $0xF0, STATE1, STATE0 /* DCBA */ 249 pblendw $0xF0, STATE1, STATE0 /* DCBA */
247 palignr $8, MSGTMP4, STATE1 /* HGFE */ 250 palignr $8, XMMTMP4, STATE1 /* HGFE */
248 251
249 movu128 STATE0, 76+0*16(%eax) 252 movu128 STATE0, 76+0*16(%eax)
250 movu128 STATE1, 76+1*16(%eax) 253 movu128 STATE1, 76+1*16(%eax)