aboutsummaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorDenys Vlasenko <vda.linux@googlemail.com>2022-02-06 00:30:03 +0100
committerDenys Vlasenko <vda.linux@googlemail.com>2022-02-06 00:33:42 +0100
commit31c1c310772fa6c897ee1585ea15fc38f3ab3dff (patch)
tree7fb122b079d32981ee6bf7f7565f14f570ae6ac6
parenta1429fbb8ca373efc01939d599f6f65969b1a366 (diff)
downloadbusybox-w32-31c1c310772fa6c897ee1585ea15fc38f3ab3dff.tar.gz
busybox-w32-31c1c310772fa6c897ee1585ea15fc38f3ab3dff.tar.bz2
busybox-w32-31c1c310772fa6c897ee1585ea15fc38f3ab3dff.zip
libbb/sha256: code shrink in 64-bit x86
function old new delta sha256_process_block64_shaNI 706 701 -5 Signed-off-by: Denys Vlasenko <vda.linux@googlemail.com>
-rw-r--r--libbb/hash_md5_sha256_x86-64_shaNI.S96
1 files changed, 48 insertions, 48 deletions
diff --git a/libbb/hash_md5_sha256_x86-64_shaNI.S b/libbb/hash_md5_sha256_x86-64_shaNI.S
index f3df541e4..dbf391135 100644
--- a/libbb/hash_md5_sha256_x86-64_shaNI.S
+++ b/libbb/hash_md5_sha256_x86-64_shaNI.S
@@ -31,9 +31,7 @@
31#define MSGTMP1 %xmm4 31#define MSGTMP1 %xmm4
32#define MSGTMP2 %xmm5 32#define MSGTMP2 %xmm5
33#define MSGTMP3 %xmm6 33#define MSGTMP3 %xmm6
34#define MSGTMP4 %xmm7 34#define XMMTMP4 %xmm7
35
36#define SHUF_MASK %xmm8
37 35
38#define ABEF_SAVE %xmm9 36#define ABEF_SAVE %xmm9
39#define CDGH_SAVE %xmm10 37#define CDGH_SAVE %xmm10
@@ -45,11 +43,12 @@ sha256_process_block64_shaNI:
45 43
46 shuf128_32 $0xB1, STATE0, STATE0 /* CDAB */ 44 shuf128_32 $0xB1, STATE0, STATE0 /* CDAB */
47 shuf128_32 $0x1B, STATE1, STATE1 /* EFGH */ 45 shuf128_32 $0x1B, STATE1, STATE1 /* EFGH */
48 mova128 STATE0, MSGTMP4 46 mova128 STATE0, XMMTMP4
49 palignr $8, STATE1, STATE0 /* ABEF */ 47 palignr $8, STATE1, STATE0 /* ABEF */
50 pblendw $0xF0, MSGTMP4, STATE1 /* CDGH */ 48 pblendw $0xF0, XMMTMP4, STATE1 /* CDGH */
51 49
52 mova128 PSHUFFLE_BSWAP32_FLIP_MASK(%rip), SHUF_MASK 50/* XMMTMP4 holds flip mask from here... */
51 mova128 PSHUFFLE_BSWAP32_FLIP_MASK(%rip), XMMTMP4
53 leaq K256+8*16(%rip), SHA256CONSTANTS 52 leaq K256+8*16(%rip), SHA256CONSTANTS
54 53
55 /* Save hash values for addition after rounds */ 54 /* Save hash values for addition after rounds */
@@ -58,7 +57,7 @@ sha256_process_block64_shaNI:
58 57
59 /* Rounds 0-3 */ 58 /* Rounds 0-3 */
60 movu128 0*16(DATA_PTR), MSG 59 movu128 0*16(DATA_PTR), MSG
61 pshufb SHUF_MASK, MSG 60 pshufb XMMTMP4, MSG
62 mova128 MSG, MSGTMP0 61 mova128 MSG, MSGTMP0
63 paddd 0*16-8*16(SHA256CONSTANTS), MSG 62 paddd 0*16-8*16(SHA256CONSTANTS), MSG
64 sha256rnds2 STATE0, STATE1 63 sha256rnds2 STATE0, STATE1
@@ -67,7 +66,7 @@ sha256_process_block64_shaNI:
67 66
68 /* Rounds 4-7 */ 67 /* Rounds 4-7 */
69 movu128 1*16(DATA_PTR), MSG 68 movu128 1*16(DATA_PTR), MSG
70 pshufb SHUF_MASK, MSG 69 pshufb XMMTMP4, MSG
71 mova128 MSG, MSGTMP1 70 mova128 MSG, MSGTMP1
72 paddd 1*16-8*16(SHA256CONSTANTS), MSG 71 paddd 1*16-8*16(SHA256CONSTANTS), MSG
73 sha256rnds2 STATE0, STATE1 72 sha256rnds2 STATE0, STATE1
@@ -77,7 +76,7 @@ sha256_process_block64_shaNI:
77 76
78 /* Rounds 8-11 */ 77 /* Rounds 8-11 */
79 movu128 2*16(DATA_PTR), MSG 78 movu128 2*16(DATA_PTR), MSG
80 pshufb SHUF_MASK, MSG 79 pshufb XMMTMP4, MSG
81 mova128 MSG, MSGTMP2 80 mova128 MSG, MSGTMP2
82 paddd 2*16-8*16(SHA256CONSTANTS), MSG 81 paddd 2*16-8*16(SHA256CONSTANTS), MSG
83 sha256rnds2 STATE0, STATE1 82 sha256rnds2 STATE0, STATE1
@@ -87,13 +86,14 @@ sha256_process_block64_shaNI:
87 86
88 /* Rounds 12-15 */ 87 /* Rounds 12-15 */
89 movu128 3*16(DATA_PTR), MSG 88 movu128 3*16(DATA_PTR), MSG
90 pshufb SHUF_MASK, MSG 89 pshufb XMMTMP4, MSG
90/* ...to here */
91 mova128 MSG, MSGTMP3 91 mova128 MSG, MSGTMP3
92 paddd 3*16-8*16(SHA256CONSTANTS), MSG 92 paddd 3*16-8*16(SHA256CONSTANTS), MSG
93 sha256rnds2 STATE0, STATE1 93 sha256rnds2 STATE0, STATE1
94 mova128 MSGTMP3, MSGTMP4 94 mova128 MSGTMP3, XMMTMP4
95 palignr $4, MSGTMP2, MSGTMP4 95 palignr $4, MSGTMP2, XMMTMP4
96 paddd MSGTMP4, MSGTMP0 96 paddd XMMTMP4, MSGTMP0
97 sha256msg2 MSGTMP3, MSGTMP0 97 sha256msg2 MSGTMP3, MSGTMP0
98 shuf128_32 $0x0E, MSG, MSG 98 shuf128_32 $0x0E, MSG, MSG
99 sha256rnds2 STATE1, STATE0 99 sha256rnds2 STATE1, STATE0
@@ -103,9 +103,9 @@ sha256_process_block64_shaNI:
103 mova128 MSGTMP0, MSG 103 mova128 MSGTMP0, MSG
104 paddd 4*16-8*16(SHA256CONSTANTS), MSG 104 paddd 4*16-8*16(SHA256CONSTANTS), MSG
105 sha256rnds2 STATE0, STATE1 105 sha256rnds2 STATE0, STATE1
106 mova128 MSGTMP0, MSGTMP4 106 mova128 MSGTMP0, XMMTMP4
107 palignr $4, MSGTMP3, MSGTMP4 107 palignr $4, MSGTMP3, XMMTMP4
108 paddd MSGTMP4, MSGTMP1 108 paddd XMMTMP4, MSGTMP1
109 sha256msg2 MSGTMP0, MSGTMP1 109 sha256msg2 MSGTMP0, MSGTMP1
110 shuf128_32 $0x0E, MSG, MSG 110 shuf128_32 $0x0E, MSG, MSG
111 sha256rnds2 STATE1, STATE0 111 sha256rnds2 STATE1, STATE0
@@ -115,9 +115,9 @@ sha256_process_block64_shaNI:
115 mova128 MSGTMP1, MSG 115 mova128 MSGTMP1, MSG
116 paddd 5*16-8*16(SHA256CONSTANTS), MSG 116 paddd 5*16-8*16(SHA256CONSTANTS), MSG
117 sha256rnds2 STATE0, STATE1 117 sha256rnds2 STATE0, STATE1
118 mova128 MSGTMP1, MSGTMP4 118 mova128 MSGTMP1, XMMTMP4
119 palignr $4, MSGTMP0, MSGTMP4 119 palignr $4, MSGTMP0, XMMTMP4
120 paddd MSGTMP4, MSGTMP2 120 paddd XMMTMP4, MSGTMP2
121 sha256msg2 MSGTMP1, MSGTMP2 121 sha256msg2 MSGTMP1, MSGTMP2
122 shuf128_32 $0x0E, MSG, MSG 122 shuf128_32 $0x0E, MSG, MSG
123 sha256rnds2 STATE1, STATE0 123 sha256rnds2 STATE1, STATE0
@@ -127,9 +127,9 @@ sha256_process_block64_shaNI:
127 mova128 MSGTMP2, MSG 127 mova128 MSGTMP2, MSG
128 paddd 6*16-8*16(SHA256CONSTANTS), MSG 128 paddd 6*16-8*16(SHA256CONSTANTS), MSG
129 sha256rnds2 STATE0, STATE1 129 sha256rnds2 STATE0, STATE1
130 mova128 MSGTMP2, MSGTMP4 130 mova128 MSGTMP2, XMMTMP4
131 palignr $4, MSGTMP1, MSGTMP4 131 palignr $4, MSGTMP1, XMMTMP4
132 paddd MSGTMP4, MSGTMP3 132 paddd XMMTMP4, MSGTMP3
133 sha256msg2 MSGTMP2, MSGTMP3 133 sha256msg2 MSGTMP2, MSGTMP3
134 shuf128_32 $0x0E, MSG, MSG 134 shuf128_32 $0x0E, MSG, MSG
135 sha256rnds2 STATE1, STATE0 135 sha256rnds2 STATE1, STATE0
@@ -139,9 +139,9 @@ sha256_process_block64_shaNI:
139 mova128 MSGTMP3, MSG 139 mova128 MSGTMP3, MSG
140 paddd 7*16-8*16(SHA256CONSTANTS), MSG 140 paddd 7*16-8*16(SHA256CONSTANTS), MSG
141 sha256rnds2 STATE0, STATE1 141 sha256rnds2 STATE0, STATE1
142 mova128 MSGTMP3, MSGTMP4 142 mova128 MSGTMP3, XMMTMP4
143 palignr $4, MSGTMP2, MSGTMP4 143 palignr $4, MSGTMP2, XMMTMP4
144 paddd MSGTMP4, MSGTMP0 144 paddd XMMTMP4, MSGTMP0
145 sha256msg2 MSGTMP3, MSGTMP0 145 sha256msg2 MSGTMP3, MSGTMP0
146 shuf128_32 $0x0E, MSG, MSG 146 shuf128_32 $0x0E, MSG, MSG
147 sha256rnds2 STATE1, STATE0 147 sha256rnds2 STATE1, STATE0
@@ -151,9 +151,9 @@ sha256_process_block64_shaNI:
151 mova128 MSGTMP0, MSG 151 mova128 MSGTMP0, MSG
152 paddd 8*16-8*16(SHA256CONSTANTS), MSG 152 paddd 8*16-8*16(SHA256CONSTANTS), MSG
153 sha256rnds2 STATE0, STATE1 153 sha256rnds2 STATE0, STATE1
154 mova128 MSGTMP0, MSGTMP4 154 mova128 MSGTMP0, XMMTMP4
155 palignr $4, MSGTMP3, MSGTMP4 155 palignr $4, MSGTMP3, XMMTMP4
156 paddd MSGTMP4, MSGTMP1 156 paddd XMMTMP4, MSGTMP1
157 sha256msg2 MSGTMP0, MSGTMP1 157 sha256msg2 MSGTMP0, MSGTMP1
158 shuf128_32 $0x0E, MSG, MSG 158 shuf128_32 $0x0E, MSG, MSG
159 sha256rnds2 STATE1, STATE0 159 sha256rnds2 STATE1, STATE0
@@ -163,9 +163,9 @@ sha256_process_block64_shaNI:
163 mova128 MSGTMP1, MSG 163 mova128 MSGTMP1, MSG
164 paddd 9*16-8*16(SHA256CONSTANTS), MSG 164 paddd 9*16-8*16(SHA256CONSTANTS), MSG
165 sha256rnds2 STATE0, STATE1 165 sha256rnds2 STATE0, STATE1
166 mova128 MSGTMP1, MSGTMP4 166 mova128 MSGTMP1, XMMTMP4
167 palignr $4, MSGTMP0, MSGTMP4 167 palignr $4, MSGTMP0, XMMTMP4
168 paddd MSGTMP4, MSGTMP2 168 paddd XMMTMP4, MSGTMP2
169 sha256msg2 MSGTMP1, MSGTMP2 169 sha256msg2 MSGTMP1, MSGTMP2
170 shuf128_32 $0x0E, MSG, MSG 170 shuf128_32 $0x0E, MSG, MSG
171 sha256rnds2 STATE1, STATE0 171 sha256rnds2 STATE1, STATE0
@@ -175,9 +175,9 @@ sha256_process_block64_shaNI:
175 mova128 MSGTMP2, MSG 175 mova128 MSGTMP2, MSG
176 paddd 10*16-8*16(SHA256CONSTANTS), MSG 176 paddd 10*16-8*16(SHA256CONSTANTS), MSG
177 sha256rnds2 STATE0, STATE1 177 sha256rnds2 STATE0, STATE1
178 mova128 MSGTMP2, MSGTMP4 178 mova128 MSGTMP2, XMMTMP4
179 palignr $4, MSGTMP1, MSGTMP4 179 palignr $4, MSGTMP1, XMMTMP4
180 paddd MSGTMP4, MSGTMP3 180 paddd XMMTMP4, MSGTMP3
181 sha256msg2 MSGTMP2, MSGTMP3 181 sha256msg2 MSGTMP2, MSGTMP3
182 shuf128_32 $0x0E, MSG, MSG 182 shuf128_32 $0x0E, MSG, MSG
183 sha256rnds2 STATE1, STATE0 183 sha256rnds2 STATE1, STATE0
@@ -187,9 +187,9 @@ sha256_process_block64_shaNI:
187 mova128 MSGTMP3, MSG 187 mova128 MSGTMP3, MSG
188 paddd 11*16-8*16(SHA256CONSTANTS), MSG 188 paddd 11*16-8*16(SHA256CONSTANTS), MSG
189 sha256rnds2 STATE0, STATE1 189 sha256rnds2 STATE0, STATE1
190 mova128 MSGTMP3, MSGTMP4 190 mova128 MSGTMP3, XMMTMP4
191 palignr $4, MSGTMP2, MSGTMP4 191 palignr $4, MSGTMP2, XMMTMP4
192 paddd MSGTMP4, MSGTMP0 192 paddd XMMTMP4, MSGTMP0
193 sha256msg2 MSGTMP3, MSGTMP0 193 sha256msg2 MSGTMP3, MSGTMP0
194 shuf128_32 $0x0E, MSG, MSG 194 shuf128_32 $0x0E, MSG, MSG
195 sha256rnds2 STATE1, STATE0 195 sha256rnds2 STATE1, STATE0
@@ -199,9 +199,9 @@ sha256_process_block64_shaNI:
199 mova128 MSGTMP0, MSG 199 mova128 MSGTMP0, MSG
200 paddd 12*16-8*16(SHA256CONSTANTS), MSG 200 paddd 12*16-8*16(SHA256CONSTANTS), MSG
201 sha256rnds2 STATE0, STATE1 201 sha256rnds2 STATE0, STATE1
202 mova128 MSGTMP0, MSGTMP4 202 mova128 MSGTMP0, XMMTMP4
203 palignr $4, MSGTMP3, MSGTMP4 203 palignr $4, MSGTMP3, XMMTMP4
204 paddd MSGTMP4, MSGTMP1 204 paddd XMMTMP4, MSGTMP1
205 sha256msg2 MSGTMP0, MSGTMP1 205 sha256msg2 MSGTMP0, MSGTMP1
206 shuf128_32 $0x0E, MSG, MSG 206 shuf128_32 $0x0E, MSG, MSG
207 sha256rnds2 STATE1, STATE0 207 sha256rnds2 STATE1, STATE0
@@ -211,9 +211,9 @@ sha256_process_block64_shaNI:
211 mova128 MSGTMP1, MSG 211 mova128 MSGTMP1, MSG
212 paddd 13*16-8*16(SHA256CONSTANTS), MSG 212 paddd 13*16-8*16(SHA256CONSTANTS), MSG
213 sha256rnds2 STATE0, STATE1 213 sha256rnds2 STATE0, STATE1
214 mova128 MSGTMP1, MSGTMP4 214 mova128 MSGTMP1, XMMTMP4
215 palignr $4, MSGTMP0, MSGTMP4 215 palignr $4, MSGTMP0, XMMTMP4
216 paddd MSGTMP4, MSGTMP2 216 paddd XMMTMP4, MSGTMP2
217 sha256msg2 MSGTMP1, MSGTMP2 217 sha256msg2 MSGTMP1, MSGTMP2
218 shuf128_32 $0x0E, MSG, MSG 218 shuf128_32 $0x0E, MSG, MSG
219 sha256rnds2 STATE1, STATE0 219 sha256rnds2 STATE1, STATE0
@@ -222,9 +222,9 @@ sha256_process_block64_shaNI:
222 mova128 MSGTMP2, MSG 222 mova128 MSGTMP2, MSG
223 paddd 14*16-8*16(SHA256CONSTANTS), MSG 223 paddd 14*16-8*16(SHA256CONSTANTS), MSG
224 sha256rnds2 STATE0, STATE1 224 sha256rnds2 STATE0, STATE1
225 mova128 MSGTMP2, MSGTMP4 225 mova128 MSGTMP2, XMMTMP4
226 palignr $4, MSGTMP1, MSGTMP4 226 palignr $4, MSGTMP1, XMMTMP4
227 paddd MSGTMP4, MSGTMP3 227 paddd XMMTMP4, MSGTMP3
228 sha256msg2 MSGTMP2, MSGTMP3 228 sha256msg2 MSGTMP2, MSGTMP3
229 shuf128_32 $0x0E, MSG, MSG 229 shuf128_32 $0x0E, MSG, MSG
230 sha256rnds2 STATE1, STATE0 230 sha256rnds2 STATE1, STATE0
@@ -243,9 +243,9 @@ sha256_process_block64_shaNI:
243 /* Write hash values back in the correct order */ 243 /* Write hash values back in the correct order */
244 shuf128_32 $0x1B, STATE0, STATE0 /* FEBA */ 244 shuf128_32 $0x1B, STATE0, STATE0 /* FEBA */
245 shuf128_32 $0xB1, STATE1, STATE1 /* DCHG */ 245 shuf128_32 $0xB1, STATE1, STATE1 /* DCHG */
246 mova128 STATE0, MSGTMP4 246 mova128 STATE0, XMMTMP4
247 pblendw $0xF0, STATE1, STATE0 /* DCBA */ 247 pblendw $0xF0, STATE1, STATE0 /* DCBA */
248 palignr $8, MSGTMP4, STATE1 /* HGFE */ 248 palignr $8, XMMTMP4, STATE1 /* HGFE */
249 249
250 movu128 STATE0, 80+0*16(%rdi) 250 movu128 STATE0, 80+0*16(%rdi)
251 movu128 STATE1, 80+1*16(%rdi) 251 movu128 STATE1, 80+1*16(%rdi)