aboutsummaryrefslogtreecommitdiff
path: root/libbb/hash_md5_sha256_x86-64_shaNI.S
diff options
context:
space:
mode:
authorDenys Vlasenko <vda.linux@googlemail.com>2022-02-09 00:33:39 +0100
committerDenys Vlasenko <vda.linux@googlemail.com>2022-02-09 00:33:39 +0100
commitc0ff0d4528d718c20b9ca2290bd10d59e9f794a3 (patch)
tree7c0879d37c70fef047a55ef72f4aff5dcf88ab12 /libbb/hash_md5_sha256_x86-64_shaNI.S
parenteb8d5f3b8f3c91f3ed82a52b4ce52a154c146ede (diff)
downloadbusybox-w32-c0ff0d4528d718c20b9ca2290bd10d59e9f794a3.tar.gz
busybox-w32-c0ff0d4528d718c20b9ca2290bd10d59e9f794a3.tar.bz2
busybox-w32-c0ff0d4528d718c20b9ca2290bd10d59e9f794a3.zip
libbb/sha256: code shrink in 32-bit x86
function old new delta sha256_process_block64_shaNI 713 697 -16 Signed-off-by: Denys Vlasenko <vda.linux@googlemail.com>
Diffstat (limited to '')
-rw-r--r--libbb/hash_md5_sha256_x86-64_shaNI.S107
1 files changed, 54 insertions, 53 deletions
diff --git a/libbb/hash_md5_sha256_x86-64_shaNI.S b/libbb/hash_md5_sha256_x86-64_shaNI.S
index c6c931341..b5c950a9a 100644
--- a/libbb/hash_md5_sha256_x86-64_shaNI.S
+++ b/libbb/hash_md5_sha256_x86-64_shaNI.S
@@ -31,7 +31,8 @@
31#define MSGTMP1 %xmm4 31#define MSGTMP1 %xmm4
32#define MSGTMP2 %xmm5 32#define MSGTMP2 %xmm5
33#define MSGTMP3 %xmm6 33#define MSGTMP3 %xmm6
34#define XMMTMP4 %xmm7 34
35#define XMMTMP %xmm7
35 36
36#define ABEF_SAVE %xmm9 37#define ABEF_SAVE %xmm9
37#define CDGH_SAVE %xmm10 38#define CDGH_SAVE %xmm10
@@ -41,14 +42,14 @@ sha256_process_block64_shaNI:
41 movu128 80+0*16(%rdi), STATE0 42 movu128 80+0*16(%rdi), STATE0
42 movu128 80+1*16(%rdi), STATE1 43 movu128 80+1*16(%rdi), STATE1
43 44
44 shuf128_32 $0xB1, STATE0, STATE0 /* CDAB */ 45 shuf128_32 $0xB1, STATE0, STATE0 /* CDAB */
45 shuf128_32 $0x1B, STATE1, STATE1 /* EFGH */ 46 shuf128_32 $0x1B, STATE1, STATE1 /* EFGH */
46 mova128 STATE0, XMMTMP4 47 mova128 STATE0, XMMTMP
47 palignr $8, STATE1, STATE0 /* ABEF */ 48 palignr $8, STATE1, STATE0 /* ABEF */
48 pblendw $0xF0, XMMTMP4, STATE1 /* CDGH */ 49 pblendw $0xF0, XMMTMP, STATE1 /* CDGH */
49 50
50/* XMMTMP4 holds flip mask from here... */ 51/* XMMTMP holds flip mask from here... */
51 mova128 PSHUFFLE_BSWAP32_FLIP_MASK(%rip), XMMTMP4 52 mova128 PSHUFFLE_BSWAP32_FLIP_MASK(%rip), XMMTMP
52 leaq K256+8*16(%rip), SHA256CONSTANTS 53 leaq K256+8*16(%rip), SHA256CONSTANTS
53 54
54 /* Save hash values for addition after rounds */ 55 /* Save hash values for addition after rounds */
@@ -57,7 +58,7 @@ sha256_process_block64_shaNI:
57 58
58 /* Rounds 0-3 */ 59 /* Rounds 0-3 */
59 movu128 0*16(DATA_PTR), MSG 60 movu128 0*16(DATA_PTR), MSG
60 pshufb XMMTMP4, MSG 61 pshufb XMMTMP, MSG
61 mova128 MSG, MSGTMP0 62 mova128 MSG, MSGTMP0
62 paddd 0*16-8*16(SHA256CONSTANTS), MSG 63 paddd 0*16-8*16(SHA256CONSTANTS), MSG
63 sha256rnds2 STATE0, STATE1 64 sha256rnds2 STATE0, STATE1
@@ -66,7 +67,7 @@ sha256_process_block64_shaNI:
66 67
67 /* Rounds 4-7 */ 68 /* Rounds 4-7 */
68 movu128 1*16(DATA_PTR), MSG 69 movu128 1*16(DATA_PTR), MSG
69 pshufb XMMTMP4, MSG 70 pshufb XMMTMP, MSG
70 mova128 MSG, MSGTMP1 71 mova128 MSG, MSGTMP1
71 paddd 1*16-8*16(SHA256CONSTANTS), MSG 72 paddd 1*16-8*16(SHA256CONSTANTS), MSG
72 sha256rnds2 STATE0, STATE1 73 sha256rnds2 STATE0, STATE1
@@ -76,7 +77,7 @@ sha256_process_block64_shaNI:
76 77
77 /* Rounds 8-11 */ 78 /* Rounds 8-11 */
78 movu128 2*16(DATA_PTR), MSG 79 movu128 2*16(DATA_PTR), MSG
79 pshufb XMMTMP4, MSG 80 pshufb XMMTMP, MSG
80 mova128 MSG, MSGTMP2 81 mova128 MSG, MSGTMP2
81 paddd 2*16-8*16(SHA256CONSTANTS), MSG 82 paddd 2*16-8*16(SHA256CONSTANTS), MSG
82 sha256rnds2 STATE0, STATE1 83 sha256rnds2 STATE0, STATE1
@@ -86,14 +87,14 @@ sha256_process_block64_shaNI:
86 87
87 /* Rounds 12-15 */ 88 /* Rounds 12-15 */
88 movu128 3*16(DATA_PTR), MSG 89 movu128 3*16(DATA_PTR), MSG
89 pshufb XMMTMP4, MSG 90 pshufb XMMTMP, MSG
90/* ...to here */ 91/* ...to here */
91 mova128 MSG, MSGTMP3 92 mova128 MSG, MSGTMP3
92 paddd 3*16-8*16(SHA256CONSTANTS), MSG 93 paddd 3*16-8*16(SHA256CONSTANTS), MSG
93 sha256rnds2 STATE0, STATE1 94 sha256rnds2 STATE0, STATE1
94 mova128 MSGTMP3, XMMTMP4 95 mova128 MSGTMP3, XMMTMP
95 palignr $4, MSGTMP2, XMMTMP4 96 palignr $4, MSGTMP2, XMMTMP
96 paddd XMMTMP4, MSGTMP0 97 paddd XMMTMP, MSGTMP0
97 sha256msg2 MSGTMP3, MSGTMP0 98 sha256msg2 MSGTMP3, MSGTMP0
98 shuf128_32 $0x0E, MSG, MSG 99 shuf128_32 $0x0E, MSG, MSG
99 sha256rnds2 STATE1, STATE0 100 sha256rnds2 STATE1, STATE0
@@ -103,9 +104,9 @@ sha256_process_block64_shaNI:
103 mova128 MSGTMP0, MSG 104 mova128 MSGTMP0, MSG
104 paddd 4*16-8*16(SHA256CONSTANTS), MSG 105 paddd 4*16-8*16(SHA256CONSTANTS), MSG
105 sha256rnds2 STATE0, STATE1 106 sha256rnds2 STATE0, STATE1
106 mova128 MSGTMP0, XMMTMP4 107 mova128 MSGTMP0, XMMTMP
107 palignr $4, MSGTMP3, XMMTMP4 108 palignr $4, MSGTMP3, XMMTMP
108 paddd XMMTMP4, MSGTMP1 109 paddd XMMTMP, MSGTMP1
109 sha256msg2 MSGTMP0, MSGTMP1 110 sha256msg2 MSGTMP0, MSGTMP1
110 shuf128_32 $0x0E, MSG, MSG 111 shuf128_32 $0x0E, MSG, MSG
111 sha256rnds2 STATE1, STATE0 112 sha256rnds2 STATE1, STATE0
@@ -115,9 +116,9 @@ sha256_process_block64_shaNI:
115 mova128 MSGTMP1, MSG 116 mova128 MSGTMP1, MSG
116 paddd 5*16-8*16(SHA256CONSTANTS), MSG 117 paddd 5*16-8*16(SHA256CONSTANTS), MSG
117 sha256rnds2 STATE0, STATE1 118 sha256rnds2 STATE0, STATE1
118 mova128 MSGTMP1, XMMTMP4 119 mova128 MSGTMP1, XMMTMP
119 palignr $4, MSGTMP0, XMMTMP4 120 palignr $4, MSGTMP0, XMMTMP
120 paddd XMMTMP4, MSGTMP2 121 paddd XMMTMP, MSGTMP2
121 sha256msg2 MSGTMP1, MSGTMP2 122 sha256msg2 MSGTMP1, MSGTMP2
122 shuf128_32 $0x0E, MSG, MSG 123 shuf128_32 $0x0E, MSG, MSG
123 sha256rnds2 STATE1, STATE0 124 sha256rnds2 STATE1, STATE0
@@ -127,9 +128,9 @@ sha256_process_block64_shaNI:
127 mova128 MSGTMP2, MSG 128 mova128 MSGTMP2, MSG
128 paddd 6*16-8*16(SHA256CONSTANTS), MSG 129 paddd 6*16-8*16(SHA256CONSTANTS), MSG
129 sha256rnds2 STATE0, STATE1 130 sha256rnds2 STATE0, STATE1
130 mova128 MSGTMP2, XMMTMP4 131 mova128 MSGTMP2, XMMTMP
131 palignr $4, MSGTMP1, XMMTMP4 132 palignr $4, MSGTMP1, XMMTMP
132 paddd XMMTMP4, MSGTMP3 133 paddd XMMTMP, MSGTMP3
133 sha256msg2 MSGTMP2, MSGTMP3 134 sha256msg2 MSGTMP2, MSGTMP3
134 shuf128_32 $0x0E, MSG, MSG 135 shuf128_32 $0x0E, MSG, MSG
135 sha256rnds2 STATE1, STATE0 136 sha256rnds2 STATE1, STATE0
@@ -139,9 +140,9 @@ sha256_process_block64_shaNI:
139 mova128 MSGTMP3, MSG 140 mova128 MSGTMP3, MSG
140 paddd 7*16-8*16(SHA256CONSTANTS), MSG 141 paddd 7*16-8*16(SHA256CONSTANTS), MSG
141 sha256rnds2 STATE0, STATE1 142 sha256rnds2 STATE0, STATE1
142 mova128 MSGTMP3, XMMTMP4 143 mova128 MSGTMP3, XMMTMP
143 palignr $4, MSGTMP2, XMMTMP4 144 palignr $4, MSGTMP2, XMMTMP
144 paddd XMMTMP4, MSGTMP0 145 paddd XMMTMP, MSGTMP0
145 sha256msg2 MSGTMP3, MSGTMP0 146 sha256msg2 MSGTMP3, MSGTMP0
146 shuf128_32 $0x0E, MSG, MSG 147 shuf128_32 $0x0E, MSG, MSG
147 sha256rnds2 STATE1, STATE0 148 sha256rnds2 STATE1, STATE0
@@ -151,9 +152,9 @@ sha256_process_block64_shaNI:
151 mova128 MSGTMP0, MSG 152 mova128 MSGTMP0, MSG
152 paddd 8*16-8*16(SHA256CONSTANTS), MSG 153 paddd 8*16-8*16(SHA256CONSTANTS), MSG
153 sha256rnds2 STATE0, STATE1 154 sha256rnds2 STATE0, STATE1
154 mova128 MSGTMP0, XMMTMP4 155 mova128 MSGTMP0, XMMTMP
155 palignr $4, MSGTMP3, XMMTMP4 156 palignr $4, MSGTMP3, XMMTMP
156 paddd XMMTMP4, MSGTMP1 157 paddd XMMTMP, MSGTMP1
157 sha256msg2 MSGTMP0, MSGTMP1 158 sha256msg2 MSGTMP0, MSGTMP1
158 shuf128_32 $0x0E, MSG, MSG 159 shuf128_32 $0x0E, MSG, MSG
159 sha256rnds2 STATE1, STATE0 160 sha256rnds2 STATE1, STATE0
@@ -163,9 +164,9 @@ sha256_process_block64_shaNI:
163 mova128 MSGTMP1, MSG 164 mova128 MSGTMP1, MSG
164 paddd 9*16-8*16(SHA256CONSTANTS), MSG 165 paddd 9*16-8*16(SHA256CONSTANTS), MSG
165 sha256rnds2 STATE0, STATE1 166 sha256rnds2 STATE0, STATE1
166 mova128 MSGTMP1, XMMTMP4 167 mova128 MSGTMP1, XMMTMP
167 palignr $4, MSGTMP0, XMMTMP4 168 palignr $4, MSGTMP0, XMMTMP
168 paddd XMMTMP4, MSGTMP2 169 paddd XMMTMP, MSGTMP2
169 sha256msg2 MSGTMP1, MSGTMP2 170 sha256msg2 MSGTMP1, MSGTMP2
170 shuf128_32 $0x0E, MSG, MSG 171 shuf128_32 $0x0E, MSG, MSG
171 sha256rnds2 STATE1, STATE0 172 sha256rnds2 STATE1, STATE0
@@ -175,9 +176,9 @@ sha256_process_block64_shaNI:
175 mova128 MSGTMP2, MSG 176 mova128 MSGTMP2, MSG
176 paddd 10*16-8*16(SHA256CONSTANTS), MSG 177 paddd 10*16-8*16(SHA256CONSTANTS), MSG
177 sha256rnds2 STATE0, STATE1 178 sha256rnds2 STATE0, STATE1
178 mova128 MSGTMP2, XMMTMP4 179 mova128 MSGTMP2, XMMTMP
179 palignr $4, MSGTMP1, XMMTMP4 180 palignr $4, MSGTMP1, XMMTMP
180 paddd XMMTMP4, MSGTMP3 181 paddd XMMTMP, MSGTMP3
181 sha256msg2 MSGTMP2, MSGTMP3 182 sha256msg2 MSGTMP2, MSGTMP3
182 shuf128_32 $0x0E, MSG, MSG 183 shuf128_32 $0x0E, MSG, MSG
183 sha256rnds2 STATE1, STATE0 184 sha256rnds2 STATE1, STATE0
@@ -187,9 +188,9 @@ sha256_process_block64_shaNI:
187 mova128 MSGTMP3, MSG 188 mova128 MSGTMP3, MSG
188 paddd 11*16-8*16(SHA256CONSTANTS), MSG 189 paddd 11*16-8*16(SHA256CONSTANTS), MSG
189 sha256rnds2 STATE0, STATE1 190 sha256rnds2 STATE0, STATE1
190 mova128 MSGTMP3, XMMTMP4 191 mova128 MSGTMP3, XMMTMP
191 palignr $4, MSGTMP2, XMMTMP4 192 palignr $4, MSGTMP2, XMMTMP
192 paddd XMMTMP4, MSGTMP0 193 paddd XMMTMP, MSGTMP0
193 sha256msg2 MSGTMP3, MSGTMP0 194 sha256msg2 MSGTMP3, MSGTMP0
194 shuf128_32 $0x0E, MSG, MSG 195 shuf128_32 $0x0E, MSG, MSG
195 sha256rnds2 STATE1, STATE0 196 sha256rnds2 STATE1, STATE0
@@ -199,9 +200,9 @@ sha256_process_block64_shaNI:
199 mova128 MSGTMP0, MSG 200 mova128 MSGTMP0, MSG
200 paddd 12*16-8*16(SHA256CONSTANTS), MSG 201 paddd 12*16-8*16(SHA256CONSTANTS), MSG
201 sha256rnds2 STATE0, STATE1 202 sha256rnds2 STATE0, STATE1
202 mova128 MSGTMP0, XMMTMP4 203 mova128 MSGTMP0, XMMTMP
203 palignr $4, MSGTMP3, XMMTMP4 204 palignr $4, MSGTMP3, XMMTMP
204 paddd XMMTMP4, MSGTMP1 205 paddd XMMTMP, MSGTMP1
205 sha256msg2 MSGTMP0, MSGTMP1 206 sha256msg2 MSGTMP0, MSGTMP1
206 shuf128_32 $0x0E, MSG, MSG 207 shuf128_32 $0x0E, MSG, MSG
207 sha256rnds2 STATE1, STATE0 208 sha256rnds2 STATE1, STATE0
@@ -211,9 +212,9 @@ sha256_process_block64_shaNI:
211 mova128 MSGTMP1, MSG 212 mova128 MSGTMP1, MSG
212 paddd 13*16-8*16(SHA256CONSTANTS), MSG 213 paddd 13*16-8*16(SHA256CONSTANTS), MSG
213 sha256rnds2 STATE0, STATE1 214 sha256rnds2 STATE0, STATE1
214 mova128 MSGTMP1, XMMTMP4 215 mova128 MSGTMP1, XMMTMP
215 palignr $4, MSGTMP0, XMMTMP4 216 palignr $4, MSGTMP0, XMMTMP
216 paddd XMMTMP4, MSGTMP2 217 paddd XMMTMP, MSGTMP2
217 sha256msg2 MSGTMP1, MSGTMP2 218 sha256msg2 MSGTMP1, MSGTMP2
218 shuf128_32 $0x0E, MSG, MSG 219 shuf128_32 $0x0E, MSG, MSG
219 sha256rnds2 STATE1, STATE0 220 sha256rnds2 STATE1, STATE0
@@ -222,9 +223,9 @@ sha256_process_block64_shaNI:
222 mova128 MSGTMP2, MSG 223 mova128 MSGTMP2, MSG
223 paddd 14*16-8*16(SHA256CONSTANTS), MSG 224 paddd 14*16-8*16(SHA256CONSTANTS), MSG
224 sha256rnds2 STATE0, STATE1 225 sha256rnds2 STATE0, STATE1
225 mova128 MSGTMP2, XMMTMP4 226 mova128 MSGTMP2, XMMTMP
226 palignr $4, MSGTMP1, XMMTMP4 227 palignr $4, MSGTMP1, XMMTMP
227 paddd XMMTMP4, MSGTMP3 228 paddd XMMTMP, MSGTMP3
228 sha256msg2 MSGTMP2, MSGTMP3 229 sha256msg2 MSGTMP2, MSGTMP3
229 shuf128_32 $0x0E, MSG, MSG 230 shuf128_32 $0x0E, MSG, MSG
230 sha256rnds2 STATE1, STATE0 231 sha256rnds2 STATE1, STATE0
@@ -241,11 +242,11 @@ sha256_process_block64_shaNI:
241 paddd CDGH_SAVE, STATE1 242 paddd CDGH_SAVE, STATE1
242 243
243 /* Write hash values back in the correct order */ 244 /* Write hash values back in the correct order */
244 shuf128_32 $0x1B, STATE0, STATE0 /* FEBA */ 245 shuf128_32 $0x1B, STATE0, STATE0 /* FEBA */
245 shuf128_32 $0xB1, STATE1, STATE1 /* DCHG */ 246 shuf128_32 $0xB1, STATE1, STATE1 /* DCHG */
246 mova128 STATE0, XMMTMP4 247 mova128 STATE0, XMMTMP
247 pblendw $0xF0, STATE1, STATE0 /* DCBA */ 248 pblendw $0xF0, STATE1, STATE0 /* DCBA */
248 palignr $8, XMMTMP4, STATE1 /* HGFE */ 249 palignr $8, XMMTMP, STATE1 /* HGFE */
249 250
250 movu128 STATE0, 80+0*16(%rdi) 251 movu128 STATE0, 80+0*16(%rdi)
251 movu128 STATE1, 80+1*16(%rdi) 252 movu128 STATE1, 80+1*16(%rdi)