aboutsummaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorRon Yorston <rmy@pobox.com>2022-04-07 11:41:52 +0100
committerRon Yorston <rmy@pobox.com>2022-04-07 11:41:52 +0100
commitb34845ed2e1583bec6581b6881cc7d6c84454276 (patch)
treef1ecff71fa8f84e44f0b8794f0d2d33031f69dc8
parentaf41de68901d48753eb73491d54931a99d1a13b5 (diff)
parentfc7868602ecf0d761a9a877141add4a9b6918d02 (diff)
downloadbusybox-w32-b34845ed2e1583bec6581b6881cc7d6c84454276.tar.gz
busybox-w32-b34845ed2e1583bec6581b6881cc7d6c84454276.tar.bz2
busybox-w32-b34845ed2e1583bec6581b6881cc7d6c84454276.zip
Merge branch 'busybox'
-rw-r--r--editors/vi.c34
-rw-r--r--libbb/Config.src7
-rw-r--r--libbb/hash_md5_sha256_x86-32_shaNI.S92
-rw-r--r--libbb/hash_md5_sha256_x86-64_shaNI.S105
-rw-r--r--libbb/hash_md5_sha_x86-32_shaNI.S11
-rw-r--r--libbb/hash_md5_sha_x86-64.S10
-rwxr-xr-xlibbb/hash_md5_sha_x86-64.S.sh34
-rw-r--r--libbb/hash_md5_sha_x86-64_shaNI.S11
-rw-r--r--shell/ash.c16
-rw-r--r--shell/ash_test/ash-vars/var_bash_repl_unterminated.right1
-rwxr-xr-xshell/ash_test/ash-vars/var_bash_repl_unterminated.tests2
-rw-r--r--shell/hush_test/hush-vars/var_bash_repl_unterminated.right1
-rwxr-xr-xshell/hush_test/hush-vars/var_bash_repl_unterminated.tests2
-rw-r--r--util-linux/taskset.c3
14 files changed, 196 insertions, 133 deletions
diff --git a/editors/vi.c b/editors/vi.c
index b30369302..dd8dd488a 100644
--- a/editors/vi.c
+++ b/editors/vi.c
@@ -233,6 +233,11 @@
233 233
234#endif 234#endif
235 235
236#if !ENABLE_PLATFORM_MINGW32
237#define isbackspace(c) ((c) == term_orig.c_cc[VERASE] || (c) == 8 || (c) == 127)
238#else
239#define isbackspace(c) ((c) == 8 || (c) == 127)
240#endif
236 241
237enum { 242enum {
238 MAX_TABSTOP = 32, // sanity limit 243 MAX_TABSTOP = 32, // sanity limit
@@ -371,6 +376,7 @@ struct globals {
371 int last_modified_count; // = -1; 376 int last_modified_count; // = -1;
372 int cmdline_filecnt; // how many file names on cmd line 377 int cmdline_filecnt; // how many file names on cmd line
373 int cmdcnt; // repetition count 378 int cmdcnt; // repetition count
379 char *rstart; // start of text in Replace mode
374 unsigned rows, columns; // the terminal screen is this size 380 unsigned rows, columns; // the terminal screen is this size
375#if ENABLE_FEATURE_VI_ASK_TERMINAL 381#if ENABLE_FEATURE_VI_ASK_TERMINAL
376 int get_rowcol_error; 382 int get_rowcol_error;
@@ -509,6 +515,7 @@ struct globals {
509#define last_modified_count (G.last_modified_count) 515#define last_modified_count (G.last_modified_count)
510#define cmdline_filecnt (G.cmdline_filecnt ) 516#define cmdline_filecnt (G.cmdline_filecnt )
511#define cmdcnt (G.cmdcnt ) 517#define cmdcnt (G.cmdcnt )
518#define rstart (G.rstart )
512#define rows (G.rows ) 519#define rows (G.rows )
513#define columns (G.columns ) 520#define columns (G.columns )
514#define crow (G.crow ) 521#define crow (G.crow )
@@ -1272,11 +1279,7 @@ static char *get_input_line(const char *prompt)
1272 c = get_one_char(); 1279 c = get_one_char();
1273 if (c == '\n' || c == '\r' || c == 27) 1280 if (c == '\n' || c == '\r' || c == 27)
1274 break; // this is end of input 1281 break; // this is end of input
1275#if !ENABLE_PLATFORM_MINGW32 1282 if (isbackspace(c)) {
1276 if (c == term_orig.c_cc[VERASE] || c == 8 || c == 127) {
1277#else
1278 if (c == 8 || c == 127) {
1279#endif
1280 // user wants to erase prev char 1283 // user wants to erase prev char
1281 write1("\b \b"); // erase char on screen 1284 write1("\b \b"); // erase char on screen
1282 buf[--i] = '\0'; 1285 buf[--i] = '\0';
@@ -2265,12 +2268,16 @@ static char *char_insert(char *p, char c, int undo) // insert the char c at 'p'
2265 p += 1 + stupid_insert(p, ' '); 2268 p += 1 + stupid_insert(p, ' ');
2266 } 2269 }
2267#endif 2270#endif
2268#if !ENABLE_PLATFORM_MINGW32 2271 } else if (isbackspace(c)) {
2269 } else if (c == term_orig.c_cc[VERASE] || c == 8 || c == 127) { // Is this a BS 2272 if (cmd_mode == 2) {
2270#else 2273 // special treatment for backspace in Replace mode
2271 } else if (c == 8 || c == 127) { // Is this a BS 2274 if (p > rstart) {
2275 p--;
2276#if ENABLE_FEATURE_VI_UNDO
2277 undo_pop();
2272#endif 2278#endif
2273 if (p > text) { 2279 }
2280 } else if (p > text) {
2274 p--; 2281 p--;
2275 p = text_hole_delete(p, p, ALLOW_UNDO_QUEUED); // shrink buffer 1 char 2282 p = text_hole_delete(p, p, ALLOW_UNDO_QUEUED); // shrink buffer 1 char
2276 } 2283 }
@@ -3863,9 +3870,9 @@ static void do_cmd(int c)
3863 undo_queue_commit(); 3870 undo_queue_commit();
3864 } else { 3871 } else {
3865 if (1 <= c || Isprint(c)) { 3872 if (1 <= c || Isprint(c)) {
3866 if (c != 27) 3873 if (c != 27 && !isbackspace(c))
3867 dot = yank_delete(dot, dot, PARTIAL, YANKDEL, ALLOW_UNDO); // delete char 3874 dot = yank_delete(dot, dot, PARTIAL, YANKDEL, ALLOW_UNDO);
3868 dot = char_insert(dot, c, ALLOW_UNDO_CHAIN); // insert new char 3875 dot = char_insert(dot, c, ALLOW_UNDO_CHAIN);
3869 } 3876 }
3870 goto dc1; 3877 goto dc1;
3871 } 3878 }
@@ -4424,6 +4431,7 @@ static void do_cmd(int c)
4424 dc5: 4431 dc5:
4425 cmd_mode = 2; 4432 cmd_mode = 2;
4426 undo_queue_commit(); 4433 undo_queue_commit();
4434 rstart = dot;
4427 break; 4435 break;
4428 case KEYCODE_DELETE: 4436 case KEYCODE_DELETE:
4429 if (dot < end - 1) 4437 if (dot < end - 1)
diff --git a/libbb/Config.src b/libbb/Config.src
index 0ecd5bd46..66a3ffa23 100644
--- a/libbb/Config.src
+++ b/libbb/Config.src
@@ -57,11 +57,12 @@ config SHA1_SMALL
57 range 0 3 57 range 0 3
58 help 58 help
59 Trade binary size versus speed for the sha1 algorithm. 59 Trade binary size versus speed for the sha1 algorithm.
60 With FEATURE_COPYBUF_KB=64:
60 throughput MB/s size of sha1_process_block64 61 throughput MB/s size of sha1_process_block64
61 value 486 x86-64 486 x86-64 62 value 486 x86-64 486 x86-64
62 0 367 375 3657 3502 63 0 440 485 3481 3502
63 1 224 229 654 732 64 1 265 265 641 696
64 2,3 200 195 358 380 65 2,3 220 210 342 364
65 66
66config SHA1_HWACCEL 67config SHA1_HWACCEL
67 bool "SHA1: Use hardware accelerated instructions if possible" 68 bool "SHA1: Use hardware accelerated instructions if possible"
diff --git a/libbb/hash_md5_sha256_x86-32_shaNI.S b/libbb/hash_md5_sha256_x86-32_shaNI.S
index aa68193bd..3905bad9a 100644
--- a/libbb/hash_md5_sha256_x86-32_shaNI.S
+++ b/libbb/hash_md5_sha256_x86-32_shaNI.S
@@ -4,7 +4,7 @@
4// We use shorter insns, even though they are for "wrong" 4// We use shorter insns, even though they are for "wrong"
5// data type (fp, not int). 5// data type (fp, not int).
6// For Intel, there is no penalty for doing it at all 6// For Intel, there is no penalty for doing it at all
7// (CPUs which do have such penalty do not support SHA1 insns). 7// (CPUs which do have such penalty do not support SHA insns).
8// For AMD, the penalty is one extra cycle 8// For AMD, the penalty is one extra cycle
9// (allegedly: I failed to find measurable difference). 9// (allegedly: I failed to find measurable difference).
10 10
@@ -15,6 +15,10 @@
15//#define shuf128_32 pshufd 15//#define shuf128_32 pshufd
16#define shuf128_32 shufps 16#define shuf128_32 shufps
17 17
18// pshufb and palignr are SSSE3 insns.
19// We do not check SSSE3 in cpuid,
20// all SHA-capable CPUs support it as well.
21
18 .section .text.sha256_process_block64_shaNI, "ax", @progbits 22 .section .text.sha256_process_block64_shaNI, "ax", @progbits
19 .globl sha256_process_block64_shaNI 23 .globl sha256_process_block64_shaNI
20 .hidden sha256_process_block64_shaNI 24 .hidden sha256_process_block64_shaNI
@@ -39,12 +43,13 @@
39 .balign 8 # allow decoders to fetch at least 2 first insns 43 .balign 8 # allow decoders to fetch at least 2 first insns
40sha256_process_block64_shaNI: 44sha256_process_block64_shaNI:
41 45
42 movu128 76+0*16(%eax), XMMTMP /* DCBA (msb-to-lsb: 3,2,1,0) */ 46 movu128 76+0*16(%eax), XMMTMP /* ABCD (little-endian dword order) */
43 movu128 76+1*16(%eax), STATE1 /* HGFE */ 47 movu128 76+1*16(%eax), STATE1 /* EFGH */
44/* shufps takes dwords 0,1 from *2nd* operand, and dwords 2,3 from 1st one */ 48/* shufps takes dwords 0,1 from *2nd* operand, and dwords 2,3 from 1st one */
45 mova128 STATE1, STATE0 49 mova128 STATE1, STATE0
46 shufps SHUF(1,0,1,0), XMMTMP, STATE0 /* ABEF */ 50 /* --- -------------- ABCD -- EFGH */
47 shufps SHUF(3,2,3,2), XMMTMP, STATE1 /* CDGH */ 51 shufps SHUF(1,0,1,0), XMMTMP, STATE0 /* FEBA */
52 shufps SHUF(3,2,3,2), XMMTMP, STATE1 /* HGDC */
48 53
49/* XMMTMP holds flip mask from here... */ 54/* XMMTMP holds flip mask from here... */
50 mova128 PSHUFFLE_BSWAP32_FLIP_MASK, XMMTMP 55 mova128 PSHUFFLE_BSWAP32_FLIP_MASK, XMMTMP
@@ -55,18 +60,18 @@ sha256_process_block64_shaNI:
55 pshufb XMMTMP, MSG 60 pshufb XMMTMP, MSG
56 mova128 MSG, MSGTMP0 61 mova128 MSG, MSGTMP0
57 paddd 0*16-8*16(SHA256CONSTANTS), MSG 62 paddd 0*16-8*16(SHA256CONSTANTS), MSG
58 sha256rnds2 STATE0, STATE1 63 sha256rnds2 MSG, STATE0, STATE1
59 shuf128_32 $0x0E, MSG, MSG 64 shuf128_32 $0x0E, MSG, MSG
60 sha256rnds2 STATE1, STATE0 65 sha256rnds2 MSG, STATE1, STATE0
61 66
62 /* Rounds 4-7 */ 67 /* Rounds 4-7 */
63 movu128 1*16(DATA_PTR), MSG 68 movu128 1*16(DATA_PTR), MSG
64 pshufb XMMTMP, MSG 69 pshufb XMMTMP, MSG
65 mova128 MSG, MSGTMP1 70 mova128 MSG, MSGTMP1
66 paddd 1*16-8*16(SHA256CONSTANTS), MSG 71 paddd 1*16-8*16(SHA256CONSTANTS), MSG
67 sha256rnds2 STATE0, STATE1 72 sha256rnds2 MSG, STATE0, STATE1
68 shuf128_32 $0x0E, MSG, MSG 73 shuf128_32 $0x0E, MSG, MSG
69 sha256rnds2 STATE1, STATE0 74 sha256rnds2 MSG, STATE1, STATE0
70 sha256msg1 MSGTMP1, MSGTMP0 75 sha256msg1 MSGTMP1, MSGTMP0
71 76
72 /* Rounds 8-11 */ 77 /* Rounds 8-11 */
@@ -74,9 +79,9 @@ sha256_process_block64_shaNI:
74 pshufb XMMTMP, MSG 79 pshufb XMMTMP, MSG
75 mova128 MSG, MSGTMP2 80 mova128 MSG, MSGTMP2
76 paddd 2*16-8*16(SHA256CONSTANTS), MSG 81 paddd 2*16-8*16(SHA256CONSTANTS), MSG
77 sha256rnds2 STATE0, STATE1 82 sha256rnds2 MSG, STATE0, STATE1
78 shuf128_32 $0x0E, MSG, MSG 83 shuf128_32 $0x0E, MSG, MSG
79 sha256rnds2 STATE1, STATE0 84 sha256rnds2 MSG, STATE1, STATE0
80 sha256msg1 MSGTMP2, MSGTMP1 85 sha256msg1 MSGTMP2, MSGTMP1
81 86
82 /* Rounds 12-15 */ 87 /* Rounds 12-15 */
@@ -85,159 +90,158 @@ sha256_process_block64_shaNI:
85/* ...to here */ 90/* ...to here */
86 mova128 MSG, MSGTMP3 91 mova128 MSG, MSGTMP3
87 paddd 3*16-8*16(SHA256CONSTANTS), MSG 92 paddd 3*16-8*16(SHA256CONSTANTS), MSG
88 sha256rnds2 STATE0, STATE1 93 sha256rnds2 MSG, STATE0, STATE1
89 mova128 MSGTMP3, XMMTMP 94 mova128 MSGTMP3, XMMTMP
90 palignr $4, MSGTMP2, XMMTMP 95 palignr $4, MSGTMP2, XMMTMP
91 paddd XMMTMP, MSGTMP0 96 paddd XMMTMP, MSGTMP0
92 sha256msg2 MSGTMP3, MSGTMP0 97 sha256msg2 MSGTMP3, MSGTMP0
93 shuf128_32 $0x0E, MSG, MSG 98 shuf128_32 $0x0E, MSG, MSG
94 sha256rnds2 STATE1, STATE0 99 sha256rnds2 MSG, STATE1, STATE0
95 sha256msg1 MSGTMP3, MSGTMP2 100 sha256msg1 MSGTMP3, MSGTMP2
96 101
97 /* Rounds 16-19 */ 102 /* Rounds 16-19 */
98 mova128 MSGTMP0, MSG 103 mova128 MSGTMP0, MSG
99 paddd 4*16-8*16(SHA256CONSTANTS), MSG 104 paddd 4*16-8*16(SHA256CONSTANTS), MSG
100 sha256rnds2 STATE0, STATE1 105 sha256rnds2 MSG, STATE0, STATE1
101 mova128 MSGTMP0, XMMTMP 106 mova128 MSGTMP0, XMMTMP
102 palignr $4, MSGTMP3, XMMTMP 107 palignr $4, MSGTMP3, XMMTMP
103 paddd XMMTMP, MSGTMP1 108 paddd XMMTMP, MSGTMP1
104 sha256msg2 MSGTMP0, MSGTMP1 109 sha256msg2 MSGTMP0, MSGTMP1
105 shuf128_32 $0x0E, MSG, MSG 110 shuf128_32 $0x0E, MSG, MSG
106 sha256rnds2 STATE1, STATE0 111 sha256rnds2 MSG, STATE1, STATE0
107 sha256msg1 MSGTMP0, MSGTMP3 112 sha256msg1 MSGTMP0, MSGTMP3
108 113
109 /* Rounds 20-23 */ 114 /* Rounds 20-23 */
110 mova128 MSGTMP1, MSG 115 mova128 MSGTMP1, MSG
111 paddd 5*16-8*16(SHA256CONSTANTS), MSG 116 paddd 5*16-8*16(SHA256CONSTANTS), MSG
112 sha256rnds2 STATE0, STATE1 117 sha256rnds2 MSG, STATE0, STATE1
113 mova128 MSGTMP1, XMMTMP 118 mova128 MSGTMP1, XMMTMP
114 palignr $4, MSGTMP0, XMMTMP 119 palignr $4, MSGTMP0, XMMTMP
115 paddd XMMTMP, MSGTMP2 120 paddd XMMTMP, MSGTMP2
116 sha256msg2 MSGTMP1, MSGTMP2 121 sha256msg2 MSGTMP1, MSGTMP2
117 shuf128_32 $0x0E, MSG, MSG 122 shuf128_32 $0x0E, MSG, MSG
118 sha256rnds2 STATE1, STATE0 123 sha256rnds2 MSG, STATE1, STATE0
119 sha256msg1 MSGTMP1, MSGTMP0 124 sha256msg1 MSGTMP1, MSGTMP0
120 125
121 /* Rounds 24-27 */ 126 /* Rounds 24-27 */
122 mova128 MSGTMP2, MSG 127 mova128 MSGTMP2, MSG
123 paddd 6*16-8*16(SHA256CONSTANTS), MSG 128 paddd 6*16-8*16(SHA256CONSTANTS), MSG
124 sha256rnds2 STATE0, STATE1 129 sha256rnds2 MSG, STATE0, STATE1
125 mova128 MSGTMP2, XMMTMP 130 mova128 MSGTMP2, XMMTMP
126 palignr $4, MSGTMP1, XMMTMP 131 palignr $4, MSGTMP1, XMMTMP
127 paddd XMMTMP, MSGTMP3 132 paddd XMMTMP, MSGTMP3
128 sha256msg2 MSGTMP2, MSGTMP3 133 sha256msg2 MSGTMP2, MSGTMP3
129 shuf128_32 $0x0E, MSG, MSG 134 shuf128_32 $0x0E, MSG, MSG
130 sha256rnds2 STATE1, STATE0 135 sha256rnds2 MSG, STATE1, STATE0
131 sha256msg1 MSGTMP2, MSGTMP1 136 sha256msg1 MSGTMP2, MSGTMP1
132 137
133 /* Rounds 28-31 */ 138 /* Rounds 28-31 */
134 mova128 MSGTMP3, MSG 139 mova128 MSGTMP3, MSG
135 paddd 7*16-8*16(SHA256CONSTANTS), MSG 140 paddd 7*16-8*16(SHA256CONSTANTS), MSG
136 sha256rnds2 STATE0, STATE1 141 sha256rnds2 MSG, STATE0, STATE1
137 mova128 MSGTMP3, XMMTMP 142 mova128 MSGTMP3, XMMTMP
138 palignr $4, MSGTMP2, XMMTMP 143 palignr $4, MSGTMP2, XMMTMP
139 paddd XMMTMP, MSGTMP0 144 paddd XMMTMP, MSGTMP0
140 sha256msg2 MSGTMP3, MSGTMP0 145 sha256msg2 MSGTMP3, MSGTMP0
141 shuf128_32 $0x0E, MSG, MSG 146 shuf128_32 $0x0E, MSG, MSG
142 sha256rnds2 STATE1, STATE0 147 sha256rnds2 MSG, STATE1, STATE0
143 sha256msg1 MSGTMP3, MSGTMP2 148 sha256msg1 MSGTMP3, MSGTMP2
144 149
145 /* Rounds 32-35 */ 150 /* Rounds 32-35 */
146 mova128 MSGTMP0, MSG 151 mova128 MSGTMP0, MSG
147 paddd 8*16-8*16(SHA256CONSTANTS), MSG 152 paddd 8*16-8*16(SHA256CONSTANTS), MSG
148 sha256rnds2 STATE0, STATE1 153 sha256rnds2 MSG, STATE0, STATE1
149 mova128 MSGTMP0, XMMTMP 154 mova128 MSGTMP0, XMMTMP
150 palignr $4, MSGTMP3, XMMTMP 155 palignr $4, MSGTMP3, XMMTMP
151 paddd XMMTMP, MSGTMP1 156 paddd XMMTMP, MSGTMP1
152 sha256msg2 MSGTMP0, MSGTMP1 157 sha256msg2 MSGTMP0, MSGTMP1
153 shuf128_32 $0x0E, MSG, MSG 158 shuf128_32 $0x0E, MSG, MSG
154 sha256rnds2 STATE1, STATE0 159 sha256rnds2 MSG, STATE1, STATE0
155 sha256msg1 MSGTMP0, MSGTMP3 160 sha256msg1 MSGTMP0, MSGTMP3
156 161
157 /* Rounds 36-39 */ 162 /* Rounds 36-39 */
158 mova128 MSGTMP1, MSG 163 mova128 MSGTMP1, MSG
159 paddd 9*16-8*16(SHA256CONSTANTS), MSG 164 paddd 9*16-8*16(SHA256CONSTANTS), MSG
160 sha256rnds2 STATE0, STATE1 165 sha256rnds2 MSG, STATE0, STATE1
161 mova128 MSGTMP1, XMMTMP 166 mova128 MSGTMP1, XMMTMP
162 palignr $4, MSGTMP0, XMMTMP 167 palignr $4, MSGTMP0, XMMTMP
163 paddd XMMTMP, MSGTMP2 168 paddd XMMTMP, MSGTMP2
164 sha256msg2 MSGTMP1, MSGTMP2 169 sha256msg2 MSGTMP1, MSGTMP2
165 shuf128_32 $0x0E, MSG, MSG 170 shuf128_32 $0x0E, MSG, MSG
166 sha256rnds2 STATE1, STATE0 171 sha256rnds2 MSG, STATE1, STATE0
167 sha256msg1 MSGTMP1, MSGTMP0 172 sha256msg1 MSGTMP1, MSGTMP0
168 173
169 /* Rounds 40-43 */ 174 /* Rounds 40-43 */
170 mova128 MSGTMP2, MSG 175 mova128 MSGTMP2, MSG
171 paddd 10*16-8*16(SHA256CONSTANTS), MSG 176 paddd 10*16-8*16(SHA256CONSTANTS), MSG
172 sha256rnds2 STATE0, STATE1 177 sha256rnds2 MSG, STATE0, STATE1
173 mova128 MSGTMP2, XMMTMP 178 mova128 MSGTMP2, XMMTMP
174 palignr $4, MSGTMP1, XMMTMP 179 palignr $4, MSGTMP1, XMMTMP
175 paddd XMMTMP, MSGTMP3 180 paddd XMMTMP, MSGTMP3
176 sha256msg2 MSGTMP2, MSGTMP3 181 sha256msg2 MSGTMP2, MSGTMP3
177 shuf128_32 $0x0E, MSG, MSG 182 shuf128_32 $0x0E, MSG, MSG
178 sha256rnds2 STATE1, STATE0 183 sha256rnds2 MSG, STATE1, STATE0
179 sha256msg1 MSGTMP2, MSGTMP1 184 sha256msg1 MSGTMP2, MSGTMP1
180 185
181 /* Rounds 44-47 */ 186 /* Rounds 44-47 */
182 mova128 MSGTMP3, MSG 187 mova128 MSGTMP3, MSG
183 paddd 11*16-8*16(SHA256CONSTANTS), MSG 188 paddd 11*16-8*16(SHA256CONSTANTS), MSG
184 sha256rnds2 STATE0, STATE1 189 sha256rnds2 MSG, STATE0, STATE1
185 mova128 MSGTMP3, XMMTMP 190 mova128 MSGTMP3, XMMTMP
186 palignr $4, MSGTMP2, XMMTMP 191 palignr $4, MSGTMP2, XMMTMP
187 paddd XMMTMP, MSGTMP0 192 paddd XMMTMP, MSGTMP0
188 sha256msg2 MSGTMP3, MSGTMP0 193 sha256msg2 MSGTMP3, MSGTMP0
189 shuf128_32 $0x0E, MSG, MSG 194 shuf128_32 $0x0E, MSG, MSG
190 sha256rnds2 STATE1, STATE0 195 sha256rnds2 MSG, STATE1, STATE0
191 sha256msg1 MSGTMP3, MSGTMP2 196 sha256msg1 MSGTMP3, MSGTMP2
192 197
193 /* Rounds 48-51 */ 198 /* Rounds 48-51 */
194 mova128 MSGTMP0, MSG 199 mova128 MSGTMP0, MSG
195 paddd 12*16-8*16(SHA256CONSTANTS), MSG 200 paddd 12*16-8*16(SHA256CONSTANTS), MSG
196 sha256rnds2 STATE0, STATE1 201 sha256rnds2 MSG, STATE0, STATE1
197 mova128 MSGTMP0, XMMTMP 202 mova128 MSGTMP0, XMMTMP
198 palignr $4, MSGTMP3, XMMTMP 203 palignr $4, MSGTMP3, XMMTMP
199 paddd XMMTMP, MSGTMP1 204 paddd XMMTMP, MSGTMP1
200 sha256msg2 MSGTMP0, MSGTMP1 205 sha256msg2 MSGTMP0, MSGTMP1
201 shuf128_32 $0x0E, MSG, MSG 206 shuf128_32 $0x0E, MSG, MSG
202 sha256rnds2 STATE1, STATE0 207 sha256rnds2 MSG, STATE1, STATE0
203 sha256msg1 MSGTMP0, MSGTMP3 208 sha256msg1 MSGTMP0, MSGTMP3
204 209
205 /* Rounds 52-55 */ 210 /* Rounds 52-55 */
206 mova128 MSGTMP1, MSG 211 mova128 MSGTMP1, MSG
207 paddd 13*16-8*16(SHA256CONSTANTS), MSG 212 paddd 13*16-8*16(SHA256CONSTANTS), MSG
208 sha256rnds2 STATE0, STATE1 213 sha256rnds2 MSG, STATE0, STATE1
209 mova128 MSGTMP1, XMMTMP 214 mova128 MSGTMP1, XMMTMP
210 palignr $4, MSGTMP0, XMMTMP 215 palignr $4, MSGTMP0, XMMTMP
211 paddd XMMTMP, MSGTMP2 216 paddd XMMTMP, MSGTMP2
212 sha256msg2 MSGTMP1, MSGTMP2 217 sha256msg2 MSGTMP1, MSGTMP2
213 shuf128_32 $0x0E, MSG, MSG 218 shuf128_32 $0x0E, MSG, MSG
214 sha256rnds2 STATE1, STATE0 219 sha256rnds2 MSG, STATE1, STATE0
215 220
216 /* Rounds 56-59 */ 221 /* Rounds 56-59 */
217 mova128 MSGTMP2, MSG 222 mova128 MSGTMP2, MSG
218 paddd 14*16-8*16(SHA256CONSTANTS), MSG 223 paddd 14*16-8*16(SHA256CONSTANTS), MSG
219 sha256rnds2 STATE0, STATE1 224 sha256rnds2 MSG, STATE0, STATE1
220 mova128 MSGTMP2, XMMTMP 225 mova128 MSGTMP2, XMMTMP
221 palignr $4, MSGTMP1, XMMTMP 226 palignr $4, MSGTMP1, XMMTMP
222 paddd XMMTMP, MSGTMP3 227 paddd XMMTMP, MSGTMP3
223 sha256msg2 MSGTMP2, MSGTMP3 228 sha256msg2 MSGTMP2, MSGTMP3
224 shuf128_32 $0x0E, MSG, MSG 229 shuf128_32 $0x0E, MSG, MSG
225 sha256rnds2 STATE1, STATE0 230 sha256rnds2 MSG, STATE1, STATE0
226 231
227 /* Rounds 60-63 */ 232 /* Rounds 60-63 */
228 mova128 MSGTMP3, MSG 233 mova128 MSGTMP3, MSG
229 paddd 15*16-8*16(SHA256CONSTANTS), MSG 234 paddd 15*16-8*16(SHA256CONSTANTS), MSG
230 sha256rnds2 STATE0, STATE1 235 sha256rnds2 MSG, STATE0, STATE1
231 shuf128_32 $0x0E, MSG, MSG 236 shuf128_32 $0x0E, MSG, MSG
232 sha256rnds2 STATE1, STATE0 237 sha256rnds2 MSG, STATE1, STATE0
233 238
234 /* Write hash values back in the correct order */ 239 /* Write hash values back in the correct order */
235 /* STATE0: ABEF (msb-to-lsb: 3,2,1,0) */
236 /* STATE1: CDGH */
237 mova128 STATE0, XMMTMP 240 mova128 STATE0, XMMTMP
238/* shufps takes dwords 0,1 from *2nd* operand, and dwords 2,3 from 1st one */ 241/* shufps takes dwords 0,1 from *2nd* operand, and dwords 2,3 from 1st one */
239 shufps SHUF(3,2,3,2), STATE1, STATE0 /* DCBA */ 242 /* --- -------------- HGDC -- FEBA */
240 shufps SHUF(1,0,1,0), STATE1, XMMTMP /* HGFE */ 243 shufps SHUF(3,2,3,2), STATE1, STATE0 /* ABCD */
244 shufps SHUF(1,0,1,0), STATE1, XMMTMP /* EFGH */
241 /* add current hash values to previous ones */ 245 /* add current hash values to previous ones */
242 movu128 76+1*16(%eax), STATE1 246 movu128 76+1*16(%eax), STATE1
243 paddd XMMTMP, STATE1 247 paddd XMMTMP, STATE1
@@ -250,7 +254,7 @@ sha256_process_block64_shaNI:
250 .size sha256_process_block64_shaNI, .-sha256_process_block64_shaNI 254 .size sha256_process_block64_shaNI, .-sha256_process_block64_shaNI
251 255
252 .section .rodata.cst256.K256, "aM", @progbits, 256 256 .section .rodata.cst256.K256, "aM", @progbits, 256
253 .balign 16 257 .balign 16
254K256: 258K256:
255 .long 0x428a2f98,0x71374491,0xb5c0fbcf,0xe9b5dba5 259 .long 0x428a2f98,0x71374491,0xb5c0fbcf,0xe9b5dba5
256 .long 0x3956c25b,0x59f111f1,0x923f82a4,0xab1c5ed5 260 .long 0x3956c25b,0x59f111f1,0x923f82a4,0xab1c5ed5
@@ -270,8 +274,8 @@ K256:
270 .long 0x90befffa,0xa4506ceb,0xbef9a3f7,0xc67178f2 274 .long 0x90befffa,0xa4506ceb,0xbef9a3f7,0xc67178f2
271 275
272 .section .rodata.cst16.PSHUFFLE_BSWAP32_FLIP_MASK, "aM", @progbits, 16 276 .section .rodata.cst16.PSHUFFLE_BSWAP32_FLIP_MASK, "aM", @progbits, 16
273 .balign 16 277 .balign 16
274PSHUFFLE_BSWAP32_FLIP_MASK: 278PSHUFFLE_BSWAP32_FLIP_MASK:
275 .octa 0x0c0d0e0f08090a0b0405060700010203 279 .octa 0x0c0d0e0f08090a0b0405060700010203
276 280
277#endif 281#endif
diff --git a/libbb/hash_md5_sha256_x86-64_shaNI.S b/libbb/hash_md5_sha256_x86-64_shaNI.S
index 4663f750a..082ceafe4 100644
--- a/libbb/hash_md5_sha256_x86-64_shaNI.S
+++ b/libbb/hash_md5_sha256_x86-64_shaNI.S
@@ -4,7 +4,7 @@
4// We use shorter insns, even though they are for "wrong" 4// We use shorter insns, even though they are for "wrong"
5// data type (fp, not int). 5// data type (fp, not int).
6// For Intel, there is no penalty for doing it at all 6// For Intel, there is no penalty for doing it at all
7// (CPUs which do have such penalty do not support SHA1 insns). 7// (CPUs which do have such penalty do not support SHA insns).
8// For AMD, the penalty is one extra cycle 8// For AMD, the penalty is one extra cycle
9// (allegedly: I failed to find measurable difference). 9// (allegedly: I failed to find measurable difference).
10 10
@@ -15,6 +15,10 @@
15//#define shuf128_32 pshufd 15//#define shuf128_32 pshufd
16#define shuf128_32 shufps 16#define shuf128_32 shufps
17 17
18// pshufb and palignr are SSSE3 insns.
19// We do not check SSSE3 in cpuid,
20// all SHA-capable CPUs support it as well.
21
18 .section .text.sha256_process_block64_shaNI, "ax", @progbits 22 .section .text.sha256_process_block64_shaNI, "ax", @progbits
19 .globl sha256_process_block64_shaNI 23 .globl sha256_process_block64_shaNI
20 .hidden sha256_process_block64_shaNI 24 .hidden sha256_process_block64_shaNI
@@ -34,46 +38,47 @@
34 38
35#define XMMTMP %xmm7 39#define XMMTMP %xmm7
36 40
37#define ABEF_SAVE %xmm9 41#define SAVE0 %xmm8
38#define CDGH_SAVE %xmm10 42#define SAVE1 %xmm9
39 43
40#define SHUF(a,b,c,d) $(a+(b<<2)+(c<<4)+(d<<6)) 44#define SHUF(a,b,c,d) $(a+(b<<2)+(c<<4)+(d<<6))
41 45
42 .balign 8 # allow decoders to fetch at least 2 first insns 46 .balign 8 # allow decoders to fetch at least 2 first insns
43sha256_process_block64_shaNI: 47sha256_process_block64_shaNI:
44 48
45 movu128 80+0*16(%rdi), XMMTMP /* DCBA (msb-to-lsb: 3,2,1,0) */ 49 movu128 80+0*16(%rdi), XMMTMP /* ABCD (little-endian dword order) */
46 movu128 80+1*16(%rdi), STATE1 /* HGFE */ 50 movu128 80+1*16(%rdi), STATE1 /* EFGH */
47/* shufps takes dwords 0,1 from *2nd* operand, and dwords 2,3 from 1st one */ 51/* shufps takes dwords 0,1 from *2nd* operand, and dwords 2,3 from 1st one */
48 mova128 STATE1, STATE0 52 mova128 STATE1, STATE0
49 shufps SHUF(1,0,1,0), XMMTMP, STATE0 /* ABEF */ 53 /* --- -------------- ABCD -- EFGH */
50 shufps SHUF(3,2,3,2), XMMTMP, STATE1 /* CDGH */ 54 shufps SHUF(1,0,1,0), XMMTMP, STATE0 /* FEBA */
55 shufps SHUF(3,2,3,2), XMMTMP, STATE1 /* HGDC */
51 56
52/* XMMTMP holds flip mask from here... */ 57/* XMMTMP holds flip mask from here... */
53 mova128 PSHUFFLE_BSWAP32_FLIP_MASK(%rip), XMMTMP 58 mova128 PSHUFFLE_BSWAP32_FLIP_MASK(%rip), XMMTMP
54 leaq K256+8*16(%rip), SHA256CONSTANTS 59 leaq K256+8*16(%rip), SHA256CONSTANTS
55 60
56 /* Save hash values for addition after rounds */ 61 /* Save hash values for addition after rounds */
57 mova128 STATE0, ABEF_SAVE 62 mova128 STATE0, SAVE0
58 mova128 STATE1, CDGH_SAVE 63 mova128 STATE1, SAVE1
59 64
60 /* Rounds 0-3 */ 65 /* Rounds 0-3 */
61 movu128 0*16(DATA_PTR), MSG 66 movu128 0*16(DATA_PTR), MSG
62 pshufb XMMTMP, MSG 67 pshufb XMMTMP, MSG
63 mova128 MSG, MSGTMP0 68 mova128 MSG, MSGTMP0
64 paddd 0*16-8*16(SHA256CONSTANTS), MSG 69 paddd 0*16-8*16(SHA256CONSTANTS), MSG
65 sha256rnds2 STATE0, STATE1 70 sha256rnds2 MSG, STATE0, STATE1
66 shuf128_32 $0x0E, MSG, MSG 71 shuf128_32 $0x0E, MSG, MSG
67 sha256rnds2 STATE1, STATE0 72 sha256rnds2 MSG, STATE1, STATE0
68 73
69 /* Rounds 4-7 */ 74 /* Rounds 4-7 */
70 movu128 1*16(DATA_PTR), MSG 75 movu128 1*16(DATA_PTR), MSG
71 pshufb XMMTMP, MSG 76 pshufb XMMTMP, MSG
72 mova128 MSG, MSGTMP1 77 mova128 MSG, MSGTMP1
73 paddd 1*16-8*16(SHA256CONSTANTS), MSG 78 paddd 1*16-8*16(SHA256CONSTANTS), MSG
74 sha256rnds2 STATE0, STATE1 79 sha256rnds2 MSG, STATE0, STATE1
75 shuf128_32 $0x0E, MSG, MSG 80 shuf128_32 $0x0E, MSG, MSG
76 sha256rnds2 STATE1, STATE0 81 sha256rnds2 MSG, STATE1, STATE0
77 sha256msg1 MSGTMP1, MSGTMP0 82 sha256msg1 MSGTMP1, MSGTMP0
78 83
79 /* Rounds 8-11 */ 84 /* Rounds 8-11 */
@@ -81,9 +86,9 @@ sha256_process_block64_shaNI:
81 pshufb XMMTMP, MSG 86 pshufb XMMTMP, MSG
82 mova128 MSG, MSGTMP2 87 mova128 MSG, MSGTMP2
83 paddd 2*16-8*16(SHA256CONSTANTS), MSG 88 paddd 2*16-8*16(SHA256CONSTANTS), MSG
84 sha256rnds2 STATE0, STATE1 89 sha256rnds2 MSG, STATE0, STATE1
85 shuf128_32 $0x0E, MSG, MSG 90 shuf128_32 $0x0E, MSG, MSG
86 sha256rnds2 STATE1, STATE0 91 sha256rnds2 MSG, STATE1, STATE0
87 sha256msg1 MSGTMP2, MSGTMP1 92 sha256msg1 MSGTMP2, MSGTMP1
88 93
89 /* Rounds 12-15 */ 94 /* Rounds 12-15 */
@@ -92,164 +97,162 @@ sha256_process_block64_shaNI:
92/* ...to here */ 97/* ...to here */
93 mova128 MSG, MSGTMP3 98 mova128 MSG, MSGTMP3
94 paddd 3*16-8*16(SHA256CONSTANTS), MSG 99 paddd 3*16-8*16(SHA256CONSTANTS), MSG
95 sha256rnds2 STATE0, STATE1 100 sha256rnds2 MSG, STATE0, STATE1
96 mova128 MSGTMP3, XMMTMP 101 mova128 MSGTMP3, XMMTMP
97 palignr $4, MSGTMP2, XMMTMP 102 palignr $4, MSGTMP2, XMMTMP
98 paddd XMMTMP, MSGTMP0 103 paddd XMMTMP, MSGTMP0
99 sha256msg2 MSGTMP3, MSGTMP0 104 sha256msg2 MSGTMP3, MSGTMP0
100 shuf128_32 $0x0E, MSG, MSG 105 shuf128_32 $0x0E, MSG, MSG
101 sha256rnds2 STATE1, STATE0 106 sha256rnds2 MSG, STATE1, STATE0
102 sha256msg1 MSGTMP3, MSGTMP2 107 sha256msg1 MSGTMP3, MSGTMP2
103 108
104 /* Rounds 16-19 */ 109 /* Rounds 16-19 */
105 mova128 MSGTMP0, MSG 110 mova128 MSGTMP0, MSG
106 paddd 4*16-8*16(SHA256CONSTANTS), MSG 111 paddd 4*16-8*16(SHA256CONSTANTS), MSG
107 sha256rnds2 STATE0, STATE1 112 sha256rnds2 MSG, STATE0, STATE1
108 mova128 MSGTMP0, XMMTMP 113 mova128 MSGTMP0, XMMTMP
109 palignr $4, MSGTMP3, XMMTMP 114 palignr $4, MSGTMP3, XMMTMP
110 paddd XMMTMP, MSGTMP1 115 paddd XMMTMP, MSGTMP1
111 sha256msg2 MSGTMP0, MSGTMP1 116 sha256msg2 MSGTMP0, MSGTMP1
112 shuf128_32 $0x0E, MSG, MSG 117 shuf128_32 $0x0E, MSG, MSG
113 sha256rnds2 STATE1, STATE0 118 sha256rnds2 MSG, STATE1, STATE0
114 sha256msg1 MSGTMP0, MSGTMP3 119 sha256msg1 MSGTMP0, MSGTMP3
115 120
116 /* Rounds 20-23 */ 121 /* Rounds 20-23 */
117 mova128 MSGTMP1, MSG 122 mova128 MSGTMP1, MSG
118 paddd 5*16-8*16(SHA256CONSTANTS), MSG 123 paddd 5*16-8*16(SHA256CONSTANTS), MSG
119 sha256rnds2 STATE0, STATE1 124 sha256rnds2 MSG, STATE0, STATE1
120 mova128 MSGTMP1, XMMTMP 125 mova128 MSGTMP1, XMMTMP
121 palignr $4, MSGTMP0, XMMTMP 126 palignr $4, MSGTMP0, XMMTMP
122 paddd XMMTMP, MSGTMP2 127 paddd XMMTMP, MSGTMP2
123 sha256msg2 MSGTMP1, MSGTMP2 128 sha256msg2 MSGTMP1, MSGTMP2
124 shuf128_32 $0x0E, MSG, MSG 129 shuf128_32 $0x0E, MSG, MSG
125 sha256rnds2 STATE1, STATE0 130 sha256rnds2 MSG, STATE1, STATE0
126 sha256msg1 MSGTMP1, MSGTMP0 131 sha256msg1 MSGTMP1, MSGTMP0
127 132
128 /* Rounds 24-27 */ 133 /* Rounds 24-27 */
129 mova128 MSGTMP2, MSG 134 mova128 MSGTMP2, MSG
130 paddd 6*16-8*16(SHA256CONSTANTS), MSG 135 paddd 6*16-8*16(SHA256CONSTANTS), MSG
131 sha256rnds2 STATE0, STATE1 136 sha256rnds2 MSG, STATE0, STATE1
132 mova128 MSGTMP2, XMMTMP 137 mova128 MSGTMP2, XMMTMP
133 palignr $4, MSGTMP1, XMMTMP 138 palignr $4, MSGTMP1, XMMTMP
134 paddd XMMTMP, MSGTMP3 139 paddd XMMTMP, MSGTMP3
135 sha256msg2 MSGTMP2, MSGTMP3 140 sha256msg2 MSGTMP2, MSGTMP3
136 shuf128_32 $0x0E, MSG, MSG 141 shuf128_32 $0x0E, MSG, MSG
137 sha256rnds2 STATE1, STATE0 142 sha256rnds2 MSG, STATE1, STATE0
138 sha256msg1 MSGTMP2, MSGTMP1 143 sha256msg1 MSGTMP2, MSGTMP1
139 144
140 /* Rounds 28-31 */ 145 /* Rounds 28-31 */
141 mova128 MSGTMP3, MSG 146 mova128 MSGTMP3, MSG
142 paddd 7*16-8*16(SHA256CONSTANTS), MSG 147 paddd 7*16-8*16(SHA256CONSTANTS), MSG
143 sha256rnds2 STATE0, STATE1 148 sha256rnds2 MSG, STATE0, STATE1
144 mova128 MSGTMP3, XMMTMP 149 mova128 MSGTMP3, XMMTMP
145 palignr $4, MSGTMP2, XMMTMP 150 palignr $4, MSGTMP2, XMMTMP
146 paddd XMMTMP, MSGTMP0 151 paddd XMMTMP, MSGTMP0
147 sha256msg2 MSGTMP3, MSGTMP0 152 sha256msg2 MSGTMP3, MSGTMP0
148 shuf128_32 $0x0E, MSG, MSG 153 shuf128_32 $0x0E, MSG, MSG
149 sha256rnds2 STATE1, STATE0 154 sha256rnds2 MSG, STATE1, STATE0
150 sha256msg1 MSGTMP3, MSGTMP2 155 sha256msg1 MSGTMP3, MSGTMP2
151 156
152 /* Rounds 32-35 */ 157 /* Rounds 32-35 */
153 mova128 MSGTMP0, MSG 158 mova128 MSGTMP0, MSG
154 paddd 8*16-8*16(SHA256CONSTANTS), MSG 159 paddd 8*16-8*16(SHA256CONSTANTS), MSG
155 sha256rnds2 STATE0, STATE1 160 sha256rnds2 MSG, STATE0, STATE1
156 mova128 MSGTMP0, XMMTMP 161 mova128 MSGTMP0, XMMTMP
157 palignr $4, MSGTMP3, XMMTMP 162 palignr $4, MSGTMP3, XMMTMP
158 paddd XMMTMP, MSGTMP1 163 paddd XMMTMP, MSGTMP1
159 sha256msg2 MSGTMP0, MSGTMP1 164 sha256msg2 MSGTMP0, MSGTMP1
160 shuf128_32 $0x0E, MSG, MSG 165 shuf128_32 $0x0E, MSG, MSG
161 sha256rnds2 STATE1, STATE0 166 sha256rnds2 MSG, STATE1, STATE0
162 sha256msg1 MSGTMP0, MSGTMP3 167 sha256msg1 MSGTMP0, MSGTMP3
163 168
164 /* Rounds 36-39 */ 169 /* Rounds 36-39 */
165 mova128 MSGTMP1, MSG 170 mova128 MSGTMP1, MSG
166 paddd 9*16-8*16(SHA256CONSTANTS), MSG 171 paddd 9*16-8*16(SHA256CONSTANTS), MSG
167 sha256rnds2 STATE0, STATE1 172 sha256rnds2 MSG, STATE0, STATE1
168 mova128 MSGTMP1, XMMTMP 173 mova128 MSGTMP1, XMMTMP
169 palignr $4, MSGTMP0, XMMTMP 174 palignr $4, MSGTMP0, XMMTMP
170 paddd XMMTMP, MSGTMP2 175 paddd XMMTMP, MSGTMP2
171 sha256msg2 MSGTMP1, MSGTMP2 176 sha256msg2 MSGTMP1, MSGTMP2
172 shuf128_32 $0x0E, MSG, MSG 177 shuf128_32 $0x0E, MSG, MSG
173 sha256rnds2 STATE1, STATE0 178 sha256rnds2 MSG, STATE1, STATE0
174 sha256msg1 MSGTMP1, MSGTMP0 179 sha256msg1 MSGTMP1, MSGTMP0
175 180
176 /* Rounds 40-43 */ 181 /* Rounds 40-43 */
177 mova128 MSGTMP2, MSG 182 mova128 MSGTMP2, MSG
178 paddd 10*16-8*16(SHA256CONSTANTS), MSG 183 paddd 10*16-8*16(SHA256CONSTANTS), MSG
179 sha256rnds2 STATE0, STATE1 184 sha256rnds2 MSG, STATE0, STATE1
180 mova128 MSGTMP2, XMMTMP 185 mova128 MSGTMP2, XMMTMP
181 palignr $4, MSGTMP1, XMMTMP 186 palignr $4, MSGTMP1, XMMTMP
182 paddd XMMTMP, MSGTMP3 187 paddd XMMTMP, MSGTMP3
183 sha256msg2 MSGTMP2, MSGTMP3 188 sha256msg2 MSGTMP2, MSGTMP3
184 shuf128_32 $0x0E, MSG, MSG 189 shuf128_32 $0x0E, MSG, MSG
185 sha256rnds2 STATE1, STATE0 190 sha256rnds2 MSG, STATE1, STATE0
186 sha256msg1 MSGTMP2, MSGTMP1 191 sha256msg1 MSGTMP2, MSGTMP1
187 192
188 /* Rounds 44-47 */ 193 /* Rounds 44-47 */
189 mova128 MSGTMP3, MSG 194 mova128 MSGTMP3, MSG
190 paddd 11*16-8*16(SHA256CONSTANTS), MSG 195 paddd 11*16-8*16(SHA256CONSTANTS), MSG
191 sha256rnds2 STATE0, STATE1 196 sha256rnds2 MSG, STATE0, STATE1
192 mova128 MSGTMP3, XMMTMP 197 mova128 MSGTMP3, XMMTMP
193 palignr $4, MSGTMP2, XMMTMP 198 palignr $4, MSGTMP2, XMMTMP
194 paddd XMMTMP, MSGTMP0 199 paddd XMMTMP, MSGTMP0
195 sha256msg2 MSGTMP3, MSGTMP0 200 sha256msg2 MSGTMP3, MSGTMP0
196 shuf128_32 $0x0E, MSG, MSG 201 shuf128_32 $0x0E, MSG, MSG
197 sha256rnds2 STATE1, STATE0 202 sha256rnds2 MSG, STATE1, STATE0
198 sha256msg1 MSGTMP3, MSGTMP2 203 sha256msg1 MSGTMP3, MSGTMP2
199 204
200 /* Rounds 48-51 */ 205 /* Rounds 48-51 */
201 mova128 MSGTMP0, MSG 206 mova128 MSGTMP0, MSG
202 paddd 12*16-8*16(SHA256CONSTANTS), MSG 207 paddd 12*16-8*16(SHA256CONSTANTS), MSG
203 sha256rnds2 STATE0, STATE1 208 sha256rnds2 MSG, STATE0, STATE1
204 mova128 MSGTMP0, XMMTMP 209 mova128 MSGTMP0, XMMTMP
205 palignr $4, MSGTMP3, XMMTMP 210 palignr $4, MSGTMP3, XMMTMP
206 paddd XMMTMP, MSGTMP1 211 paddd XMMTMP, MSGTMP1
207 sha256msg2 MSGTMP0, MSGTMP1 212 sha256msg2 MSGTMP0, MSGTMP1
208 shuf128_32 $0x0E, MSG, MSG 213 shuf128_32 $0x0E, MSG, MSG
209 sha256rnds2 STATE1, STATE0 214 sha256rnds2 MSG, STATE1, STATE0
210 sha256msg1 MSGTMP0, MSGTMP3 215 sha256msg1 MSGTMP0, MSGTMP3
211 216
212 /* Rounds 52-55 */ 217 /* Rounds 52-55 */
213 mova128 MSGTMP1, MSG 218 mova128 MSGTMP1, MSG
214 paddd 13*16-8*16(SHA256CONSTANTS), MSG 219 paddd 13*16-8*16(SHA256CONSTANTS), MSG
215 sha256rnds2 STATE0, STATE1 220 sha256rnds2 MSG, STATE0, STATE1
216 mova128 MSGTMP1, XMMTMP 221 mova128 MSGTMP1, XMMTMP
217 palignr $4, MSGTMP0, XMMTMP 222 palignr $4, MSGTMP0, XMMTMP
218 paddd XMMTMP, MSGTMP2 223 paddd XMMTMP, MSGTMP2
219 sha256msg2 MSGTMP1, MSGTMP2 224 sha256msg2 MSGTMP1, MSGTMP2
220 shuf128_32 $0x0E, MSG, MSG 225 shuf128_32 $0x0E, MSG, MSG
221 sha256rnds2 STATE1, STATE0 226 sha256rnds2 MSG, STATE1, STATE0
222 227
223 /* Rounds 56-59 */ 228 /* Rounds 56-59 */
224 mova128 MSGTMP2, MSG 229 mova128 MSGTMP2, MSG
225 paddd 14*16-8*16(SHA256CONSTANTS), MSG 230 paddd 14*16-8*16(SHA256CONSTANTS), MSG
226 sha256rnds2 STATE0, STATE1 231 sha256rnds2 MSG, STATE0, STATE1
227 mova128 MSGTMP2, XMMTMP 232 mova128 MSGTMP2, XMMTMP
228 palignr $4, MSGTMP1, XMMTMP 233 palignr $4, MSGTMP1, XMMTMP
229 paddd XMMTMP, MSGTMP3 234 paddd XMMTMP, MSGTMP3
230 sha256msg2 MSGTMP2, MSGTMP3 235 sha256msg2 MSGTMP2, MSGTMP3
231 shuf128_32 $0x0E, MSG, MSG 236 shuf128_32 $0x0E, MSG, MSG
232 sha256rnds2 STATE1, STATE0 237 sha256rnds2 MSG, STATE1, STATE0
233 238
234 /* Rounds 60-63 */ 239 /* Rounds 60-63 */
235 mova128 MSGTMP3, MSG 240 mova128 MSGTMP3, MSG
236 paddd 15*16-8*16(SHA256CONSTANTS), MSG 241 paddd 15*16-8*16(SHA256CONSTANTS), MSG
237 sha256rnds2 STATE0, STATE1 242 sha256rnds2 MSG, STATE0, STATE1
238 shuf128_32 $0x0E, MSG, MSG 243 shuf128_32 $0x0E, MSG, MSG
239 sha256rnds2 STATE1, STATE0 244 sha256rnds2 MSG, STATE1, STATE0
240 245
241 /* Add current hash values with previously saved */ 246 /* Add current hash values with previously saved */
242 paddd ABEF_SAVE, STATE0 247 paddd SAVE0, STATE0
243 paddd CDGH_SAVE, STATE1 248 paddd SAVE1, STATE1
244 249
245 /* Write hash values back in the correct order */ 250 /* Write hash values back in the correct order */
246 /* STATE0: ABEF (msb-to-lsb: 3,2,1,0) */
247 /* STATE1: CDGH */
248 mova128 STATE0, XMMTMP 251 mova128 STATE0, XMMTMP
249/* shufps takes dwords 0,1 from *2nd* operand, and dwords 2,3 from 1st one */ 252/* shufps takes dwords 0,1 from *2nd* operand, and dwords 2,3 from 1st one */
250 shufps SHUF(3,2,3,2), STATE1, STATE0 /* DCBA */ 253 /* --- -------------- HGDC -- FEBA */
251 shufps SHUF(1,0,1,0), STATE1, XMMTMP /* HGFE */ 254 shufps SHUF(3,2,3,2), STATE1, STATE0 /* ABCD */
252 255 shufps SHUF(1,0,1,0), STATE1, XMMTMP /* EFGH */
253 movu128 STATE0, 80+0*16(%rdi) 256 movu128 STATE0, 80+0*16(%rdi)
254 movu128 XMMTMP, 80+1*16(%rdi) 257 movu128 XMMTMP, 80+1*16(%rdi)
255 258
@@ -257,7 +260,7 @@ sha256_process_block64_shaNI:
257 .size sha256_process_block64_shaNI, .-sha256_process_block64_shaNI 260 .size sha256_process_block64_shaNI, .-sha256_process_block64_shaNI
258 261
259 .section .rodata.cst256.K256, "aM", @progbits, 256 262 .section .rodata.cst256.K256, "aM", @progbits, 256
260 .balign 16 263 .balign 16
261K256: 264K256:
262 .long 0x428a2f98,0x71374491,0xb5c0fbcf,0xe9b5dba5 265 .long 0x428a2f98,0x71374491,0xb5c0fbcf,0xe9b5dba5
263 .long 0x3956c25b,0x59f111f1,0x923f82a4,0xab1c5ed5 266 .long 0x3956c25b,0x59f111f1,0x923f82a4,0xab1c5ed5
@@ -277,8 +280,8 @@ K256:
277 .long 0x90befffa,0xa4506ceb,0xbef9a3f7,0xc67178f2 280 .long 0x90befffa,0xa4506ceb,0xbef9a3f7,0xc67178f2
278 281
279 .section .rodata.cst16.PSHUFFLE_BSWAP32_FLIP_MASK, "aM", @progbits, 16 282 .section .rodata.cst16.PSHUFFLE_BSWAP32_FLIP_MASK, "aM", @progbits, 16
280 .balign 16 283 .balign 16
281PSHUFFLE_BSWAP32_FLIP_MASK: 284PSHUFFLE_BSWAP32_FLIP_MASK:
282 .octa 0x0c0d0e0f08090a0b0405060700010203 285 .octa 0x0c0d0e0f08090a0b0405060700010203
283 286
284#endif 287#endif
diff --git a/libbb/hash_md5_sha_x86-32_shaNI.S b/libbb/hash_md5_sha_x86-32_shaNI.S
index a61b3cbed..2366b046a 100644
--- a/libbb/hash_md5_sha_x86-32_shaNI.S
+++ b/libbb/hash_md5_sha_x86-32_shaNI.S
@@ -4,7 +4,7 @@
4// We use shorter insns, even though they are for "wrong" 4// We use shorter insns, even though they are for "wrong"
5// data type (fp, not int). 5// data type (fp, not int).
6// For Intel, there is no penalty for doing it at all 6// For Intel, there is no penalty for doing it at all
7// (CPUs which do have such penalty do not support SHA1 insns). 7// (CPUs which do have such penalty do not support SHA insns).
8// For AMD, the penalty is one extra cycle 8// For AMD, the penalty is one extra cycle
9// (allegedly: I failed to find measurable difference). 9// (allegedly: I failed to find measurable difference).
10 10
@@ -20,6 +20,11 @@
20#define extr128_32 pextrd 20#define extr128_32 pextrd
21//#define extr128_32 extractps # not shorter 21//#define extr128_32 extractps # not shorter
22 22
23// pshufb is a SSSE3 insn.
24// pinsrd, pextrd, extractps are SSE4.1 insns.
25// We do not check SSSE3/SSE4.1 in cpuid,
26// all SHA-capable CPUs support them as well.
27
23 .section .text.sha1_process_block64_shaNI, "ax", @progbits 28 .section .text.sha1_process_block64_shaNI, "ax", @progbits
24 .globl sha1_process_block64_shaNI 29 .globl sha1_process_block64_shaNI
25 .hidden sha1_process_block64_shaNI 30 .hidden sha1_process_block64_shaNI
@@ -219,8 +224,8 @@ sha1_process_block64_shaNI:
219 .size sha1_process_block64_shaNI, .-sha1_process_block64_shaNI 224 .size sha1_process_block64_shaNI, .-sha1_process_block64_shaNI
220 225
221 .section .rodata.cst16.PSHUFFLE_BYTE_FLIP_MASK, "aM", @progbits, 16 226 .section .rodata.cst16.PSHUFFLE_BYTE_FLIP_MASK, "aM", @progbits, 16
222 .balign 16 227 .balign 16
223PSHUFFLE_BYTE_FLIP_MASK: 228PSHUFFLE_BYTE_FLIP_MASK:
224 .octa 0x000102030405060708090a0b0c0d0e0f 229 .octa 0x000102030405060708090a0b0c0d0e0f
225 230
226#endif 231#endif
diff --git a/libbb/hash_md5_sha_x86-64.S b/libbb/hash_md5_sha_x86-64.S
index 287cfe547..1d55b91f8 100644
--- a/libbb/hash_md5_sha_x86-64.S
+++ b/libbb/hash_md5_sha_x86-64.S
@@ -36,7 +36,7 @@ sha1_process_block64:
36 movaps sha1const(%rip), %xmm7 36 movaps sha1const(%rip), %xmm7
37 pshufd $0x00, %xmm7, %xmm6 37 pshufd $0x00, %xmm7, %xmm6
38 38
39 # Load W[] to xmm registers, byteswapping on the fly. 39 # Load W[] to xmm0..3, byteswapping on the fly.
40 # 40 #
41 # For iterations 0..15, we pass W[] in rsi,r8..r14 41 # For iterations 0..15, we pass W[] in rsi,r8..r14
42 # for use in RD1As instead of spilling them to stack. 42 # for use in RD1As instead of spilling them to stack.
@@ -71,8 +71,8 @@ sha1_process_block64:
71 movq 4*10(%rdi), %r12 71 movq 4*10(%rdi), %r12
72 bswapq %r11 72 bswapq %r11
73 bswapq %r12 73 bswapq %r12
74 rolq $32, %r11 # r11 = W[9]:W[8] 74 rolq $32, %r11 # r11 = W[9]:W[8]
75 rolq $32, %r12 # r12 = W[11]:W[10] 75 rolq $32, %r12 # r12 = W[11]:W[10]
76 movq %r11, %xmm2 76 movq %r11, %xmm2
77 movq %r12, %xmm4 77 movq %r12, %xmm4
78 punpcklqdq %xmm4, %xmm2 # xmm2 = r12:r11 = (W[8],W[9],W[10],W[11]) 78 punpcklqdq %xmm4, %xmm2 # xmm2 = r12:r11 = (W[8],W[9],W[10],W[11])
@@ -81,8 +81,8 @@ sha1_process_block64:
81 movq 4*14(%rdi), %r14 81 movq 4*14(%rdi), %r14
82 bswapq %r13 82 bswapq %r13
83 bswapq %r14 83 bswapq %r14
84 rolq $32, %r13 # r13 = W[13]:W[12] 84 rolq $32, %r13 # r13 = W[13]:W[12]
85 rolq $32, %r14 # r14 = W[15]:W[14] 85 rolq $32, %r14 # r14 = W[15]:W[14]
86 movq %r13, %xmm3 86 movq %r13, %xmm3
87 movq %r14, %xmm4 87 movq %r14, %xmm4
88 punpcklqdq %xmm4, %xmm3 # xmm3 = r14:r13 = (W[12],W[13],W[14],W[15]) 88 punpcklqdq %xmm4, %xmm3 # xmm3 = r14:r13 = (W[12],W[13],W[14],W[15])
diff --git a/libbb/hash_md5_sha_x86-64.S.sh b/libbb/hash_md5_sha_x86-64.S.sh
index a10ac411d..40c979d35 100755
--- a/libbb/hash_md5_sha_x86-64.S.sh
+++ b/libbb/hash_md5_sha_x86-64.S.sh
@@ -99,6 +99,30 @@ INTERLEAVE() {
99 ) 99 )
100} 100}
101 101
102# movaps bswap32_mask(%rip), $xmmT1
103# Load W[] to xmm0..3, byteswapping on the fly.
104# For iterations 0..15, we pass RCONST+W[] in rsi,r8..r14
105# for use in RD1As instead of spilling them to stack.
106# (We use rsi instead of rN because this makes two
107# ADDs in two first RD1As shorter by one byte).
108# movups 16*0(%rdi), %xmm0
109# pshufb $xmmT1, %xmm0 #SSSE3 insn
110# movaps %xmm0, $xmmT2
111# paddd $xmmRCONST, $xmmT2
112# movq $xmmT2, %rsi
113# #pextrq \$1, $xmmT2, %r8 #SSE4.1 insn
114# #movhpd $xmmT2, %r8 #can only move to mem, not to reg
115# shufps \$0x0e, $xmmT2, $xmmT2 # have to use two-insn sequence
116# movq $xmmT2, %r8 # instead
117# ...
118# <repeat for xmm1,2,3>
119# ...
120#- leal $RCONST(%r$e,%rsi), %e$e # e += RCONST + W[n]
121#+ addl %esi, %e$e # e += RCONST + W[n]
122# ^^^^^^^^^^^^^^^^^^^^^^^^
123# The above is -97 bytes of code...
124# ...but pshufb is a SSSE3 insn. Can't use it.
125
102echo \ 126echo \
103"### Generated by hash_md5_sha_x86-64.S.sh ### 127"### Generated by hash_md5_sha_x86-64.S.sh ###
104 128
@@ -138,7 +162,7 @@ sha1_process_block64:
138 movaps sha1const(%rip), $xmmALLRCONST 162 movaps sha1const(%rip), $xmmALLRCONST
139 pshufd \$0x00, $xmmALLRCONST, $xmmRCONST 163 pshufd \$0x00, $xmmALLRCONST, $xmmRCONST
140 164
141 # Load W[] to xmm registers, byteswapping on the fly. 165 # Load W[] to xmm0..3, byteswapping on the fly.
142 # 166 #
143 # For iterations 0..15, we pass W[] in rsi,r8..r14 167 # For iterations 0..15, we pass W[] in rsi,r8..r14
144 # for use in RD1As instead of spilling them to stack. 168 # for use in RD1As instead of spilling them to stack.
@@ -173,8 +197,8 @@ sha1_process_block64:
173 movq 4*10(%rdi), %r12 197 movq 4*10(%rdi), %r12
174 bswapq %r11 198 bswapq %r11
175 bswapq %r12 199 bswapq %r12
176 rolq \$32, %r11 # r11 = W[9]:W[8] 200 rolq \$32, %r11 # r11 = W[9]:W[8]
177 rolq \$32, %r12 # r12 = W[11]:W[10] 201 rolq \$32, %r12 # r12 = W[11]:W[10]
178 movq %r11, %xmm2 202 movq %r11, %xmm2
179 movq %r12, $xmmT1 203 movq %r12, $xmmT1
180 punpcklqdq $xmmT1, %xmm2 # xmm2 = r12:r11 = (W[8],W[9],W[10],W[11]) 204 punpcklqdq $xmmT1, %xmm2 # xmm2 = r12:r11 = (W[8],W[9],W[10],W[11])
@@ -183,8 +207,8 @@ sha1_process_block64:
183 movq 4*14(%rdi), %r14 207 movq 4*14(%rdi), %r14
184 bswapq %r13 208 bswapq %r13
185 bswapq %r14 209 bswapq %r14
186 rolq \$32, %r13 # r13 = W[13]:W[12] 210 rolq \$32, %r13 # r13 = W[13]:W[12]
187 rolq \$32, %r14 # r14 = W[15]:W[14] 211 rolq \$32, %r14 # r14 = W[15]:W[14]
188 movq %r13, %xmm3 212 movq %r13, %xmm3
189 movq %r14, $xmmT1 213 movq %r14, $xmmT1
190 punpcklqdq $xmmT1, %xmm3 # xmm3 = r14:r13 = (W[12],W[13],W[14],W[15]) 214 punpcklqdq $xmmT1, %xmm3 # xmm3 = r14:r13 = (W[12],W[13],W[14],W[15])
diff --git a/libbb/hash_md5_sha_x86-64_shaNI.S b/libbb/hash_md5_sha_x86-64_shaNI.S
index b32029360..794e97040 100644
--- a/libbb/hash_md5_sha_x86-64_shaNI.S
+++ b/libbb/hash_md5_sha_x86-64_shaNI.S
@@ -4,7 +4,7 @@
4// We use shorter insns, even though they are for "wrong" 4// We use shorter insns, even though they are for "wrong"
5// data type (fp, not int). 5// data type (fp, not int).
6// For Intel, there is no penalty for doing it at all 6// For Intel, there is no penalty for doing it at all
7// (CPUs which do have such penalty do not support SHA1 insns). 7// (CPUs which do have such penalty do not support SHA insns).
8// For AMD, the penalty is one extra cycle 8// For AMD, the penalty is one extra cycle
9// (allegedly: I failed to find measurable difference). 9// (allegedly: I failed to find measurable difference).
10 10
@@ -20,6 +20,11 @@
20#define extr128_32 pextrd 20#define extr128_32 pextrd
21//#define extr128_32 extractps # not shorter 21//#define extr128_32 extractps # not shorter
22 22
23// pshufb is a SSSE3 insn.
24// pinsrd, pextrd, extractps are SSE4.1 insns.
25// We do not check SSSE3/SSE4.1 in cpuid,
26// all SHA-capable CPUs support them as well.
27
23 .section .text.sha1_process_block64_shaNI, "ax", @progbits 28 .section .text.sha1_process_block64_shaNI, "ax", @progbits
24 .globl sha1_process_block64_shaNI 29 .globl sha1_process_block64_shaNI
25 .hidden sha1_process_block64_shaNI 30 .hidden sha1_process_block64_shaNI
@@ -217,8 +222,8 @@ sha1_process_block64_shaNI:
217 .size sha1_process_block64_shaNI, .-sha1_process_block64_shaNI 222 .size sha1_process_block64_shaNI, .-sha1_process_block64_shaNI
218 223
219 .section .rodata.cst16.PSHUFFLE_BYTE_FLIP_MASK, "aM", @progbits, 16 224 .section .rodata.cst16.PSHUFFLE_BYTE_FLIP_MASK, "aM", @progbits, 16
220 .balign 16 225 .balign 16
221PSHUFFLE_BYTE_FLIP_MASK: 226PSHUFFLE_BYTE_FLIP_MASK:
222 .octa 0x000102030405060708090a0b0c0d0e0f 227 .octa 0x000102030405060708090a0b0c0d0e0f
223 228
224#endif 229#endif
diff --git a/shell/ash.c b/shell/ash.c
index e8a1e853c..35aa3f6e6 100644
--- a/shell/ash.c
+++ b/shell/ash.c
@@ -7163,9 +7163,7 @@ exptilde(char *startp, int flag)
7163 home = lookupvar("HOME"); 7163 home = lookupvar("HOME");
7164 } else { 7164 } else {
7165 pw = getpwnam(name); 7165 pw = getpwnam(name);
7166 if (pw == NULL) 7166 home = pw ? pw->pw_dir : NULL;
7167 goto lose;
7168 home = pw->pw_dir;
7169 } 7167 }
7170 *p = c; 7168 *p = c;
7171 if (!home) 7169 if (!home)
@@ -7724,6 +7722,10 @@ subevalvar(char *start, char *str, int strloc,
7724 *repl = '\0'; 7722 *repl = '\0';
7725 break; 7723 break;
7726 } 7724 }
7725 if ((unsigned char)*repl == CTLENDVAR) { /* ${v/pattern} (no trailing /, no repl) */
7726 repl = NULL;
7727 break;
7728 }
7727 /* Handle escaped slashes, e.g. "${v/\//_}" (they are CTLESC'ed by this point) */ 7729 /* Handle escaped slashes, e.g. "${v/\//_}" (they are CTLESC'ed by this point) */
7728 if ((unsigned char)*repl == CTLESC && repl[1]) 7730 if ((unsigned char)*repl == CTLESC && repl[1])
7729 repl++; 7731 repl++;
@@ -7830,7 +7832,13 @@ subevalvar(char *start, char *str, int strloc,
7830 len = orig_len - pos; 7832 len = orig_len - pos;
7831 7833
7832 if (!quotes) { 7834 if (!quotes) {
7833 loc = mempcpy(startp, startp + pos, len); 7835 /* want: loc = mempcpy(startp, startp + pos, len)
7836 * but it does not allow overlapping arguments */
7837 loc = startp;
7838 while (--len >= 0) {
7839 *loc = loc[pos];
7840 loc++;
7841 }
7834 } else { 7842 } else {
7835 for (vstr = startp; pos != 0; pos--) { 7843 for (vstr = startp; pos != 0; pos--) {
7836 if ((unsigned char)*vstr == CTLESC) 7844 if ((unsigned char)*vstr == CTLESC)
diff --git a/shell/ash_test/ash-vars/var_bash_repl_unterminated.right b/shell/ash_test/ash-vars/var_bash_repl_unterminated.right
new file mode 100644
index 000000000..5bff3a6fa
--- /dev/null
+++ b/shell/ash_test/ash-vars/var_bash_repl_unterminated.right
@@ -0,0 +1 @@
b/d
diff --git a/shell/ash_test/ash-vars/var_bash_repl_unterminated.tests b/shell/ash_test/ash-vars/var_bash_repl_unterminated.tests
new file mode 100755
index 000000000..c9513343d
--- /dev/null
+++ b/shell/ash_test/ash-vars/var_bash_repl_unterminated.tests
@@ -0,0 +1,2 @@
1a=b-c
2echo ${a/-*}/d
diff --git a/shell/hush_test/hush-vars/var_bash_repl_unterminated.right b/shell/hush_test/hush-vars/var_bash_repl_unterminated.right
new file mode 100644
index 000000000..5bff3a6fa
--- /dev/null
+++ b/shell/hush_test/hush-vars/var_bash_repl_unterminated.right
@@ -0,0 +1 @@
b/d
diff --git a/shell/hush_test/hush-vars/var_bash_repl_unterminated.tests b/shell/hush_test/hush-vars/var_bash_repl_unterminated.tests
new file mode 100755
index 000000000..c9513343d
--- /dev/null
+++ b/shell/hush_test/hush-vars/var_bash_repl_unterminated.tests
@@ -0,0 +1,2 @@
1a=b-c
2echo ${a/-*}/d
diff --git a/util-linux/taskset.c b/util-linux/taskset.c
index d2ef9b98f..8b410f369 100644
--- a/util-linux/taskset.c
+++ b/util-linux/taskset.c
@@ -55,7 +55,6 @@
55 * Not yet implemented: 55 * Not yet implemented:
56 * -a/--all-tasks (affect all threads) 56 * -a/--all-tasks (affect all threads)
57 * needs to get TIDs from /proc/PID/task/ and use _them_ as "pid" in sched_setaffinity(pid) 57 * needs to get TIDs from /proc/PID/task/ and use _them_ as "pid" in sched_setaffinity(pid)
58 * -c/--cpu-list (specify CPUs via "1,3,5-7")
59 */ 58 */
60 59
61#include <sched.h> 60#include <sched.h>
@@ -91,7 +90,7 @@ static char *from_mask(const ul *mask, unsigned sz_in_bytes)
91} 90}
92#else 91#else
93#define TASKSET_PRINTF_MASK "%lx" 92#define TASKSET_PRINTF_MASK "%lx"
94static unsigned long long from_mask(ul *mask, unsigned sz_in_bytes UNUSED_PARAM) 93static unsigned long from_mask(ul *mask, unsigned sz_in_bytes UNUSED_PARAM)
95{ 94{
96 return *mask; 95 return *mask;
97} 96}