From 627821e42b06adfe6bbc6004d8eeb7c35f65120d Mon Sep 17 00:00:00 2001
From: Denys Vlasenko <vda.linux@googlemail.com>
Date: Sat, 25 Sep 2021 19:36:35 +0200
Subject: libbb/lineedit: tweak #if indenting, no code changes

Signed-off-by: Denys Vlasenko <vda.linux@googlemail.com>
---
 libbb/lineedit.c | 16 ++++++++--------
 1 file changed, 8 insertions(+), 8 deletions(-)
diff --git a/libbb/lineedit.c b/libbb/lineedit.c
index e8d721e61..a7a3ee103 100644
--- a/libbb/lineedit.c
+++ b/libbb/lineedit.c
@@ -1762,7 +1762,7 @@ vi_back_motion(void)
 			input_backward(1);
 	}
 }
-#endif
+#endif /* ENABLE_FEATURE_EDITING_VI */
 
 /* Modelled after bash 4.0 behavior of Ctrl-<arrow> */
 static void ctrl_left(void)
@@ -1863,7 +1863,7 @@ static void ask_terminal(void)
 	}
 }
 #else
-#define ask_terminal() ((void)0)
+# define ask_terminal() ((void)0)
 #endif
 
 /* Note about multi-line PS1 (e.g. "\n\w \u@\h\n> ") and prompt redrawing:
@@ -2057,7 +2057,7 @@ static void parse_and_put_prompt(const char *prmt_ptr)
 			if (c == '\n')
 				cmdedit_prmt_len = 0;
 			else if (flg_not_length != ']') {
-#if ENABLE_UNICODE_SUPPORT
+# if ENABLE_UNICODE_SUPPORT
 				if (n == 1) {
 					/* Only count single-byte characters and the first of multi-byte characters */
 					if ((unsigned char)*pbuf < 0x80  /* single byte character */
@@ -2068,9 +2068,9 @@ static void parse_and_put_prompt(const char *prmt_ptr)
 				} else {
 					cmdedit_prmt_len += unicode_strwidth(pbuf);
 				}
-#else
+# else
 				cmdedit_prmt_len += n;
-#endif
+# endif
 			}
 		}
 		prmt_mem_ptr = strcat(xrealloc(prmt_mem_ptr, prmt_size+1), pbuf);
@@ -2297,7 +2297,7 @@ static int32_t reverse_i_search(int timeout)
 			}
 
 			/* Append this char */
-#if ENABLE_UNICODE_SUPPORT
+# if ENABLE_UNICODE_SUPPORT
 			if (unicode_status == UNICODE_ON) {
 				mbstate_t mbstate = { 0 };
 				char buf[MB_CUR_MAX + 1];
@@ -2308,7 +2308,7 @@ static int32_t reverse_i_search(int timeout)
 						strcpy(match_buf + match_buf_len, buf);
 				}
 			} else
-#endif
+# endif
 			if (match_buf_len < sizeof(match_buf) - 1) {
 				match_buf[match_buf_len] = ic;
 				match_buf[match_buf_len + 1] = '\0';
@@ -2360,7 +2360,7 @@ static int32_t reverse_i_search(int timeout)
 
 	return ic;
 }
-#endif
+#endif /* ENABLE_FEATURE_REVERSE_SEARCH */
 
 #if ENABLE_FEATURE_EDITING_WINCH
 static void sigaction2(int sig, struct sigaction *act)
-- 
cgit v1.2.3-55-g6feb


From d84a604830a7ee3f8fb5f3908ae0d54513393a20 Mon Sep 17 00:00:00 2001
From: Denys Vlasenko <vda.linux@googlemail.com>
Date: Sat, 25 Sep 2021 22:04:45 +0200
Subject: shell: fix arithmentic evaluation of "++7" and such (it is + + 7,
 i.e. 7)

function                                             old     new   delta
evaluate_string                                      945     988     +43

Signed-off-by: Denys Vlasenko <vda.linux@googlemail.com>
---
 shell/ash_test/ash-arith/arith.right   | 20 +++++++++++++-------
 shell/ash_test/ash-arith/arith.tests   | 10 +++++-----
 shell/ash_test/ash-arith/arith1.sub    |  4 ++--
 shell/ash_test/ash-arith/arith2.sub    | 10 +++++-----
 shell/hush_test/hush-arith/arith.right | 20 +++++++++++++-------
 shell/hush_test/hush-arith/arith.tests | 10 +++++-----
 shell/hush_test/hush-arith/arith1.sub  |  4 ++--
 shell/hush_test/hush-arith/arith2.sub  | 10 +++++-----
 shell/math.c                           | 23 +++++++++++++++--------
 9 files changed, 65 insertions(+), 46 deletions(-)

diff --git a/shell/ash_test/ash-arith/arith.right b/shell/ash_test/ash-arith/arith.right
index 6936f1269..99ef825f5 100644
--- a/shell/ash_test/ash-arith/arith.right
+++ b/shell/ash_test/ash-arith/arith.right
@@ -97,6 +97,7 @@ ghi
 3 3
 4 4
 4 4
+7 7
 ./arith.tests: line 257: arithmetic syntax error
 ./arith.tests: line 259: arithmetic syntax error
 ./arith.tests: line 260: arithmetic syntax error
@@ -105,6 +106,8 @@ ghi
 4 4
 7 7
 -7 -7
+7
+7
 ./arith1.sub: line 2: arithmetic syntax error
 ./arith1.sub: line 3: arithmetic syntax error
 ./arith1.sub: line 4: arithmetic syntax error
@@ -119,11 +122,12 @@ ghi
 2 2
 -2 -2
 1 1
-./arith1.sub: line 37: arithmetic syntax error
-./arith2.sub: line 2: arithmetic syntax error
-./arith2.sub: line 3: arithmetic syntax error
-./arith2.sub: line 4: arithmetic syntax error
-./arith2.sub: line 5: arithmetic syntax error
+7
+7
+7
+7
+7
+7
 5 5
 1 1
 6 6
@@ -132,8 +136,10 @@ ghi
 1 1
 4 4
 0 0
-./arith2.sub: line 42: arithmetic syntax error
-./arith2.sub: line 47: arithmetic syntax error
+-7
+-7
+7
+7
 8 12
 ./arith.tests: line 290: arithmetic syntax error
 42
diff --git a/shell/ash_test/ash-arith/arith.tests b/shell/ash_test/ash-arith/arith.tests
index d65758e7d..746ccab71 100755
--- a/shell/ash_test/ash-arith/arith.tests
+++ b/shell/ash_test/ash-arith/arith.tests
@@ -252,8 +252,8 @@ echo 3 $x
 echo 4 $(( ++x ))
 echo 4 $x
 
-# bash 3.2 apparently thinks that ++7 is 7
-#ash# echo 7 $(( ++7 ))
+# ++ is not a inc operator on non-variable, it is the + + sequence
+echo 7 $(( ++7 ))
 (  echo $(( 7-- ))    )
 
 (  echo $(( --x=7 ))  )
@@ -267,9 +267,9 @@ echo 4 $x
 echo 7 $(( +7 ))
 echo -7 $(( -7 ))
 
-# bash 3.2 apparently thinks that ++7 is 7
-#ash# echo $(( ++7 ))
-#ash# echo $(( --7 ))
+# ++ is not a inc operator on non-variable, it is the + + sequence
+echo $(( ++7 ))
+echo $(( --7 ))
 
 ${THIS_SH} ./arith1.sub
 ${THIS_SH} ./arith2.sub
diff --git a/shell/ash_test/ash-arith/arith1.sub b/shell/ash_test/ash-arith/arith1.sub
index 80aa99922..a36785c67 100755
--- a/shell/ash_test/ash-arith/arith1.sub
+++ b/shell/ash_test/ash-arith/arith1.sub
@@ -35,6 +35,6 @@ echo 1 $a
 
 #ash# (( ++ ))
 (  echo $(( +++7 ))  )
-# bash 3.2 apparently thinks that ++ +7 is 7
-#ash# echo $(( ++ + 7 ))
+# ++ is not a inc operator on non-variable, it is the + + sequence
+echo $(( ++ + 7 ))
 #ash# (( -- ))
diff --git a/shell/ash_test/ash-arith/arith2.sub b/shell/ash_test/ash-arith/arith2.sub
index 9105059db..29f9471d6 100755
--- a/shell/ash_test/ash-arith/arith2.sub
+++ b/shell/ash_test/ash-arith/arith2.sub
@@ -1,4 +1,4 @@
-# bash 3.2 apparently thinks that ++7 is 7 etc
+# ++ and -- are not inc/dec operators on non-variables, they are + + and - - sequences
 (  echo $(( --7 ))   )
 (  echo $(( ++7 ))   )
 (  echo $(( -- 7 ))  )
@@ -37,13 +37,13 @@ echo 4 $(( 4 - -- a ))
 echo 0 $a
 
 #ash# (( -- ))
-# bash 3.2 apparently thinks that ---7 is -7
-#ash# echo $(( ---7 ))
+# -- is not a dec operator on non-variable, it is the - - sequence
+echo $(( ---7 ))
 (  echo $(( -- - 7 ))  )
 
 #ash# (( ++ ))
-# bash 3.2: 7
-#ash# echo 7 $(( ++7 ))
+# ++ is not a inc operator on non-variable, it is the + + sequence
+echo $(( ++7 ))
 (  echo $(( ++ + 7 ))  )
 
 # bash 3.2: -7
diff --git a/shell/hush_test/hush-arith/arith.right b/shell/hush_test/hush-arith/arith.right
index c48e468a5..2c389caea 100644
--- a/shell/hush_test/hush-arith/arith.right
+++ b/shell/hush_test/hush-arith/arith.right
@@ -106,6 +106,7 @@ hush: arithmetic syntax error
 3 3
 4 4
 4 4
+7 7
 hush: arithmetic syntax error
 hush: arithmetic syntax error
 hush: arithmetic syntax error
@@ -114,6 +115,8 @@ hush: arithmetic syntax error
 4 4
 7 7
 -7 -7
+7
+7
 hush: arithmetic syntax error
 hush: arithmetic syntax error
 hush: arithmetic syntax error
@@ -128,11 +131,12 @@ hush: arithmetic syntax error
 2 2
 -2 -2
 1 1
-hush: arithmetic syntax error
-hush: arithmetic syntax error
-hush: arithmetic syntax error
-hush: arithmetic syntax error
-hush: arithmetic syntax error
+7
+7
+7
+7
+7
+7
 5 5
 1 1
 6 6
@@ -141,8 +145,10 @@ hush: arithmetic syntax error
 1 1
 4 4
 0 0
-hush: arithmetic syntax error
-hush: arithmetic syntax error
+-7
+-7
+7
+7
 8 12
 hush: arithmetic syntax error
 42
diff --git a/shell/hush_test/hush-arith/arith.tests b/shell/hush_test/hush-arith/arith.tests
index bc6b341d1..a7aded17d 100755
--- a/shell/hush_test/hush-arith/arith.tests
+++ b/shell/hush_test/hush-arith/arith.tests
@@ -255,8 +255,8 @@ echo 3 $x
 echo 4 $(( ++x ))
 echo 4 $x
 
-# bash 3.2 apparently thinks that ++7 is 7
-#ash# echo 7 $(( ++7 ))
+# ++ is not a inc operator on non-variable, it is the + + sequence
+echo 7 $(( ++7 ))
 (  echo $(( 7-- ))    )
 
 (  echo $(( --x=7 ))  )
@@ -270,9 +270,9 @@ echo 4 $x
 echo 7 $(( +7 ))
 echo -7 $(( -7 ))
 
-# bash 3.2 apparently thinks that ++7 is 7
-#ash# echo $(( ++7 ))
-#ash# echo $(( --7 ))
+# ++ is not a inc operator on non-variable, it is the + + sequence
+echo $(( ++7 ))
+echo $(( --7 ))
 
 ${THIS_SH} ./arith1.sub
 ${THIS_SH} ./arith2.sub
diff --git a/shell/hush_test/hush-arith/arith1.sub b/shell/hush_test/hush-arith/arith1.sub
index 80aa99922..a36785c67 100755
--- a/shell/hush_test/hush-arith/arith1.sub
+++ b/shell/hush_test/hush-arith/arith1.sub
@@ -35,6 +35,6 @@ echo 1 $a
 
 #ash# (( ++ ))
 (  echo $(( +++7 ))  )
-# bash 3.2 apparently thinks that ++ +7 is 7
-#ash# echo $(( ++ + 7 ))
+# ++ is not a inc operator on non-variable, it is the + + sequence
+echo $(( ++ + 7 ))
 #ash# (( -- ))
diff --git a/shell/hush_test/hush-arith/arith2.sub b/shell/hush_test/hush-arith/arith2.sub
index 9105059db..29f9471d6 100755
--- a/shell/hush_test/hush-arith/arith2.sub
+++ b/shell/hush_test/hush-arith/arith2.sub
@@ -1,4 +1,4 @@
-# bash 3.2 apparently thinks that ++7 is 7 etc
+# ++ and -- are not inc/dec operators on non-variables, they are + + and - - sequences
 (  echo $(( --7 ))   )
 (  echo $(( ++7 ))   )
 (  echo $(( -- 7 ))  )
@@ -37,13 +37,13 @@ echo 4 $(( 4 - -- a ))
 echo 0 $a
 
 #ash# (( -- ))
-# bash 3.2 apparently thinks that ---7 is -7
-#ash# echo $(( ---7 ))
+# -- is not a dec operator on non-variable, it is the - - sequence
+echo $(( ---7 ))
 (  echo $(( -- - 7 ))  )
 
 #ash# (( ++ ))
-# bash 3.2: 7
-#ash# echo 7 $(( ++7 ))
+# ++ is not a inc operator on non-variable, it is the + + sequence
+echo $(( ++7 ))
 (  echo $(( ++ + 7 ))  )
 
 # bash 3.2: -7
diff --git a/shell/math.c b/shell/math.c
index 2942cdd26..049d5703b 100644
--- a/shell/math.c
+++ b/shell/math.c
@@ -668,19 +668,26 @@ evaluate_string(arith_state_t *math_state, const char *expr)
 
 		/* Should be an operator */
 
-		/* Special case: NUM-- and NUM++ are not recognized if NUM
-		 * is a literal number, not a variable. IOW:
+		/* Special case: XYZ--, XYZ++, --XYZ, ++XYZ are recognized
+		 * only if XYZ is a variable name, not a number or EXPR. IOW:
 		 * "a+++v" is a++ + v.
 		 * "7+++v" is 7 + ++v, not 7++ + v.
+		 * "--7" is - - 7, not --7.
+		 * "++++a" is + + ++a, not ++ ++ a.
+		 * (we still mishandle "(a)+++7", should be treated as (a) + + + 7, but we do increment a)
 		 */
-		if (lasttok == TOK_NUM && !numstackptr[-1].var /* number literal */
-		 && (expr[0] == '+' || expr[0] == '-')
+		if ((expr[0] == '+' || expr[0] == '-')
 		 && (expr[1] == expr[0])
 		) {
-			//bb_error_msg("special %c%c", expr[0], expr[0]);
-			op = (expr[0] == '+' ? TOK_ADD : TOK_SUB);
-			expr += 1;
-			goto tok_found1;
+			if (numstackptr == numstack || !numstackptr[-1].var) { /* not a VAR++ */
+				char next = skip_whitespace(expr + 2)[0];
+				if (!(isalpha(next) || next == '_')) { /* not a ++VAR */
+					//bb_error_msg("special %c%c", expr[0], expr[0]);
+					op = (expr[0] == '+' ? TOK_ADD : TOK_SUB);
+					expr++;
+					goto tok_found1;
+				}
+			}
 		}
 
 		p = op_tokens;
-- 
cgit v1.2.3-55-g6feb


From 62e433131b289ea90e465cf0c5f78c8c226fc692 Mon Sep 17 00:00:00 2001
From: Denys Vlasenko <vda.linux@googlemail.com>
Date: Sat, 25 Sep 2021 22:25:19 +0200
Subject: shell: enable more tests which are passing now

Signed-off-by: Denys Vlasenko <vda.linux@googlemail.com>
---
 shell/ash_test/ash-arith/arith.right   | 58 ++++++++++++++++++++++++----------
 shell/ash_test/ash-arith/arith.tests   | 45 +++++++++++++-------------
 shell/ash_test/ash-arith/arith2.sub    | 14 +++-----
 shell/hush_test/hush-arith/arith.right | 17 ++++++++++
 shell/hush_test/hush-arith/arith.tests | 30 +++++++++---------
 shell/hush_test/hush-arith/arith2.sub  | 14 +++-----
 6 files changed, 105 insertions(+), 73 deletions(-)

diff --git a/shell/ash_test/ash-arith/arith.right b/shell/ash_test/ash-arith/arith.right
index 99ef825f5..61fcab55e 100644
--- a/shell/ash_test/ash-arith/arith.right
+++ b/shell/ash_test/ash-arith/arith.right
@@ -43,40 +43,60 @@ Format: 'expected actual'
 4 4
 29 29
 5 5
+unary plus, minus
 -4 -4
 4 4
+conditional expressions
 1 1
 32 32
 32 32
 1 1
 1 1
 32 32
+check that the unevaluated part of the ternary operator does not do evaluation or assignment
 20 20
 30 30
 20 20
 30 30
-./arith.tests: line 117: arithmetic syntax error
+check precedence of assignment vs. conditional operator
+./arith.tests: line 116: arithmetic syntax error
+check precedence of assignment vs. conditional operator
+associativity of assignment-operator operator
 6 6
 6,5,3 6,5,3
+octal, hex
 263 263
 255 255
 40 40
-./arith.tests: line 163: arithmetic syntax error
-./arith.tests: line 165: divide by zero
-./arith.tests: let: line 166: arithmetic syntax error
-./arith.tests: line 167: arithmetic syntax error
-./arith.tests: let: line 168: arithmetic syntax error
+other bases
+10 10
+10 10
+10 10
+10 10
+10 10
+10 10
+36 36
+36 36
+62 62
+63 63
+missing number after base
+0 0
+./arith.tests: line 162: arithmetic syntax error
+./arith.tests: line 164: divide by zero
+./arith.tests: let: line 165: arithmetic syntax error
+./arith.tests: line 166: arithmetic syntax error
+./arith.tests: let: line 167: arithmetic syntax error
 abc
 def
 ghi
-./arith.tests: line 191: arithmetic syntax error
+./arith.tests: line 190: arithmetic syntax error
 16 16
-./arith.tests: line 196: arithmetic syntax error
-./arith.tests: line 197: malformed ?: operator
-./arith.tests: line 198: arithmetic syntax error
+./arith.tests: line 195: arithmetic syntax error
+./arith.tests: line 196: malformed ?: operator
+./arith.tests: line 197: arithmetic syntax error
 9 9
-./arith.tests: line 205: arithmetic syntax error
-./arith.tests: line 208: arithmetic syntax error
+./arith.tests: line 204: arithmetic syntax error
+./arith.tests: line 207: arithmetic syntax error
 9 9
 9 9
 9 9
@@ -98,11 +118,11 @@ ghi
 4 4
 4 4
 7 7
-./arith.tests: line 257: arithmetic syntax error
+./arith.tests: line 256: arithmetic syntax error
+./arith.tests: line 258: arithmetic syntax error
 ./arith.tests: line 259: arithmetic syntax error
-./arith.tests: line 260: arithmetic syntax error
+./arith.tests: line 261: arithmetic syntax error
 ./arith.tests: line 262: arithmetic syntax error
-./arith.tests: line 263: arithmetic syntax error
 4 4
 7 7
 -7 -7
@@ -140,9 +160,13 @@ ghi
 -7
 7
 7
+-7 -7
+-7 -7
+7 7
+7 7
 8 12
-./arith.tests: line 290: arithmetic syntax error
+./arith.tests: line 289: arithmetic syntax error
 42
 42
 42
-./arith.tests: line 302: a[b[c]d]=e: not found
+./arith.tests: line 301: a[b[c]d]=e: not found
diff --git a/shell/ash_test/ash-arith/arith.tests b/shell/ash_test/ash-arith/arith.tests
index 746ccab71..b9cb8ba4c 100755
--- a/shell/ash_test/ash-arith/arith.tests
+++ b/shell/ash_test/ash-arith/arith.tests
@@ -75,11 +75,11 @@ echo 4 $(( iv &= 4 ))
 echo 29 $(( iv += (jv + 9)))
 echo 5 $(( (iv + 4) % 7 ))
 
-# unary plus, minus
+echo unary plus, minus
 echo -4 $(( +4 - 8 ))
 echo 4 $(( -4 + 8 ))
 
-# conditional expressions
+echo conditional expressions
 echo 1 $(( 4<5 ? 1 : 32))
 echo 32 $(( 4>5 ? 1 : 32))
 echo 32 $(( 4>(2+3) ? 1 : 32))
@@ -87,8 +87,7 @@ echo 1 $(( 4<(2+3) ? 1 : 32))
 echo 1 $(( (2+2)<(2+3) ? 1 : 32))
 echo 32 $(( (2+2)>(2+3) ? 1 : 32))
 
-# check that the unevaluated part of the ternary operator does not do
-# evaluation or assignment
+echo check that the unevaluated part of the ternary operator does not do evaluation or assignment
 x=i+=2
 y=j+=2
 #ash# declare -i i=1 j=1
@@ -109,20 +108,20 @@ echo 20 $((1 ? 20 : (x+=2)))
 echo 30 $((0 ? (y+=2) : 30))
 #ash# echo $i,$y             # ash mishandles this
 
-# check precedence of assignment vs. conditional operator
+echo check precedence of assignment vs. conditional operator
 # should be an error
 #ash# declare -i x=2
       x=2
 #ashnote# bash reports error but continues, ash aborts - using subshell to 'emulate' bash:
 (  y=$((1 ? 20 : x+=2))  )
 
-# check precedence of assignment vs. conditional operator
+echo check precedence of assignment vs. conditional operator
 #ash# declare -i x=2
       x=2
 # ash says "line NNN: syntax error: 0 ? x+=2 : 20"
 #ash# echo 20 $((0 ? x+=2 : 20))
 
-# associativity of assignment-operator operator
+echo associativity of assignment-operator operator
 #ash# declare -i i=1 j=2 k=3
 i=1
 j=2
@@ -130,7 +129,7 @@ k=3
 echo 6 $((i += j += k))
 echo 6,5,3 $i,$j,$k
 
-# octal, hex
+echo octal, hex
 echo 263 $(( 0x100 | 007 ))
 echo 255 $(( 0xff ))
 #ash# echo 255 $(( 16#ff ))
@@ -139,25 +138,25 @@ echo 255 $(( 0xff ))
 
 echo 40 $(( 8 ^ 32 ))
 
-#ash# # other bases
-#ash# echo 10 $(( 16#a ))
-#ash# echo 10 $(( 32#a ))
-#ash# echo 10 $(( 56#a ))
-#ash# echo 10 $(( 64#a ))
-#ash#
-#ash# echo 10 $(( 16#A ))
-#ash# echo 10 $(( 32#A ))
-#ash# echo 36 $(( 56#A ))
-#ash# echo 36 $(( 64#A ))
-#ash#
-#ash# echo 62 $(( 64#@ ))
-#ash# echo 63 $(( 64#_ ))
+echo other bases
+echo 10 $(( 16#a ))
+echo 10 $(( 32#a ))
+echo 10 $(( 56#a ))
+echo 10 $(( 64#a ))
+
+echo 10 $(( 16#A ))
+echo 10 $(( 32#A ))
+echo 36 $(( 56#A ))
+echo 36 $(( 64#A ))
+
+echo 62 $(( 64#@ ))
+echo 63 $(( 64#_ ))
 
 #ash# # weird bases (error)
 #ash# echo $(( 3425#56 ))
 
-#ash# # missing number after base
-#ash# echo 0 $(( 2# ))
+echo missing number after base
+echo 0 $(( 2# ))
 
 # these should generate errors
 (  echo $(( 7 = 43 ))      )
diff --git a/shell/ash_test/ash-arith/arith2.sub b/shell/ash_test/ash-arith/arith2.sub
index 29f9471d6..8d7918114 100755
--- a/shell/ash_test/ash-arith/arith2.sub
+++ b/shell/ash_test/ash-arith/arith2.sub
@@ -46,12 +46,8 @@ echo $(( ---7 ))
 echo $(( ++7 ))
 (  echo $(( ++ + 7 ))  )
 
-# bash 3.2: -7
-#ash# echo -7 $(( ++-7 ))
-# bash 3.2: -7
-#ash# echo -7 $(( ++ - 7 ))
-
-# bash 3.2: 7
-#ash# echo 7 $(( +--7 ))
-# bash 3.2: 7
-#ash# echo 7 $(( -- + 7 ))
+echo -7 $(( ++-7 ))
+echo -7 $(( ++ - 7 ))
+
+echo 7 $(( +--7 ))
+echo 7 $(( -- + 7 ))
diff --git a/shell/hush_test/hush-arith/arith.right b/shell/hush_test/hush-arith/arith.right
index 2c389caea..a8612295e 100644
--- a/shell/hush_test/hush-arith/arith.right
+++ b/shell/hush_test/hush-arith/arith.right
@@ -70,6 +70,19 @@ octal, hex
 263 263
 255 255
 40 40
+other bases
+10 10
+10 10
+10 10
+10 10
+10 10
+10 10
+36 36
+36 36
+62 62
+63 63
+missing number after base
+0 0
 hush: arithmetic syntax error
 hush: divide by zero
 hush: can't execute 'let': No such file or directory
@@ -149,6 +162,10 @@ hush: arithmetic syntax error
 -7
 7
 7
+-7 -7
+-7 -7
+7 7
+7 7
 8 12
 hush: arithmetic syntax error
 42
diff --git a/shell/hush_test/hush-arith/arith.tests b/shell/hush_test/hush-arith/arith.tests
index a7aded17d..6b707486c 100755
--- a/shell/hush_test/hush-arith/arith.tests
+++ b/shell/hush_test/hush-arith/arith.tests
@@ -142,25 +142,25 @@ echo 255 $(( 0xff ))
 
 echo 40 $(( 8 ^ 32 ))
 
-#ash# # other bases
-#ash# echo 10 $(( 16#a ))
-#ash# echo 10 $(( 32#a ))
-#ash# echo 10 $(( 56#a ))
-#ash# echo 10 $(( 64#a ))
-#ash#
-#ash# echo 10 $(( 16#A ))
-#ash# echo 10 $(( 32#A ))
-#ash# echo 36 $(( 56#A ))
-#ash# echo 36 $(( 64#A ))
-#ash#
-#ash# echo 62 $(( 64#@ ))
-#ash# echo 63 $(( 64#_ ))
+echo other bases
+echo 10 $(( 16#a ))
+echo 10 $(( 32#a ))
+echo 10 $(( 56#a ))
+echo 10 $(( 64#a ))
+
+echo 10 $(( 16#A ))
+echo 10 $(( 32#A ))
+echo 36 $(( 56#A ))
+echo 36 $(( 64#A ))
+
+echo 62 $(( 64#@ ))
+echo 63 $(( 64#_ ))
 
 #ash# # weird bases (error)
 #ash# echo $(( 3425#56 ))
 
-#ash# # missing number after base
-#ash# echo 0 $(( 2# ))
+echo missing number after base
+echo 0 $(( 2# ))
 
 # these should generate errors
 (  echo $(( 7 = 43 ))      )
diff --git a/shell/hush_test/hush-arith/arith2.sub b/shell/hush_test/hush-arith/arith2.sub
index 29f9471d6..8d7918114 100755
--- a/shell/hush_test/hush-arith/arith2.sub
+++ b/shell/hush_test/hush-arith/arith2.sub
@@ -46,12 +46,8 @@ echo $(( ---7 ))
 echo $(( ++7 ))
 (  echo $(( ++ + 7 ))  )
 
-# bash 3.2: -7
-#ash# echo -7 $(( ++-7 ))
-# bash 3.2: -7
-#ash# echo -7 $(( ++ - 7 ))
-
-# bash 3.2: 7
-#ash# echo 7 $(( +--7 ))
-# bash 3.2: 7
-#ash# echo 7 $(( -- + 7 ))
+echo -7 $(( ++-7 ))
+echo -7 $(( ++ - 7 ))
+
+echo 7 $(( +--7 ))
+echo 7 $(( -- + 7 ))
-- 
cgit v1.2.3-55-g6feb


From 1be73dd9ad6d2cf6747934374c1d58bd9bc211b4 Mon Sep 17 00:00:00 2001
From: Denys Vlasenko <vda.linux@googlemail.com>
Date: Sun, 26 Sep 2021 13:25:49 +0200
Subject: shell: fix parsing of $(( (v)++ + NUM ))

function                                             old     new   delta
evaluate_string                                      988    1011     +23

Signed-off-by: Denys Vlasenko <vda.linux@googlemail.com>
---
 shell/ash_test/ash-arith/arith-postinc.right   |  3 ++
 shell/ash_test/ash-arith/arith-postinc.tests   |  4 ++
 shell/hush_test/hush-arith/arith-postinc.right |  3 ++
 shell/hush_test/hush-arith/arith-postinc.tests |  4 ++
 shell/math.c                                   | 52 +++++++++++++++-----------
 5 files changed, 44 insertions(+), 22 deletions(-)

diff --git a/shell/ash_test/ash-arith/arith-postinc.right b/shell/ash_test/ash-arith/arith-postinc.right
index c95ce02bf..5cd4ba6b4 100644
--- a/shell/ash_test/ash-arith/arith-postinc.right
+++ b/shell/ash_test/ash-arith/arith-postinc.right
@@ -2,4 +2,7 @@
 1 1
 1 1
 1 1
+6 6
+7 7
+7 7
 Ok:0
diff --git a/shell/ash_test/ash-arith/arith-postinc.tests b/shell/ash_test/ash-arith/arith-postinc.tests
index 3fd9bfed5..f2ae778df 100755
--- a/shell/ash_test/ash-arith/arith-postinc.tests
+++ b/shell/ash_test/ash-arith/arith-postinc.tests
@@ -2,4 +2,8 @@ echo 1 $((0++1))
 echo 1 $((0--1))
 x=-1; echo 1 $((0-$x))
 x=+1; echo 1 $((0+$x))
+a=3
+echo 6 $((a+++3))   # a++ + 3
+echo 7 $(((a)+++3)) # a + + + 3
+echo 7 $(((a)+++3)) # a + + + 3
 echo Ok:$?
diff --git a/shell/hush_test/hush-arith/arith-postinc.right b/shell/hush_test/hush-arith/arith-postinc.right
index c95ce02bf..5cd4ba6b4 100644
--- a/shell/hush_test/hush-arith/arith-postinc.right
+++ b/shell/hush_test/hush-arith/arith-postinc.right
@@ -2,4 +2,7 @@
 1 1
 1 1
 1 1
+6 6
+7 7
+7 7
 Ok:0
diff --git a/shell/hush_test/hush-arith/arith-postinc.tests b/shell/hush_test/hush-arith/arith-postinc.tests
index 3fd9bfed5..f2ae778df 100755
--- a/shell/hush_test/hush-arith/arith-postinc.tests
+++ b/shell/hush_test/hush-arith/arith-postinc.tests
@@ -2,4 +2,8 @@ echo 1 $((0++1))
 echo 1 $((0--1))
 x=-1; echo 1 $((0-$x))
 x=+1; echo 1 $((0+$x))
+a=3
+echo 6 $((a+++3))   # a++ + 3
+echo 7 $(((a)+++3)) # a + + + 3
+echo 7 $(((a)+++3)) # a + + + 3
 echo Ok:$?
diff --git a/shell/math.c b/shell/math.c
index 049d5703b..76d22c9bd 100644
--- a/shell/math.c
+++ b/shell/math.c
@@ -116,10 +116,6 @@
 #include "libbb.h"
 #include "math.h"
 
-#define lookupvar (math_state->lookupvar)
-#define setvar    (math_state->setvar   )
-//#define endofname (math_state->endofname)
-
 typedef unsigned char operator;
 
 /* An operator's token id is a bit of a bitfield. The lower 5 bits are the
@@ -258,7 +254,7 @@ static const char*
 arith_lookup_val(arith_state_t *math_state, var_or_num_t *t)
 {
 	if (t->var) {
-		const char *p = lookupvar(t->var);
+		const char *p = math_state->lookupvar(t->var);
 		if (p) {
 			remembered_name *cur;
 			remembered_name cur_save;
@@ -445,16 +441,15 @@ arith_apply(arith_state_t *math_state, operator op, var_or_num_t *numstack, var_
 
 		if (top_of_stack->var == NULL) {
 			/* Hmm, 1=2 ? */
-//TODO: actually, bash allows ++7 but for some reason it evals to 7, not 8
 			goto err;
 		}
 		/* Save to shell variable */
 		sprintf(buf, ARITH_FMT, rez);
-		setvar(top_of_stack->var, buf);
+		math_state->setvar(top_of_stack->var, buf);
 		/* After saving, make previous value for v++ or v-- */
 		if (op == TOK_POST_INC)
 			rez--;
-		else if (op == TOK_POST_DEC)
+		if (op == TOK_POST_DEC)
 			rez++;
 	}
 
@@ -607,11 +602,9 @@ evaluate_string(arith_state_t *math_state, const char *expr)
 		const char *p;
 		operator op;
 		operator prec;
-		char arithval;
 
 		expr = skip_whitespace(expr);
-		arithval = *expr;
-		if (arithval == '\0') {
+		if (*expr == '\0') {
 			if (expr == start_expr) {
 				/* Null expression */
 				numstack->val = 0;
@@ -628,6 +621,7 @@ evaluate_string(arith_state_t *math_state, const char *expr)
 				 * append a closing right paren
 				 * and let the loop process it */
 				expr = ptr_to_rparen;
+//bb_error_msg("expr=')'");
 				continue;
 			}
 			/* At this point, we're done with the expression */
@@ -635,19 +629,16 @@ evaluate_string(arith_state_t *math_state, const char *expr)
 				/* ...but if there isn't, it's bad */
 				goto err;
 			}
-			if (numstack->var) {
-				/* expression is $((var)) only, lookup now */
-				errmsg = arith_lookup_val(math_state, numstack);
-			}
 			goto ret;
 		}
 
 		p = endofname(expr);
 		if (p != expr) {
 			/* Name */
-			size_t var_name_size = (p-expr) + 1;  /* +1 for NUL */
+			size_t var_name_size = (p - expr) + 1;  /* +1 for NUL */
 			numstackptr->var = alloca(var_name_size);
 			safe_strncpy(numstackptr->var, expr, var_name_size);
+//bb_error_msg("var:'%s'", numstackptr->var);
 			expr = p;
  num:
 			numstackptr->second_val_present = 0;
@@ -656,11 +647,12 @@ evaluate_string(arith_state_t *math_state, const char *expr)
 			continue;
 		}
 
-		if (isdigit(arithval)) {
+		if (isdigit(*expr)) {
 			/* Number */
 			numstackptr->var = NULL;
 			errno = 0;
 			numstackptr->val = strto_arith_t(expr, (char**) &expr);
+//bb_error_msg("val:%lld", numstackptr->val);
 			if (errno)
 				numstackptr->val = 0; /* bash compat */
 			goto num;
@@ -671,10 +663,10 @@ evaluate_string(arith_state_t *math_state, const char *expr)
 		/* Special case: XYZ--, XYZ++, --XYZ, ++XYZ are recognized
 		 * only if XYZ is a variable name, not a number or EXPR. IOW:
 		 * "a+++v" is a++ + v.
+		 * "(a)+++7" is ( a ) + + + 7.
 		 * "7+++v" is 7 + ++v, not 7++ + v.
 		 * "--7" is - - 7, not --7.
-		 * "++++a" is + + ++a, not ++ ++ a.
-		 * (we still mishandle "(a)+++7", should be treated as (a) + + + 7, but we do increment a)
+		 * "++++a" is + + ++a, not ++ ++a.
 		 */
 		if ((expr[0] == '+' || expr[0] == '-')
 		 && (expr[1] == expr[0])
@@ -756,26 +748,40 @@ evaluate_string(arith_state_t *math_state, const char *expr)
 		 * "applied" in this way.
 		 */
 		prec = PREC(op);
+//bb_error_msg("prec:%02x", prec);
 		if ((prec > 0 && prec < UNARYPREC) || prec == SPEC_PREC) {
 			/* not left paren or unary */
 			if (lasttok != TOK_NUM) {
 				/* binary op must be preceded by a num */
 				goto err;
 			}
+			/* The algorithm employed here is simple: while we don't
+			 * hit an open paren nor the bottom of the stack, pop
+			 * tokens and apply them */
 			while (stackptr != stack) {
 				operator prev_op = *--stackptr;
 				if (op == TOK_RPAREN) {
-					/* The algorithm employed here is simple: while we don't
-					 * hit an open paren nor the bottom of the stack, pop
-					 * tokens and apply them */
+//bb_error_msg("op == TOK_RPAREN");
 					if (prev_op == TOK_LPAREN) {
+//bb_error_msg("prev_op == TOK_LPAREN");
+//bb_error_msg("  %p %p numstackptr[-1].var:'%s'", numstack, numstackptr-1, numstackptr[-1].var);
+						if (numstackptr[-1].var) {
+							/* Expression is (var), lookup now */
+							errmsg = arith_lookup_val(math_state, &numstackptr[-1]);
+							if (errmsg)
+								goto err_with_custom_msg;
+							/* Erase var name: (var) is just a number, for example, (var) = 1 is not valid */
+							numstackptr[-1].var = NULL;
+						}
 						/* Any operator directly after a
 						 * close paren should consider itself binary */
 						lasttok = TOK_NUM;
 						goto next;
 					}
+//bb_error_msg("prev_op != TOK_LPAREN");
 				} else {
 					operator prev_prec = PREC(prev_op);
+//bb_error_msg("op != TOK_RPAREN");
 					fix_assignment_prec(prec);
 					fix_assignment_prec(prev_prec);
 					if (prev_prec < prec
@@ -785,6 +791,7 @@ evaluate_string(arith_state_t *math_state, const char *expr)
 						break;
 					}
 				}
+//bb_error_msg("arith_apply(prev_op:%02x)", prev_op);
 				errmsg = arith_apply(math_state, prev_op, numstack, &numstackptr);
 				if (errmsg)
 					goto err_with_custom_msg;
@@ -794,6 +801,7 @@ evaluate_string(arith_state_t *math_state, const char *expr)
 		}
 
 		/* Push this operator to the stack and remember it */
+//bb_error_msg("push op:%02x", op);
 		*stackptr++ = lasttok = op;
  next: ;
 	} /* while (1) */
-- 
cgit v1.2.3-55-g6feb


From 03ed86b39e887b2f4031961673fddd88fdeb493e Mon Sep 17 00:00:00 2001
From: Denys Vlasenko <vda.linux@googlemail.com>
Date: Tue, 28 Sep 2021 17:40:59 +0200
Subject: libbb: code shrink in main() and scripted_main()

function                                             old     new   delta
main                                                  68      65      -3
------------------------------------------------------------------------------
(add/remove: 0/0 grow/shrink: 0/1 up/down: 0/-3)               Total: -3 bytes
   text	   data	    bss	    dec	    hex	filename
   1472	    560	     16	   2048	    800	busybox_old
   1398	    552	     16	   1966	    7ae	busybox_unstripped

Signed-off-by: Denys Vlasenko <vda.linux@googlemail.com>
---
 libbb/appletlib.c | 10 +++++-----
 1 file changed, 5 insertions(+), 5 deletions(-)

diff --git a/libbb/appletlib.c b/libbb/appletlib.c
index 14be33603..5c5d7eb95 100644
--- a/libbb/appletlib.c
+++ b/libbb/appletlib.c
@@ -726,9 +726,9 @@ int scripted_main(int argc UNUSED_PARAM, char **argv)
 	int script = find_script_by_name(applet_name);
 	if (script >= 0)
 #  if ENABLE_SHELL_ASH
-		exit(ash_main(-script - 1, argv));
+		return ash_main(-script - 1, argv);
 #  elif ENABLE_SHELL_HUSH
-		exit(hush_main(-script - 1, argv));
+		return hush_main(-script - 1, argv);
 #  else
 		return 1;
 #  endif
@@ -1005,10 +1005,10 @@ int scripted_main(int argc UNUSED_PARAM, char **argv)
 {
 #  if ENABLE_SHELL_ASH
 	int script = 0;
-	exit(ash_main(-script - 1, argv));
+	return ash_main(-script - 1, argv);
 #  elif ENABLE_SHELL_HUSH
 	int script = 0;
-	exit(hush_main(-script - 1, argv));
+	return hush_main(-script - 1, argv);
 #  else
 	return 1;
 #  endif
@@ -1094,7 +1094,7 @@ int main(int argc UNUSED_PARAM, char **argv)
 
 	full_write2_str(bb_basename(argv[0]));
 	full_write2_str(": no applets enabled\n");
-	exit(127);
+	return 127;
 
 #else
 
-- 
cgit v1.2.3-55-g6feb


From 5b026d1ecf2df93d31248153a7f5d0c45a5d12fa Mon Sep 17 00:00:00 2001
From: Denys Vlasenko <vda.linux@googlemail.com>
Date: Tue, 28 Sep 2021 17:41:56 +0200
Subject: ash: fix compile breakage in !ENABLE_ASH_ALIAS config

Signed-off-by: Denys Vlasenko <vda.linux@googlemail.com>
---
 shell/ash.c | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/shell/ash.c b/shell/ash.c
index 4bc4f55d0..4bf0615ea 100644
--- a/shell/ash.c
+++ b/shell/ash.c
@@ -10941,14 +10941,14 @@ static void freestrings(struct strpush *sp)
 	INT_OFF;
 	do {
 		struct strpush *psp;
-
+#if ENABLE_ASH_ALIAS
 		if (sp->ap) {
 			sp->ap->flag &= ~ALIASINUSE;
 			if (sp->ap->flag & ALIASDEAD) {
 				unalias(sp->ap->name);
 			}
 		}
-
+#endif
 		psp = sp;
 		sp = sp->spfree;
 
-- 
cgit v1.2.3-55-g6feb


From 35727f5741b0c06ab53769e400c2074d2e77a77b Mon Sep 17 00:00:00 2001
From: Ismael Luceno <ismael@iodev.co.uk>
Date: Tue, 28 Sep 2021 21:47:49 +0200
Subject: less: Ignore -X

The flag disables termcap init/deinit of the terminal, which the applet
doesn't do anyway.

Signed-off-by: Ismael Luceno <ismael@iodev.co.uk>
Signed-off-by: Denys Vlasenko <vda.linux@googlemail.com>
---
 miscutils/less.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/miscutils/less.c b/miscutils/less.c
index 223c2558d..26983f40d 100644
--- a/miscutils/less.c
+++ b/miscutils/less.c
@@ -1820,7 +1820,7 @@ int less_main(int argc, char **argv)
 	getopt32(argv, "EMmN~IF"
 		IF_FEATURE_LESS_TRUNCATE("S")
 		IF_FEATURE_LESS_RAW("R")
-		/*ignored:*/"s"
+		/*ignored:*/"sX"
 	);
 	argv += optind;
 	num_files = argc - optind;
-- 
cgit v1.2.3-55-g6feb


From ac36e7007480e2d2d68d9d333c026ba4527086df Mon Sep 17 00:00:00 2001
From: Denys Vlasenko <vda.linux@googlemail.com>
Date: Thu, 30 Sep 2021 00:03:23 +0200
Subject: tls: remove unused define

Signed-off-by: Denys Vlasenko <vda.linux@googlemail.com>
---
 networking/tls_sp_c32.c | 2 --
 1 file changed, 2 deletions(-)

diff --git a/networking/tls_sp_c32.c b/networking/tls_sp_c32.c
index 5a84852a5..73dae6c7b 100644
--- a/networking/tls_sp_c32.c
+++ b/networking/tls_sp_c32.c
@@ -24,8 +24,6 @@ static void dump_hex(const char *fmt, const void *vp, int len)
 # define dump_hex(...) ((void)0)
 #endif
 
-#undef DIGIT_BIT
-#define DIGIT_BIT  32
 typedef int32_t sp_digit;
 
 /* The code below is taken from parts of
-- 
cgit v1.2.3-55-g6feb


From 7714518f1a97b6facd58a877afaafa130149192d Mon Sep 17 00:00:00 2001
From: Denys Vlasenko <vda.linux@googlemail.com>
Date: Fri, 1 Oct 2021 13:51:39 +0200
Subject: tls: code shrink P256 code

function                                             old     new   delta
sp_256_to_bin                                        148     120     -28

Signed-off-by: Denys Vlasenko <vda.linux@googlemail.com>
---
 networking/tls_sp_c32.c | 26 ++++++++++++--------------
 1 file changed, 12 insertions(+), 14 deletions(-)

diff --git a/networking/tls_sp_c32.c b/networking/tls_sp_c32.c
index 73dae6c7b..353dacdc4 100644
--- a/networking/tls_sp_c32.c
+++ b/networking/tls_sp_c32.c
@@ -70,6 +70,16 @@ static const sp_digit p256_mod[10] = {
 
 #define p256_mp_mod ((sp_digit)0x000001)
 
+/* Normalize the values in each word to 26 bits. */
+static void sp_256_norm_10(sp_digit* a)
+{
+	int i;
+	for (i = 0; i < 9; i++) {
+		a[i+1] += a[i] >> 26;
+		a[i] &= 0x3ffffff;
+	}
+}
+
 /* Write r as big endian to byte aray.
  * Fixed length number of bytes written: 32
  *
@@ -80,10 +90,8 @@ static void sp_256_to_bin(sp_digit* r, uint8_t* a)
 {
 	int i, j, s = 0, b;
 
-	for (i = 0; i < 9; i++) {
-		r[i+1] += r[i] >> 26;
-		r[i] &= 0x3ffffff;
-	}
+	sp_256_norm_10(r);
+
 	j = 256 / 8 - 1;
 	a[j] = 0;
 	for (i = 0; i < 10 && j >= 0; i++) {
@@ -171,16 +179,6 @@ static int sp_256_cmp_equal_10(const sp_digit* a, const sp_digit* b)
 	return sp_256_cmp_10(a, b) == 0;
 }
 
-/* Normalize the values in each word to 26 bits. */
-static void sp_256_norm_10(sp_digit* a)
-{
-	int i;
-	for (i = 0; i < 9; i++) {
-		a[i+1] += a[i] >> 26;
-		a[i] &= 0x3ffffff;
-	}
-}
-
 /* Add b to a into r. (r = a + b) */
 static void sp_256_add_10(sp_digit* r, const sp_digit* a, const sp_digit* b)
 {
-- 
cgit v1.2.3-55-g6feb


From 1f5a44d20c7963a347165a033aea6da3bbfa9d2c Mon Sep 17 00:00:00 2001
From: Denys Vlasenko <vda.linux@googlemail.com>
Date: Fri, 1 Oct 2021 14:27:10 +0200
Subject: tls: add scaffolding to selectively disable ciphers. no code changes

Signed-off-by: Denys Vlasenko <vda.linux@googlemail.com>
---
 networking/tls.c | 106 ++++++++++++++++++++++++++-----------------------------
 1 file changed, 51 insertions(+), 55 deletions(-)

diff --git a/networking/tls.c b/networking/tls.c
index 5566d7911..d0b976c33 100644
--- a/networking/tls.c
+++ b/networking/tls.c
@@ -22,47 +22,23 @@
 
 #include "tls.h"
 
+// Usually enabled. You can disable some of them to force only
+// specific ciphers to be advertized to server.
+// (this would not exclude code to handle disabled ciphers, no code size win)
+#define ALLOW_ECDHE_ECDSA_WITH_AES_128_CBC_SHA256       1
+#define ALLOW_ECDHE_RSA_WITH_AES_128_CBC_SHA256         1
+#define ALLOW_ECDHE_ECDSA_WITH_AES_128_GCM_SHA256       1
+#define ALLOW_ECDHE_RSA_WITH_AES_128_GCM_SHA256         1
+#define ALLOW_RSA_WITH_AES_128_CBC_SHA256       1
+#define ALLOW_RSA_WITH_AES_256_CBC_SHA256       1
+#define ALLOW_RSA_WITH_AES_128_GCM_SHA256       1
+#define ALLOW_CURVE_P256        1
+#define ALLOW_CURVE_X25519      1
+
+// For testing (does everything except encrypting).
 // works against "openssl s_server -cipher NULL"
 // and against wolfssl-3.9.10-stable/examples/server/server.c:
-#define ALLOW_RSA_NULL_SHA256  0  // for testing (does everything except encrypting)
-
-//Tested against kernel.org:
-//#define CIPHER_ID TLS_ECDHE_RSA_WITH_AES_128_CBC_SHA // ok, recvs SERVER_KEY_EXCHANGE *** matrixssl uses this on my box
-//#define CIPHER_ID TLS_RSA_WITH_AES_256_CBC_SHA256 // ok, no SERVER_KEY_EXCHANGE
-//#define CIPHER_ID TLS_DH_anon_WITH_AES_256_CBC_SHA // SSL_ALERT_HANDSHAKE_FAILURE
-//^^^^^^^^^^^^^^^^^^^^^^^ (tested b/c this one doesn't req server certs... no luck, server refuses it)
-//#define CIPHER_ID TLS_ECDHE_ECDSA_WITH_AES_256_GCM_SHA384 // SSL_ALERT_HANDSHAKE_FAILURE
-//#define CIPHER_ID TLS_ECDHE_ECDSA_WITH_AES_128_GCM_SHA256 // SSL_ALERT_HANDSHAKE_FAILURE
-//#define CIPHER_ID TLS_ECDHE_RSA_WITH_AES_256_GCM_SHA384 // ok, recvs SERVER_KEY_EXCHANGE
-//#define CIPHER_ID TLS_ECDHE_RSA_WITH_AES_128_GCM_SHA256
-//#define CIPHER_ID TLS_ECDH_ECDSA_WITH_AES_256_GCM_SHA384
-//#define CIPHER_ID TLS_ECDH_ECDSA_WITH_AES_128_GCM_SHA256 // SSL_ALERT_HANDSHAKE_FAILURE
-//#define CIPHER_ID TLS_ECDH_RSA_WITH_AES_256_GCM_SHA384
-//#define CIPHER_ID TLS_ECDH_RSA_WITH_AES_128_GCM_SHA256 // SSL_ALERT_HANDSHAKE_FAILURE
-//#define CIPHER_ID TLS_RSA_WITH_AES_256_GCM_SHA384 // ok, no SERVER_KEY_EXCHANGE
-//#define CIPHER_ID TLS_RSA_WITH_AES_128_GCM_SHA256 // ok, no SERVER_KEY_EXCHANGE
-
-// works against wolfssl-3.9.10-stable/examples/server/server.c
-// works for kernel.org
-// does not work for cdn.kernel.org (e.g. downloading an actual tarball, not a web page)
-//  getting alert 40 "handshake failure" at once
-//  with GNU Wget 1.18, they agree on TLS_ECDHE_RSA_WITH_AES_128_GCM_SHA256 (0xC02F) cipher
-//  fail: openssl s_client -connect cdn.kernel.org:443 -debug -tls1_2 -cipher AES256-SHA256
-//  fail: openssl s_client -connect cdn.kernel.org:443 -debug -tls1_2 -cipher AES256-GCM-SHA384
-//  fail: openssl s_client -connect cdn.kernel.org:443 -debug -tls1_2 -cipher AES128-SHA256
-//  ok:   openssl s_client -connect cdn.kernel.org:443 -debug -tls1_2 -cipher AES128-GCM-SHA256
-//  ok:   openssl s_client -connect cdn.kernel.org:443 -debug -tls1_2 -cipher AES128-SHA
-//        (TLS_RSA_WITH_AES_128_CBC_SHA - in TLS 1.2 it's mandated to be always supported)
-//#define CIPHER_ID1  TLS_RSA_WITH_AES_256_CBC_SHA256 //0x003D
-// Works with "wget https://cdn.kernel.org/pub/linux/kernel/v4.x/linux-4.9.5.tar.xz"
-//#define CIPHER_ID2  TLS_RSA_WITH_AES_128_CBC_SHA    //0x002F
-
-// bug #11456:
-// ftp.openbsd.org only supports ECDHE-RSA-AESnnn-GCM-SHAnnn or ECDHE-RSA-CHACHA20-POLY1305
-//#define CIPHER_ID3  TLS_ECDHE_RSA_WITH_AES_128_GCM_SHA256 //0xC02F
-// host is.gd accepts only ECDHE-ECDSA-foo (the simplest which works: ECDHE-ECDSA-AES128-SHA 0xC009)
-//#define CIPHER_ID4  TLS_ECDHE_ECDSA_WITH_AES_128_CBC_SHA  //0xC009
-
+#define ALLOW_RSA_NULL_SHA256                   0
 
 #define TLS_DEBUG      0
 #define TLS_DEBUG_HASH 0
@@ -1488,9 +1464,20 @@ static ALWAYS_INLINE void fill_handshake_record_hdr(void *buf, unsigned type, un
 
 static void send_client_hello_and_alloc_hsd(tls_state_t *tls, const char *sni)
 {
-#define NUM_CIPHERS (7 + 6 * ENABLE_FEATURE_TLS_SHA1 + ALLOW_RSA_NULL_SHA256)
+#define NUM_CIPHERS (0 \
+	+ 4 * ENABLE_FEATURE_TLS_SHA1 \
+	+ ALLOW_ECDHE_ECDSA_WITH_AES_128_CBC_SHA256 \
+	+ ALLOW_ECDHE_RSA_WITH_AES_128_CBC_SHA256 \
+	+ ALLOW_ECDHE_ECDSA_WITH_AES_128_GCM_SHA256 \
+	+ ALLOW_ECDHE_RSA_WITH_AES_128_GCM_SHA256 \
+	+ 2 * ENABLE_FEATURE_TLS_SHA1 \
+	+ ALLOW_RSA_WITH_AES_128_CBC_SHA256 \
+	+ ALLOW_RSA_WITH_AES_256_CBC_SHA256 \
+	+ ALLOW_RSA_WITH_AES_128_GCM_SHA256 \
+	+ ALLOW_RSA_NULL_SHA256 \
+	)
 	static const uint8_t ciphers[] = {
-		0x00,2 + NUM_CIPHERS*2, //len16_be
+		0x00,2 * (1 + NUM_CIPHERS), //len16_be
 		0x00,0xFF, //not a cipher - TLS_EMPTY_RENEGOTIATION_INFO_SCSV
 		/* ^^^^^^ RFC 5746 Renegotiation Indication Extension - some servers will refuse to work with us otherwise */
 #if ENABLE_FEATURE_TLS_SHA1
@@ -1501,14 +1488,22 @@ static void send_client_hello_and_alloc_hsd(tls_state_t *tls, const char *sni)
 	//	0xC0,0x18, //   TLS_ECDH_anon_WITH_AES_128_CBC_SHA
 	//	0xC0,0x19, //   TLS_ECDH_anon_WITH_AES_256_CBC_SHA
 #endif
+#if ALLOW_ECDHE_ECDSA_WITH_AES_128_CBC_SHA256
 		0xC0,0x23, // 5 TLS_ECDHE_ECDSA_WITH_AES_128_CBC_SHA256 - ok: wget https://is.gd/
+#endif
 	//	0xC0,0x24, //   TLS_ECDHE_ECDSA_WITH_AES_256_CBC_SHA384 - can't do SHA384 yet
+#if ALLOW_ECDHE_RSA_WITH_AES_128_CBC_SHA256
 		0xC0,0x27, // 6 TLS_ECDHE_RSA_WITH_AES_128_CBC_SHA256 - ok: openssl s_server ... -cipher ECDHE-RSA-AES128-SHA256
+#endif
 	//	0xC0,0x28, //   TLS_ECDHE_RSA_WITH_AES_256_CBC_SHA384 - can't do SHA384 yet
+#if ALLOW_ECDHE_ECDSA_WITH_AES_128_GCM_SHA256
 		0xC0,0x2B, // 7 TLS_ECDHE_ECDSA_WITH_AES_128_GCM_SHA256 - ok: wget https://is.gd/
+#endif
 	//	0xC0,0x2C, //   TLS_ECDHE_ECDSA_WITH_AES_256_GCM_SHA384 - wget https://is.gd/: "TLS error from peer (alert code 20): bad MAC"
 //TODO: GCM_SHA384 ciphers can be supported, only need sha384-based PRF?
+#if ALLOW_ECDHE_RSA_WITH_AES_128_GCM_SHA256
 		0xC0,0x2F, // 8 TLS_ECDHE_RSA_WITH_AES_128_GCM_SHA256 - ok: openssl s_server ... -cipher ECDHE-RSA-AES128-GCM-SHA256
+#endif
 	//	0xC0,0x30, //   TLS_ECDHE_RSA_WITH_AES_256_GCM_SHA384 - openssl s_server ... -cipher ECDHE-RSA-AES256-GCM-SHA384: "decryption failed or bad record mac"
 	//possibly these too:
 #if ENABLE_FEATURE_TLS_SHA1
@@ -1521,9 +1516,15 @@ static void send_client_hello_and_alloc_hsd(tls_state_t *tls, const char *sni)
 		0x00,0x2F, // 9 TLS_RSA_WITH_AES_128_CBC_SHA - ok: openssl s_server ... -cipher AES128-SHA
 		0x00,0x35, //10 TLS_RSA_WITH_AES_256_CBC_SHA - ok: openssl s_server ... -cipher AES256-SHA
 #endif
+#if ALLOW_RSA_WITH_AES_128_CBC_SHA256
 		0x00,0x3C, //11 TLS_RSA_WITH_AES_128_CBC_SHA256 - ok: openssl s_server ... -cipher AES128-SHA256
+#endif
+#if ALLOW_RSA_WITH_AES_256_CBC_SHA256
 		0x00,0x3D, //12 TLS_RSA_WITH_AES_256_CBC_SHA256 - ok: openssl s_server ... -cipher AES256-SHA256
+#endif
+#if ALLOW_RSA_WITH_AES_128_GCM_SHA256
 		0x00,0x9C, //13 TLS_RSA_WITH_AES_128_GCM_SHA256 - ok: openssl s_server ... -cipher AES128-GCM-SHA256
+#endif
 	//	0x00,0x9D, //   TLS_RSA_WITH_AES_256_GCM_SHA384 - openssl s_server ... -cipher AES256-GCM-SHA384: "decryption failed or bad record mac"
 #if ALLOW_RSA_NULL_SHA256
 		0x00,0x3B, //   TLS_RSA_WITH_NULL_SHA256
@@ -1532,12 +1533,16 @@ static void send_client_hello_and_alloc_hsd(tls_state_t *tls, const char *sni)
 	};
 	static const uint8_t supported_groups[] = {
 		0x00,0x0a, //extension_type: "supported_groups"
-		0x00,0x06, //ext len
-		0x00,0x04, //list len
-		0x00,0x17, //curve_secp256r1 (aka P256)
+		0x00,2 * (1 + ALLOW_CURVE_P256 + ALLOW_CURVE_X25519), //ext len
+		0x00,2 * (0 + ALLOW_CURVE_P256 + ALLOW_CURVE_X25519), //list len
+#if ALLOW_CURVE_P256
+		0x00,0x17, //curve_secp256r1 (aka P256, aka prime256v1)
+#endif
 		//0x00,0x18, //curve_secp384r1
 		//0x00,0x19, //curve_secp521r1
+#if ALLOW_CURVE_X25519
 		0x00,0x1d, //curve_x25519 (RFC 7748)
+#endif
 		//0x00,0x1e, //curve_x448 (RFC 7748)
 	};
 	//static const uint8_t signature_algorithms[] = {
@@ -1555,7 +1560,7 @@ static void send_client_hello_and_alloc_hsd(tls_state_t *tls, const char *sni)
 		uint8_t session_id_len;
 		/* uint8_t session_id[]; */
 		uint8_t cipherid_len16_hi, cipherid_len16_lo;
-		uint8_t cipherid[2 + NUM_CIPHERS*2]; /* actually variable */
+		uint8_t cipherid[2 * (1 + NUM_CIPHERS)]; /* actually variable */
 		uint8_t comprtypes_len;
 		uint8_t comprtypes[1]; /* actually variable */
 		/* Extensions (SNI shown):
@@ -1603,7 +1608,7 @@ static void send_client_hello_and_alloc_hsd(tls_state_t *tls, const char *sni)
 		memset(record->rand32, 0x11, sizeof(record->rand32));
 	/* record->session_id_len = 0; - already is */
 
-	BUILD_BUG_ON(sizeof(ciphers) != 2 + 2 + NUM_CIPHERS*2 + 2);
+	BUILD_BUG_ON(sizeof(ciphers) != 2 * (1 + 1 + NUM_CIPHERS + 1));
 	memcpy(&record->cipherid_len16_hi, ciphers, sizeof(ciphers));
 
 	ptr = (void*)(record + 1);
@@ -1700,41 +1705,32 @@ static void get_server_hello(tls_state_t *tls)
 
 	/* Set up encryption params based on selected cipher */
 #if 0
-#if ENABLE_FEATURE_TLS_SHA1
 		0xC0,0x09, // 1 TLS_ECDHE_ECDSA_WITH_AES_128_CBC_SHA - ok: wget https://is.gd/
 		0xC0,0x0A, // 2 TLS_ECDHE_ECDSA_WITH_AES_256_CBC_SHA - ok: wget https://is.gd/
 		0xC0,0x13, // 3 TLS_ECDHE_RSA_WITH_AES_128_CBC_SHA - ok: openssl s_server ... -cipher ECDHE-RSA-AES128-SHA
 		0xC0,0x14, // 4 TLS_ECDHE_RSA_WITH_AES_256_CBC_SHA - ok: openssl s_server ... -cipher ECDHE-RSA-AES256-SHA (might fail with older openssl)
 	//	0xC0,0x18, //   TLS_ECDH_anon_WITH_AES_128_CBC_SHA
 	//	0xC0,0x19, //   TLS_ECDH_anon_WITH_AES_256_CBC_SHA
-#endif
 		0xC0,0x23, // 5 TLS_ECDHE_ECDSA_WITH_AES_128_CBC_SHA256 - ok: wget https://is.gd/
 	//	0xC0,0x24, //   TLS_ECDHE_ECDSA_WITH_AES_256_CBC_SHA384 - can't do SHA384 yet
 		0xC0,0x27, // 6 TLS_ECDHE_RSA_WITH_AES_128_CBC_SHA256 - ok: openssl s_server ... -cipher ECDHE-RSA-AES128-SHA256
 	//	0xC0,0x28, //   TLS_ECDHE_RSA_WITH_AES_256_CBC_SHA384 - can't do SHA384 yet
 		0xC0,0x2B, // 7 TLS_ECDHE_ECDSA_WITH_AES_128_GCM_SHA256 - ok: wget https://is.gd/
 	//	0xC0,0x2C, //   TLS_ECDHE_ECDSA_WITH_AES_256_GCM_SHA384 - wget https://is.gd/: "TLS error from peer (alert code 20): bad MAC"
-//TODO: GCM_SHA384 ciphers can be supported, only need sha384-based PRF?
 		0xC0,0x2F, // 8 TLS_ECDHE_RSA_WITH_AES_128_GCM_SHA256 - ok: openssl s_server ... -cipher ECDHE-RSA-AES128-GCM-SHA256
 	//	0xC0,0x30, //   TLS_ECDHE_RSA_WITH_AES_256_GCM_SHA384 - openssl s_server ... -cipher ECDHE-RSA-AES256-GCM-SHA384: "decryption failed or bad record mac"
 	//possibly these too:
-#if ENABLE_FEATURE_TLS_SHA1
 	//	0xC0,0x35, //   TLS_ECDHE_PSK_WITH_AES_128_CBC_SHA
 	//	0xC0,0x36, //   TLS_ECDHE_PSK_WITH_AES_256_CBC_SHA
-#endif
 	//	0xC0,0x37, //   TLS_ECDHE_PSK_WITH_AES_128_CBC_SHA256
 	//	0xC0,0x38, //   TLS_ECDHE_PSK_WITH_AES_256_CBC_SHA384 - can't do SHA384 yet
-#if ENABLE_FEATURE_TLS_SHA1
 		0x00,0x2F, // 9 TLS_RSA_WITH_AES_128_CBC_SHA - ok: openssl s_server ... -cipher AES128-SHA
 		0x00,0x35, //10 TLS_RSA_WITH_AES_256_CBC_SHA - ok: openssl s_server ... -cipher AES256-SHA
-#endif
 		0x00,0x3C, //11 TLS_RSA_WITH_AES_128_CBC_SHA256 - ok: openssl s_server ... -cipher AES128-SHA256
 		0x00,0x3D, //12 TLS_RSA_WITH_AES_256_CBC_SHA256 - ok: openssl s_server ... -cipher AES256-SHA256
 		0x00,0x9C, //13 TLS_RSA_WITH_AES_128_GCM_SHA256 - ok: openssl s_server ... -cipher AES128-GCM-SHA256
 	//	0x00,0x9D, //   TLS_RSA_WITH_AES_256_GCM_SHA384 - openssl s_server ... -cipher AES256-GCM-SHA384: "decryption failed or bad record mac"
-#if ALLOW_RSA_NULL_SHA256
 		0x00,0x3B, //   TLS_RSA_WITH_NULL_SHA256
-#endif
 #endif
 	cipherid1 = cipherid[1];
 	tls->cipher_id = 0x100 * cipherid[0] + cipherid1;
-- 
cgit v1.2.3-55-g6feb


From ed9aa89269569c58bd47d085f25eece0a6221973 Mon Sep 17 00:00:00 2001
From: Ron Yorston <rmy@pobox.com>
Date: Tue, 28 Sep 2021 09:27:40 +0100
Subject: wget: implement --post-file

Add the --post-file option to send form data from a file.  As with
--post-data it's up to the user to ensure that the data is encoded
as appropriate:  all wget does is stuff the provided data into
the request.

The --post-data and --post-file options are mutually exclusive and
only one instance of either may be given.

Additionally:

- update the usage message to include missing details of the --post-data
  and --header options;

- free POST data if FEATURE_CLEAN_UP is enabled.

function                                             old     new   delta
packed_usage                                       34158   34214     +56
wget_main                                           2762    2805     +43
.rodata                                            99225   99240     +15
static.wget_longopts                                 266     278     +12
------------------------------------------------------------------------------
(add/remove: 0/0 grow/shrink: 4/0 up/down: 126/0)             Total: 126 bytes

Signed-off-by: Ron Yorston <rmy@pobox.com>
Signed-off-by: Denys Vlasenko <vda.linux@googlemail.com>
---
 networking/wget.c | 26 +++++++++++++++++++++++---
 1 file changed, 23 insertions(+), 3 deletions(-)

diff --git a/networking/wget.c b/networking/wget.c
index 6a9604421..91ef99eab 100644
--- a/networking/wget.c
+++ b/networking/wget.c
@@ -135,7 +135,8 @@
 
 //usage:#define wget_trivial_usage
 //usage:	IF_FEATURE_WGET_LONG_OPTIONS(
-//usage:       "[-cqS] [--spider] [-O FILE] [-o LOGFILE] [--header 'HEADER: VALUE'] [-Y on/off]\n"
+//usage:       "[-cqS] [--spider] [-O FILE] [-o LOGFILE] [--header STR]\n"
+//usage:       "	[--post-data STR | --post-file FILE] [-Y on/off]\n"
 /* Since we ignore these opts, we don't show them in --help */
 /* //usage:    "	[--no-cache] [--passive-ftp] [-t TRIES]" */
 /* //usage:    "	[-nv] [-nc] [-nH] [-np]" */
@@ -148,6 +149,9 @@
 //usage:       "Retrieve files via HTTP or FTP\n"
 //usage:	IF_FEATURE_WGET_LONG_OPTIONS(
 //usage:     "\n	--spider	Only check URL existence: $? is 0 if exists"
+//usage:     "\n	--header STR	Add STR (of form 'header: value') to headers"
+//usage:     "\n	--post-data STR	Send STR using POST method"
+//usage:     "\n	--post-file FILE	Send FILE using POST method"
 //usage:	IF_FEATURE_WGET_OPENSSL(
 //usage:     "\n	--no-check-certificate	Don't validate the server's certificate"
 //usage:	)
@@ -244,6 +248,7 @@ struct globals {
 	char *dir_prefix;
 #if ENABLE_FEATURE_WGET_LONG_OPTIONS
 	char *post_data;
+	char *post_file;
 	char *extra_headers;
 	unsigned char user_headers; /* Headers mentioned by the user */
 #endif
@@ -292,10 +297,13 @@ enum {
 	WGET_OPT_POST_DATA  = (1 << 12) * ENABLE_FEATURE_WGET_LONG_OPTIONS,
 	WGET_OPT_SPIDER     = (1 << 13) * ENABLE_FEATURE_WGET_LONG_OPTIONS,
 	WGET_OPT_NO_CHECK_CERT = (1 << 14) * ENABLE_FEATURE_WGET_LONG_OPTIONS,
+	WGET_OPT_POST_FILE  = (1 << 15) * ENABLE_FEATURE_WGET_LONG_OPTIONS,
 	/* hijack this bit for other than opts purposes: */
 	WGET_NO_FTRUNCATE   = (1 << 31)
 };
 
+#define WGET_OPT_POST (WGET_OPT_POST_DATA | WGET_OPT_POST_FILE)
+
 enum {
 	PROGRESS_START = -1,
 	PROGRESS_END   = 0,
@@ -1213,7 +1221,7 @@ static void download_one_url(const char *url)
 				target.path);
 		} else {
 			SENDFMT(sfp, "%s /%s HTTP/1.1\r\n",
-				(option_mask32 & WGET_OPT_POST_DATA) ? "POST" : "GET",
+				(option_mask32 & WGET_OPT_POST) ? "POST" : "GET",
 				target.path);
 		}
 		if (!USR_HEADER_HOST)
@@ -1246,7 +1254,13 @@ static void download_one_url(const char *url)
 			fputs(G.extra_headers, sfp);
 		}
 
-		if (option_mask32 & WGET_OPT_POST_DATA) {
+		if (option_mask32 & WGET_OPT_POST_FILE) {
+			int fd = xopen_stdin(G.post_file);
+			G.post_data = xmalloc_read(fd, NULL);
+			close(fd);
+		}
+
+		if (G.post_data) {
 			SENDFMT(sfp,
 				"Content-Type: application/x-www-form-urlencoded\r\n"
 				"Content-Length: %u\r\n"
@@ -1489,6 +1503,7 @@ IF_DESKTOP(	"tries\0"            Required_argument "t")
 		"post-data\0"        Required_argument "\xfe"
 		"spider\0"           No_argument       "\xfd"
 		"no-check-certificate\0" No_argument   "\xfc"
+		"post-file\0"        Required_argument "\xfb"
 		/* Ignored (we always use PASV): */
 IF_DESKTOP(	"passive-ftp\0"      No_argument       "\xf0")
 		/* Ignored (we don't support caching) */
@@ -1532,6 +1547,9 @@ IF_DESKTOP(	"no-parent\0"        No_argument       "\xf0")
 		 */
 		"\0"
 		"-1" /* at least one URL */
+		IF_FEATURE_WGET_LONG_OPTIONS(":\xfe--\xfb")
+		IF_FEATURE_WGET_LONG_OPTIONS(":\xfe--\xfe")
+		IF_FEATURE_WGET_LONG_OPTIONS(":\xfb--\xfb")
 		IF_FEATURE_WGET_LONG_OPTIONS(":\xff::") /* --header is a list */
 		LONGOPTS
 		, &G.fname_out, &G.fname_log, &G.dir_prefix,
@@ -1541,6 +1559,7 @@ IF_DESKTOP(	"no-parent\0"        No_argument       "\xf0")
 		NULL  /* -n[ARG] */
 		IF_FEATURE_WGET_LONG_OPTIONS(, &headers_llist)
 		IF_FEATURE_WGET_LONG_OPTIONS(, &G.post_data)
+		IF_FEATURE_WGET_LONG_OPTIONS(, &G.post_file)
 	);
 #if 0 /* option bits debug */
 	if (option_mask32 & WGET_OPT_RETRIES) bb_error_msg("-t NUM");
@@ -1549,6 +1568,7 @@ IF_DESKTOP(	"no-parent\0"        No_argument       "\xf0")
 	if (option_mask32 & WGET_OPT_POST_DATA) bb_error_msg("--post-data");
 	if (option_mask32 & WGET_OPT_SPIDER) bb_error_msg("--spider");
 	if (option_mask32 & WGET_OPT_NO_CHECK_CERT) bb_error_msg("--no-check-certificate");
+	if (option_mask32 & WGET_OPT_POST_FILE) bb_error_msg("--post-file");
 	exit(0);
 #endif
 	argv += optind;
-- 
cgit v1.2.3-55-g6feb


From 934bb01d518f28fcefed81345ed6ec89510925be Mon Sep 17 00:00:00 2001
From: Denys Vlasenko <vda.linux@googlemail.com>
Date: Fri, 1 Oct 2021 22:03:09 +0200
Subject: tls: "server cert is not RSA" is a fatal error

function                                             old     new   delta
tls_handshake                                       2022    2019      -3

Signed-off-by: Denys Vlasenko <vda.linux@googlemail.com>
---
 networking/tls.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/networking/tls.c b/networking/tls.c
index d0b976c33..7ae9e5a1f 100644
--- a/networking/tls.c
+++ b/networking/tls.c
@@ -1940,7 +1940,7 @@ static void send_client_key_exchange(tls_state_t *tls)
 	if (!(tls->flags & NEED_EC_KEY)) {
 		/* RSA */
 		if (!(tls->flags & GOT_CERT_RSA_KEY_ALG))
-			bb_simple_error_msg("server cert is not RSA");
+			bb_simple_error_msg_and_die("server cert is not RSA");
 
 		tls_get_random(premaster, RSA_PREMASTER_SIZE);
 		if (TLS_DEBUG_FIXED_SECRETS)
-- 
cgit v1.2.3-55-g6feb


From e730505034e9964176111c9e28b06c68cec1f4c0 Mon Sep 17 00:00:00 2001
From: Denys Vlasenko <vda.linux@googlemail.com>
Date: Tue, 5 Oct 2021 13:30:48 +0200
Subject: tls: P256 code shrink

function                                             old     new   delta
sp_256_to_bin_10                                       -     120    +120
sp_256_from_bin_10                                     -     119    +119
sp_256_proj_point_dbl_10                             446     443      -3
curve_P256_compute_pubkey_and_premaster              191     186      -5
sp_256_point_from_bin2x32                             62      50     -12
sp_256_to_bin                                        120       -    -120
static.sp_256_from_bin                               149       -    -149
------------------------------------------------------------------------------
(add/remove: 2/2 grow/shrink: 0/3 up/down: 239/-289)          Total: -50 bytes

Signed-off-by: Denys Vlasenko <vda.linux@googlemail.com>
---
 networking/tls_sp_c32.c | 94 ++++++++++++++++++++++---------------------------
 1 file changed, 42 insertions(+), 52 deletions(-)

diff --git a/networking/tls_sp_c32.c b/networking/tls_sp_c32.c
index 353dacdc4..9c92d0a14 100644
--- a/networking/tls_sp_c32.c
+++ b/networking/tls_sp_c32.c
@@ -80,13 +80,13 @@ static void sp_256_norm_10(sp_digit* a)
 	}
 }
 
-/* Write r as big endian to byte aray.
+/* Write r as big endian to byte array.
  * Fixed length number of bytes written: 32
  *
  * r  A single precision integer.
  * a  Byte array.
  */
-static void sp_256_to_bin(sp_digit* r, uint8_t* a)
+static void sp_256_to_bin_10(sp_digit* r, uint8_t* a)
 {
 	int i, j, s = 0, b;
 
@@ -112,33 +112,28 @@ static void sp_256_to_bin(sp_digit* r, uint8_t* a)
 	}
 }
 
-/* Read big endian unsigned byte aray into r.
+/* Read big endian unsigned byte array into r.
  *
  * r  A single precision integer.
  * a  Byte array.
  * n  Number of bytes in array to read.
  */
-static void sp_256_from_bin(sp_digit* r, int max, const uint8_t* a, int n)
+static void sp_256_from_bin_10(sp_digit* r, const uint8_t* a)
 {
 	int i, j = 0, s = 0;
 
 	r[0] = 0;
-	for (i = n-1; i >= 0; i--) {
+	for (i = 32 - 1; i >= 0; i--) {
 		r[j] |= ((sp_digit)a[i]) << s;
 		if (s >= 18) {
 			r[j] &= 0x3ffffff;
 			s = 26 - s;
-			if (j + 1 >= max)
-				break;
 			r[++j] = a[i] >> s;
 			s = 8 - s;
 		}
 		else
 			s += 8;
 	}
-
-	for (j++; j < max; j++)
-		r[j] = 0;
 }
 
 /* Convert a point of big-endian 32-byte x,y pair to type sp_point. */
@@ -146,11 +141,9 @@ static void sp_256_point_from_bin2x32(sp_point* p, const uint8_t *bin2x32)
 {
 	memset(p, 0, sizeof(*p));
 	/*p->infinity = 0;*/
-	sp_256_from_bin(p->x, 2 * 10, bin2x32, 32);
-	sp_256_from_bin(p->y, 2 * 10, bin2x32 + 32, 32);
-	//static const uint8_t one[1] = { 1 };
-	//sp_256_from_bin(p->z, 2 * 10, one, 1);
-	p->z[0] = 1;
+	sp_256_from_bin_10(p->x, bin2x32);
+	sp_256_from_bin_10(p->y, bin2x32 + 32);
+	p->z[0] = 1; /* p->z = 1 */
 }
 
 /* Compare a with b.
@@ -195,30 +188,6 @@ static void sp_256_sub_10(sp_digit* r, const sp_digit* a, const sp_digit* b)
 		r[i] = a[i] - b[i];
 }
 
-/* Shift number left one bit. Bottom bit is lost. */
-static void sp_256_rshift1_10(sp_digit* r, sp_digit* a)
-{
-	int i;
-	for (i = 0; i < 9; i++)
-		r[i] = ((a[i] >> 1) | (a[i + 1] << 25)) & 0x3ffffff;
-	r[9] = a[9] >> 1;
-}
-
-/* Mul a by scalar b and add into r. (r += a * b) */
-static void sp_256_mul_add_10(sp_digit* r, const sp_digit* a, sp_digit b)
-{
-	int64_t tb = b;
-	int64_t t = 0;
-	int i;
-
-	for (i = 0; i < 10; i++) {
-		t += (tb * a[i]) + r[i];
-		r[i] = t & 0x3ffffff;
-		t >>= 26;
-	}
-	r[10] += t;
-}
-
 /* Multiply a and b into r. (r = a * b) */
 static void sp_256_mul_10(sp_digit* r, const sp_digit* a, const sp_digit* b)
 {
@@ -271,6 +240,15 @@ static void sp_256_sqr_10(sp_digit* r, const sp_digit* a)
 	r[0] = (sp_digit)(c >> 26);
 }
 
+/* Shift number left one bit. Bottom bit is lost. */
+static void sp_256_rshift1_10(sp_digit* r, sp_digit* a)
+{
+	int i;
+	for (i = 0; i < 9; i++)
+		r[i] = ((a[i] >> 1) | (a[i + 1] << 25)) & 0x3ffffff;
+	r[9] = a[9] >> 1;
+}
+
 /* Divide the number by 2 mod the modulus (prime). (r = a / 2 % m) */
 static void sp_256_div2_10(sp_digit* r, const sp_digit* a, const sp_digit* m)
 {
@@ -345,6 +323,20 @@ static void sp_256_mont_shift_10(sp_digit* r, const sp_digit* a)
 	memset(&r[10], 0, sizeof(*r) * 10);
 }
 
+/* Mul a by scalar b and add into r. (r += a * b) */
+static void sp_256_mul_add_10(sp_digit* r, const sp_digit* a, sp_digit b)
+{
+	int64_t t = 0;
+	int i;
+
+	for (i = 0; i < 10; i++) {
+		t += ((int64_t)b * a[i]) + r[i];
+		r[i] = t & 0x3ffffff;
+		t >>= 26;
+	}
+	r[10] += t;
+}
+
 /* Reduce the number back to 256 bits using Montgomery reduction.
  *
  * a   A single precision number to reduce in place.
@@ -362,7 +354,7 @@ static void sp_256_mont_reduce_10(sp_digit* a, const sp_digit* m, sp_digit mp)
 			sp_256_mul_add_10(a+i, m, mu);
 			a[i+1] += a[i] >> 26;
 		}
-		mu = (a[i] * mp) & 0x3fffffl;
+		mu = (a[i] * mp) & 0x03fffff;
 		sp_256_mul_add_10(a+i, m, mu);
 		a[i+1] += a[i] >> 26;
 		a[i] &= 0x3ffffff;
@@ -370,11 +362,11 @@ static void sp_256_mont_reduce_10(sp_digit* a, const sp_digit* m, sp_digit mp)
 	else {
 		for (i = 0; i < 9; i++) {
 			mu = a[i] & 0x3ffffff;
-			sp_256_mul_add_10(a+i, p256_mod, mu);
+			sp_256_mul_add_10(a+i, m, mu);
 			a[i+1] += a[i] >> 26;
 		}
-		mu = a[i] & 0x3fffffl;
-		sp_256_mul_add_10(a+i, p256_mod, mu);
+		mu = a[i] & 0x03fffff;
+		sp_256_mul_add_10(a+i, m, mu);
 		a[i+1] += a[i] >> 26;
 		a[i] &= 0x3ffffff;
 	}
@@ -617,7 +609,6 @@ static void sp_256_map_10(sp_point* r, sp_point* p)
  */
 static void sp_256_proj_point_dbl_10(sp_point* r, sp_point* p)
 {
-	sp_point tp;
 	sp_digit t1[2*10];
 	sp_digit t2[2*10];
 
@@ -625,10 +616,9 @@ static void sp_256_proj_point_dbl_10(sp_point* r, sp_point* p)
 	if (r != p)
 		*r = *p; /* struct copy */
 
-	if (r->infinity) {
-		/* If infinity, don't double (work on dummy value) */
-		r = &tp;
-	}
+	if (r->infinity) /* If infinity, don't double */
+		return;
+
 	/* T1 = Z * Z */
 	sp_256_mont_sqr_10(t1, r->z, p256_mod, p256_mp_mod);
 	/* Z = Y * Z */
@@ -848,7 +838,7 @@ static void sp_ecc_secret_gen_256(const sp_digit priv[10], const uint8_t *pub2x3
 
 	sp_256_ecc_mulmod_10(point, point, priv);
 
-	sp_256_to_bin(point->x, out32);
+	sp_256_to_bin_10(point->x, out32);
 	dump_hex("out32: %s\n", out32, 32);
 }
 
@@ -876,7 +866,7 @@ static void sp_256_ecc_gen_k_10(sp_digit k[10])
 #if FIXED_SECRET
 		memset(buf, 0x77, sizeof(buf));
 #endif
-		sp_256_from_bin(k, 10, buf, sizeof(buf));
+		sp_256_from_bin_10(k, buf);
 #if !SIMPLIFY
 		if (sp_256_cmp_10(k, p256_order2) < 0)
 			break;
@@ -901,8 +891,8 @@ static void sp_ecc_make_key_256(sp_digit privkey[10], uint8_t *pubkey)
 
 	sp_256_ecc_gen_k_10(privkey);
 	sp_256_ecc_mulmod_base_10(point, privkey);
-	sp_256_to_bin(point->x, pubkey);
-	sp_256_to_bin(point->y, pubkey + 32);
+	sp_256_to_bin_10(point->x, pubkey);
+	sp_256_to_bin_10(point->y, pubkey + 32);
 
 	memset(point, 0, sizeof(point)); //paranoia
 }
-- 
cgit v1.2.3-55-g6feb


From 389329efbed15122bb3fba59e9919d870301eb93 Mon Sep 17 00:00:00 2001
From: Denys Vlasenko <vda.linux@googlemail.com>
Date: Tue, 5 Oct 2021 13:39:33 +0200
Subject: tls: another P256 code shrink

Propagate constant arrays and scalars deeper down call chain.
Use sp_256_mont_mul_10 to implement sp_256_mont_sqr_10.

function                                             old     new   delta
sp_256_mont_mul_10                                     -     214    +214
sp_256_mont_reduce_10                                  -     178    +178
sp_256_mont_sqr_10                                     -       7      +7
static.sp_256_mont_reduce_10                         178       -    -178
static.sp_256_mont_mul_10                            214       -    -214
static.sp_256_mont_sqr_10                            234       -    -234
------------------------------------------------------------------------------
(add/remove: 3/3 grow/shrink: 0/0 up/down: 399/-626)         Total: -227 bytes

Signed-off-by: Denys Vlasenko <vda.linux@googlemail.com>
---
 networking/tls_sp_c32.c | 123 +++++++++++++++++++++---------------------------
 1 file changed, 54 insertions(+), 69 deletions(-)

diff --git a/networking/tls_sp_c32.c b/networking/tls_sp_c32.c
index 9c92d0a14..f4902f7f3 100644
--- a/networking/tls_sp_c32.c
+++ b/networking/tls_sp_c32.c
@@ -213,34 +213,7 @@ static void sp_256_mul_10(sp_digit* r, const sp_digit* a, const sp_digit* b)
 	r[0] = (sp_digit)(c >> 26);
 }
 
-/* Square a and put result in r. (r = a * a) */
-static void sp_256_sqr_10(sp_digit* r, const sp_digit* a)
-{
-	int i, j, k;
-	int64_t c;
-
-	c = ((int64_t)a[9]) * a[9];
-	r[19] = (sp_digit)(c >> 26);
-	c = (c & 0x3ffffff) << 26;
-	for (k = 17; k >= 0; k--) {
-		for (i = 9; i >= 0; i--) {
-			j = k - i;
-			if (j >= 10 || i <= j)
-				break;
-			if (j < 0)
-				continue;
-			c += ((int64_t)a[i]) * a[j] * 2;
-		}
-		if (i == j)
-			c += ((int64_t)a[i]) * a[i];
-		r[k + 2] += c >> 52;
-		r[k + 1] = (c >> 26) & 0x3ffffff;
-		c = (c & 0x3ffffff) << 26;
-	}
-	r[0] = (sp_digit)(c >> 26);
-}
-
-/* Shift number left one bit. Bottom bit is lost. */
+/* Shift number right one bit. Bottom bit is lost. */
 static void sp_256_rshift1_10(sp_digit* r, sp_digit* a)
 {
 	int i;
@@ -343,8 +316,11 @@ static void sp_256_mul_add_10(sp_digit* r, const sp_digit* a, sp_digit b)
  * m   The single precision number representing the modulus.
  * mp  The digit representing the negative inverse of m mod 2^n.
  */
-static void sp_256_mont_reduce_10(sp_digit* a, const sp_digit* m, sp_digit mp)
+static void sp_256_mont_reduce_10(sp_digit* a /*, const sp_digit* m, sp_digit mp*/)
 {
+	const sp_digit* m = p256_mod;
+	sp_digit mp = p256_mp_mod;
+
 	int i;
 	sp_digit mu;
 
@@ -359,7 +335,7 @@ static void sp_256_mont_reduce_10(sp_digit* a, const sp_digit* m, sp_digit mp)
 		a[i+1] += a[i] >> 26;
 		a[i] &= 0x3ffffff;
 	}
-	else {
+	else { /* Same code for explicit mp == 1 (which is always the case for P256) */
 		for (i = 0; i < 9; i++) {
 			mu = a[i] & 0x3ffffff;
 			sp_256_mul_add_10(a+i, m, mu);
@@ -372,8 +348,12 @@ static void sp_256_mont_reduce_10(sp_digit* a, const sp_digit* m, sp_digit mp)
 	}
 
 	sp_256_mont_shift_10(a, a);
+//TODO: can below condition ever be true? Doesn't it require 512+th bit(s) in a to be set?
 	if ((a[9] >> 22) > 0)
+{
+dbg("THIS HAPPENS\n");
 		sp_256_sub_10(a, a, m);
+}
 	sp_256_norm_10(a);
 }
 
@@ -386,11 +366,14 @@ static void sp_256_mont_reduce_10(sp_digit* a, const sp_digit* m, sp_digit mp)
  * m   Modulus (prime).
  * mp  Montogmery mulitplier.
  */
-static void sp_256_mont_mul_10(sp_digit* r, const sp_digit* a, const sp_digit* b,
-		const sp_digit* m, sp_digit mp)
+static void sp_256_mont_mul_10(sp_digit* r, const sp_digit* a, const sp_digit* b
+		/*, const sp_digit* m, sp_digit mp*/)
 {
+	//const sp_digit* m = p256_mod;
+	//sp_digit mp = p256_mp_mod;
+
 	sp_256_mul_10(r, a, b);
-	sp_256_mont_reduce_10(r, m, mp);
+	sp_256_mont_reduce_10(r /*, m, mp*/);
 }
 
 /* Square the Montgomery form number. (r = a * a mod m)
@@ -400,11 +383,13 @@ static void sp_256_mont_mul_10(sp_digit* r, const sp_digit* a, const sp_digit* b
  * m   Modulus (prime).
  * mp  Montogmery mulitplier.
  */
-static void sp_256_mont_sqr_10(sp_digit* r, const sp_digit* a, const sp_digit* m,
-		sp_digit mp)
+static void sp_256_mont_sqr_10(sp_digit* r, const sp_digit* a
+		/*, const sp_digit* m, sp_digit mp*/)
 {
-	sp_256_sqr_10(r, a);
-	sp_256_mont_reduce_10(r, m, mp);
+	//const sp_digit* m = p256_mod;
+	//sp_digit mp = p256_mp_mod;
+
+	sp_256_mont_mul_10(r, a, a /*, m, mp*/);
 }
 
 /* Invert the number, in Montgomery form, modulo the modulus (prime) of the
@@ -432,10 +417,10 @@ static void sp_256_mont_inv_10(sp_digit* r, sp_digit* a)
 
 	memcpy(t, a, sizeof(sp_digit) * 10);
 	for (i = 254; i >= 0; i--) {
-		sp_256_mont_sqr_10(t, t, p256_mod, p256_mp_mod);
+		sp_256_mont_sqr_10(t, t /*, p256_mod, p256_mp_mod*/);
 		/*if (p256_mod_2[i / 32] & ((sp_digit)1 << (i % 32)))*/
 		if (i >= 224 || i == 192 || (i <= 95 && i != 1))
-			sp_256_mont_mul_10(t, t, a, p256_mod, p256_mp_mod);
+			sp_256_mont_mul_10(t, t, a /*, p256_mod, p256_mp_mod*/);
 	}
 	memcpy(r, t, sizeof(sp_digit) * 10);
 }
@@ -577,22 +562,22 @@ static void sp_256_map_10(sp_point* r, sp_point* p)
 
 	sp_256_mont_inv_10(t1, p->z);
 
-	sp_256_mont_sqr_10(t2, t1, p256_mod, p256_mp_mod);
-	sp_256_mont_mul_10(t1, t2, t1, p256_mod, p256_mp_mod);
+	sp_256_mont_sqr_10(t2, t1 /*, p256_mod, p256_mp_mod*/);
+	sp_256_mont_mul_10(t1, t2, t1 /*, p256_mod, p256_mp_mod*/);
 
 	/* x /= z^2 */
-	sp_256_mont_mul_10(r->x, p->x, t2, p256_mod, p256_mp_mod);
+	sp_256_mont_mul_10(r->x, p->x, t2 /*, p256_mod, p256_mp_mod*/);
 	memset(r->x + 10, 0, sizeof(r->x) / 2);
-	sp_256_mont_reduce_10(r->x, p256_mod, p256_mp_mod);
+	sp_256_mont_reduce_10(r->x /*, p256_mod, p256_mp_mod*/);
 	/* Reduce x to less than modulus */
 	if (sp_256_cmp_10(r->x, p256_mod) >= 0)
 		sp_256_sub_10(r->x, r->x, p256_mod);
 	sp_256_norm_10(r->x);
 
 	/* y /= z^3 */
-	sp_256_mont_mul_10(r->y, p->y, t1, p256_mod, p256_mp_mod);
+	sp_256_mont_mul_10(r->y, p->y, t1 /*, p256_mod, p256_mp_mod*/);
 	memset(r->y + 10, 0, sizeof(r->y) / 2);
-	sp_256_mont_reduce_10(r->y, p256_mod, p256_mp_mod);
+	sp_256_mont_reduce_10(r->y /*, p256_mod, p256_mp_mod*/);
 	/* Reduce y to less than modulus */
 	if (sp_256_cmp_10(r->y, p256_mod) >= 0)
 		sp_256_sub_10(r->y, r->y, p256_mod);
@@ -620,9 +605,9 @@ static void sp_256_proj_point_dbl_10(sp_point* r, sp_point* p)
 		return;
 
 	/* T1 = Z * Z */
-	sp_256_mont_sqr_10(t1, r->z, p256_mod, p256_mp_mod);
+	sp_256_mont_sqr_10(t1, r->z /*, p256_mod, p256_mp_mod*/);
 	/* Z = Y * Z */
-	sp_256_mont_mul_10(r->z, r->y, r->z, p256_mod, p256_mp_mod);
+	sp_256_mont_mul_10(r->z, r->y, r->z /*, p256_mod, p256_mp_mod*/);
 	/* Z = 2Z */
 	sp_256_mont_dbl_10(r->z, r->z, p256_mod);
 	/* T2 = X - T1 */
@@ -630,21 +615,21 @@ static void sp_256_proj_point_dbl_10(sp_point* r, sp_point* p)
 	/* T1 = X + T1 */
 	sp_256_mont_add_10(t1, r->x, t1, p256_mod);
 	/* T2 = T1 * T2 */
-	sp_256_mont_mul_10(t2, t1, t2, p256_mod, p256_mp_mod);
+	sp_256_mont_mul_10(t2, t1, t2 /*, p256_mod, p256_mp_mod*/);
 	/* T1 = 3T2 */
 	sp_256_mont_tpl_10(t1, t2, p256_mod);
 	/* Y = 2Y */
 	sp_256_mont_dbl_10(r->y, r->y, p256_mod);
 	/* Y = Y * Y */
-	sp_256_mont_sqr_10(r->y, r->y, p256_mod, p256_mp_mod);
+	sp_256_mont_sqr_10(r->y, r->y /*, p256_mod, p256_mp_mod*/);
 	/* T2 = Y * Y */
-	sp_256_mont_sqr_10(t2, r->y, p256_mod, p256_mp_mod);
+	sp_256_mont_sqr_10(t2, r->y /*, p256_mod, p256_mp_mod*/);
 	/* T2 = T2/2 */
 	sp_256_div2_10(t2, t2, p256_mod);
 	/* Y = Y * X */
-	sp_256_mont_mul_10(r->y, r->y, r->x, p256_mod, p256_mp_mod);
+	sp_256_mont_mul_10(r->y, r->y, r->x /*, p256_mod, p256_mp_mod*/);
 	/* X = T1 * T1 */
-	sp_256_mont_mul_10(r->x, t1, t1, p256_mod, p256_mp_mod);
+	sp_256_mont_mul_10(r->x, t1, t1 /*, p256_mod, p256_mp_mod*/);
 	/* X = X - Y */
 	sp_256_mont_sub_10(r->x, r->x, r->y, p256_mod);
 	/* X = X - Y */
@@ -652,7 +637,7 @@ static void sp_256_proj_point_dbl_10(sp_point* r, sp_point* p)
 	/* Y = Y - X */
 	sp_256_mont_sub_10(r->y, r->y, r->x, p256_mod);
 	/* Y = Y * T1 */
-	sp_256_mont_mul_10(r->y, r->y, t1, p256_mod, p256_mp_mod);
+	sp_256_mont_mul_10(r->y, r->y, t1 /*, p256_mod, p256_mp_mod*/);
 	/* Y = Y - T2 */
 	sp_256_mont_sub_10(r->y, r->y, t2, p256_mod);
 }
@@ -700,36 +685,36 @@ static void sp_256_proj_point_add_10(sp_point* r, sp_point* p, sp_point* q)
 		*r = p->infinity ? *q : *p; /* struct copy */
 
 		/* U1 = X1*Z2^2 */
-		sp_256_mont_sqr_10(t1, q->z, p256_mod, p256_mp_mod);
-		sp_256_mont_mul_10(t3, t1, q->z, p256_mod, p256_mp_mod);
-		sp_256_mont_mul_10(t1, t1, v->x, p256_mod, p256_mp_mod);
+		sp_256_mont_sqr_10(t1, q->z /*, p256_mod, p256_mp_mod*/);
+		sp_256_mont_mul_10(t3, t1, q->z /*, p256_mod, p256_mp_mod*/);
+		sp_256_mont_mul_10(t1, t1, v->x /*, p256_mod, p256_mp_mod*/);
 		/* U2 = X2*Z1^2 */
-		sp_256_mont_sqr_10(t2, v->z, p256_mod, p256_mp_mod);
-		sp_256_mont_mul_10(t4, t2, v->z, p256_mod, p256_mp_mod);
-		sp_256_mont_mul_10(t2, t2, q->x, p256_mod, p256_mp_mod);
+		sp_256_mont_sqr_10(t2, v->z /*, p256_mod, p256_mp_mod*/);
+		sp_256_mont_mul_10(t4, t2, v->z /*, p256_mod, p256_mp_mod*/);
+		sp_256_mont_mul_10(t2, t2, q->x /*, p256_mod, p256_mp_mod*/);
 		/* S1 = Y1*Z2^3 */
-		sp_256_mont_mul_10(t3, t3, v->y, p256_mod, p256_mp_mod);
+		sp_256_mont_mul_10(t3, t3, v->y /*, p256_mod, p256_mp_mod*/);
 		/* S2 = Y2*Z1^3 */
-		sp_256_mont_mul_10(t4, t4, q->y, p256_mod, p256_mp_mod);
+		sp_256_mont_mul_10(t4, t4, q->y /*, p256_mod, p256_mp_mod*/);
 		/* H = U2 - U1 */
 		sp_256_mont_sub_10(t2, t2, t1, p256_mod);
 		/* R = S2 - S1 */
 		sp_256_mont_sub_10(t4, t4, t3, p256_mod);
 		/* Z3 = H*Z1*Z2 */
-		sp_256_mont_mul_10(v->z, v->z, q->z, p256_mod, p256_mp_mod);
-		sp_256_mont_mul_10(v->z, v->z, t2, p256_mod, p256_mp_mod);
+		sp_256_mont_mul_10(v->z, v->z, q->z /*, p256_mod, p256_mp_mod*/);
+		sp_256_mont_mul_10(v->z, v->z, t2 /*, p256_mod, p256_mp_mod*/);
 		/* X3 = R^2 - H^3 - 2*U1*H^2 */
-		sp_256_mont_sqr_10(v->x, t4, p256_mod, p256_mp_mod);
-		sp_256_mont_sqr_10(t5, t2, p256_mod, p256_mp_mod);
-		sp_256_mont_mul_10(v->y, t1, t5, p256_mod, p256_mp_mod);
-		sp_256_mont_mul_10(t5, t5, t2, p256_mod, p256_mp_mod);
+		sp_256_mont_sqr_10(v->x, t4 /*, p256_mod, p256_mp_mod*/);
+		sp_256_mont_sqr_10(t5, t2 /*, p256_mod, p256_mp_mod*/);
+		sp_256_mont_mul_10(v->y, t1, t5 /*, p256_mod, p256_mp_mod*/);
+		sp_256_mont_mul_10(t5, t5, t2 /*, p256_mod, p256_mp_mod*/);
 		sp_256_mont_sub_10(v->x, v->x, t5, p256_mod);
 		sp_256_mont_dbl_10(t1, v->y, p256_mod);
 		sp_256_mont_sub_10(v->x, v->x, t1, p256_mod);
 		/* Y3 = R*(U1*H^2 - X3) - S1*H^3 */
 		sp_256_mont_sub_10(v->y, v->y, v->x, p256_mod);
-		sp_256_mont_mul_10(v->y, v->y, t4, p256_mod, p256_mp_mod);
-		sp_256_mont_mul_10(t5, t5, t3, p256_mod, p256_mp_mod);
+		sp_256_mont_mul_10(v->y, v->y, t4 /*, p256_mod, p256_mp_mod*/);
+		sp_256_mont_mul_10(t5, t5, t3 /*, p256_mod, p256_mp_mod*/);
 		sp_256_mont_sub_10(v->y, v->y, t5, p256_mod);
 	}
 }
-- 
cgit v1.2.3-55-g6feb


From 137864f559e7eff1f929958d3999359c7070ed91 Mon Sep 17 00:00:00 2001
From: Denys Vlasenko <vda.linux@googlemail.com>
Date: Tue, 5 Oct 2021 13:47:42 +0200
Subject: tls: add debugging scaffolding to P256 code

Signed-off-by: Denys Vlasenko <vda.linux@googlemail.com>
---
 networking/tls_sp_c32.c | 53 +++++++++++++++++++++++++++++++++++++++++++++++++
 1 file changed, 53 insertions(+)

diff --git a/networking/tls_sp_c32.c b/networking/tls_sp_c32.c
index f4902f7f3..5b4c7e97c 100644
--- a/networking/tls_sp_c32.c
+++ b/networking/tls_sp_c32.c
@@ -136,6 +136,53 @@ static void sp_256_from_bin_10(sp_digit* r, const uint8_t* a)
 	}
 }
 
+#if SP_DEBUG
+static void dump_256(const char *fmt, const sp_digit* cr)
+{
+	sp_digit* r = (sp_digit*)cr;
+	uint8_t b32[32];
+	sp_256_to_bin_10(r, b32);
+	dump_hex(fmt, b32, 32);
+}
+static void dump_512(const char *fmt, const sp_digit* cr)
+{
+	sp_digit* r = (sp_digit*)cr;
+	uint8_t a[64];
+	int i, j, s, b;
+
+	/* sp_512_norm_10: */
+	for (i = 0; i < 19; i++) {
+		r[i+1] += r[i] >> 26;
+		r[i] &= 0x3ffffff;
+	}
+	/* sp_512_to_bin_10: */
+	s = 0;
+	j = 512 / 8 - 1;
+	a[j] = 0;
+	for (i = 0; i < 20 && j >= 0; i++) {
+		b = 0;
+		a[j--] |= r[i] << s; b += 8 - s;
+		if (j < 0)
+			break;
+		while (b < 26) {
+			a[j--] = r[i] >> b; b += 8;
+			if (j < 0)
+				break;
+		}
+		s = 8 - (b - 26);
+		if (j >= 0)
+			a[j] = 0;
+		if (s != 0)
+			j++;
+	}
+
+	dump_hex(fmt, a, 64);
+}
+#else
+# define dump_256(...) ((void)0)
+# define dump_512(...) ((void)0)
+#endif
+
 /* Convert a point of big-endian 32-byte x,y pair to type sp_point. */
 static void sp_256_point_from_bin2x32(sp_point* p, const uint8_t *bin2x32)
 {
@@ -743,6 +790,9 @@ static void sp_256_ecc_mulmod_10(sp_point* r, const sp_point* g, const sp_digit*
 	sp_256_mod_mul_norm_10(t[1].x, g->x);
 	sp_256_mod_mul_norm_10(t[1].y, g->y);
 	sp_256_mod_mul_norm_10(t[1].z, g->z);
+	dump_512("t[1].x %s\n", t[1].x);
+	dump_512("t[1].y %s\n", t[1].y);
+	dump_512("t[1].z %s\n", t[1].z);
 
 	i = 9;
 	c = 22;
@@ -875,7 +925,10 @@ static void sp_ecc_make_key_256(sp_digit privkey[10], uint8_t *pubkey)
 	sp_point point[1];
 
 	sp_256_ecc_gen_k_10(privkey);
+	dump_256("privkey %s\n", privkey);
 	sp_256_ecc_mulmod_base_10(point, privkey);
+	dump_512("point->x %s\n", point->x);
+	dump_512("point->y %s\n", point->y);
 	sp_256_to_bin_10(point->x, pubkey);
 	sp_256_to_bin_10(point->y, pubkey + 32);
 
-- 
cgit v1.2.3-55-g6feb


From 92402d5e0a2b8e0692b2eebd15b893bdbe4c3a34 Mon Sep 17 00:00:00 2001
From: Denys Vlasenko <vda.linux@googlemail.com>
Date: Tue, 5 Oct 2021 14:01:17 +0200
Subject: tls: remove one overzealous debugging statement

Signed-off-by: Denys Vlasenko <vda.linux@googlemail.com>
---
 networking/tls_sp_c32.c | 4 ----
 1 file changed, 4 deletions(-)

diff --git a/networking/tls_sp_c32.c b/networking/tls_sp_c32.c
index 5b4c7e97c..7c6229ffd 100644
--- a/networking/tls_sp_c32.c
+++ b/networking/tls_sp_c32.c
@@ -395,12 +395,8 @@ static void sp_256_mont_reduce_10(sp_digit* a /*, const sp_digit* m, sp_digit mp
 	}
 
 	sp_256_mont_shift_10(a, a);
-//TODO: can below condition ever be true? Doesn't it require 512+th bit(s) in a to be set?
 	if ((a[9] >> 22) > 0)
-{
-dbg("THIS HAPPENS\n");
 		sp_256_sub_10(a, a, m);
-}
 	sp_256_norm_10(a);
 }
 
-- 
cgit v1.2.3-55-g6feb


From 81d8af1970e70f2bffa9e67acb10e732cba555a6 Mon Sep 17 00:00:00 2001
From: Denys Vlasenko <vda.linux@googlemail.com>
Date: Tue, 5 Oct 2021 17:31:33 +0200
Subject: tls: fix (what looks like) a rare corner case bug in P256

function                                             old     new   delta
static.sp_256_mont_sub_10                             30      46     +16

Signed-off-by: Denys Vlasenko <vda.linux@googlemail.com>
---
 networking/tls_sp_c32.c | 11 +++++++----
 1 file changed, 7 insertions(+), 4 deletions(-)

diff --git a/networking/tls_sp_c32.c b/networking/tls_sp_c32.c
index 7c6229ffd..99f9c6839 100644
--- a/networking/tls_sp_c32.c
+++ b/networking/tls_sp_c32.c
@@ -96,11 +96,13 @@ static void sp_256_to_bin_10(sp_digit* r, uint8_t* a)
 	a[j] = 0;
 	for (i = 0; i < 10 && j >= 0; i++) {
 		b = 0;
-		a[j--] |= r[i] << s; b += 8 - s;
+		a[j--] |= r[i] << s;
+		b += 8 - s;
 		if (j < 0)
 			break;
 		while (b < 26) {
-			a[j--] = r[i] >> b; b += 8;
+			a[j--] = r[i] >> b;
+			b += 8;
 			if (j < 0)
 				break;
 		}
@@ -297,6 +299,7 @@ static void sp_256_mont_sub_10(sp_digit* r, const sp_digit* a, const sp_digit* b
 	if (r[9] >> 22)
 		sp_256_add_10(r, r, m);
 	sp_256_norm_10(r);
+	r[9] &= 0x03fffff; /* truncate to 22 bits */
 }
 
 /* Double a Montgomery form number (r = a + a % m) */
@@ -864,8 +867,8 @@ static void sp_ecc_secret_gen_256(const sp_digit priv[10], const uint8_t *pub2x3
 	dump_hex("        %s\n", pub2x32 + 32, 32);
 
 	sp_256_point_from_bin2x32(point, pub2x32);
-	dump_hex("point->x %s\n", point->x, sizeof(point->x));
-	dump_hex("point->y %s\n", point->y, sizeof(point->y));
+	dump_512("point->x %s\n", point->x);
+	dump_512("point->y %s\n", point->y);
 
 	sp_256_ecc_mulmod_10(point, point, priv);
 
-- 
cgit v1.2.3-55-g6feb


From 55578f2fb7c05357fb0b1ce84b616ba8ffd6d907 Mon Sep 17 00:00:00 2001
From: Denys Vlasenko <vda.linux@googlemail.com>
Date: Tue, 5 Oct 2021 19:45:56 +0200
Subject: tls: fix the case of sp_256_mont_tpl_10() leaving striay high bits

It has no effect on correctness, but interferes with compating internal state
of different implementations.

function                                             old     new   delta
sp_256_proj_point_dbl_10                             443     451      +8
static.sp_256_mont_sub_10                             46      49      +3
------------------------------------------------------------------------------
(add/remove: 0/0 grow/shrink: 2/0 up/down: 11/0)               Total: 11 bytes

Signed-off-by: Denys Vlasenko <vda.linux@googlemail.com>
---
 networking/tls.c        | 42 ++++++++++++++++++++++++++++++++++++++++++
 networking/tls.h        |  4 ++++
 networking/tls_sp_c32.c | 37 ++++++++++++++++++++++++++-----------
 3 files changed, 72 insertions(+), 11 deletions(-)

diff --git a/networking/tls.c b/networking/tls.c
index 7ae9e5a1f..4f0e2b6eb 100644
--- a/networking/tls.c
+++ b/networking/tls.c
@@ -2326,6 +2326,48 @@ void FAST_FUNC tls_run_copy_loop(tls_state_t *tls, unsigned flags)
 	const int INBUF_STEP = 4 * 1024;
 	struct pollfd pfds[2];
 
+#if 0
+// Debug aid for comparing P256 implementations.
+// Enable this, set SP_DEBUG and FIXED_SECRET to 1,
+// and add
+//	tls_run_copy_loop(NULL, 0);
+// e.g. at the very beginning of wget_main()
+//
+{
+//kbuild:lib-$(CONFIG_TLS) += tls_sp_c32_new.o
+	uint8_t ecc_pub_key32[2 * 32];
+	uint8_t pubkey2x32[2 * 32];
+	uint8_t premaster32[32];
+
+//Fixed input key:
+//	memset(ecc_pub_key32, 0xee, sizeof(ecc_pub_key32));
+//Fixed 000000000000000000000000000000000000ab000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000
+//	memset(ecc_pub_key32, 0x00, sizeof(ecc_pub_key32));
+//	ecc_pub_key32[18] = 0xab;
+//Random key:
+	tls_get_random(ecc_pub_key32, sizeof(ecc_pub_key32));
+//Biased random (almost all zeros or almost all ones):
+//	srand(time(NULL) ^ getpid());
+//	if (rand() & 1)
+//		memset(ecc_pub_key32, 0x00, sizeof(ecc_pub_key32));
+//	else
+//		memset(ecc_pub_key32, 0xff, sizeof(ecc_pub_key32));
+//	ecc_pub_key32[rand() & 0x3f] = rand();
+
+	xmove_fd(xopen("p256.OLD", O_WRONLY | O_CREAT | O_TRUNC), 2);
+	curve_P256_compute_pubkey_and_premaster(
+			pubkey2x32, premaster32,
+			/*point:*/ ecc_pub_key32
+	);
+	xmove_fd(xopen("p256.NEW", O_WRONLY | O_CREAT | O_TRUNC), 2);
+	curve_P256_compute_pubkey_and_premaster_NEW(
+			pubkey2x32, premaster32,
+			/*point:*/ ecc_pub_key32
+	);
+	exit(1);
+}
+#endif
+
 	pfds[0].fd = STDIN_FILENO;
 	pfds[0].events = POLLIN;
 	pfds[1].fd = tls->ifd;
diff --git a/networking/tls.h b/networking/tls.h
index 215e92b02..eb0fdd4cf 100644
--- a/networking/tls.h
+++ b/networking/tls.h
@@ -117,3 +117,7 @@ void curve_x25519_compute_pubkey_and_premaster(
 void curve_P256_compute_pubkey_and_premaster(
 		uint8_t *pubkey2x32, uint8_t *premaster32,
 		const uint8_t *peerkey2x32) FAST_FUNC;
+
+void curve_P256_compute_pubkey_and_premaster_NEW(
+		uint8_t *pubkey2x32, uint8_t *premaster32,
+		const uint8_t *peerkey2x32) FAST_FUNC;
diff --git a/networking/tls_sp_c32.c b/networking/tls_sp_c32.c
index 99f9c6839..bba22dee3 100644
--- a/networking/tls_sp_c32.c
+++ b/networking/tls_sp_c32.c
@@ -163,11 +163,13 @@ static void dump_512(const char *fmt, const sp_digit* cr)
 	a[j] = 0;
 	for (i = 0; i < 20 && j >= 0; i++) {
 		b = 0;
-		a[j--] |= r[i] << s; b += 8 - s;
+		a[j--] |= r[i] << s;
+		b += 8 - s;
 		if (j < 0)
 			break;
 		while (b < 26) {
-			a[j--] = r[i] >> b; b += 8;
+			a[j--] = r[i] >> b;
+			b += 8;
 			if (j < 0)
 				break;
 		}
@@ -286,9 +288,10 @@ static void sp_256_mont_add_10(sp_digit* r, const sp_digit* a, const sp_digit* b
 {
 	sp_256_add_10(r, a, b);
 	sp_256_norm_10(r);
-	if ((r[9] >> 22) > 0)
+	if ((r[9] >> 22) > 0) {
 		sp_256_sub_10(r, r, m);
-	sp_256_norm_10(r);
+		sp_256_norm_10(r);
+	}
 }
 
 /* Subtract two Montgomery form numbers (r = a - b % m) */
@@ -296,10 +299,12 @@ static void sp_256_mont_sub_10(sp_digit* r, const sp_digit* a, const sp_digit* b
 		const sp_digit* m)
 {
 	sp_256_sub_10(r, a, b);
-	if (r[9] >> 22)
-		sp_256_add_10(r, r, m);
 	sp_256_norm_10(r);
-	r[9] &= 0x03fffff; /* truncate to 22 bits */
+	if (r[9] >> 22) {
+		sp_256_add_10(r, r, m);
+		sp_256_norm_10(r);
+		r[9] &= 0x03fffff; /* truncate to 22 bits */
+	}
 }
 
 /* Double a Montgomery form number (r = a + a % m) */
@@ -317,14 +322,17 @@ static void sp_256_mont_tpl_10(sp_digit* r, const sp_digit* a, const sp_digit* m
 {
 	sp_256_add_10(r, a, a);
 	sp_256_norm_10(r);
-	if ((r[9] >> 22) > 0)
+	if ((r[9] >> 22) > 0) {
 		sp_256_sub_10(r, r, m);
-	sp_256_norm_10(r);
+		sp_256_norm_10(r);
+	}
 	sp_256_add_10(r, r, a);
 	sp_256_norm_10(r);
-	if ((r[9] >> 22) > 0)
+	if ((r[9] >> 22) > 0) {
 		sp_256_sub_10(r, r, m);
-	sp_256_norm_10(r);
+		sp_256_norm_10(r);
+	}
+	r[9] &= 0x03fffff; /* truncate to 22 bits */
 }
 
 /* Shift the result in the high 256 bits down to the bottom. */
@@ -650,6 +658,13 @@ static void sp_256_proj_point_dbl_10(sp_point* r, sp_point* p)
 	if (r->infinity) /* If infinity, don't double */
 		return;
 
+	if (SP_DEBUG) {
+		/* unused part of t2, may result in spurios
+		 * differences in debug output. Clear it.
+		 */
+		memset(t2, 0, sizeof(t2));
+	}
+
 	/* T1 = Z * Z */
 	sp_256_mont_sqr_10(t1, r->z /*, p256_mod, p256_mp_mod*/);
 	/* Z = Y * Z */
-- 
cgit v1.2.3-55-g6feb


From 3b411ebbfc749f9f12b0eb739cb5ba3ec052197e Mon Sep 17 00:00:00 2001
From: Denys Vlasenko <vda.linux@googlemail.com>
Date: Tue, 5 Oct 2021 20:00:50 +0200
Subject: tls: replace "26-bit" P256 code with 32-bit one.

function                                             old     new   delta
sp_256_ecc_mulmod_8                                    -    1171   +1171
sp_256_mod_mul_norm_8                                  -     834    +834
sp_256_proj_point_dbl_8                                -     374    +374
sp_256_mont_reduce_8                                   -     268    +268
sp_256_mont_mul_8                                      -     151    +151
sp_256_sub_8                                           -      76     +76
sp_256_add_8                                           -      76     +76
sp_256_cmp_8                                           -      38     +38
static.sp_256_mont_dbl_8                               -      31     +31
static.sp_256_mont_sub_8                               -      29     +29
sp_256_to_bin_8                                        -      28     +28
sp_256_point_from_bin2x32                             50      73     +23
sp_256_mont_sqr_8                                      -       7      +7
sp_256_mont_sqr_10                                     7       -      -7
p256_mod                                              40      32      -8
curve_P256_compute_pubkey_and_premaster              186     167     -19
sp_256_sub_10                                         22       -     -22
sp_256_add_10                                         22       -     -22
sp_256_cmp_10                                         24       -     -24
sp_256_norm_10                                        31       -     -31
static.sp_256_mont_sub_10                             49       -     -49
static.sp_256_mont_dbl_10                             52       -     -52
static.sp_256_mul_add_10                              82       -     -82
sp_256_from_bin_10                                   119       -    -119
sp_256_to_bin_10                                     120       -    -120
sp_256_mont_reduce_10                                178       -    -178
sp_256_mont_mul_10                                   214       -    -214
sp_256_proj_point_dbl_10                             451       -    -451
sp_256_ecc_mulmod_10                                1216       -   -1216
sp_256_mod_mul_norm_10                              1305       -   -1305
------------------------------------------------------------------------------
(add/remove: 12/15 grow/shrink: 1/2 up/down: 3106/-3919)     Total: -813 bytes

Signed-off-by: Denys Vlasenko <vda.linux@googlemail.com>
---
 networking/tls.c        |   15 +-
 networking/tls_sp_c32.c | 1071 +++++++++++++++++++++++++----------------------
 2 files changed, 588 insertions(+), 498 deletions(-)

diff --git a/networking/tls.c b/networking/tls.c
index 4f0e2b6eb..675ef4b3a 100644
--- a/networking/tls.c
+++ b/networking/tls.c
@@ -2334,7 +2334,6 @@ void FAST_FUNC tls_run_copy_loop(tls_state_t *tls, unsigned flags)
 // e.g. at the very beginning of wget_main()
 //
 {
-//kbuild:lib-$(CONFIG_TLS) += tls_sp_c32_new.o
 	uint8_t ecc_pub_key32[2 * 32];
 	uint8_t pubkey2x32[2 * 32];
 	uint8_t premaster32[32];
@@ -2345,14 +2344,14 @@ void FAST_FUNC tls_run_copy_loop(tls_state_t *tls, unsigned flags)
 //	memset(ecc_pub_key32, 0x00, sizeof(ecc_pub_key32));
 //	ecc_pub_key32[18] = 0xab;
 //Random key:
-	tls_get_random(ecc_pub_key32, sizeof(ecc_pub_key32));
+//	tls_get_random(ecc_pub_key32, sizeof(ecc_pub_key32));
 //Biased random (almost all zeros or almost all ones):
-//	srand(time(NULL) ^ getpid());
-//	if (rand() & 1)
-//		memset(ecc_pub_key32, 0x00, sizeof(ecc_pub_key32));
-//	else
-//		memset(ecc_pub_key32, 0xff, sizeof(ecc_pub_key32));
-//	ecc_pub_key32[rand() & 0x3f] = rand();
+	srand(time(NULL) ^ getpid());
+	if (rand() & 1)
+		memset(ecc_pub_key32, 0x00, sizeof(ecc_pub_key32));
+	else
+		memset(ecc_pub_key32, 0xff, sizeof(ecc_pub_key32));
+	ecc_pub_key32[rand() & 0x3f] = rand();
 
 	xmove_fd(xopen("p256.OLD", O_WRONLY | O_CREAT | O_TRUNC), 2);
 	curve_P256_compute_pubkey_and_premaster(
diff --git a/networking/tls_sp_c32.c b/networking/tls_sp_c32.c
index bba22dee3..b99951890 100644
--- a/networking/tls_sp_c32.c
+++ b/networking/tls_sp_c32.c
@@ -9,6 +9,8 @@
 #define FIXED_SECRET      0
 #define FIXED_PEER_PUBKEY 0
 
+#define ALLOW_ASM         1
+
 #if SP_DEBUG
 # define dbg(...) fprintf(stderr, __VA_ARGS__)
 static void dump_hex(const char *fmt, const void *vp, int len)
@@ -24,7 +26,8 @@ static void dump_hex(const char *fmt, const void *vp, int len)
 # define dump_hex(...) ((void)0)
 #endif
 
-typedef int32_t sp_digit;
+typedef uint32_t sp_digit;
+typedef int32_t signed_sp_digit;
 
 /* The code below is taken from parts of
  *  wolfssl-3.15.3/wolfcrypt/src/sp_c32.c
@@ -32,53 +35,23 @@ typedef int32_t sp_digit;
  * Header comment is kept intact:
  */
 
-/* sp.c
- *
- * Copyright (C) 2006-2018 wolfSSL Inc.
- *
- * This file is part of wolfSSL.
- *
- * wolfSSL is free software; you can redistribute it and/or modify
- * it under the terms of the GNU General Public License as published by
- * the Free Software Foundation; either version 2 of the License, or
- * (at your option) any later version.
- *
- * wolfSSL is distributed in the hope that it will be useful,
- * but WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
- * GNU General Public License for more details.
- *
- * You should have received a copy of the GNU General Public License
- * along with this program; if not, write to the Free Software
- * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1335, USA
- */
-
-/* Implementation by Sean Parkinson. */
-
 typedef struct sp_point {
-	sp_digit x[2 * 10];
-	sp_digit y[2 * 10];
-	sp_digit z[2 * 10];
+	sp_digit x[2 * 8];
+	sp_digit y[2 * 8];
+	sp_digit z[2 * 8];
 	int infinity;
 } sp_point;
 
 /* The modulus (prime) of the curve P256. */
-static const sp_digit p256_mod[10] = {
-	0x3ffffff,0x3ffffff,0x3ffffff,0x003ffff,0x0000000,
-	0x0000000,0x0000000,0x0000400,0x3ff0000,0x03fffff,
+static const sp_digit p256_mod[8] = {
+	0xffffffff,0xffffffff,0xffffffff,0x00000000,
+	0x00000000,0x00000000,0x00000001,0xffffffff,
 };
 
 #define p256_mp_mod ((sp_digit)0x000001)
 
-/* Normalize the values in each word to 26 bits. */
-static void sp_256_norm_10(sp_digit* a)
-{
-	int i;
-	for (i = 0; i < 9; i++) {
-		a[i+1] += a[i] >> 26;
-		a[i] &= 0x3ffffff;
-	}
-}
+/* Normalize the values in each word to 32 bits - NOP */
+#define sp_256_norm_8(a) ((void)0)
 
 /* Write r as big endian to byte array.
  * Fixed length number of bytes written: 32
@@ -86,31 +59,17 @@ static void sp_256_norm_10(sp_digit* a)
  * r  A single precision integer.
  * a  Byte array.
  */
-static void sp_256_to_bin_10(sp_digit* r, uint8_t* a)
-{
-	int i, j, s = 0, b;
-
-	sp_256_norm_10(r);
-
-	j = 256 / 8 - 1;
-	a[j] = 0;
-	for (i = 0; i < 10 && j >= 0; i++) {
-		b = 0;
-		a[j--] |= r[i] << s;
-		b += 8 - s;
-		if (j < 0)
-			break;
-		while (b < 26) {
-			a[j--] = r[i] >> b;
-			b += 8;
-			if (j < 0)
-				break;
-		}
-		s = 8 - (b - 26);
-		if (j >= 0)
-			a[j] = 0;
-		if (s != 0)
-			j++;
+static void sp_256_to_bin_8(const sp_digit* r, uint8_t* a)
+{
+	int i;
+
+	sp_256_norm_8(r);
+
+	r += 8;
+	for (i = 0; i < 8; i++) {
+		r--;
+		move_to_unaligned32(a, SWAP_BE32(*r));
+		a += 4;
 	}
 }
 
@@ -120,67 +79,32 @@ static void sp_256_to_bin_10(sp_digit* r, uint8_t* a)
  * a  Byte array.
  * n  Number of bytes in array to read.
  */
-static void sp_256_from_bin_10(sp_digit* r, const uint8_t* a)
-{
-	int i, j = 0, s = 0;
-
-	r[0] = 0;
-	for (i = 32 - 1; i >= 0; i--) {
-		r[j] |= ((sp_digit)a[i]) << s;
-		if (s >= 18) {
-			r[j] &= 0x3ffffff;
-			s = 26 - s;
-			r[++j] = a[i] >> s;
-			s = 8 - s;
-		}
-		else
-			s += 8;
+static void sp_256_from_bin_8(sp_digit* r, const uint8_t* a)
+{
+	int i;
+
+	r += 8;
+	for (i = 0; i < 8; i++) {
+		sp_digit v;
+		move_from_unaligned32(v, a);
+		*--r = SWAP_BE32(v);
+		a += 4;
 	}
 }
 
 #if SP_DEBUG
-static void dump_256(const char *fmt, const sp_digit* cr)
+static void dump_256(const char *fmt, const sp_digit* r)
 {
-	sp_digit* r = (sp_digit*)cr;
 	uint8_t b32[32];
-	sp_256_to_bin_10(r, b32);
+	sp_256_to_bin_8(r, b32);
 	dump_hex(fmt, b32, 32);
 }
-static void dump_512(const char *fmt, const sp_digit* cr)
+static void dump_512(const char *fmt, const sp_digit* r)
 {
-	sp_digit* r = (sp_digit*)cr;
-	uint8_t a[64];
-	int i, j, s, b;
-
-	/* sp_512_norm_10: */
-	for (i = 0; i < 19; i++) {
-		r[i+1] += r[i] >> 26;
-		r[i] &= 0x3ffffff;
-	}
-	/* sp_512_to_bin_10: */
-	s = 0;
-	j = 512 / 8 - 1;
-	a[j] = 0;
-	for (i = 0; i < 20 && j >= 0; i++) {
-		b = 0;
-		a[j--] |= r[i] << s;
-		b += 8 - s;
-		if (j < 0)
-			break;
-		while (b < 26) {
-			a[j--] = r[i] >> b;
-			b += 8;
-			if (j < 0)
-				break;
-		}
-		s = 8 - (b - 26);
-		if (j >= 0)
-			a[j] = 0;
-		if (s != 0)
-			j++;
-	}
-
-	dump_hex(fmt, a, 64);
+	uint8_t b64[64];
+	sp_256_to_bin_8(r, b64 + 32);
+	sp_256_to_bin_8(r+8, b64);
+	dump_hex(fmt, b64, 64);
 }
 #else
 # define dump_256(...) ((void)0)
@@ -192,8 +116,8 @@ static void sp_256_point_from_bin2x32(sp_point* p, const uint8_t *bin2x32)
 {
 	memset(p, 0, sizeof(*p));
 	/*p->infinity = 0;*/
-	sp_256_from_bin_10(p->x, bin2x32);
-	sp_256_from_bin_10(p->y, bin2x32 + 32);
+	sp_256_from_bin_8(p->x, bin2x32);
+	sp_256_from_bin_8(p->y, bin2x32 + 32);
 	p->z[0] = 1; /* p->z = 1 */
 }
 
@@ -202,170 +126,303 @@ static void sp_256_point_from_bin2x32(sp_point* p, const uint8_t *bin2x32)
  * return -ve, 0 or +ve if a is less than, equal to or greater than b
  * respectively.
  */
-static sp_digit sp_256_cmp_10(const sp_digit* a, const sp_digit* b)
+static signed_sp_digit sp_256_cmp_8(const sp_digit* a, const sp_digit* b)
 {
-	sp_digit r;
 	int i;
-	for (i = 9; i >= 0; i--) {
-		r = a[i] - b[i];
-		if (r != 0)
-			break;
+	for (i = 7; i >= 0; i--) {
+/*		signed_sp_digit r = a[i] - b[i];
+ *		if (r != 0)
+ *			return r;
+ * does not work: think about a[i]=0, b[i]=0xffffffff
+ */
+		if (a[i] == b[i])
+			continue;
+		return (a[i] > b[i]) * 2 - 1;
 	}
-	return r;
+	return 0;
 }
 
 /* Compare two numbers to determine if they are equal.
  *
  * return 1 when equal and 0 otherwise.
  */
-static int sp_256_cmp_equal_10(const sp_digit* a, const sp_digit* b)
+static int sp_256_cmp_equal_8(const sp_digit* a, const sp_digit* b)
 {
-	return sp_256_cmp_10(a, b) == 0;
+	return sp_256_cmp_8(a, b) == 0;
 }
 
-/* Add b to a into r. (r = a + b) */
-static void sp_256_add_10(sp_digit* r, const sp_digit* a, const sp_digit* b)
+/* Add b to a into r. (r = a + b). Return !0 on overflow */
+static int sp_256_add_8(sp_digit* r, const sp_digit* a, const sp_digit* b)
 {
+#if ALLOW_ASM && defined(__GNUC__) && defined(__i386__)
+	sp_digit reg;
+	asm volatile (
+"\n		movl	(%0), %3"
+"\n		addl	(%1), %3"
+"\n		movl	%3, (%2)"
+"\n"
+"\n		movl	1*4(%0), %3"
+"\n		adcl	1*4(%1), %3"
+"\n		movl	%3, 1*4(%2)"
+"\n"
+"\n		movl	2*4(%0), %3"
+"\n		adcl	2*4(%1), %3"
+"\n		movl	%3, 2*4(%2)"
+"\n"
+"\n		movl	3*4(%0), %3"
+"\n		adcl	3*4(%1), %3"
+"\n		movl	%3, 3*4(%2)"
+"\n"
+"\n		movl	4*4(%0), %3"
+"\n		adcl	4*4(%1), %3"
+"\n		movl	%3, 4*4(%2)"
+"\n"
+"\n		movl	5*4(%0), %3"
+"\n		adcl	5*4(%1), %3"
+"\n		movl	%3, 5*4(%2)"
+"\n"
+"\n		movl	6*4(%0), %3"
+"\n		adcl	6*4(%1), %3"
+"\n		movl	%3, 6*4(%2)"
+"\n"
+"\n		movl	7*4(%0), %3"
+"\n		adcl	7*4(%1), %3"
+"\n		movl	%3, 7*4(%2)"
+"\n"
+"\n		sbbl	%3, %3"
+"\n"
+		: "=r" (a), "=r" (b), "=r" (r), "=r" (reg)
+		: "0" (a), "1" (b), "2" (r)
+		: "memory"
+	);
+	return reg;
+#else
 	int i;
-	for (i = 0; i < 10; i++)
-		r[i] = a[i] + b[i];
+	sp_digit carry;
+
+	carry = 0;
+	for (i = 0; i < 8; i++) {
+		sp_digit w, v;
+		w = b[i] + carry;
+		v = a[i];
+		if (w != 0) {
+			v = a[i] + w;
+			carry = (v < a[i]);
+			/* hope compiler detects above as "carry flag set" */
+		}
+		/* else: b + carry == 0, two cases:
+		 * b:ffffffff, carry:1
+		 * b:00000000, carry:0
+		 * in either case, r[i] = a[i] and carry remains unchanged
+		 */
+		r[i] = v;
+	}
+	return carry;
+#endif
 }
 
-/* Sub b from a into r. (r = a - b) */
-static void sp_256_sub_10(sp_digit* r, const sp_digit* a, const sp_digit* b)
+/* Sub b from a into r. (r = a - b). Return !0 on underflow */
+static int sp_256_sub_8(sp_digit* r, const sp_digit* a, const sp_digit* b)
 {
+#if ALLOW_ASM && defined(__GNUC__) && defined(__i386__)
+	sp_digit reg;
+	asm volatile (
+"\n		movl	(%0), %3"
+"\n		subl	(%1), %3"
+"\n		movl	%3, (%2)"
+"\n"
+"\n		movl	1*4(%0), %3"
+"\n		sbbl	1*4(%1), %3"
+"\n		movl	%3, 1*4(%2)"
+"\n"
+"\n		movl	2*4(%0), %3"
+"\n		sbbl	2*4(%1), %3"
+"\n		movl	%3, 2*4(%2)"
+"\n"
+"\n		movl	3*4(%0), %3"
+"\n		sbbl	3*4(%1), %3"
+"\n		movl	%3, 3*4(%2)"
+"\n"
+"\n		movl	4*4(%0), %3"
+"\n		sbbl	4*4(%1), %3"
+"\n		movl	%3, 4*4(%2)"
+"\n"
+"\n		movl	5*4(%0), %3"
+"\n		sbbl	5*4(%1), %3"
+"\n		movl	%3, 5*4(%2)"
+"\n"
+"\n		movl	6*4(%0), %3"
+"\n		sbbl	6*4(%1), %3"
+"\n		movl	%3, 6*4(%2)"
+"\n"
+"\n		movl	7*4(%0), %3"
+"\n		sbbl	7*4(%1), %3"
+"\n		movl	%3, 7*4(%2)"
+"\n"
+"\n		sbbl	%3, %3"
+"\n"
+		: "=r" (a), "=r" (b), "=r" (r), "=r" (reg)
+		: "0" (a), "1" (b), "2" (r)
+		: "memory"
+	);
+	return reg;
+#else
 	int i;
-	for (i = 0; i < 10; i++)
-		r[i] = a[i] - b[i];
+	sp_digit borrow;
+
+	borrow = 0;
+	for (i = 0; i < 8; i++) {
+		sp_digit w, v;
+		w = b[i] + borrow;
+		v = a[i];
+		if (w != 0) {
+			v = a[i] - w;
+			borrow = (v > a[i]);
+			/* hope compiler detects above as "carry flag set" */
+		}
+		/* else: b + borrow == 0, two cases:
+		 * b:ffffffff, borrow:1
+		 * b:00000000, borrow:0
+		 * in either case, r[i] = a[i] and borrow remains unchanged
+		 */
+		r[i] = v;
+	}
+	return borrow;
+#endif
 }
 
 /* Multiply a and b into r. (r = a * b) */
-static void sp_256_mul_10(sp_digit* r, const sp_digit* a, const sp_digit* b)
+static void sp_256_mul_8(sp_digit* r, const sp_digit* a, const sp_digit* b)
 {
+	sp_digit rr[15]; /* in case r coincides with a or b */
 	int i, j, k;
-	int64_t c;
-
-	c = ((int64_t)a[9]) * b[9];
-	r[19] = (sp_digit)(c >> 26);
-	c = (c & 0x3ffffff) << 26;
-	for (k = 17; k >= 0; k--) {
-		for (i = 9; i >= 0; i--) {
-			j = k - i;
-			if (j >= 10)
-				break;
-			if (j < 0)
-				continue;
-			c += ((int64_t)a[i]) * b[j];
+	uint64_t acc;
+
+	acc = 0;
+	for (k = 0; k < 15; k++) {
+		uint32_t acc_hi;
+		i = k - 7;
+		if (i < 0)
+			i = 0;
+		j = k - i;
+		acc_hi = 0;
+		while (i != 8 && i <= k) {
+			uint64_t m = ((uint64_t)a[i]) * b[j];
+			acc += m;
+			if (acc < m)
+				acc_hi++;
+		        j--;
+			i++;
 		}
-		r[k + 2] += c >> 52;
-		r[k + 1] = (c >> 26) & 0x3ffffff;
-		c = (c & 0x3ffffff) << 26;
+		rr[k] = acc;
+		acc = (acc >> 32) | ((uint64_t)acc_hi << 32);
 	}
-	r[0] = (sp_digit)(c >> 26);
+	r[15] = acc;
+	memcpy(r, rr, sizeof(rr));
 }
 
 /* Shift number right one bit. Bottom bit is lost. */
-static void sp_256_rshift1_10(sp_digit* r, sp_digit* a)
+static void sp_256_rshift1_8(sp_digit* r, sp_digit* a, sp_digit carry)
 {
 	int i;
-	for (i = 0; i < 9; i++)
-		r[i] = ((a[i] >> 1) | (a[i + 1] << 25)) & 0x3ffffff;
-	r[9] = a[9] >> 1;
+
+	carry = (!!carry << 31);
+	for (i = 7; i >= 0; i--) {
+		sp_digit c = a[i] << 31;
+		r[i] = (a[i] >> 1) | carry;
+		carry = c;
+	}
 }
 
 /* Divide the number by 2 mod the modulus (prime). (r = a / 2 % m) */
-static void sp_256_div2_10(sp_digit* r, const sp_digit* a, const sp_digit* m)
+static void sp_256_div2_8(sp_digit* r, const sp_digit* a, const sp_digit* m)
 {
+	int carry = 0;
 	if (a[0] & 1)
-		sp_256_add_10(r, a, m);
-	sp_256_norm_10(r);
-	sp_256_rshift1_10(r, r);
+		carry = sp_256_add_8(r, a, m);
+	sp_256_norm_8(r);
+	sp_256_rshift1_8(r, r, carry);
 }
 
 /* Add two Montgomery form numbers (r = a + b % m) */
-static void sp_256_mont_add_10(sp_digit* r, const sp_digit* a, const sp_digit* b,
+static void sp_256_mont_add_8(sp_digit* r, const sp_digit* a, const sp_digit* b,
 		const sp_digit* m)
 {
-	sp_256_add_10(r, a, b);
-	sp_256_norm_10(r);
-	if ((r[9] >> 22) > 0) {
-		sp_256_sub_10(r, r, m);
-		sp_256_norm_10(r);
+	int carry = sp_256_add_8(r, a, b);
+	sp_256_norm_8(r);
+	if (carry) {
+		sp_256_sub_8(r, r, m);
+		sp_256_norm_8(r);
 	}
 }
 
 /* Subtract two Montgomery form numbers (r = a - b % m) */
-static void sp_256_mont_sub_10(sp_digit* r, const sp_digit* a, const sp_digit* b,
+static void sp_256_mont_sub_8(sp_digit* r, const sp_digit* a, const sp_digit* b,
 		const sp_digit* m)
 {
-	sp_256_sub_10(r, a, b);
-	sp_256_norm_10(r);
-	if (r[9] >> 22) {
-		sp_256_add_10(r, r, m);
-		sp_256_norm_10(r);
-		r[9] &= 0x03fffff; /* truncate to 22 bits */
+	int borrow;
+	borrow = sp_256_sub_8(r, a, b);
+	sp_256_norm_8(r);
+	if (borrow) {
+		sp_256_add_8(r, r, m);
+		sp_256_norm_8(r);
 	}
 }
 
 /* Double a Montgomery form number (r = a + a % m) */
-static void sp_256_mont_dbl_10(sp_digit* r, const sp_digit* a, const sp_digit* m)
+static void sp_256_mont_dbl_8(sp_digit* r, const sp_digit* a, const sp_digit* m)
 {
-	sp_256_add_10(r, a, a);
-	sp_256_norm_10(r);
-	if ((r[9] >> 22) > 0)
-		sp_256_sub_10(r, r, m);
-	sp_256_norm_10(r);
+	int carry = sp_256_add_8(r, a, a);
+	sp_256_norm_8(r);
+	if (carry)
+		sp_256_sub_8(r, r, m);
+	sp_256_norm_8(r);
 }
 
 /* Triple a Montgomery form number (r = a + a + a % m) */
-static void sp_256_mont_tpl_10(sp_digit* r, const sp_digit* a, const sp_digit* m)
+static void sp_256_mont_tpl_8(sp_digit* r, const sp_digit* a, const sp_digit* m)
 {
-	sp_256_add_10(r, a, a);
-	sp_256_norm_10(r);
-	if ((r[9] >> 22) > 0) {
-		sp_256_sub_10(r, r, m);
-		sp_256_norm_10(r);
+	int carry = sp_256_add_8(r, a, a);
+	sp_256_norm_8(r);
+	if (carry) {
+		sp_256_sub_8(r, r, m);
+		sp_256_norm_8(r);
 	}
-	sp_256_add_10(r, r, a);
-	sp_256_norm_10(r);
-	if ((r[9] >> 22) > 0) {
-		sp_256_sub_10(r, r, m);
-		sp_256_norm_10(r);
+	carry = sp_256_add_8(r, r, a);
+	sp_256_norm_8(r);
+	if (carry) {
+		sp_256_sub_8(r, r, m);
+		sp_256_norm_8(r);
 	}
-	r[9] &= 0x03fffff; /* truncate to 22 bits */
 }
 
 /* Shift the result in the high 256 bits down to the bottom. */
-static void sp_256_mont_shift_10(sp_digit* r, const sp_digit* a)
+static void sp_256_mont_shift_8(sp_digit* r, const sp_digit* a)
 {
 	int i;
-	sp_digit n, s;
-
-	s = a[10];
-	n = a[9] >> 22;
-	for (i = 0; i < 9; i++) {
-		n += (s & 0x3ffffff) << 4;
-		r[i] = n & 0x3ffffff;
-		n >>= 26;
-		s = a[11 + i] + (s >> 26);
+
+	for (i = 0; i < 8; i++) {
+		r[i] = a[i+8];
+		r[i+8] = 0;
 	}
-	n += s << 4;
-	r[9] = n;
-	memset(&r[10], 0, sizeof(*r) * 10);
 }
 
 /* Mul a by scalar b and add into r. (r += a * b) */
-static void sp_256_mul_add_10(sp_digit* r, const sp_digit* a, sp_digit b)
+static int sp_256_mul_add_8(sp_digit* r, const sp_digit* a, sp_digit b)
 {
-	int64_t t = 0;
+	uint64_t t = 0;
 	int i;
 
-	for (i = 0; i < 10; i++) {
-		t += ((int64_t)b * a[i]) + r[i];
-		r[i] = t & 0x3ffffff;
-		t >>= 26;
+	for (i = 0; i < 8; i++) {
+		uint32_t t_hi;
+		uint64_t m = ((uint64_t)b * a[i]) + r[i];
+		t += m;
+		t_hi = (t < m);
+		r[i] = (sp_digit)t;
+		t = (t >> 32) | ((uint64_t)t_hi << 32);
 	}
-	r[10] += t;
+	r[8] += (sp_digit)t;
+	return (r[8] < (sp_digit)t); /* 1 if addition overflowed */
 }
 
 /* Reduce the number back to 256 bits using Montgomery reduction.
@@ -374,7 +431,7 @@ static void sp_256_mul_add_10(sp_digit* r, const sp_digit* a, sp_digit b)
  * m   The single precision number representing the modulus.
  * mp  The digit representing the negative inverse of m mod 2^n.
  */
-static void sp_256_mont_reduce_10(sp_digit* a /*, const sp_digit* m, sp_digit mp*/)
+static void sp_256_mont_reduce_8(sp_digit* a/*, const sp_digit* m, sp_digit mp*/)
 {
 	const sp_digit* m = p256_mod;
 	sp_digit mp = p256_mp_mod;
@@ -383,33 +440,144 @@ static void sp_256_mont_reduce_10(sp_digit* a /*, const sp_digit* m, sp_digit mp
 	sp_digit mu;
 
 	if (mp != 1) {
-		for (i = 0; i < 9; i++) {
-			mu = (a[i] * mp) & 0x3ffffff;
-			sp_256_mul_add_10(a+i, m, mu);
-			a[i+1] += a[i] >> 26;
+		int too_wide;
+		for (i = 0; i < 7; i++) {
+			mu = (sp_digit)(a[i] * mp);
+			if (sp_256_mul_add_8(a+i, m, mu))
+				(a+i)[9]++;
 		}
-		mu = (a[i] * mp) & 0x03fffff;
-		sp_256_mul_add_10(a+i, m, mu);
-		a[i+1] += a[i] >> 26;
-		a[i] &= 0x3ffffff;
+		mu = (sp_digit)(a[7] * mp);
+		too_wide = sp_256_mul_add_8(a+7, m, mu);
+		sp_256_mont_shift_8(a, a);
+		if (too_wide)
+			sp_256_sub_8(a, a, m);
+		sp_256_norm_8(a);
 	}
 	else { /* Same code for explicit mp == 1 (which is always the case for P256) */
-		for (i = 0; i < 9; i++) {
-			mu = a[i] & 0x3ffffff;
-			sp_256_mul_add_10(a+i, m, mu);
-			a[i+1] += a[i] >> 26;
+		sp_digit word16th = 0;
+		for (i = 0; i < 8; i++) {
+			mu = a[i];
+//m = ffffffff 00000001 00000000 00000000 00000000 ffffffff ffffffff ffffffff
+			if (sp_256_mul_add_8(a+i, m, mu)) {
+				int j = i + 8;
+ inc_next_word:
+				if (++j > 15) { /* a[16] array has no more words? */
+					word16th++;
+					continue;
+				}
+				if (++a[j] == 0) /* did this overflow too? */
+					goto inc_next_word;
+			}
 		}
-		mu = a[i] & 0x03fffff;
-		sp_256_mul_add_10(a+i, m, mu);
-		a[i+1] += a[i] >> 26;
-		a[i] &= 0x3ffffff;
+		sp_256_mont_shift_8(a, a);
+		if (word16th != 0)
+			sp_256_sub_8(a, a, m);
+		sp_256_norm_8(a);
 	}
-
-	sp_256_mont_shift_10(a, a);
-	if ((a[9] >> 22) > 0)
-		sp_256_sub_10(a, a, m);
-	sp_256_norm_10(a);
 }
+#if 0
+//TODO: arm32 asm (also adapt for x86?)
+static void sp_256_mont_reduce_8(sp_digit* a, sp_digit* m, sp_digit mp)
+{
+	sp_digit ca = 0;
+
+	asm volatile (
+	# i = 0
+	mov	r12, #0                 #  i = 0
+	ldr	r10, [%[a], #0]         #  r10 = a[0]
+	ldr	r14, [%[a], #4]         #  r14 = a[1]
+1:
+	# mu = a[i] * mp                #
+	mul	r8, %[mp], r10          # mu = a[i] * mp
+	# a[i+0] += m[0] * mu           #
+	ldr	r7, [%[m], #0]          # a[i+0] += m[0] * mu
+	ldr	r9, [%[a], #0]          #
+	umull	r6, r7, r8, r7          #  r7:r6 = mu * m[0]
+	adds	r10, r10, r6            #  r5:r10 += r7:r6
+	adc	r5, r7, #0              #
+	# a[i+1] += m[1] * mu           #
+	ldr	r7, [%[m], #4]          # a[i+1] += m[1] * mu
+	ldr	r9, [%[a], #4]          #
+	umull	r6, r7, r8, r7          #  r7:r6 = mu * m[1]
+	adds	r10, r14, r6            #  r4:r10 = r7:r14 + r7:r6
+	adc	r4, r7, #0              #
+	adds	r10, r10, r5            #  r4:r10 += r5
+	adc	r4, r4, #0              #
+	# a[i+2] += m[2] * mu           #
+	ldr	r7, [%[m], #8]          # a[i+2] += m[2] * mu
+	ldr	r14, [%[a], #8]         #
+	umull	r6, r7, r8, r7          #
+	adds	r14, r14, r6            #
+	adc	r5, r7, #0              #
+	adds	r14, r14, r4            #
+	adc	r5, r5, #0              #
+	# a[i+3] += m[3] * mu           #
+	ldr	r7, [%[m], #12]         # a[i+3] += m[3] * mu
+	ldr	r9, [%[a], #12]         #
+	umull	r6, r7, r8, r7          #
+	adds	r9, r9, r6              #
+	adc	r4, r7, #0              #
+	adds	r9, r9, r5              #
+	str	r9, [%[a], #12]         #  a[3] = r9
+	adc	r4, r4, #0              #
+	# a[i+4] += m[4] * mu           #
+	ldr	r7, [%[m], #16]         # a[i+4] += m[4] * mu
+	ldr	r9, [%[a], #16]         #
+	umull	r6, r7, r8, r7          #
+	adds	r9, r9, r6              #
+	adc	r5, r7, #0              #
+	adds	r9, r9, r4              #
+	str	r9, [%[a], #16]         #  a[4] = r9
+	adc	r5, r5, #0              #
+	# a[i+5] += m[5] * mu           #
+	ldr	r7, [%[m], #20]         # a[i+5] += m[5] * mu
+	ldr	r9, [%[a], #20]         #
+	umull	r6, r7, r8, r7          #
+	adds	r9, r9, r6              #
+	adc	r4, r7, #0              #
+	adds	r9, r9, r5              #
+	str	r9, [%[a], #20]         #  a[5] = r9
+	adc	r4, r4, #0              #
+	# a[i+6] += m[6] * mu           #
+	ldr	r7, [%[m], #24]         # a[i+6] += m[6] * mu
+	ldr	r9, [%[a], #24]         #
+	umull	r6, r7, r8, r7          #
+	adds	r9, r9, r6              #
+	adc	r5, r7, #0              #
+	adds	r9, r9, r4              #
+	str	r9, [%[a], #24]         #  a[6] = r9
+	adc	r5, r5, #0              #
+	# a[i+7] += m[7] * mu           #
+	ldr	r7, [%[m], #28]         # a[i+7] += m[7] * mu
+	ldr	r9, [%[a], #28]         #
+	umull	r6, r7, r8, r7          #
+	adds	r5, r5, r6              #
+	adcs	r7, r7, %[ca]           #
+	mov	%[ca], #0               #
+	adc	%[ca], %[ca], %[ca]     #  ca = CF
+	adds	r9, r9, r5              #
+	str	r9, [%[a], #28]         #  a[7] = r9
+	ldr	r9, [%[a], #32]         #  r9 = a[8]
+	adcs	r9, r9, r7              #
+	str	r9, [%[a], #32]         #  a[8] = r9
+	adc	%[ca], %[ca], #0        #  ca += CF
+	# i += 1                        # i++
+	add	%[a], %[a], #4          #  a++
+	add	r12, r12, #4            #  i += 4
+	cmp	r12, #32                #  if (i < 32)
+	blt	1b                      #   goto 1
+
+	str	r10, [%[a], #0]         #  a[0] = r10
+	str	r14, [%[a], #4]         #  a[1] = r14
+	: [ca] "+r" (ca), [a] "+r" (a)
+	: [m] "r" (m), [mp] "r" (mp)
+	: "memory", "r4", "r5", "r6", "r7", "r8", "r9", "r10", "r14", "r12"
+	);
+
+	if (ca)
+		a -= m;
+}
+#endif
 
 /* Multiply two Montogmery form numbers mod the modulus (prime).
  * (r = a * b mod m)
@@ -420,14 +588,13 @@ static void sp_256_mont_reduce_10(sp_digit* a /*, const sp_digit* m, sp_digit mp
  * m   Modulus (prime).
  * mp  Montogmery mulitplier.
  */
-static void sp_256_mont_mul_10(sp_digit* r, const sp_digit* a, const sp_digit* b
+static void sp_256_mont_mul_8(sp_digit* r, const sp_digit* a, const sp_digit* b
 		/*, const sp_digit* m, sp_digit mp*/)
 {
 	//const sp_digit* m = p256_mod;
 	//sp_digit mp = p256_mp_mod;
-
-	sp_256_mul_10(r, a, b);
-	sp_256_mont_reduce_10(r /*, m, mp*/);
+	sp_256_mul_8(r, a, b);
+	sp_256_mont_reduce_8(r /*, m, mp*/);
 }
 
 /* Square the Montgomery form number. (r = a * a mod m)
@@ -437,13 +604,12 @@ static void sp_256_mont_mul_10(sp_digit* r, const sp_digit* a, const sp_digit* b
  * m   Modulus (prime).
  * mp  Montogmery mulitplier.
  */
-static void sp_256_mont_sqr_10(sp_digit* r, const sp_digit* a
+static void sp_256_mont_sqr_8(sp_digit* r, const sp_digit* a
 		/*, const sp_digit* m, sp_digit mp*/)
 {
 	//const sp_digit* m = p256_mod;
 	//sp_digit mp = p256_mp_mod;
-
-	sp_256_mont_mul_10(r, a, a /*, m, mp*/);
+	sp_256_mont_mul_8(r, a, a /*, m, mp*/);
 }
 
 /* Invert the number, in Montgomery form, modulo the modulus (prime) of the
@@ -464,19 +630,19 @@ static const uint32_t p256_mod_2[8] = {
 //543210987654321098765432109876543210987654321098765432109876543210...09876543210...09876543210
 //111111111111111111111111111111110000000000000000000000000000000100...00000111111...11111111101
 #endif
-static void sp_256_mont_inv_10(sp_digit* r, sp_digit* a)
+static void sp_256_mont_inv_8(sp_digit* r, sp_digit* a)
 {
-	sp_digit t[2*10]; //can be just [10]?
+	sp_digit t[2*8]; //can be just [8]?
 	int i;
 
-	memcpy(t, a, sizeof(sp_digit) * 10);
+	memcpy(t, a, sizeof(sp_digit) * 8);
 	for (i = 254; i >= 0; i--) {
-		sp_256_mont_sqr_10(t, t /*, p256_mod, p256_mp_mod*/);
+		sp_256_mont_sqr_8(t, t /*, p256_mod, p256_mp_mod*/);
 		/*if (p256_mod_2[i / 32] & ((sp_digit)1 << (i % 32)))*/
 		if (i >= 224 || i == 192 || (i <= 95 && i != 1))
-			sp_256_mont_mul_10(t, t, a /*, p256_mod, p256_mp_mod*/);
+			sp_256_mont_mul_8(t, t, a /*, p256_mod, p256_mp_mod*/);
 	}
-	memcpy(r, t, sizeof(sp_digit) * 10);
+	memcpy(r, t, sizeof(sp_digit) * 8);
 }
 
 /* Multiply a number by Montogmery normalizer mod modulus (prime).
@@ -484,93 +650,29 @@ static void sp_256_mont_inv_10(sp_digit* r, sp_digit* a)
  * r  The resulting Montgomery form number.
  * a  The number to convert.
  */
-static void sp_256_mod_mul_norm_10(sp_digit* r, const sp_digit* a)
+static void sp_256_mod_mul_norm_8(sp_digit* r, const sp_digit* a)
 {
 	int64_t t[8];
-	int64_t o;
-	uint32_t a32;
+	int32_t o;
 
+#define A(n) ((uint64_t)a[n])
 	/*  1  1  0 -1 -1 -1 -1  0 */
+	t[0] = 0 + A(0) + A(1) - A(3) - A(4) - A(5) - A(6);
 	/*  0  1  1  0 -1 -1 -1 -1 */
+	t[1] = 0 + A(1) + A(2) - A(4) - A(5) - A(6) - A(7);
 	/*  0  0  1  1  0 -1 -1 -1 */
+	t[2] = 0 + A(2) + A(3) - A(5) - A(6) - A(7);
 	/* -1 -1  0  2  2  1  0 -1 */
+	t[3] = 0 - A(0) - A(1) + 2 * A(3) + 2 * A(4) + A(5) - A(7);
 	/*  0 -1 -1  0  2  2  1  0 */
+	t[4] = 0 - A(1) - A(2) + 2 * A(4) + 2 * A(5) + A(6);
 	/*  0  0 -1 -1  0  2  2  1 */
+	t[5] = 0 - A(2) - A(3) + 2 * A(5) + 2 * A(6) + A(7);
 	/* -1 -1  0  0  0  1  3  2 */
+	t[6] = 0 - A(0) - A(1) + A(5) + 3 * A(6) + 2 * A(7);
 	/*  1  0 -1 -1 -1 -1  0  3 */
-	// t[] should be calculated from "a" (converted from 26-bit to 32-bit vector a32[8])
-	// according to the above matrix:
-	//t[0] = 0 + a32[0] + a32[1]            - a32[3]   - a32[4]   - a32[5]   - a32[6]             ;
-	//t[1] = 0          + a32[1] + a32[2]              - a32[4]   - a32[5]   - a32[6]   - a32[7]  ;
-	//t[2] = 0                   + a32[2]   + a32[3]              - a32[5]   - a32[6]   - a32[7]  ;
-	//t[3] = 0 - a32[0] - a32[1]            + 2*a32[3] + 2*a32[4] + a32[5]              - a32[7]  ;
-	//t[4] = 0          - a32[1] - a32[2]              + 2*a32[4] + 2*a32[5] + a32[6]             ;
-	//t[5] = 0                   - a32[2]   - a32[3]              + 2*a32[5] + 2*a32[6] + a32[7]  ;
-	//t[6] = 0 - a32[0] - a32[1]                                  + a32[5]   + 3*a32[6] + 2*a32[7];
-	//t[7] = 0 + a32[0]          - a32[2]   - a32[3]   - a32[4]   - a32[5]              + 3*a32[7];
-	// We can do it "piecemeal" after each a32[i] is known, no need to store entire a32[8] vector:
-
-#define A32 (int64_t)a32
-	a32 = a[0] | (a[1] << 26);
-	t[0] = 0 + A32;
-	t[3] = 0 - A32;
-	t[6] = 0 - A32;
-	t[7] = 0 + A32;
-
-	a32 = (a[1] >> 6) | (a[2] << 20);
-	t[0] += A32    ;
-	t[1]  = 0 + A32;
-	t[3] -= A32    ;
-	t[4]  = 0 - A32;
-	t[6] -= A32    ;
-
-	a32 = (a[2] >> 12) | (a[3] << 14);
-	t[1] += A32    ;
-	t[2]  = 0 + A32;
-	t[4] -= A32    ;
-	t[5]  = 0 - A32;
-	t[7] -= A32    ;
-
-	a32 = (a[3] >> 18) | (a[4] << 8);
-	t[0] -= A32  ;
-	t[2] += A32  ;
-	t[3] += 2*A32;
-	t[5] -= A32  ;
-	t[7] -= A32  ;
-
-	a32 = (a[4] >> 24) | (a[5] << 2) | (a[6] << 28);
-	t[0] -= A32  ;
-	t[1] -= A32  ;
-	t[3] += 2*A32;
-	t[4] += 2*A32;
-	t[7] -= A32  ;
-
-	a32 = (a[6] >> 4) | (a[7] << 22);
-	t[0] -= A32  ;
-	t[1] -= A32  ;
-	t[2] -= A32  ;
-	t[3] += A32  ;
-	t[4] += 2*A32;
-	t[5] += 2*A32;
-	t[6] += A32  ;
-	t[7] -= A32  ;
-
-	a32 = (a[7] >> 10) | (a[8] << 16);
-	t[0] -= A32  ;
-	t[1] -= A32  ;
-	t[2] -= A32  ;
-	t[4] += A32  ;
-	t[5] += 2*A32;
-	t[6] += 3*A32;
-
-	a32 = (a[8] >> 16) | (a[9] << 10);
-	t[1] -= A32  ;
-	t[2] -= A32  ;
-	t[3] -= A32  ;
-	t[5] += A32  ;
-	t[6] += 2*A32;
-	t[7] += 3*A32;
-#undef A32
+	t[7] = 0 + A(0) - A(2) - A(3) - A(4) - A(5) + 3 * A(7);
+#undef A
 
 	t[1] += t[0] >> 32; t[0] &= 0xffffffff;
 	t[2] += t[1] >> 32; t[1] &= 0xffffffff;
@@ -579,29 +681,27 @@ static void sp_256_mod_mul_norm_10(sp_digit* r, const sp_digit* a)
 	t[5] += t[4] >> 32; t[4] &= 0xffffffff;
 	t[6] += t[5] >> 32; t[5] &= 0xffffffff;
 	t[7] += t[6] >> 32; t[6] &= 0xffffffff;
-	o     = t[7] >> 32; t[7] &= 0xffffffff;
+	o     = t[7] >> 32; //t[7] &= 0xffffffff;
 	t[0] += o;
 	t[3] -= o;
 	t[6] -= o;
 	t[7] += o;
-	t[1] += t[0] >> 32; //t[0] &= 0xffffffff;
-	t[2] += t[1] >> 32; //t[1] &= 0xffffffff;
-	t[3] += t[2] >> 32; //t[2] &= 0xffffffff;
-	t[4] += t[3] >> 32; //t[3] &= 0xffffffff;
-	t[5] += t[4] >> 32; //t[4] &= 0xffffffff;
-	t[6] += t[5] >> 32; //t[5] &= 0xffffffff;
-	t[7] += t[6] >> 32; //t[6] &= 0xffffffff; - (uint32_t)t[i] casts below accomplish masking
-
-	r[0] = 0x3ffffff & ((sp_digit)((uint32_t)t[0]));
-	r[1] = 0x3ffffff & ((sp_digit)((uint32_t)t[0] >> 26) | ((sp_digit)t[1] <<  6));
-	r[2] = 0x3ffffff & ((sp_digit)((uint32_t)t[1] >> 20) | ((sp_digit)t[2] << 12));
-	r[3] = 0x3ffffff & ((sp_digit)((uint32_t)t[2] >> 14) | ((sp_digit)t[3] << 18));
-	r[4] = 0x3ffffff & ((sp_digit)((uint32_t)t[3] >>  8) | ((sp_digit)t[4] << 24));
-	r[5] = 0x3ffffff & ((sp_digit)((uint32_t)t[4] >>  2));
-	r[6] = 0x3ffffff & ((sp_digit)((uint32_t)t[4] >> 28) | ((sp_digit)t[5] <<  4));
-	r[7] = 0x3ffffff & ((sp_digit)((uint32_t)t[5] >> 22) | ((sp_digit)t[6] << 10));
-	r[8] = 0x3ffffff & ((sp_digit)((uint32_t)t[6] >> 16) | ((sp_digit)t[7] << 16));
-	r[9] =             ((sp_digit)((uint32_t)t[7] >> 10));
+	r[0] = (sp_digit)t[0];
+	t[1] += t[0] >> 32;
+	r[1] = (sp_digit)t[1];
+	t[2] += t[1] >> 32;
+	r[2] = (sp_digit)t[2];
+	t[3] += t[2] >> 32;
+	r[3] = (sp_digit)t[3];
+	t[4] += t[3] >> 32;
+	r[4] = (sp_digit)t[4];
+	t[5] += t[4] >> 32;
+	r[5] = (sp_digit)t[5];
+	t[6] += t[5] >> 32;
+	r[6] = (sp_digit)t[6];
+//	t[7] += t[6] >> 32;
+//	r[7] = (sp_digit)t[7];
+	r[7] = (sp_digit)t[7] + (sp_digit)(t[6] >> 32);
 }
 
 /* Map the Montgomery form projective co-ordinate point to an affine point.
@@ -609,33 +709,33 @@ static void sp_256_mod_mul_norm_10(sp_digit* r, const sp_digit* a)
  * r  Resulting affine co-ordinate point.
  * p  Montgomery form projective co-ordinate point.
  */
-static void sp_256_map_10(sp_point* r, sp_point* p)
+static void sp_256_map_8(sp_point* r, sp_point* p)
 {
-	sp_digit t1[2*10];
-	sp_digit t2[2*10];
+	sp_digit t1[2*8];
+	sp_digit t2[2*8];
 
-	sp_256_mont_inv_10(t1, p->z);
+	sp_256_mont_inv_8(t1, p->z);
 
-	sp_256_mont_sqr_10(t2, t1 /*, p256_mod, p256_mp_mod*/);
-	sp_256_mont_mul_10(t1, t2, t1 /*, p256_mod, p256_mp_mod*/);
+	sp_256_mont_sqr_8(t2, t1 /*, p256_mod, p256_mp_mod*/);
+	sp_256_mont_mul_8(t1, t2, t1 /*, p256_mod, p256_mp_mod*/);
 
 	/* x /= z^2 */
-	sp_256_mont_mul_10(r->x, p->x, t2 /*, p256_mod, p256_mp_mod*/);
-	memset(r->x + 10, 0, sizeof(r->x) / 2);
-	sp_256_mont_reduce_10(r->x /*, p256_mod, p256_mp_mod*/);
+	sp_256_mont_mul_8(r->x, p->x, t2 /*, p256_mod, p256_mp_mod*/);
+	memset(r->x + 8, 0, sizeof(r->x) / 2);
+	sp_256_mont_reduce_8(r->x /*, p256_mod, p256_mp_mod*/);
 	/* Reduce x to less than modulus */
-	if (sp_256_cmp_10(r->x, p256_mod) >= 0)
-		sp_256_sub_10(r->x, r->x, p256_mod);
-	sp_256_norm_10(r->x);
+	if (sp_256_cmp_8(r->x, p256_mod) >= 0)
+		sp_256_sub_8(r->x, r->x, p256_mod);
+	sp_256_norm_8(r->x);
 
 	/* y /= z^3 */
-	sp_256_mont_mul_10(r->y, p->y, t1 /*, p256_mod, p256_mp_mod*/);
-	memset(r->y + 10, 0, sizeof(r->y) / 2);
-	sp_256_mont_reduce_10(r->y /*, p256_mod, p256_mp_mod*/);
+	sp_256_mont_mul_8(r->y, p->y, t1 /*, p256_mod, p256_mp_mod*/);
+	memset(r->y + 8, 0, sizeof(r->y) / 2);
+	sp_256_mont_reduce_8(r->y /*, p256_mod, p256_mp_mod*/);
 	/* Reduce y to less than modulus */
-	if (sp_256_cmp_10(r->y, p256_mod) >= 0)
-		sp_256_sub_10(r->y, r->y, p256_mod);
-	sp_256_norm_10(r->y);
+	if (sp_256_cmp_8(r->y, p256_mod) >= 0)
+		sp_256_sub_8(r->y, r->y, p256_mod);
+	sp_256_norm_8(r->y);
 
 	memset(r->z, 0, sizeof(r->z));
 	r->z[0] = 1;
@@ -646,16 +746,16 @@ static void sp_256_map_10(sp_point* r, sp_point* p)
  * r  Result of doubling point.
  * p  Point to double.
  */
-static void sp_256_proj_point_dbl_10(sp_point* r, sp_point* p)
+static void sp_256_proj_point_dbl_8(sp_point* r, sp_point* p)
 {
-	sp_digit t1[2*10];
-	sp_digit t2[2*10];
+	sp_digit t1[2*8];
+	sp_digit t2[2*8];
 
 	/* Put point to double into result */
 	if (r != p)
 		*r = *p; /* struct copy */
 
-	if (r->infinity) /* If infinity, don't double */
+	if (r->infinity)
 		return;
 
 	if (SP_DEBUG) {
@@ -666,41 +766,42 @@ static void sp_256_proj_point_dbl_10(sp_point* r, sp_point* p)
 	}
 
 	/* T1 = Z * Z */
-	sp_256_mont_sqr_10(t1, r->z /*, p256_mod, p256_mp_mod*/);
+	sp_256_mont_sqr_8(t1, r->z /*, p256_mod, p256_mp_mod*/);
 	/* Z = Y * Z */
-	sp_256_mont_mul_10(r->z, r->y, r->z /*, p256_mod, p256_mp_mod*/);
+	sp_256_mont_mul_8(r->z, r->y, r->z /*, p256_mod, p256_mp_mod*/);
 	/* Z = 2Z */
-	sp_256_mont_dbl_10(r->z, r->z, p256_mod);
+	sp_256_mont_dbl_8(r->z, r->z, p256_mod);
 	/* T2 = X - T1 */
-	sp_256_mont_sub_10(t2, r->x, t1, p256_mod);
+	sp_256_mont_sub_8(t2, r->x, t1, p256_mod);
 	/* T1 = X + T1 */
-	sp_256_mont_add_10(t1, r->x, t1, p256_mod);
+	sp_256_mont_add_8(t1, r->x, t1, p256_mod);
 	/* T2 = T1 * T2 */
-	sp_256_mont_mul_10(t2, t1, t2 /*, p256_mod, p256_mp_mod*/);
+	sp_256_mont_mul_8(t2, t1, t2 /*, p256_mod, p256_mp_mod*/);
 	/* T1 = 3T2 */
-	sp_256_mont_tpl_10(t1, t2, p256_mod);
+	sp_256_mont_tpl_8(t1, t2, p256_mod);
 	/* Y = 2Y */
-	sp_256_mont_dbl_10(r->y, r->y, p256_mod);
+	sp_256_mont_dbl_8(r->y, r->y, p256_mod);
 	/* Y = Y * Y */
-	sp_256_mont_sqr_10(r->y, r->y /*, p256_mod, p256_mp_mod*/);
+	sp_256_mont_sqr_8(r->y, r->y /*, p256_mod, p256_mp_mod*/);
 	/* T2 = Y * Y */
-	sp_256_mont_sqr_10(t2, r->y /*, p256_mod, p256_mp_mod*/);
+	sp_256_mont_sqr_8(t2, r->y /*, p256_mod, p256_mp_mod*/);
 	/* T2 = T2/2 */
-	sp_256_div2_10(t2, t2, p256_mod);
+	sp_256_div2_8(t2, t2, p256_mod);
 	/* Y = Y * X */
-	sp_256_mont_mul_10(r->y, r->y, r->x /*, p256_mod, p256_mp_mod*/);
+	sp_256_mont_mul_8(r->y, r->y, r->x /*, p256_mod, p256_mp_mod*/);
 	/* X = T1 * T1 */
-	sp_256_mont_mul_10(r->x, t1, t1 /*, p256_mod, p256_mp_mod*/);
+	sp_256_mont_mul_8(r->x, t1, t1 /*, p256_mod, p256_mp_mod*/);
 	/* X = X - Y */
-	sp_256_mont_sub_10(r->x, r->x, r->y, p256_mod);
+	sp_256_mont_sub_8(r->x, r->x, r->y, p256_mod);
 	/* X = X - Y */
-	sp_256_mont_sub_10(r->x, r->x, r->y, p256_mod);
+	sp_256_mont_sub_8(r->x, r->x, r->y, p256_mod);
 	/* Y = Y - X */
-	sp_256_mont_sub_10(r->y, r->y, r->x, p256_mod);
+	sp_256_mont_sub_8(r->y, r->y, r->x, p256_mod);
 	/* Y = Y * T1 */
-	sp_256_mont_mul_10(r->y, r->y, t1 /*, p256_mod, p256_mp_mod*/);
+	sp_256_mont_mul_8(r->y, r->y, t1 /*, p256_mod, p256_mp_mod*/);
 	/* Y = Y - T2 */
-	sp_256_mont_sub_10(r->y, r->y, t2, p256_mod);
+	sp_256_mont_sub_8(r->y, r->y, t2, p256_mod);
+	dump_512("y2 %s\n", r->y);
 }
 
 /* Add two Montgomery form projective points.
@@ -709,13 +810,13 @@ static void sp_256_proj_point_dbl_10(sp_point* r, sp_point* p)
  * p  Frist point to add.
  * q  Second point to add.
  */
-static void sp_256_proj_point_add_10(sp_point* r, sp_point* p, sp_point* q)
+static void sp_256_proj_point_add_8(sp_point* r, sp_point* p, sp_point* q)
 {
-	sp_digit t1[2*10];
-	sp_digit t2[2*10];
-	sp_digit t3[2*10];
-	sp_digit t4[2*10];
-	sp_digit t5[2*10];
+	sp_digit t1[2*8];
+	sp_digit t2[2*8];
+	sp_digit t3[2*8];
+	sp_digit t4[2*8];
+	sp_digit t5[2*8];
 
 	/* Ensure only the first point is the same as the result. */
 	if (q == r) {
@@ -725,13 +826,13 @@ static void sp_256_proj_point_add_10(sp_point* r, sp_point* p, sp_point* q)
 	}
 
 	/* Check double */
-	sp_256_sub_10(t1, p256_mod, q->y);
-	sp_256_norm_10(t1);
-	if (sp_256_cmp_equal_10(p->x, q->x)
-	 && sp_256_cmp_equal_10(p->z, q->z)
-	 && (sp_256_cmp_equal_10(p->y, q->y) || sp_256_cmp_equal_10(p->y, t1))
+	sp_256_sub_8(t1, p256_mod, q->y);
+	sp_256_norm_8(t1);
+	if (sp_256_cmp_equal_8(p->x, q->x)
+	 && sp_256_cmp_equal_8(p->z, q->z)
+	 && (sp_256_cmp_equal_8(p->y, q->y) || sp_256_cmp_equal_8(p->y, t1))
 	) {
-		sp_256_proj_point_dbl_10(r, p);
+		sp_256_proj_point_dbl_8(r, p);
 	}
 	else {
 		sp_point tp;
@@ -746,37 +847,37 @@ static void sp_256_proj_point_add_10(sp_point* r, sp_point* p, sp_point* q)
 		*r = p->infinity ? *q : *p; /* struct copy */
 
 		/* U1 = X1*Z2^2 */
-		sp_256_mont_sqr_10(t1, q->z /*, p256_mod, p256_mp_mod*/);
-		sp_256_mont_mul_10(t3, t1, q->z /*, p256_mod, p256_mp_mod*/);
-		sp_256_mont_mul_10(t1, t1, v->x /*, p256_mod, p256_mp_mod*/);
+		sp_256_mont_sqr_8(t1, q->z /*, p256_mod, p256_mp_mod*/);
+		sp_256_mont_mul_8(t3, t1, q->z /*, p256_mod, p256_mp_mod*/);
+		sp_256_mont_mul_8(t1, t1, v->x /*, p256_mod, p256_mp_mod*/);
 		/* U2 = X2*Z1^2 */
-		sp_256_mont_sqr_10(t2, v->z /*, p256_mod, p256_mp_mod*/);
-		sp_256_mont_mul_10(t4, t2, v->z /*, p256_mod, p256_mp_mod*/);
-		sp_256_mont_mul_10(t2, t2, q->x /*, p256_mod, p256_mp_mod*/);
+		sp_256_mont_sqr_8(t2, v->z /*, p256_mod, p256_mp_mod*/);
+		sp_256_mont_mul_8(t4, t2, v->z /*, p256_mod, p256_mp_mod*/);
+		sp_256_mont_mul_8(t2, t2, q->x /*, p256_mod, p256_mp_mod*/);
 		/* S1 = Y1*Z2^3 */
-		sp_256_mont_mul_10(t3, t3, v->y /*, p256_mod, p256_mp_mod*/);
+		sp_256_mont_mul_8(t3, t3, v->y /*, p256_mod, p256_mp_mod*/);
 		/* S2 = Y2*Z1^3 */
-		sp_256_mont_mul_10(t4, t4, q->y /*, p256_mod, p256_mp_mod*/);
+		sp_256_mont_mul_8(t4, t4, q->y /*, p256_mod, p256_mp_mod*/);
 		/* H = U2 - U1 */
-		sp_256_mont_sub_10(t2, t2, t1, p256_mod);
+		sp_256_mont_sub_8(t2, t2, t1, p256_mod);
 		/* R = S2 - S1 */
-		sp_256_mont_sub_10(t4, t4, t3, p256_mod);
+		sp_256_mont_sub_8(t4, t4, t3, p256_mod);
 		/* Z3 = H*Z1*Z2 */
-		sp_256_mont_mul_10(v->z, v->z, q->z /*, p256_mod, p256_mp_mod*/);
-		sp_256_mont_mul_10(v->z, v->z, t2 /*, p256_mod, p256_mp_mod*/);
+		sp_256_mont_mul_8(v->z, v->z, q->z /*, p256_mod, p256_mp_mod*/);
+		sp_256_mont_mul_8(v->z, v->z, t2 /*, p256_mod, p256_mp_mod*/);
 		/* X3 = R^2 - H^3 - 2*U1*H^2 */
-		sp_256_mont_sqr_10(v->x, t4 /*, p256_mod, p256_mp_mod*/);
-		sp_256_mont_sqr_10(t5, t2 /*, p256_mod, p256_mp_mod*/);
-		sp_256_mont_mul_10(v->y, t1, t5 /*, p256_mod, p256_mp_mod*/);
-		sp_256_mont_mul_10(t5, t5, t2 /*, p256_mod, p256_mp_mod*/);
-		sp_256_mont_sub_10(v->x, v->x, t5, p256_mod);
-		sp_256_mont_dbl_10(t1, v->y, p256_mod);
-		sp_256_mont_sub_10(v->x, v->x, t1, p256_mod);
+		sp_256_mont_sqr_8(v->x, t4 /*, p256_mod, p256_mp_mod*/);
+		sp_256_mont_sqr_8(t5, t2 /*, p256_mod, p256_mp_mod*/);
+		sp_256_mont_mul_8(v->y, t1, t5 /*, p256_mod, p256_mp_mod*/);
+		sp_256_mont_mul_8(t5, t5, t2 /*, p256_mod, p256_mp_mod*/);
+		sp_256_mont_sub_8(v->x, v->x, t5, p256_mod);
+		sp_256_mont_dbl_8(t1, v->y, p256_mod);
+		sp_256_mont_sub_8(v->x, v->x, t1, p256_mod);
 		/* Y3 = R*(U1*H^2 - X3) - S1*H^3 */
-		sp_256_mont_sub_10(v->y, v->y, v->x, p256_mod);
-		sp_256_mont_mul_10(v->y, v->y, t4 /*, p256_mod, p256_mp_mod*/);
-		sp_256_mont_mul_10(t5, t5, t3 /*, p256_mod, p256_mp_mod*/);
-		sp_256_mont_sub_10(v->y, v->y, t5, p256_mod);
+		sp_256_mont_sub_8(v->y, v->y, v->x, p256_mod);
+		sp_256_mont_mul_8(v->y, v->y, t4 /*, p256_mod, p256_mp_mod*/);
+		sp_256_mont_mul_8(t5, t5, t3 /*, p256_mod, p256_mp_mod*/);
+		sp_256_mont_sub_8(v->y, v->y, t5, p256_mod);
 	}
 }
 
@@ -788,12 +889,11 @@ static void sp_256_proj_point_add_10(sp_point* r, sp_point* p, sp_point* q)
  * k     Scalar to multiply by.
  * map   Indicates whether to convert result to affine.
  */
-static void sp_256_ecc_mulmod_10(sp_point* r, const sp_point* g, const sp_digit* k /*, int map*/)
+static void sp_256_ecc_mulmod_8(sp_point* r, const sp_point* g, const sp_digit* k /*, int map*/)
 {
 	enum { map = 1 }; /* we always convert result to affine coordinates */
 	sp_point t[3];
-	sp_digit n;
-	int i;
+	sp_digit n = n; /* for compiler */
 	int c, y;
 
 	memset(t, 0, sizeof(t));
@@ -801,36 +901,44 @@ static void sp_256_ecc_mulmod_10(sp_point* r, const sp_point* g, const sp_digit*
 	/* t[0] = {0, 0, 1} * norm */
 	t[0].infinity = 1;
 	/* t[1] = {g->x, g->y, g->z} * norm */
-	sp_256_mod_mul_norm_10(t[1].x, g->x);
-	sp_256_mod_mul_norm_10(t[1].y, g->y);
-	sp_256_mod_mul_norm_10(t[1].z, g->z);
-	dump_512("t[1].x %s\n", t[1].x);
-	dump_512("t[1].y %s\n", t[1].y);
-	dump_512("t[1].z %s\n", t[1].z);
-
-	i = 9;
-	c = 22;
-	n = k[i--] << (26 - c);
-	for (; ; c--) {
-		if (c == 0) {
-			if (i == -1)
-				break;
+	sp_256_mod_mul_norm_8(t[1].x, g->x);
+	sp_256_mod_mul_norm_8(t[1].y, g->y);
+	sp_256_mod_mul_norm_8(t[1].z, g->z);
 
-			n = k[i--];
-			c = 26;
+	/* For every bit, starting from most significant... */
+	k += 7;
+	c = 256;
+	for (;;) {
+		if ((c & 0x1f) == 0) {
+			if (c == 0)
+				break;
+			n = *k--;
 		}
 
-		y = (n >> 25) & 1;
-		n <<= 1;
-
-		sp_256_proj_point_add_10(&t[y^1], &t[0], &t[1]);
+		y = (n >> 31);
+		dbg("y:%d t[%d] = t[0]+t[1]\n", y, y^1);
+		sp_256_proj_point_add_8(&t[y^1], &t[0], &t[1]);
+		dump_512("t[0].x %s\n", t[0].x);
+		dump_512("t[0].y %s\n", t[0].y);
+		dump_512("t[0].z %s\n", t[0].z);
+		dump_512("t[1].x %s\n", t[1].x);
+		dump_512("t[1].y %s\n", t[1].y);
+		dump_512("t[1].z %s\n", t[1].z);
+		dbg("t[2] = t[%d]\n", y);
 		memcpy(&t[2], &t[y], sizeof(sp_point));
-		sp_256_proj_point_dbl_10(&t[2], &t[2]);
+		dbg("t[2] *= 2\n");
+		sp_256_proj_point_dbl_8(&t[2], &t[2]);
+		dump_512("t[2].x %s\n", t[2].x);
+		dump_512("t[2].y %s\n", t[2].y);
+		dump_512("t[2].z %s\n", t[2].z);
 		memcpy(&t[y], &t[2], sizeof(sp_point));
+
+		n <<= 1;
+		c--;
 	}
 
 	if (map)
-		sp_256_map_10(r, &t[0]);
+		sp_256_map_8(r, &t[0]);
 	else
 		memcpy(r, &t[0], sizeof(sp_point));
 
@@ -844,7 +952,7 @@ static void sp_256_ecc_mulmod_10(sp_point* r, const sp_point* g, const sp_digit*
  * k     Scalar to multiply by.
  * map   Indicates whether to convert result to affine.
  */
-static void sp_256_ecc_mulmod_base_10(sp_point* r, sp_digit* k /*, int map*/)
+static void sp_256_ecc_mulmod_base_8(sp_point* r, sp_digit* k /*, int map*/)
 {
 	/* Since this function is called only once, save space:
 	 * don't have "static const sp_point p256_base = {...}",
@@ -861,7 +969,7 @@ static void sp_256_ecc_mulmod_base_10(sp_point* r, sp_digit* k /*, int map*/)
 
 	sp_256_point_from_bin2x32(&p256_base, p256_base_bin);
 
-	sp_256_ecc_mulmod_10(r, &p256_base, k /*, map*/);
+	sp_256_ecc_mulmod_8(r, &p256_base, k /*, map*/);
 }
 
 /* Multiply the point by the scalar and serialize the X ordinate.
@@ -871,7 +979,7 @@ static void sp_256_ecc_mulmod_base_10(sp_point* r, sp_digit* k /*, int map*/)
  * pub2x32 Point to multiply.
  * out32   Buffer to hold X ordinate.
  */
-static void sp_ecc_secret_gen_256(const sp_digit priv[10], const uint8_t *pub2x32, uint8_t* out32)
+static void sp_ecc_secret_gen_256(const sp_digit priv[8], const uint8_t *pub2x32, uint8_t* out32)
 {
 	sp_point point[1];
 
@@ -885,66 +993,48 @@ static void sp_ecc_secret_gen_256(const sp_digit priv[10], const uint8_t *pub2x3
 	dump_512("point->x %s\n", point->x);
 	dump_512("point->y %s\n", point->y);
 
-	sp_256_ecc_mulmod_10(point, point, priv);
+	sp_256_ecc_mulmod_8(point, point, priv);
 
-	sp_256_to_bin_10(point->x, out32);
+	sp_256_to_bin_8(point->x, out32);
 	dump_hex("out32: %s\n", out32, 32);
 }
 
-/* Generates a scalar that is in the range 1..order-1. */
-#define SIMPLIFY 1
-/* Add 1 to a. (a = a + 1) */
-static void sp_256_add_one_10(sp_digit* a)
-{
-	a[0]++;
-	sp_256_norm_10(a);
-}
-static void sp_256_ecc_gen_k_10(sp_digit k[10])
+/* Generates a random scalar in [1..order-1] range. */
+static void sp_256_ecc_gen_k_8(sp_digit k[8])
 {
-#if !SIMPLIFY
-	/* The order of the curve P256 minus 2. */
-	static const sp_digit p256_order2[10] = {
-		0x063254f,0x272b0bf,0x1e84f3b,0x2b69c5e,0x3bce6fa,
-		0x3ffffff,0x3ffffff,0x00003ff,0x3ff0000,0x03fffff,
-	};
-#endif
-	uint8_t buf[32];
-
-	for (;;) {
-		tls_get_random(buf, sizeof(buf));
+	/* Since 32-bit words are "dense", no need to use
+	 * sp_256_from_bin_8(k, buf) to convert random stream
+	 * to sp_digit array - just store random bits there directly.
+	 */
+	tls_get_random(k, 8 * sizeof(k[0]));
 #if FIXED_SECRET
-		memset(buf, 0x77, sizeof(buf));
+	memset(k, 0x77, 8 * sizeof(k[0]));
 #endif
-		sp_256_from_bin_10(k, buf);
-#if !SIMPLIFY
-		if (sp_256_cmp_10(k, p256_order2) < 0)
-			break;
-#else
-		/* non-loopy version (and not needing p256_order2[]):
-		 * if most-significant word seems that k can be larger
-		 * than p256_order2, fix it up:
-		 */
-		if (k[9] >= 0x03fffff)
-			k[9] = 0x03ffffe;
-		break;
-#endif
-	}
-	sp_256_add_one_10(k);
-#undef SIMPLIFY
+
+// If scalar is too large, try again (pseudo-code)
+//	if (k >= 0xffffffff00000000ffffffffffffffffbce6faada7179e84f3b9cac2fc632551 - 1) // order of P256
+//		goto pick_another_random;
+//	k++; // ensure non-zero
+	/* Simpler alternative, at the cost of not choosing some valid
+	 * random values, and slightly non-uniform distribution */
+	if (k[0] == 0)
+		k[0] = 1;
+	if (k[7] >= 0xffffffff)
+		k[7] = 0xfffffffe;
 }
 
 /* Makes a random EC key pair. */
-static void sp_ecc_make_key_256(sp_digit privkey[10], uint8_t *pubkey)
+static void sp_ecc_make_key_256(sp_digit privkey[8], uint8_t *pubkey)
 {
 	sp_point point[1];
 
-	sp_256_ecc_gen_k_10(privkey);
+	sp_256_ecc_gen_k_8(privkey);
 	dump_256("privkey %s\n", privkey);
-	sp_256_ecc_mulmod_base_10(point, privkey);
+	sp_256_ecc_mulmod_base_8(point, privkey);
 	dump_512("point->x %s\n", point->x);
 	dump_512("point->y %s\n", point->y);
-	sp_256_to_bin_10(point->x, pubkey);
-	sp_256_to_bin_10(point->y, pubkey + 32);
+	sp_256_to_bin_8(point->x, pubkey);
+	sp_256_to_bin_8(point->y, pubkey + 32);
 
 	memset(point, 0, sizeof(point)); //paranoia
 }
@@ -953,8 +1043,9 @@ void FAST_FUNC curve_P256_compute_pubkey_and_premaster(
 		uint8_t *pubkey2x32, uint8_t *premaster32,
 		const uint8_t *peerkey2x32)
 {
-	sp_digit privkey[10];
+	sp_digit privkey[8];
 
+	dump_hex("peerkey2x32: %s\n", peerkey2x32, 64);
 	sp_ecc_make_key_256(privkey, pubkey2x32);
 	dump_hex("pubkey: %s\n", pubkey2x32, 32);
 	dump_hex("        %s\n", pubkey2x32 + 32, 32);
-- 
cgit v1.2.3-55-g6feb


From bbd723ebec33aa14746dde88b982b160977938b6 Mon Sep 17 00:00:00 2001
From: Denys Vlasenko <vda.linux@googlemail.com>
Date: Tue, 5 Oct 2021 23:19:18 +0200
Subject: tls: optimize sp_256_mul_8 in P256

function                                             old     new   delta
sp_256_mont_mul_8                                    151     150      -1

Signed-off-by: Denys Vlasenko <vda.linux@googlemail.com>
---
 networking/tls_sp_c32.c | 84 +++++++++++++++++++++++++++++++++++++++++++++++--
 1 file changed, 82 insertions(+), 2 deletions(-)

diff --git a/networking/tls_sp_c32.c b/networking/tls_sp_c32.c
index b99951890..e1c4cdd54 100644
--- a/networking/tls_sp_c32.c
+++ b/networking/tls_sp_c32.c
@@ -294,6 +294,85 @@ static int sp_256_sub_8(sp_digit* r, const sp_digit* a, const sp_digit* b)
 /* Multiply a and b into r. (r = a * b) */
 static void sp_256_mul_8(sp_digit* r, const sp_digit* a, const sp_digit* b)
 {
+#if ALLOW_ASM && defined(__GNUC__) && defined(__i386__)
+	sp_digit rr[15]; /* in case r coincides with a or b */
+	int k;
+	uint32_t accl;
+	uint32_t acch;
+
+	acch = accl = 0;
+	for (k = 0; k < 15; k++) {
+		int i, j;
+		uint32_t acc_hi;
+		i = k - 7;
+		if (i < 0)
+			i = 0;
+		j = k - i;
+		acc_hi = 0;
+		do {
+////////////////////////
+//			uint64_t m = ((uint64_t)a[i]) * b[j];
+//			acc_hi:acch:accl += m;
+			asm volatile (
+			// a[i] is already loaded in %%eax
+"\n			mull	%7"
+"\n			addl	%%eax, %0"
+"\n			adcl	%%edx, %1"
+"\n			adcl	$0, %2"
+			: "=rm" (accl), "=rm" (acch), "=rm" (acc_hi)
+			: "0" (accl), "1" (acch), "2" (acc_hi), "a" (a[i]), "m" (b[j])
+			: "cc", "dx"
+			);
+////////////////////////
+		        j--;
+			i++;
+		} while (i != 8 && i <= k);
+		rr[k] = accl;
+		accl = acch;
+		acch = acc_hi;
+	}
+	r[15] = accl;
+	memcpy(r, rr, sizeof(rr));
+#elif 0
+	//TODO: arm assembly (untested)
+	sp_digit tmp[16];
+
+	asm volatile (
+"\n		mov	r5, #0"
+"\n		mov	r6, #0"
+"\n		mov	r7, #0"
+"\n		mov	r8, #0"
+"\n	1:"
+"\n		subs	r3, r5, #28"
+"\n		movcc	r3, #0"
+"\n		sub	r4, r5, r3"
+"\n		2:"
+"\n		ldr	r14, [%[a], r3]"
+"\n		ldr	r12, [%[b], r4]"
+"\n		umull	r9, r10, r14, r12"
+"\n		adds	r6, r6, r9"
+"\n		adcs	r7, r7, r10"
+"\n		adc	r8, r8, #0"
+"\n		add	r3, r3, #4"
+"\n		sub	r4, r4, #4"
+"\n		cmp	r3, #32"
+"\n		beq	3f"
+"\n		cmp	r3, r5"
+"\n		ble	2b"
+"\n	3:"
+"\n		str	r6, [%[r], r5]"
+"\n		mov	r6, r7"
+"\n		mov	r7, r8"
+"\n		mov	r8, #0"
+"\n		add	r5, r5, #4"
+"\n		cmp	r5, #56"
+"\n		ble	1b"
+"\n		str	r6, [%[r], r5]"
+		: [r] "r" (tmp), [a] "r" (a), [b] "r" (b)
+		: "memory", "r3", "r4", "r5", "r6", "r7", "r8", "r9", "r10", "r14", "r12"
+	);
+	memcpy(r, tmp, sizeof(tmp));
+#else
 	sp_digit rr[15]; /* in case r coincides with a or b */
 	int i, j, k;
 	uint64_t acc;
@@ -306,19 +385,20 @@ static void sp_256_mul_8(sp_digit* r, const sp_digit* a, const sp_digit* b)
 			i = 0;
 		j = k - i;
 		acc_hi = 0;
-		while (i != 8 && i <= k) {
+		do {
 			uint64_t m = ((uint64_t)a[i]) * b[j];
 			acc += m;
 			if (acc < m)
 				acc_hi++;
 		        j--;
 			i++;
-		}
+		} while (i != 8 && i <= k);
 		rr[k] = acc;
 		acc = (acc >> 32) | ((uint64_t)acc_hi << 32);
 	}
 	r[15] = acc;
 	memcpy(r, rr, sizeof(rr));
+#endif
 }
 
 /* Shift number right one bit. Bottom bit is lost. */
-- 
cgit v1.2.3-55-g6feb


From 2430fcfd8de47f786aca1185ae0500fa36c6a548 Mon Sep 17 00:00:00 2001
From: Denys Vlasenko <vda.linux@googlemail.com>
Date: Wed, 6 Oct 2021 00:19:30 +0200
Subject: tls: optimize sp_256_mont_reduce_8 in P256

The code size decrease is small, but we eliminate ALL multiplies!

function                                             old     new   delta
sp_256_mont_reduce_8                                 268     262      -6

Signed-off-by: Denys Vlasenko <vda.linux@googlemail.com>
---
 networking/tls_sp_c32.c | 146 +++++++++++++++++++++++++++++++++++++++++-------
 1 file changed, 125 insertions(+), 21 deletions(-)

diff --git a/networking/tls_sp_c32.c b/networking/tls_sp_c32.c
index e1c4cdd54..0773a2d47 100644
--- a/networking/tls_sp_c32.c
+++ b/networking/tls_sp_c32.c
@@ -488,19 +488,118 @@ static void sp_256_mont_shift_8(sp_digit* r, const sp_digit* a)
 }
 
 /* Mul a by scalar b and add into r. (r += a * b) */
-static int sp_256_mul_add_8(sp_digit* r, const sp_digit* a, sp_digit b)
+static int sp_256_mul_add_8(sp_digit* r /*, const sp_digit* a, sp_digit b*/)
 {
+//	const sp_digit* a = p256_mod;
+//a[7..0] = ffffffff 00000001 00000000 00000000 00000000 ffffffff ffffffff ffffffff
+	sp_digit b = r[0];
 	uint64_t t = 0;
-	int i;
 
-	for (i = 0; i < 8; i++) {
-		uint32_t t_hi;
-		uint64_t m = ((uint64_t)b * a[i]) + r[i];
+//	for (i = 0; i < 8; i++) {
+//		uint32_t t_hi;
+//		uint64_t m = ((uint64_t)b * a[i]) + r[i];
+//		t += m;
+//		t_hi = (t < m);
+//		r[i] = (sp_digit)t;
+//		t = (t >> 32) | ((uint64_t)t_hi << 32);
+//	}
+//	r[8] += (sp_digit)t;
+
+	// Unroll, then optimize the above loop:
+		//uint32_t t_hi;
+		uint64_t m;
+
+		//m = ((uint64_t)b * a[0]) + r[0];
+		//  Since b is r[0] and a[0] is ffffffff, the above optimizes to:
+		//  m = r[0] * ffffffff + r[0] = (r[0] * 100000000 - r[0]) + r[0] = r[0] << 32;
+		//t += m;
+		//  t = (uint64_t)r[0] << 32;
+		//t_hi = (t < m);
+		//  t_hi = 0;
+		//r[0] = (sp_digit)t;
+		r[0] = 0;
+		//t = (t >> 32) | ((uint64_t)t_hi << 32);
+		//  t = b;
+
+		//m = ((uint64_t)b * a[1]) + r[1];
+		//  Since a[1] is ffffffff, the above optimizes to:
+		//  m = b * ffffffff + r[1] = (b * 100000000 - b) + r[1] = (b << 32) - b + r[1];
+		//t += m;
+		//  t = b + (b << 32) - b + r[1] = (b << 32) + r[1];
+		//t_hi = (t < m);
+		//  t_hi = 0;
+		//r[1] = (sp_digit)t;
+		//  r[1] = r[1];
+		//t = (t >> 32) | ((uint64_t)t_hi << 32);
+		//  t = b;
+
+		//m = ((uint64_t)b * a[2]) + r[2];
+		//  Since a[2] is ffffffff, the above optimizes to:
+		//  m = b * ffffffff + r[2] = (b * 100000000 - b) + r[2] = (b << 32) - b + r[2];
+		//t += m;
+		//  t = b + (b << 32) - b + r[2] = (b << 32) + r[2]
+		//t_hi = (t < m);
+		//  t_hi = 0;
+		//r[2] = (sp_digit)t;
+		//  r[2] = r[2];
+		//t = (t >> 32) | ((uint64_t)t_hi << 32);
+		//  t = b;
+
+		//m = ((uint64_t)b * a[3]) + r[3];
+		//  Since a[3] is 00000000, the above optimizes to:
+		//  m = b * 0 + r[3] = r[3];
+		//t += m;
+		//  t += r[3];
+		//t_hi = (t < m);
+		//  t_hi = 0;
+		//r[3] = (sp_digit)t;
+		r[3] = r[3] + b;
+		//t = (t >> 32) | ((uint64_t)t_hi << 32);
+		t = (r[3] < b);
+
+		//m = ((uint64_t)b * a[4]) + r[4];
+		//  Since a[4] is 00000000, the above optimizes to:
+		//  m = b * 0 + r[4] = r[4];
+		//t += m;
+		t += r[4];
+		//t_hi = (t < m);
+		//  t_hi = 0;
+		r[4] = (sp_digit)t;
+		//t = (t >> 32) | ((uint64_t)t_hi << 32);
+		t = (t >> 32);
+
+		//m = ((uint64_t)b * a[5]) + r[5];
+		//  Since a[5] is 00000000, the above optimizes to:
+		//  m = b * 0 + r[5] = r[5];
+		//t += m;
+		t += r[5];
+		//t_hi = (t < m);
+		//  t_hi = 0;
+		r[5] = (sp_digit)t;
+		//t = (t >> 32) | ((uint64_t)t_hi << 32);
+		t = (t >> 32);
+
+		//m = ((uint64_t)b * a[6]) + r[6];
+		//  Since a[6] is 00000001, the above optimizes to:
+		m = (uint64_t)b + r[6]; // 33 bits at most
 		t += m;
-		t_hi = (t < m);
-		r[i] = (sp_digit)t;
-		t = (t >> 32) | ((uint64_t)t_hi << 32);
-	}
+		//t_hi = (t < m);
+		//  t_hi = 0; //32bit_value + 33bit_value can't overflow 64 bits
+		r[6] = (sp_digit)t;
+		//t = (t >> 32) | ((uint64_t)t_hi << 32);
+		t = (t >> 32);
+
+		//m = ((uint64_t)b * a[7]) + r[7];
+		//  Since a[7] is ffffffff, the above optimizes to:
+		//  m = b * ffffffff + r[7] = (b * 100000000 - b) + r[7]
+		m = ((uint64_t)b << 32) - b + r[7];
+		t += m;
+		//t_hi = (t < m);
+		//  t_hi in fact is always 0 here
+		r[7] = (sp_digit)t;
+		//t = (t >> 32) | ((uint64_t)t_hi << 32);
+		t = (t >> 32);
+
 	r[8] += (sp_digit)t;
 	return (r[8] < (sp_digit)t); /* 1 if addition overflowed */
 }
@@ -517,28 +616,33 @@ static void sp_256_mont_reduce_8(sp_digit* a/*, const sp_digit* m, sp_digit mp*/
 	sp_digit mp = p256_mp_mod;
 
 	int i;
-	sp_digit mu;
+//	sp_digit mu;
 
 	if (mp != 1) {
-		int too_wide;
-		for (i = 0; i < 7; i++) {
-			mu = (sp_digit)(a[i] * mp);
-			if (sp_256_mul_add_8(a+i, m, mu))
-				(a+i)[9]++;
+		sp_digit word16th = 0;
+		for (i = 0; i < 8; i++) {
+//			mu = (sp_digit)(a[i] * mp);
+			if (sp_256_mul_add_8(a+i /*, m, mu*/)) {
+				int j = i + 8;
+ inc_next_word0:
+				if (++j > 15) { /* a[16] array has no more words? */
+					word16th++;
+					continue;
+				}
+				if (++a[j] == 0) /* did this overflow too? */
+					goto inc_next_word0;
+			}
 		}
-		mu = (sp_digit)(a[7] * mp);
-		too_wide = sp_256_mul_add_8(a+7, m, mu);
 		sp_256_mont_shift_8(a, a);
-		if (too_wide)
+		if (word16th != 0)
 			sp_256_sub_8(a, a, m);
 		sp_256_norm_8(a);
 	}
 	else { /* Same code for explicit mp == 1 (which is always the case for P256) */
 		sp_digit word16th = 0;
 		for (i = 0; i < 8; i++) {
-			mu = a[i];
-//m = ffffffff 00000001 00000000 00000000 00000000 ffffffff ffffffff ffffffff
-			if (sp_256_mul_add_8(a+i, m, mu)) {
+//			mu = a[i];
+			if (sp_256_mul_add_8(a+i /*, m, mu*/)) {
 				int j = i + 8;
  inc_next_word:
 				if (++j > 15) { /* a[16] array has no more words? */
-- 
cgit v1.2.3-55-g6feb


From c78428461513afed5e3bf272bcbf17964cbd61a3 Mon Sep 17 00:00:00 2001
From: Denys Vlasenko <vda.linux@googlemail.com>
Date: Wed, 6 Oct 2021 01:09:37 +0200
Subject: tls: P256: propagate constants, create dedicated "subtract p256_mod"
 function

8 instances of this subtraction probably warrant a few bytes more of code.

function                                             old     new   delta
sp_256_sub_8_p256_mod                                  -      71     +71
sp_256_mont_sub_8                                      -      29     +29
sp_256_mont_dbl_8                                      -      26     +26
sp_256_mont_reduce_8                                 262     257      -5
sp_256_ecc_mulmod_8                                 1171    1161     -10
sp_256_proj_point_dbl_8                              374     359     -15
static.sp_256_mont_sub_8                              29       -     -29
static.sp_256_mont_dbl_8                              31       -     -31
------------------------------------------------------------------------------
(add/remove: 3/2 grow/shrink: 0/3 up/down: 126/-90)            Total: 36 bytes

Signed-off-by: Denys Vlasenko <vda.linux@googlemail.com>
---
 networking/tls_sp_c32.c | 140 +++++++++++++++++++++++++++++++++++++-----------
 1 file changed, 108 insertions(+), 32 deletions(-)

diff --git a/networking/tls_sp_c32.c b/networking/tls_sp_c32.c
index 0773a2d47..1ab6106a7 100644
--- a/networking/tls_sp_c32.c
+++ b/networking/tls_sp_c32.c
@@ -291,6 +291,74 @@ static int sp_256_sub_8(sp_digit* r, const sp_digit* a, const sp_digit* b)
 #endif
 }
 
+/* Sub p256_mod from a into r. (r = a - p256_mod). */
+static void sp_256_sub_8_p256_mod(sp_digit* r, const sp_digit* a)
+{
+#if ALLOW_ASM && defined(__GNUC__) && defined(__i386__)
+	sp_digit reg;
+//p256_mod[7..0] = ffffffff 00000001 00000000 00000000 00000000 ffffffff ffffffff ffffffff
+	asm volatile (
+"\n		movl	(%0), %2"
+"\n		subl	$0xffffffff, %2"
+"\n		movl	%2, (%1)"
+"\n"
+"\n		movl	1*4(%0), %2"
+"\n		sbbl	$0xffffffff, %2"
+"\n		movl	%2, 1*4(%1)"
+"\n"
+"\n		movl	2*4(%0), %2"
+"\n		sbbl	$0xffffffff, %2"
+"\n		movl	%2, 2*4(%1)"
+"\n"
+"\n		movl	3*4(%0), %2"
+"\n		sbbl	$0, %2"
+"\n		movl	%2, 3*4(%1)"
+"\n"
+"\n		movl	4*4(%0), %2"
+"\n		sbbl	$0, %2"
+"\n		movl	%2, 4*4(%1)"
+"\n"
+"\n		movl	5*4(%0), %2"
+"\n		sbbl	$0, %2"
+"\n		movl	%2, 5*4(%1)"
+"\n"
+"\n		movl	6*4(%0), %2"
+"\n		sbbl	$1, %2"
+"\n		movl	%2, 6*4(%1)"
+"\n"
+"\n		movl	7*4(%0), %2"
+"\n		sbbl	$0xffffffff, %2"
+"\n		movl	%2, 7*4(%1)"
+"\n"
+		: "=r" (a), "=r" (r), "=r" (reg)
+		: "0" (a), "1" (r)
+		: "memory"
+	);
+#else
+	const sp_digit* b = p256_mod;
+	int i;
+	sp_digit borrow;
+
+	borrow = 0;
+	for (i = 0; i < 8; i++) {
+		sp_digit w, v;
+		w = b[i] + borrow;
+		v = a[i];
+		if (w != 0) {
+			v = a[i] - w;
+			borrow = (v > a[i]);
+			/* hope compiler detects above as "carry flag set" */
+		}
+		/* else: b + borrow == 0, two cases:
+		 * b:ffffffff, borrow:1
+		 * b:00000000, borrow:0
+		 * in either case, r[i] = a[i] and borrow remains unchanged
+		 */
+		r[i] = v;
+	}
+#endif
+}
+
 /* Multiply a and b into r. (r = a * b) */
 static void sp_256_mul_8(sp_digit* r, const sp_digit* a, const sp_digit* b)
 {
@@ -425,21 +493,25 @@ static void sp_256_div2_8(sp_digit* r, const sp_digit* a, const sp_digit* m)
 }
 
 /* Add two Montgomery form numbers (r = a + b % m) */
-static void sp_256_mont_add_8(sp_digit* r, const sp_digit* a, const sp_digit* b,
-		const sp_digit* m)
+static void sp_256_mont_add_8(sp_digit* r, const sp_digit* a, const sp_digit* b
+		/*, const sp_digit* m*/)
 {
+//	const sp_digit* m = p256_mod;
+
 	int carry = sp_256_add_8(r, a, b);
 	sp_256_norm_8(r);
 	if (carry) {
-		sp_256_sub_8(r, r, m);
+		sp_256_sub_8_p256_mod(r, r /*, m*/);
 		sp_256_norm_8(r);
 	}
 }
 
 /* Subtract two Montgomery form numbers (r = a - b % m) */
-static void sp_256_mont_sub_8(sp_digit* r, const sp_digit* a, const sp_digit* b,
-		const sp_digit* m)
+static void sp_256_mont_sub_8(sp_digit* r, const sp_digit* a, const sp_digit* b
+		/*, const sp_digit* m*/)
 {
+	const sp_digit* m = p256_mod;
+
 	int borrow;
 	borrow = sp_256_sub_8(r, a, b);
 	sp_256_norm_8(r);
@@ -450,28 +522,32 @@ static void sp_256_mont_sub_8(sp_digit* r, const sp_digit* a, const sp_digit* b,
 }
 
 /* Double a Montgomery form number (r = a + a % m) */
-static void sp_256_mont_dbl_8(sp_digit* r, const sp_digit* a, const sp_digit* m)
+static void sp_256_mont_dbl_8(sp_digit* r, const sp_digit* a /*, const sp_digit* m*/)
 {
+//	const sp_digit* m = p256_mod;
+
 	int carry = sp_256_add_8(r, a, a);
 	sp_256_norm_8(r);
 	if (carry)
-		sp_256_sub_8(r, r, m);
+		sp_256_sub_8_p256_mod(r, r /*, m*/);
 	sp_256_norm_8(r);
 }
 
 /* Triple a Montgomery form number (r = a + a + a % m) */
-static void sp_256_mont_tpl_8(sp_digit* r, const sp_digit* a, const sp_digit* m)
+static void sp_256_mont_tpl_8(sp_digit* r, const sp_digit* a /*, const sp_digit* m*/)
 {
+//	const sp_digit* m = p256_mod;
+
 	int carry = sp_256_add_8(r, a, a);
 	sp_256_norm_8(r);
 	if (carry) {
-		sp_256_sub_8(r, r, m);
+		sp_256_sub_8_p256_mod(r, r /*, m*/);
 		sp_256_norm_8(r);
 	}
 	carry = sp_256_add_8(r, r, a);
 	sp_256_norm_8(r);
 	if (carry) {
-		sp_256_sub_8(r, r, m);
+		sp_256_sub_8_p256_mod(r, r /*, m*/);
 		sp_256_norm_8(r);
 	}
 }
@@ -612,7 +688,7 @@ static int sp_256_mul_add_8(sp_digit* r /*, const sp_digit* a, sp_digit b*/)
  */
 static void sp_256_mont_reduce_8(sp_digit* a/*, const sp_digit* m, sp_digit mp*/)
 {
-	const sp_digit* m = p256_mod;
+//	const sp_digit* m = p256_mod;
 	sp_digit mp = p256_mp_mod;
 
 	int i;
@@ -635,13 +711,13 @@ static void sp_256_mont_reduce_8(sp_digit* a/*, const sp_digit* m, sp_digit mp*/
 		}
 		sp_256_mont_shift_8(a, a);
 		if (word16th != 0)
-			sp_256_sub_8(a, a, m);
+			sp_256_sub_8_p256_mod(a, a /*, m*/);
 		sp_256_norm_8(a);
 	}
 	else { /* Same code for explicit mp == 1 (which is always the case for P256) */
 		sp_digit word16th = 0;
 		for (i = 0; i < 8; i++) {
-//			mu = a[i];
+			/*mu = a[i];*/
 			if (sp_256_mul_add_8(a+i /*, m, mu*/)) {
 				int j = i + 8;
  inc_next_word:
@@ -655,7 +731,7 @@ static void sp_256_mont_reduce_8(sp_digit* a/*, const sp_digit* m, sp_digit mp*/
 		}
 		sp_256_mont_shift_8(a, a);
 		if (word16th != 0)
-			sp_256_sub_8(a, a, m);
+			sp_256_sub_8_p256_mod(a, a /*, m*/);
 		sp_256_norm_8(a);
 	}
 }
@@ -909,7 +985,7 @@ static void sp_256_map_8(sp_point* r, sp_point* p)
 	sp_256_mont_reduce_8(r->x /*, p256_mod, p256_mp_mod*/);
 	/* Reduce x to less than modulus */
 	if (sp_256_cmp_8(r->x, p256_mod) >= 0)
-		sp_256_sub_8(r->x, r->x, p256_mod);
+		sp_256_sub_8_p256_mod(r->x, r->x /*, p256_mod*/);
 	sp_256_norm_8(r->x);
 
 	/* y /= z^3 */
@@ -918,7 +994,7 @@ static void sp_256_map_8(sp_point* r, sp_point* p)
 	sp_256_mont_reduce_8(r->y /*, p256_mod, p256_mp_mod*/);
 	/* Reduce y to less than modulus */
 	if (sp_256_cmp_8(r->y, p256_mod) >= 0)
-		sp_256_sub_8(r->y, r->y, p256_mod);
+		sp_256_sub_8_p256_mod(r->y, r->y /*, p256_mod*/);
 	sp_256_norm_8(r->y);
 
 	memset(r->z, 0, sizeof(r->z));
@@ -954,17 +1030,17 @@ static void sp_256_proj_point_dbl_8(sp_point* r, sp_point* p)
 	/* Z = Y * Z */
 	sp_256_mont_mul_8(r->z, r->y, r->z /*, p256_mod, p256_mp_mod*/);
 	/* Z = 2Z */
-	sp_256_mont_dbl_8(r->z, r->z, p256_mod);
+	sp_256_mont_dbl_8(r->z, r->z /*, p256_mod*/);
 	/* T2 = X - T1 */
-	sp_256_mont_sub_8(t2, r->x, t1, p256_mod);
+	sp_256_mont_sub_8(t2, r->x, t1 /*, p256_mod*/);
 	/* T1 = X + T1 */
-	sp_256_mont_add_8(t1, r->x, t1, p256_mod);
+	sp_256_mont_add_8(t1, r->x, t1 /*, p256_mod*/);
 	/* T2 = T1 * T2 */
 	sp_256_mont_mul_8(t2, t1, t2 /*, p256_mod, p256_mp_mod*/);
 	/* T1 = 3T2 */
-	sp_256_mont_tpl_8(t1, t2, p256_mod);
+	sp_256_mont_tpl_8(t1, t2 /*, p256_mod*/);
 	/* Y = 2Y */
-	sp_256_mont_dbl_8(r->y, r->y, p256_mod);
+	sp_256_mont_dbl_8(r->y, r->y /*, p256_mod*/);
 	/* Y = Y * Y */
 	sp_256_mont_sqr_8(r->y, r->y /*, p256_mod, p256_mp_mod*/);
 	/* T2 = Y * Y */
@@ -976,15 +1052,15 @@ static void sp_256_proj_point_dbl_8(sp_point* r, sp_point* p)
 	/* X = T1 * T1 */
 	sp_256_mont_mul_8(r->x, t1, t1 /*, p256_mod, p256_mp_mod*/);
 	/* X = X - Y */
-	sp_256_mont_sub_8(r->x, r->x, r->y, p256_mod);
+	sp_256_mont_sub_8(r->x, r->x, r->y /*, p256_mod*/);
 	/* X = X - Y */
-	sp_256_mont_sub_8(r->x, r->x, r->y, p256_mod);
+	sp_256_mont_sub_8(r->x, r->x, r->y /*, p256_mod*/);
 	/* Y = Y - X */
-	sp_256_mont_sub_8(r->y, r->y, r->x, p256_mod);
+	sp_256_mont_sub_8(r->y, r->y, r->x /*, p256_mod*/);
 	/* Y = Y * T1 */
 	sp_256_mont_mul_8(r->y, r->y, t1 /*, p256_mod, p256_mp_mod*/);
 	/* Y = Y - T2 */
-	sp_256_mont_sub_8(r->y, r->y, t2, p256_mod);
+	sp_256_mont_sub_8(r->y, r->y, t2 /*, p256_mod*/);
 	dump_512("y2 %s\n", r->y);
 }
 
@@ -1043,9 +1119,9 @@ static void sp_256_proj_point_add_8(sp_point* r, sp_point* p, sp_point* q)
 		/* S2 = Y2*Z1^3 */
 		sp_256_mont_mul_8(t4, t4, q->y /*, p256_mod, p256_mp_mod*/);
 		/* H = U2 - U1 */
-		sp_256_mont_sub_8(t2, t2, t1, p256_mod);
+		sp_256_mont_sub_8(t2, t2, t1 /*, p256_mod*/);
 		/* R = S2 - S1 */
-		sp_256_mont_sub_8(t4, t4, t3, p256_mod);
+		sp_256_mont_sub_8(t4, t4, t3 /*, p256_mod*/);
 		/* Z3 = H*Z1*Z2 */
 		sp_256_mont_mul_8(v->z, v->z, q->z /*, p256_mod, p256_mp_mod*/);
 		sp_256_mont_mul_8(v->z, v->z, t2 /*, p256_mod, p256_mp_mod*/);
@@ -1054,14 +1130,14 @@ static void sp_256_proj_point_add_8(sp_point* r, sp_point* p, sp_point* q)
 		sp_256_mont_sqr_8(t5, t2 /*, p256_mod, p256_mp_mod*/);
 		sp_256_mont_mul_8(v->y, t1, t5 /*, p256_mod, p256_mp_mod*/);
 		sp_256_mont_mul_8(t5, t5, t2 /*, p256_mod, p256_mp_mod*/);
-		sp_256_mont_sub_8(v->x, v->x, t5, p256_mod);
-		sp_256_mont_dbl_8(t1, v->y, p256_mod);
-		sp_256_mont_sub_8(v->x, v->x, t1, p256_mod);
+		sp_256_mont_sub_8(v->x, v->x, t5 /*, p256_mod*/);
+		sp_256_mont_dbl_8(t1, v->y /*, p256_mod*/);
+		sp_256_mont_sub_8(v->x, v->x, t1 /*, p256_mod*/);
 		/* Y3 = R*(U1*H^2 - X3) - S1*H^3 */
-		sp_256_mont_sub_8(v->y, v->y, v->x, p256_mod);
+		sp_256_mont_sub_8(v->y, v->y, v->x /*, p256_mod*/);
 		sp_256_mont_mul_8(v->y, v->y, t4 /*, p256_mod, p256_mp_mod*/);
 		sp_256_mont_mul_8(t5, t5, t3 /*, p256_mod, p256_mp_mod*/);
-		sp_256_mont_sub_8(v->y, v->y, t5, p256_mod);
+		sp_256_mont_sub_8(v->y, v->y, t5 /*, p256_mod*/);
 	}
 }
 
-- 
cgit v1.2.3-55-g6feb


From 00f2cceb6aa194aadcbe70675a0f0a0660aea233 Mon Sep 17 00:00:00 2001
From: Denys Vlasenko <vda.linux@googlemail.com>
Date: Wed, 6 Oct 2021 10:15:29 +0200
Subject: tls: P256: shrink sp_256_mul_add_8 a bit more

function                                             old     new   delta
sp_256_mont_reduce_8                                 257     245     -12

Signed-off-by: Denys Vlasenko <vda.linux@googlemail.com>
---
 networking/tls_sp_c32.c | 38 ++++++++++++++++++++++++--------------
 1 file changed, 24 insertions(+), 14 deletions(-)

diff --git a/networking/tls_sp_c32.c b/networking/tls_sp_c32.c
index 1ab6106a7..6fca2aad8 100644
--- a/networking/tls_sp_c32.c
+++ b/networking/tls_sp_c32.c
@@ -569,8 +569,10 @@ static int sp_256_mul_add_8(sp_digit* r /*, const sp_digit* a, sp_digit b*/)
 //	const sp_digit* a = p256_mod;
 //a[7..0] = ffffffff 00000001 00000000 00000000 00000000 ffffffff ffffffff ffffffff
 	sp_digit b = r[0];
-	uint64_t t = 0;
 
+	uint64_t t;
+
+//	t = 0;
 //	for (i = 0; i < 8; i++) {
 //		uint32_t t_hi;
 //		uint64_t m = ((uint64_t)b * a[i]) + r[i];
@@ -584,12 +586,13 @@ static int sp_256_mul_add_8(sp_digit* r /*, const sp_digit* a, sp_digit b*/)
 	// Unroll, then optimize the above loop:
 		//uint32_t t_hi;
 		uint64_t m;
+		uint32_t t32;
 
 		//m = ((uint64_t)b * a[0]) + r[0];
 		//  Since b is r[0] and a[0] is ffffffff, the above optimizes to:
 		//  m = r[0] * ffffffff + r[0] = (r[0] * 100000000 - r[0]) + r[0] = r[0] << 32;
 		//t += m;
-		//  t = (uint64_t)r[0] << 32;
+		//  t = r[0] << 32 = b << 32;
 		//t_hi = (t < m);
 		//  t_hi = 0;
 		//r[0] = (sp_digit)t;
@@ -625,42 +628,49 @@ static int sp_256_mul_add_8(sp_digit* r /*, const sp_digit* a, sp_digit b*/)
 		//  Since a[3] is 00000000, the above optimizes to:
 		//  m = b * 0 + r[3] = r[3];
 		//t += m;
-		//  t += r[3];
+		//  t = b + r[3];
 		//t_hi = (t < m);
 		//  t_hi = 0;
 		//r[3] = (sp_digit)t;
 		r[3] = r[3] + b;
 		//t = (t >> 32) | ((uint64_t)t_hi << 32);
-		t = (r[3] < b);
+		t32 = (r[3] < b); // 0 or 1
 
 		//m = ((uint64_t)b * a[4]) + r[4];
 		//  Since a[4] is 00000000, the above optimizes to:
 		//  m = b * 0 + r[4] = r[4];
 		//t += m;
-		t += r[4];
+		//  t = t32 + r[4];
 		//t_hi = (t < m);
 		//  t_hi = 0;
-		r[4] = (sp_digit)t;
+		//r[4] = (sp_digit)t;
 		//t = (t >> 32) | ((uint64_t)t_hi << 32);
-		t = (t >> 32);
+		if (t32 != 0) {
+			r[4]++;
+			t32 = (r[4] == 0); // 0 or 1
 
 		//m = ((uint64_t)b * a[5]) + r[5];
 		//  Since a[5] is 00000000, the above optimizes to:
 		//  m = b * 0 + r[5] = r[5];
 		//t += m;
-		t += r[5];
+		//  t = t32 + r[5]; (t32 is 0 or 1)
 		//t_hi = (t < m);
 		//  t_hi = 0;
-		r[5] = (sp_digit)t;
+		//r[5] = (sp_digit)t;
 		//t = (t >> 32) | ((uint64_t)t_hi << 32);
-		t = (t >> 32);
+			if (t32 != 0) {
+				r[5]++;
+				t32 = (r[5] == 0); // 0 or 1
+			}
+		}
 
 		//m = ((uint64_t)b * a[6]) + r[6];
 		//  Since a[6] is 00000001, the above optimizes to:
-		m = (uint64_t)b + r[6]; // 33 bits at most
-		t += m;
+		//  m = (uint64_t)b + r[6]; // 33 bits at most
+		//t += m;
+		t = t32 + (uint64_t)b + r[6];
 		//t_hi = (t < m);
-		//  t_hi = 0; //32bit_value + 33bit_value can't overflow 64 bits
+		//  t_hi = 0;
 		r[6] = (sp_digit)t;
 		//t = (t >> 32) | ((uint64_t)t_hi << 32);
 		t = (t >> 32);
@@ -671,7 +681,7 @@ static int sp_256_mul_add_8(sp_digit* r /*, const sp_digit* a, sp_digit b*/)
 		m = ((uint64_t)b << 32) - b + r[7];
 		t += m;
 		//t_hi = (t < m);
-		//  t_hi in fact is always 0 here
+		//  t_hi in fact is always 0 here (256bit * 32bit can't have more than 32 bits of overflow)
 		r[7] = (sp_digit)t;
 		//t = (t >> 32) | ((uint64_t)t_hi << 32);
 		t = (t >> 32);
-- 
cgit v1.2.3-55-g6feb


From 567eefcaf8712b72c3cd5b45aa013ff1eb45d235 Mon Sep 17 00:00:00 2001
From: Denys Vlasenko <vda.linux@googlemail.com>
Date: Wed, 6 Oct 2021 14:25:10 +0200
Subject: tls: P256: do not dumplicate sp_256_sub_8()

function                                             old     new   delta
sp_256_proj_point_dbl_8                              359     374     +15
sp_256_ecc_mulmod_8                                 1159    1171     +12
sp_256_mont_reduce_8                                 245     250      +5
sp_256_mont_dbl_8                                     26      31      +5
sp_256_sub_8_p256_mod                                 43       -     -43
------------------------------------------------------------------------------
(add/remove: 0/1 grow/shrink: 4/0 up/down: 37/-43)             Total: -6 bytes

Signed-off-by: Denys Vlasenko <vda.linux@googlemail.com>
---
 networking/tls_sp_c32.c | 26 +++-----------------------
 1 file changed, 3 insertions(+), 23 deletions(-)

diff --git a/networking/tls_sp_c32.c b/networking/tls_sp_c32.c
index 6fca2aad8..17fc05f63 100644
--- a/networking/tls_sp_c32.c
+++ b/networking/tls_sp_c32.c
@@ -291,10 +291,10 @@ static int sp_256_sub_8(sp_digit* r, const sp_digit* a, const sp_digit* b)
 #endif
 }
 
+#if ALLOW_ASM && defined(__GNUC__) && defined(__i386__)
 /* Sub p256_mod from a into r. (r = a - p256_mod). */
 static void sp_256_sub_8_p256_mod(sp_digit* r, const sp_digit* a)
 {
-#if ALLOW_ASM && defined(__GNUC__) && defined(__i386__)
 	sp_digit reg;
 //p256_mod[7..0] = ffffffff 00000001 00000000 00000000 00000000 ffffffff ffffffff ffffffff
 	asm volatile (
@@ -334,30 +334,10 @@ static void sp_256_sub_8_p256_mod(sp_digit* r, const sp_digit* a)
 		: "0" (a), "1" (r)
 		: "memory"
 	);
+}
 #else
-	const sp_digit* b = p256_mod;
-	int i;
-	sp_digit borrow;
-
-	borrow = 0;
-	for (i = 0; i < 8; i++) {
-		sp_digit w, v;
-		w = b[i] + borrow;
-		v = a[i];
-		if (w != 0) {
-			v = a[i] - w;
-			borrow = (v > a[i]);
-			/* hope compiler detects above as "carry flag set" */
-		}
-		/* else: b + borrow == 0, two cases:
-		 * b:ffffffff, borrow:1
-		 * b:00000000, borrow:0
-		 * in either case, r[i] = a[i] and borrow remains unchanged
-		 */
-		r[i] = v;
-	}
+# define sp_256_sub_8_p256_mod(r, a) sp_256_sub_8((r), (a), p256_mod)
 #endif
-}
 
 /* Multiply a and b into r. (r = a * b) */
 static void sp_256_mul_8(sp_digit* r, const sp_digit* a, const sp_digit* b)
-- 
cgit v1.2.3-55-g6feb


From d74993d31dc91b7da6519527a94b4795dd8f6814 Mon Sep 17 00:00:00 2001
From: Denys Vlasenko <vda.linux@googlemail.com>
Date: Wed, 6 Oct 2021 14:28:47 +0200
Subject: tls: P@256: remove "header comment is kept intact" comment

Signed-off-by: Denys Vlasenko <vda.linux@googlemail.com>
---
 networking/tls_sp_c32.c | 1 -
 1 file changed, 1 deletion(-)

diff --git a/networking/tls_sp_c32.c b/networking/tls_sp_c32.c
index 17fc05f63..76f0770c2 100644
--- a/networking/tls_sp_c32.c
+++ b/networking/tls_sp_c32.c
@@ -32,7 +32,6 @@ typedef int32_t signed_sp_digit;
 /* The code below is taken from parts of
  *  wolfssl-3.15.3/wolfcrypt/src/sp_c32.c
  * and heavily modified.
- * Header comment is kept intact:
  */
 
 typedef struct sp_point {
-- 
cgit v1.2.3-55-g6feb


From 22fd8fd3f4c271d6037753165131c7c35a039762 Mon Sep 17 00:00:00 2001
From: Denys Vlasenko <vda.linux@googlemail.com>
Date: Wed, 6 Oct 2021 16:10:49 +0200
Subject: tls: P256: tweak arm assembly (currently disabled)

Signed-off-by: Denys Vlasenko <vda.linux@googlemail.com>
---
 networking/tls_sp_c32.c | 177 ++++++++++++++++++++++++------------------------
 1 file changed, 89 insertions(+), 88 deletions(-)

diff --git a/networking/tls_sp_c32.c b/networking/tls_sp_c32.c
index 76f0770c2..532047739 100644
--- a/networking/tls_sp_c32.c
+++ b/networking/tls_sp_c32.c
@@ -393,7 +393,7 @@ static void sp_256_mul_8(sp_digit* r, const sp_digit* a, const sp_digit* b)
 "\n		subs	r3, r5, #28"
 "\n		movcc	r3, #0"
 "\n		sub	r4, r5, r3"
-"\n		2:"
+"\n	2:"
 "\n		ldr	r14, [%[a], r3]"
 "\n		ldr	r12, [%[b], r4]"
 "\n		umull	r9, r10, r14, r12"
@@ -416,7 +416,7 @@ static void sp_256_mul_8(sp_digit* r, const sp_digit* a, const sp_digit* b)
 "\n		ble	1b"
 "\n		str	r6, [%[r], r5]"
 		: [r] "r" (tmp), [a] "r" (a), [b] "r" (b)
-		: "memory", "r3", "r4", "r5", "r6", "r7", "r8", "r9", "r10", "r14", "r12"
+		: "memory", "r3", "r4", "r5", "r6", "r7", "r8", "r9", "r10", "r12", "r14"
 	);
 	memcpy(r, tmp, sizeof(tmp));
 #else
@@ -732,97 +732,98 @@ static void sp_256_mont_reduce_8(sp_digit* a, sp_digit* m, sp_digit mp)
 
 	asm volatile (
 	# i = 0
-	mov	r12, #0                 #  i = 0
-	ldr	r10, [%[a], #0]         #  r10 = a[0]
-	ldr	r14, [%[a], #4]         #  r14 = a[1]
+	mov	r12, #0
+	ldr	r10, [%[a], #0]
+	ldr	r14, [%[a], #4]
 1:
-	# mu = a[i] * mp                #
-	mul	r8, %[mp], r10          # mu = a[i] * mp
-	# a[i+0] += m[0] * mu           #
-	ldr	r7, [%[m], #0]          # a[i+0] += m[0] * mu
-	ldr	r9, [%[a], #0]          #
-	umull	r6, r7, r8, r7          #  r7:r6 = mu * m[0]
-	adds	r10, r10, r6            #  r5:r10 += r7:r6
-	adc	r5, r7, #0              #
-	# a[i+1] += m[1] * mu           #
-	ldr	r7, [%[m], #4]          # a[i+1] += m[1] * mu
-	ldr	r9, [%[a], #4]          #
-	umull	r6, r7, r8, r7          #  r7:r6 = mu * m[1]
-	adds	r10, r14, r6            #  r4:r10 = r7:r14 + r7:r6
-	adc	r4, r7, #0              #
-	adds	r10, r10, r5            #  r4:r10 += r5
-	adc	r4, r4, #0              #
-	# a[i+2] += m[2] * mu           #
-	ldr	r7, [%[m], #8]          # a[i+2] += m[2] * mu
-	ldr	r14, [%[a], #8]         #
-	umull	r6, r7, r8, r7          #
-	adds	r14, r14, r6            #
-	adc	r5, r7, #0              #
-	adds	r14, r14, r4            #
-	adc	r5, r5, #0              #
-	# a[i+3] += m[3] * mu           #
-	ldr	r7, [%[m], #12]         # a[i+3] += m[3] * mu
-	ldr	r9, [%[a], #12]         #
-	umull	r6, r7, r8, r7          #
-	adds	r9, r9, r6              #
-	adc	r4, r7, #0              #
-	adds	r9, r9, r5              #
-	str	r9, [%[a], #12]         #  a[3] = r9
-	adc	r4, r4, #0              #
-	# a[i+4] += m[4] * mu           #
-	ldr	r7, [%[m], #16]         # a[i+4] += m[4] * mu
-	ldr	r9, [%[a], #16]         #
-	umull	r6, r7, r8, r7          #
-	adds	r9, r9, r6              #
-	adc	r5, r7, #0              #
-	adds	r9, r9, r4              #
-	str	r9, [%[a], #16]         #  a[4] = r9
-	adc	r5, r5, #0              #
-	# a[i+5] += m[5] * mu           #
-	ldr	r7, [%[m], #20]         # a[i+5] += m[5] * mu
-	ldr	r9, [%[a], #20]         #
-	umull	r6, r7, r8, r7          #
-	adds	r9, r9, r6              #
-	adc	r4, r7, #0              #
-	adds	r9, r9, r5              #
-	str	r9, [%[a], #20]         #  a[5] = r9
-	adc	r4, r4, #0              #
-	# a[i+6] += m[6] * mu           #
-	ldr	r7, [%[m], #24]         # a[i+6] += m[6] * mu
-	ldr	r9, [%[a], #24]         #
-	umull	r6, r7, r8, r7          #
-	adds	r9, r9, r6              #
-	adc	r5, r7, #0              #
-	adds	r9, r9, r4              #
-	str	r9, [%[a], #24]         #  a[6] = r9
-	adc	r5, r5, #0              #
-	# a[i+7] += m[7] * mu           #
-	ldr	r7, [%[m], #28]         # a[i+7] += m[7] * mu
-	ldr	r9, [%[a], #28]         #
-	umull	r6, r7, r8, r7          #
-	adds	r5, r5, r6              #
-	adcs	r7, r7, %[ca]           #
-	mov	%[ca], #0               #
-	adc	%[ca], %[ca], %[ca]     #  ca = CF
-	adds	r9, r9, r5              #
-	str	r9, [%[a], #28]         #  a[7] = r9
-	ldr	r9, [%[a], #32]         #  r9 = a[8]
-	adcs	r9, r9, r7              #
-	str	r9, [%[a], #32]         #  a[8] = r9
-	adc	%[ca], %[ca], #0        #  ca += CF
-	# i += 1                        # i++
-	add	%[a], %[a], #4          #  a++
-	add	r12, r12, #4            #  i += 4
-	cmp	r12, #32                #  if (i < 32)
-	blt	1b                      #   goto 1
-
-	str	r10, [%[a], #0]         #  a[0] = r10
-	str	r14, [%[a], #4]         #  a[1] = r14
+	# mu = a[i] * mp
+	mul	r8, %[mp], r10
+	# a[i+0] += m[0] * mu
+	ldr	r7, [%[m], #0]
+	ldr	r9, [%[a], #0]
+	umull	r6, r7, r8, r7
+	adds	r10, r10, r6
+	adc	r5, r7, #0
+	# a[i+1] += m[1] * mu
+	ldr	r7, [%[m], #4]
+	ldr	r9, [%[a], #4]
+	umull	r6, r7, r8, r7
+	adds	r10, r14, r6
+	adc	r4, r7, #0
+	adds	r10, r10, r5
+	adc	r4, r4, #0
+	# a[i+2] += m[2] * mu
+	ldr	r7, [%[m], #8]
+	ldr	r14, [%[a], #8]
+	umull	r6, r7, r8, r7
+	adds	r14, r14, r6
+	adc	r5, r7, #0
+	adds	r14, r14, r4
+	adc	r5, r5, #0
+	# a[i+3] += m[3] * mu
+	ldr	r7, [%[m], #12]
+	ldr	r9, [%[a], #12]
+	umull	r6, r7, r8, r7
+	adds	r9, r9, r6
+	adc	r4, r7, #0
+	adds	r9, r9, r5
+	str	r9, [%[a], #12]
+	adc	r4, r4, #0
+	# a[i+4] += m[4] * mu
+	ldr	r7, [%[m], #16]
+	ldr	r9, [%[a], #16]
+	umull	r6, r7, r8, r7
+	adds	r9, r9, r6
+	adc	r5, r7, #0
+	adds	r9, r9, r4
+	str	r9, [%[a], #16]
+	adc	r5, r5, #0
+	# a[i+5] += m[5] * mu
+	ldr	r7, [%[m], #20]
+	ldr	r9, [%[a], #20]
+	umull	r6, r7, r8, r7
+	adds	r9, r9, r6
+	adc	r4, r7, #0
+	adds	r9, r9, r5
+	str	r9, [%[a], #20]
+	adc	r4, r4, #0
+	# a[i+6] += m[6] * mu
+	ldr	r7, [%[m], #24]
+	ldr	r9, [%[a], #24]
+	umull	r6, r7, r8, r7
+	adds	r9, r9, r6
+	adc	r5, r7, #0
+	adds	r9, r9, r4
+	str	r9, [%[a], #24]
+	adc	r5, r5, #0
+	# a[i+7] += m[7] * mu
+	ldr	r7, [%[m], #28]
+	ldr	r9, [%[a], #28]
+	umull	r6, r7, r8, r7
+	adds	r5, r5, r6
+	adcs	r7, r7, %[ca]
+	mov	%[ca], #0
+	adc	%[ca], %[ca], %[ca]
+	adds	r9, r9, r5
+	str	r9, [%[a], #28]
+	ldr	r9, [%[a], #32]
+	adcs	r9, r9, r7
+	str	r9, [%[a], #32]
+	adc	%[ca], %[ca], #0
+	# i += 1
+	add	%[a], %[a], #4
+	add	r12, r12, #4
+	cmp	r12, #32
+	blt	1b
+
+	str	r10, [%[a], #0]
+	str	r14, [%[a], #4]
 	: [ca] "+r" (ca), [a] "+r" (a)
 	: [m] "r" (m), [mp] "r" (mp)
-	: "memory", "r4", "r5", "r6", "r7", "r8", "r9", "r10", "r14", "r12"
+	: "memory", "r4", "r5", "r6", "r7", "r8", "r9", "r10", "r12", "r14"
 	);
 
+	memcpy(a, a + 8, 32);
 	if (ca)
 		a -= m;
 }
-- 
cgit v1.2.3-55-g6feb


From 911344a99889319a7dba8a725a64dc324597f9eb Mon Sep 17 00:00:00 2001
From: Denys Vlasenko <vda.linux@googlemail.com>
Date: Wed, 6 Oct 2021 17:17:34 +0200
Subject: tls: P256: x86-64 assembly

function                                             old     new   delta
sp_256_mont_mul_8                                    127     155     +28
sp_256_proj_point_dbl_8                              448     469     +21
sp_256_mont_sub_8                                     23      35     +12
sp_256_mont_dbl_8                                     26      38     +12
sp_256_sub_8                                          44      49      +5
sp_256_ecc_mulmod_8                                 1530    1535      +5
------------------------------------------------------------------------------
(add/remove: 0/0 grow/shrink: 6/0 up/down: 83/0)               Total: 83 bytes

Signed-off-by: Denys Vlasenko <vda.linux@googlemail.com>
---
 networking/tls_sp_c32.c | 99 +++++++++++++++++++++++++++++++++++++++++++++++++
 1 file changed, 99 insertions(+)

diff --git a/networking/tls_sp_c32.c b/networking/tls_sp_c32.c
index 532047739..14a7c7066 100644
--- a/networking/tls_sp_c32.c
+++ b/networking/tls_sp_c32.c
@@ -189,6 +189,34 @@ static int sp_256_add_8(sp_digit* r, const sp_digit* a, const sp_digit* b)
 "\n		movl	%3, 7*4(%2)"
 "\n"
 "\n		sbbl	%3, %3"
+"\n"
+		: "=r" (a), "=r" (b), "=r" (r), "=r" (reg)
+		: "0" (a), "1" (b), "2" (r)
+		: "memory"
+	);
+	return reg;
+#elif ALLOW_ASM && defined(__GNUC__) && defined(__x86_64__)
+	/* x86_64 has no alignment restrictions, and is little-endian,
+	 * so 64-bit and 32-bit representations are identical */
+	uint64_t reg;
+	asm volatile (
+"\n		movq	(%0), %3"
+"\n		addq	(%1), %3"
+"\n		movq	%3, (%2)"
+"\n"
+"\n		movq	1*8(%0), %3"
+"\n		adcq	1*8(%1), %3"
+"\n		movq	%3, 1*8(%2)"
+"\n"
+"\n		movq	2*8(%0), %3"
+"\n		adcq	2*8(%1), %3"
+"\n		movq	%3, 2*8(%2)"
+"\n"
+"\n		movq	3*8(%0), %3"
+"\n		adcq	3*8(%1), %3"
+"\n		movq	%3, 3*8(%2)"
+"\n"
+"\n		sbbq	%3, %3"
 "\n"
 		: "=r" (a), "=r" (b), "=r" (r), "=r" (reg)
 		: "0" (a), "1" (b), "2" (r)
@@ -259,6 +287,34 @@ static int sp_256_sub_8(sp_digit* r, const sp_digit* a, const sp_digit* b)
 "\n		movl	%3, 7*4(%2)"
 "\n"
 "\n		sbbl	%3, %3"
+"\n"
+		: "=r" (a), "=r" (b), "=r" (r), "=r" (reg)
+		: "0" (a), "1" (b), "2" (r)
+		: "memory"
+	);
+	return reg;
+#elif ALLOW_ASM && defined(__GNUC__) && defined(__x86_64__)
+	/* x86_64 has no alignment restrictions, and is little-endian,
+	 * so 64-bit and 32-bit representations are identical */
+	uint64_t reg;
+	asm volatile (
+"\n		movq	(%0), %3"
+"\n		subq	(%1), %3"
+"\n		movq	%3, (%2)"
+"\n"
+"\n		movq	1*8(%0), %3"
+"\n		sbbq	1*8(%1), %3"
+"\n		movq	%3, 1*8(%2)"
+"\n"
+"\n		movq	2*8(%0), %3"
+"\n		sbbq	2*8(%1), %3"
+"\n		movq	%3, 2*8(%2)"
+"\n"
+"\n		movq	3*8(%0), %3"
+"\n		sbbq	3*8(%1), %3"
+"\n		movq	%3, 3*8(%2)"
+"\n"
+"\n		sbbq	%3, %3"
 "\n"
 		: "=r" (a), "=r" (b), "=r" (r), "=r" (reg)
 		: "0" (a), "1" (b), "2" (r)
@@ -380,6 +436,49 @@ static void sp_256_mul_8(sp_digit* r, const sp_digit* a, const sp_digit* b)
 	}
 	r[15] = accl;
 	memcpy(r, rr, sizeof(rr));
+#elif ALLOW_ASM && defined(__GNUC__) && defined(__x86_64__)
+	/* x86_64 has no alignment restrictions, and is little-endian,
+	 * so 64-bit and 32-bit representations are identical */
+	const uint64_t* aa = (const void*)a;
+	const uint64_t* bb = (const void*)b;
+	uint64_t rr[8];
+	int k;
+	uint64_t accl;
+	uint64_t acch;
+
+	acch = accl = 0;
+	for (k = 0; k < 7; k++) {
+		int i, j;
+		uint64_t acc_hi;
+		i = k - 3;
+		if (i < 0)
+			i = 0;
+		j = k - i;
+		acc_hi = 0;
+		do {
+////////////////////////
+//			uint128_t m = ((uint128_t)a[i]) * b[j];
+//			acc_hi:acch:accl += m;
+			asm volatile (
+			// aa[i] is already loaded in %%rax
+"\n			mulq	%7"
+"\n			addq	%%rax, %0"
+"\n			adcq	%%rdx, %1"
+"\n			adcq	$0, %2"
+			: "=rm" (accl), "=rm" (acch), "=rm" (acc_hi)
+			: "0" (accl), "1" (acch), "2" (acc_hi), "a" (aa[i]), "m" (bb[j])
+			: "cc", "dx"
+			);
+////////////////////////
+		        j--;
+			i++;
+		} while (i != 4 && i <= k);
+		rr[k] = accl;
+		accl = acch;
+		acch = acc_hi;
+	}
+	rr[7] = accl;
+	memcpy(r, rr, sizeof(rr));
 #elif 0
 	//TODO: arm assembly (untested)
 	sp_digit tmp[16];
-- 
cgit v1.2.3-55-g6feb


From 87e3f2e9f8a1c99b223b316fbefb5ae49c2a8fe2 Mon Sep 17 00:00:00 2001
From: Denys Vlasenko <vda.linux@googlemail.com>
Date: Wed, 6 Oct 2021 19:59:39 +0200
Subject: tls: P256: x86-64 optimized sp_256_sub_8_p256_mod

function                                             old     new   delta
sp_256_sub_8_p256_mod                                  -      53     +53
sp_256_mont_reduce_8                                 223     217      -6
sp_256_mont_dbl_8                                     38      32      -6
sp_256_ecc_mulmod_8                                 1535    1529      -6
sp_256_proj_point_dbl_8                              469     454     -15
------------------------------------------------------------------------------
(add/remove: 1/0 grow/shrink: 0/4 up/down: 53/-33)             Total: 20 bytes

Signed-off-by: Denys Vlasenko <vda.linux@googlemail.com>
---
 networking/tls_sp_c32.c | 32 +++++++++++++++++++++++++++++++-
 1 file changed, 31 insertions(+), 1 deletion(-)

diff --git a/networking/tls_sp_c32.c b/networking/tls_sp_c32.c
index 14a7c7066..1391cb405 100644
--- a/networking/tls_sp_c32.c
+++ b/networking/tls_sp_c32.c
@@ -346,8 +346,8 @@ static int sp_256_sub_8(sp_digit* r, const sp_digit* a, const sp_digit* b)
 #endif
 }
 
-#if ALLOW_ASM && defined(__GNUC__) && defined(__i386__)
 /* Sub p256_mod from a into r. (r = a - p256_mod). */
+#if ALLOW_ASM && defined(__GNUC__) && defined(__i386__)
 static void sp_256_sub_8_p256_mod(sp_digit* r, const sp_digit* a)
 {
 	sp_digit reg;
@@ -390,6 +390,36 @@ static void sp_256_sub_8_p256_mod(sp_digit* r, const sp_digit* a)
 		: "memory"
 	);
 }
+#elif ALLOW_ASM && defined(__GNUC__) && defined(__x86_64__)
+static void sp_256_sub_8_p256_mod(sp_digit* r, const sp_digit* a)
+{
+	uint64_t reg;
+	uint64_t ooff;
+//p256_mod[3..0] = ffffffff00000001 0000000000000000 00000000ffffffff ffffffffffffffff
+	asm volatile (
+"\n		movq	(%0), %3"
+"\n		addq	$1, %3"		// adding 1 is the same as subtracting ffffffffffffffff
+"\n		movq	%3, (%1)"	//
+"\n		cmc"			// only carry bit needs inverting
+"\n"
+"\n		movq	1*8(%0), %3"
+"\n		sbbq	%2, %3"		// %2 holds 00000000ffffffff
+"\n		movq	%3, 1*8(%1)"
+"\n"
+"\n		movq	2*8(%0), %3"
+"\n		sbbq	$0, %3"
+"\n		movq	%3, 2*8(%1)"
+"\n"
+"\n		movq	3*8(%0), %3"
+"\n		sbbq	$0, %3"		// adding 00000000ffffffff (in %2)
+"\n		addq	%2, %3"		// is the same as subtracting ffffffff00000001
+"\n		movq	%3, 3*8(%1)"
+"\n"
+		: "=r" (a), "=r" (r), "=r" (ooff), "=r" (reg)
+		: "0" (a), "1" (r), "2" (0x00000000ffffffff)
+		: "memory"
+	);
+}
 #else
 # define sp_256_sub_8_p256_mod(r, a) sp_256_sub_8((r), (a), p256_mod)
 #endif
-- 
cgit v1.2.3-55-g6feb


From 5e9c6170218826dded581b99dfd225b0c76c6c86 Mon Sep 17 00:00:00 2001
From: Denys Vlasenko <vda.linux@googlemail.com>
Date: Wed, 6 Oct 2021 20:14:49 +0200
Subject: tls: P256: sp_256_sub_8_p256_mod always subtracts in-place, use that

i386:

function                                             old     new   delta
sp_256_mont_reduce_8                                 245     243      -2
sp_256_mont_dbl_8                                     26      24      -2
sp_256_ecc_mulmod_8                                 1161    1157      -4
sp_256_proj_point_dbl_8                              359     353      -6
sp_256_sub_8_p256_mod                                 71      32     -39
------------------------------------------------------------------------------
(add/remove: 0/0 grow/shrink: 0/5 up/down: 0/-53)             Total: -53 bytes

non-asm code:

function                                             old     new   delta
sp_256_sub_8_p256_mod                                  -      12     +12
sp_256_mont_reduce_8                                 250     243      -7
sp_256_mont_dbl_8                                     31      24      -7
sp_256_ecc_mulmod_8                                 1171    1157     -14
sp_256_proj_point_dbl_8                              374     353     -21
------------------------------------------------------------------------------
(add/remove: 1/0 grow/shrink: 0/4 up/down: 12/-49)            Total: -37 bytes

Signed-off-by: Denys Vlasenko <vda.linux@googlemail.com>
---
 networking/tls_sp_c32.c | 99 ++++++++++++++++++-------------------------------
 1 file changed, 36 insertions(+), 63 deletions(-)

diff --git a/networking/tls_sp_c32.c b/networking/tls_sp_c32.c
index 1391cb405..b3828d817 100644
--- a/networking/tls_sp_c32.c
+++ b/networking/tls_sp_c32.c
@@ -346,82 +346,55 @@ static int sp_256_sub_8(sp_digit* r, const sp_digit* a, const sp_digit* b)
 #endif
 }
 
-/* Sub p256_mod from a into r. (r = a - p256_mod). */
+/* Sub p256_mod from r. (r = r - p256_mod). */
 #if ALLOW_ASM && defined(__GNUC__) && defined(__i386__)
-static void sp_256_sub_8_p256_mod(sp_digit* r, const sp_digit* a)
+static void sp_256_sub_8_p256_mod(sp_digit* r)
 {
-	sp_digit reg;
 //p256_mod[7..0] = ffffffff 00000001 00000000 00000000 00000000 ffffffff ffffffff ffffffff
 	asm volatile (
-"\n		movl	(%0), %2"
-"\n		subl	$0xffffffff, %2"
-"\n		movl	%2, (%1)"
-"\n"
-"\n		movl	1*4(%0), %2"
-"\n		sbbl	$0xffffffff, %2"
-"\n		movl	%2, 1*4(%1)"
-"\n"
-"\n		movl	2*4(%0), %2"
-"\n		sbbl	$0xffffffff, %2"
-"\n		movl	%2, 2*4(%1)"
-"\n"
-"\n		movl	3*4(%0), %2"
-"\n		sbbl	$0, %2"
-"\n		movl	%2, 3*4(%1)"
-"\n"
-"\n		movl	4*4(%0), %2"
-"\n		sbbl	$0, %2"
-"\n		movl	%2, 4*4(%1)"
+"\n		subl	$0xffffffff, (%0)"
+"\n		sbbl	$0xffffffff, 1*4(%0)"
+"\n		sbbl	$0xffffffff, 2*4(%0)"
+"\n		sbbl	$0, 3*4(%0)"
+"\n		sbbl	$0, 4*4(%0)"
+"\n		sbbl	$0, 5*4(%0)"
+"\n		sbbl	$1, 6*4(%0)"
+"\n		sbbl	$0xffffffff, 7*4(%0)"
 "\n"
-"\n		movl	5*4(%0), %2"
-"\n		sbbl	$0, %2"
-"\n		movl	%2, 5*4(%1)"
-"\n"
-"\n		movl	6*4(%0), %2"
-"\n		sbbl	$1, %2"
-"\n		movl	%2, 6*4(%1)"
-"\n"
-"\n		movl	7*4(%0), %2"
-"\n		sbbl	$0xffffffff, %2"
-"\n		movl	%2, 7*4(%1)"
-"\n"
-		: "=r" (a), "=r" (r), "=r" (reg)
-		: "0" (a), "1" (r)
+		: "=r" (r)
+		: "0" (r)
 		: "memory"
 	);
 }
 #elif ALLOW_ASM && defined(__GNUC__) && defined(__x86_64__)
-static void sp_256_sub_8_p256_mod(sp_digit* r, const sp_digit* a)
+static void sp_256_sub_8_p256_mod(sp_digit* r)
 {
 	uint64_t reg;
 	uint64_t ooff;
 //p256_mod[3..0] = ffffffff00000001 0000000000000000 00000000ffffffff ffffffffffffffff
 	asm volatile (
-"\n		movq	(%0), %3"
-"\n		addq	$1, %3"		// adding 1 is the same as subtracting ffffffffffffffff
-"\n		movq	%3, (%1)"	//
+"\n		addq	$1, (%0)"	// adding 1 is the same as subtracting ffffffffffffffff
 "\n		cmc"			// only carry bit needs inverting
+
+"\n		sbbq	%1, 1*8(%0)"	// %1 holds 00000000ffffffff
+
+"\n		sbbq	$0, 2*8(%0)"
 "\n"
-"\n		movq	1*8(%0), %3"
-"\n		sbbq	%2, %3"		// %2 holds 00000000ffffffff
-"\n		movq	%3, 1*8(%1)"
-"\n"
-"\n		movq	2*8(%0), %3"
-"\n		sbbq	$0, %3"
-"\n		movq	%3, 2*8(%1)"
-"\n"
-"\n		movq	3*8(%0), %3"
-"\n		sbbq	$0, %3"		// adding 00000000ffffffff (in %2)
-"\n		addq	%2, %3"		// is the same as subtracting ffffffff00000001
-"\n		movq	%3, 3*8(%1)"
+"\n		movq	3*8(%0), %2"
+"\n		sbbq	$0, %2"		// adding 00000000ffffffff (in %1)
+"\n		addq	%1, %2"		// is the same as subtracting ffffffff00000001
+"\n		movq	%2, 3*8(%0)"
 "\n"
-		: "=r" (a), "=r" (r), "=r" (ooff), "=r" (reg)
-		: "0" (a), "1" (r), "2" (0x00000000ffffffff)
+		: "=r" (r), "=r" (ooff), "=r" (reg)
+		: "0" (r), "1" (0x00000000ffffffff)
 		: "memory"
 	);
 }
 #else
-# define sp_256_sub_8_p256_mod(r, a) sp_256_sub_8((r), (a), p256_mod)
+static void sp_256_sub_8_p256_mod(sp_digit* r)
+{
+	sp_256_sub_8(r, r, p256_mod);
+}
 #endif
 
 /* Multiply a and b into r. (r = a * b) */
@@ -609,7 +582,7 @@ static void sp_256_mont_add_8(sp_digit* r, const sp_digit* a, const sp_digit* b
 	int carry = sp_256_add_8(r, a, b);
 	sp_256_norm_8(r);
 	if (carry) {
-		sp_256_sub_8_p256_mod(r, r /*, m*/);
+		sp_256_sub_8_p256_mod(r);
 		sp_256_norm_8(r);
 	}
 }
@@ -637,7 +610,7 @@ static void sp_256_mont_dbl_8(sp_digit* r, const sp_digit* a /*, const sp_digit*
 	int carry = sp_256_add_8(r, a, a);
 	sp_256_norm_8(r);
 	if (carry)
-		sp_256_sub_8_p256_mod(r, r /*, m*/);
+		sp_256_sub_8_p256_mod(r);
 	sp_256_norm_8(r);
 }
 
@@ -649,13 +622,13 @@ static void sp_256_mont_tpl_8(sp_digit* r, const sp_digit* a /*, const sp_digit*
 	int carry = sp_256_add_8(r, a, a);
 	sp_256_norm_8(r);
 	if (carry) {
-		sp_256_sub_8_p256_mod(r, r /*, m*/);
+		sp_256_sub_8_p256_mod(r);
 		sp_256_norm_8(r);
 	}
 	carry = sp_256_add_8(r, r, a);
 	sp_256_norm_8(r);
 	if (carry) {
-		sp_256_sub_8_p256_mod(r, r /*, m*/);
+		sp_256_sub_8_p256_mod(r);
 		sp_256_norm_8(r);
 	}
 }
@@ -829,7 +802,7 @@ static void sp_256_mont_reduce_8(sp_digit* a/*, const sp_digit* m, sp_digit mp*/
 		}
 		sp_256_mont_shift_8(a, a);
 		if (word16th != 0)
-			sp_256_sub_8_p256_mod(a, a /*, m*/);
+			sp_256_sub_8_p256_mod(a);
 		sp_256_norm_8(a);
 	}
 	else { /* Same code for explicit mp == 1 (which is always the case for P256) */
@@ -849,7 +822,7 @@ static void sp_256_mont_reduce_8(sp_digit* a/*, const sp_digit* m, sp_digit mp*/
 		}
 		sp_256_mont_shift_8(a, a);
 		if (word16th != 0)
-			sp_256_sub_8_p256_mod(a, a /*, m*/);
+			sp_256_sub_8_p256_mod(a);
 		sp_256_norm_8(a);
 	}
 }
@@ -1104,7 +1077,7 @@ static void sp_256_map_8(sp_point* r, sp_point* p)
 	sp_256_mont_reduce_8(r->x /*, p256_mod, p256_mp_mod*/);
 	/* Reduce x to less than modulus */
 	if (sp_256_cmp_8(r->x, p256_mod) >= 0)
-		sp_256_sub_8_p256_mod(r->x, r->x /*, p256_mod*/);
+		sp_256_sub_8_p256_mod(r->x);
 	sp_256_norm_8(r->x);
 
 	/* y /= z^3 */
@@ -1113,7 +1086,7 @@ static void sp_256_map_8(sp_point* r, sp_point* p)
 	sp_256_mont_reduce_8(r->y /*, p256_mod, p256_mp_mod*/);
 	/* Reduce y to less than modulus */
 	if (sp_256_cmp_8(r->y, p256_mod) >= 0)
-		sp_256_sub_8_p256_mod(r->y, r->y /*, p256_mod*/);
+		sp_256_sub_8_p256_mod(r->y);
 	sp_256_norm_8(r->y);
 
 	memset(r->z, 0, sizeof(r->z));
-- 
cgit v1.2.3-55-g6feb


From 17e6fb06b3d36eae11575b226858e8474e2b46d3 Mon Sep 17 00:00:00 2001
From: Denys Vlasenko <vda.linux@googlemail.com>
Date: Wed, 6 Oct 2021 21:22:36 +0200
Subject: tls: whitespace fix

Signed-off-by: Denys Vlasenko <vda.linux@googlemail.com>
---
 networking/tls_sp_c32.c | 6 +++---
 1 file changed, 3 insertions(+), 3 deletions(-)

diff --git a/networking/tls_sp_c32.c b/networking/tls_sp_c32.c
index b3828d817..4c0cd320b 100644
--- a/networking/tls_sp_c32.c
+++ b/networking/tls_sp_c32.c
@@ -375,9 +375,9 @@ static void sp_256_sub_8_p256_mod(sp_digit* r)
 	asm volatile (
 "\n		addq	$1, (%0)"	// adding 1 is the same as subtracting ffffffffffffffff
 "\n		cmc"			// only carry bit needs inverting
-
+"\n"
 "\n		sbbq	%1, 1*8(%0)"	// %1 holds 00000000ffffffff
-
+"\n"
 "\n		sbbq	$0, 2*8(%0)"
 "\n"
 "\n		movq	3*8(%0), %2"
@@ -473,7 +473,7 @@ static void sp_256_mul_8(sp_digit* r, const sp_digit* a, const sp_digit* b)
 			: "cc", "dx"
 			);
 ////////////////////////
-		        j--;
+			j--;
 			i++;
 		} while (i != 4 && i <= k);
 		rr[k] = accl;
-- 
cgit v1.2.3-55-g6feb


From 1f925038ab9c6bd8f6b3cd40ed7aab0ef10d898e Mon Sep 17 00:00:00 2001
From: YU Jincheng <shana@zju.edu.cn>
Date: Wed, 29 Sep 2021 17:37:26 +0800
Subject: *: generalize "const trick"

While at it, change all "__asm__" to "asm"

Co-authored-by: canyie <31466456+canyie@users.noreply.github.com>
Signed-off-by: YU Jincheng <shana@zju.edu.cn>
Signed-off-by: Denys Vlasenko <vda.linux@googlemail.com>
---
 coreutils/test.c  |  5 ++---
 include/libbb.h   | 34 +++++++++++++++++++++++++---------
 libbb/appletlib.c |  3 +--
 libbb/lineedit.c  |  5 ++---
 procps/powertop.c |  2 +-
 shell/ash.c       | 23 +++--------------------
 6 files changed, 34 insertions(+), 38 deletions(-)

diff --git a/coreutils/test.c b/coreutils/test.c
index 7c6574334..fc956724b 100644
--- a/coreutils/test.c
+++ b/coreutils/test.c
@@ -435,7 +435,7 @@ struct test_statics {
 };
 
 /* See test_ptr_hack.c */
-extern struct test_statics *const test_ptr_to_statics;
+extern struct test_statics *BB_GLOBAL_CONST test_ptr_to_statics;
 
 #define S (*test_ptr_to_statics)
 #define args            (S.args         )
@@ -446,8 +446,7 @@ extern struct test_statics *const test_ptr_to_statics;
 #define leaving         (S.leaving      )
 
 #define INIT_S() do { \
-	(*(struct test_statics**)not_const_pp(&test_ptr_to_statics)) = xzalloc(sizeof(S)); \
-	barrier(); \
+	ASSIGN_CONST_PTR(test_ptr_to_statics, xzalloc(sizeof(S))); \
 } while (0)
 #define DEINIT_S() do { \
 	free(group_array); \
diff --git a/include/libbb.h b/include/libbb.h
index dfcaa05ec..02cc008f0 100644
--- a/include/libbb.h
+++ b/include/libbb.h
@@ -365,13 +365,27 @@ struct BUG_off_t_size_is_misdetected {
 #endif
 #endif
 
+/* We use a trick to have more optimized code (fewer pointer reloads
+ * and reduced binary size by a few kilobytes) like:
+ *  ash.c:   extern struct globals *const ash_ptr_to_globals;
+ *  ash_ptr_hack.c: struct globals *ash_ptr_to_globals;
+ * This way, compiler in ash.c knows the pointer can not change.
+ *
+ * However, this may break on weird arches or toolchains. In this case,
+ * set "-DBB_GLOBAL_CONST=''" in CONFIG_EXTRA_CFLAGS to disable
+ * this optimization.
+ */
+#ifndef BB_GLOBAL_CONST
+# define BB_GLOBAL_CONST const
+#endif
+
 #if defined(errno)
 /* If errno is a define, assume it's "define errno (*__errno_location())"
  * and we will cache it's result in this variable */
-extern int *const bb_errno;
-#undef errno
-#define errno (*bb_errno)
-#define bb_cached_errno_ptr 1
+extern int *BB_GLOBAL_CONST bb_errno;
+# undef errno
+# define errno (*bb_errno)
+# define bb_cached_errno_ptr 1
 #endif
 
 #if !(ULONG_MAX > 0xffffffff)
@@ -2270,6 +2284,8 @@ struct globals;
  * If you want to assign a value, use SET_PTR_TO_GLOBALS(x) */
 extern struct globals *const ptr_to_globals;
 
+#define barrier() asm volatile ("":::"memory")
+
 #if defined(__clang_major__) && __clang_major__ >= 9
 /* Clang/llvm drops assignment to "constant" storage. Silently.
  * Needs serious convincing to not eliminate the store.
@@ -2277,7 +2293,7 @@ extern struct globals *const ptr_to_globals;
 static ALWAYS_INLINE void* not_const_pp(const void *p)
 {
 	void *pp;
-	__asm__ __volatile__(
+	asm volatile (
 		"# forget that p points to const"
 		: /*outputs*/ "=r" (pp)
 		: /*inputs*/ "0" (p)
@@ -2288,13 +2304,13 @@ static ALWAYS_INLINE void* not_const_pp(const void *p)
 static ALWAYS_INLINE void* not_const_pp(const void *p) { return (void*)p; }
 #endif
 
-/* At least gcc 3.4.6 on mipsel system needs optimization barrier */
-#define barrier() __asm__ __volatile__("":::"memory")
-#define SET_PTR_TO_GLOBALS(x) do { \
-	(*(struct globals**)not_const_pp(&ptr_to_globals)) = (void*)(x); \
+#define ASSIGN_CONST_PTR(p, v) do { \
+	*(void**)not_const_pp(&p) = (void*)(v); \
+	/* At least gcc 3.4.6 on mipsel needs optimization barrier */ \
 	barrier(); \
 } while (0)
 
+#define SET_PTR_TO_GLOBALS(x) ASSIGN_CONST_PTR(ptr_to_globals, x)
 #define FREE_PTR_TO_GLOBALS() do { \
 	if (ENABLE_FEATURE_CLEAN_UP) { \
 		free(ptr_to_globals); \
diff --git a/libbb/appletlib.c b/libbb/appletlib.c
index 5c5d7eb95..bf26c99e9 100644
--- a/libbb/appletlib.c
+++ b/libbb/appletlib.c
@@ -247,8 +247,7 @@ void lbb_prepare(const char *applet
 		IF_FEATURE_INDIVIDUAL(, char **argv))
 {
 #ifdef bb_cached_errno_ptr
-	(*(int **)not_const_pp(&bb_errno)) = get_perrno();
-	barrier();
+	ASSIGN_CONST_PTR(bb_errno, get_perrno());
 #endif
 	applet_name = applet;
 
diff --git a/libbb/lineedit.c b/libbb/lineedit.c
index a7a3ee103..3c87abcf9 100644
--- a/libbb/lineedit.c
+++ b/libbb/lineedit.c
@@ -192,7 +192,7 @@ struct lineedit_statics {
 };
 
 /* See lineedit_ptr_hack.c */
-extern struct lineedit_statics *const lineedit_ptr_to_statics;
+extern struct lineedit_statics *BB_GLOBAL_CONST lineedit_ptr_to_statics;
 
 #define S (*lineedit_ptr_to_statics)
 #define state            (S.state           )
@@ -214,8 +214,7 @@ extern struct lineedit_statics *const lineedit_ptr_to_statics;
 #define delbuf           (S.delbuf          )
 
 #define INIT_S() do { \
-	(*(struct lineedit_statics**)not_const_pp(&lineedit_ptr_to_statics)) = xzalloc(sizeof(S)); \
-	barrier(); \
+	ASSIGN_CONST_PTR(lineedit_ptr_to_statics, xzalloc(sizeof(S))); \
 } while (0)
 
 static void deinit_S(void)
diff --git a/procps/powertop.c b/procps/powertop.c
index fc6018b7a..24c2b320f 100644
--- a/procps/powertop.c
+++ b/procps/powertop.c
@@ -505,7 +505,7 @@ static void cpuid(unsigned int *eax, unsigned int *ebx, unsigned int *ecx,
 				unsigned int *edx)
 {
 	/* EAX value specifies what information to return */
-	__asm__(
+	asm (
 		"	pushl %%ebx\n"     /* Save EBX */
 		"	cpuid\n"
 		"	movl %%ebx, %1\n"  /* Save content of EBX */
diff --git a/shell/ash.c b/shell/ash.c
index 4bf0615ea..7b85981ec 100644
--- a/shell/ash.c
+++ b/shell/ash.c
@@ -303,20 +303,6 @@ typedef long arith_t;
 # error "Do not even bother, ash will not run on NOMMU machine"
 #endif
 
-/* We use a trick to have more optimized code (fewer pointer reloads):
- *  ash.c:   extern struct globals *const ash_ptr_to_globals;
- *  ash_ptr_hack.c: struct globals *ash_ptr_to_globals;
- * This way, compiler in ash.c knows the pointer can not change.
- *
- * However, this may break on weird arches or toolchains. In this case,
- * set "-DBB_GLOBAL_CONST=''" in CONFIG_EXTRA_CFLAGS to disable
- * this optimization.
- */
-#ifndef BB_GLOBAL_CONST
-# define BB_GLOBAL_CONST const
-#endif
-
-
 /* ============ Hash table sizes. Configurable. */
 
 #define VTABSIZE 39
@@ -518,8 +504,7 @@ extern struct globals_misc *BB_GLOBAL_CONST ash_ptr_to_globals_misc;
 #define random_gen  (G_misc.random_gen )
 #define backgndpid  (G_misc.backgndpid )
 #define INIT_G_misc() do { \
-	(*(struct globals_misc**)not_const_pp(&ash_ptr_to_globals_misc)) = xzalloc(sizeof(G_misc)); \
-	barrier(); \
+	ASSIGN_CONST_PTR(ash_ptr_to_globals_misc, xzalloc(sizeof(G_misc))); \
 	savestatus = -1; \
 	curdir = nullstr; \
 	physdir = nullstr; \
@@ -1597,8 +1582,7 @@ extern struct globals_memstack *BB_GLOBAL_CONST ash_ptr_to_globals_memstack;
 #define g_stacknleft (G_memstack.g_stacknleft)
 #define stackbase    (G_memstack.stackbase   )
 #define INIT_G_memstack() do { \
-	(*(struct globals_memstack**)not_const_pp(&ash_ptr_to_globals_memstack)) = xzalloc(sizeof(G_memstack)); \
-	barrier(); \
+	ASSIGN_CONST_PTR(ash_ptr_to_globals_memstack, xzalloc(sizeof(G_memstack))); \
 	g_stackp = &stackbase; \
 	g_stacknxt = stackbase.space; \
 	g_stacknleft = MINSIZE; \
@@ -2229,8 +2213,7 @@ extern struct globals_var *BB_GLOBAL_CONST ash_ptr_to_globals_var;
 #endif
 #define INIT_G_var() do { \
 	unsigned i; \
-	(*(struct globals_var**)not_const_pp(&ash_ptr_to_globals_var)) = xzalloc(sizeof(G_var)); \
-	barrier(); \
+	ASSIGN_CONST_PTR(ash_ptr_to_globals_var, xzalloc(sizeof(G_var))); \
 	for (i = 0; i < ARRAY_SIZE(varinit_data); i++) { \
 		varinit[i].flags    = varinit_data[i].flags; \
 		varinit[i].var_text = varinit_data[i].var_text; \
-- 
cgit v1.2.3-55-g6feb


From 0084c44799f4735e2e8a0d92381edc77a4c9d77f Mon Sep 17 00:00:00 2001
From: Ismael Luceno <ismael@iodev.co.uk>
Date: Wed, 29 Sep 2021 13:41:32 +0200
Subject: config: find: Fix mtime/mmin description

s/modified time/modification time/

Signed-off-by: Ismael Luceno <ismael@iodev.co.uk>
Signed-off-by: Denys Vlasenko <vda.linux@googlemail.com>
---
 findutils/find.c | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/findutils/find.c b/findutils/find.c
index 34e11ae26..6d55db4e9 100644
--- a/findutils/find.c
+++ b/findutils/find.c
@@ -67,7 +67,7 @@
 //config:	interpreted by other programs.
 //config:
 //config:config FEATURE_FIND_MTIME
-//config:	bool "Enable -mtime: modified time matching"
+//config:	bool "Enable -mtime: modification time matching"
 //config:	default y
 //config:	depends on FIND
 //config:	help
@@ -75,7 +75,7 @@
 //config:	files, in days.
 //config:
 //config:config FEATURE_FIND_MMIN
-//config:	bool "Enable -mmin: modified time matching by minutes"
+//config:	bool "Enable -mmin: modification time matching by minutes"
 //config:	default y
 //config:	depends on FIND
 //config:	help
-- 
cgit v1.2.3-55-g6feb


From 421c8767ba4ebf02fadc056026033e8feaf1a470 Mon Sep 17 00:00:00 2001
From: Ismael Luceno <ismael@iodev.co.uk>
Date: Wed, 29 Sep 2021 13:41:33 +0200
Subject: find: Unify time comparisons

Split the common part into a function, to be reused.

The tail call is optimized, meaning now mmin/mtime just prepare arguments
and jump into the common code, thus near zero overhead.

This reduces code size slightly, e.g. on x86_64:
   text    data     bss     dec     hex filename
   4806       0       0    4806    12c6 findutils/find.o.orig
   4782       0       0    4782    12ae findutils/find.o

Of course, the savings are even greater when implementing atime/ctime
variants.

Signed-off-by: Ismael Luceno <ismael@iodev.co.uk>
Signed-off-by: Denys Vlasenko <vda.linux@googlemail.com>
---
 findutils/find.c | 36 ++++++++++++++++++++----------------
 1 file changed, 20 insertions(+), 16 deletions(-)

diff --git a/findutils/find.c b/findutils/find.c
index 6d55db4e9..f557bb762 100644
--- a/findutils/find.c
+++ b/findutils/find.c
@@ -618,30 +618,34 @@ ACTF(perm)
 	return (statbuf->st_mode & 07777) == ap->perm_mask;
 }
 #endif
+
+#if						\
+	ENABLE_FEATURE_FIND_MMIN  ||		\
+	ENABLE_FEATURE_FIND_MTIME
+static int time_cmp(time_t ftime, char time_char, time_t secs, time_t delta)
+{
+	time_t file_age = time(NULL) - ftime;
+	switch (time_char) {
+	case '+': return file_age >= secs + delta;
+	case '-': return file_age < secs;
+	/* just numeric time */
+	default:  return file_age >= secs && file_age < secs + delta;
+	}
+}
+#endif
+
 #if ENABLE_FEATURE_FIND_MTIME
 ACTF(mtime)
 {
-	time_t file_age = time(NULL) - statbuf->st_mtime;
-	time_t mtime_secs = ap->mtime_days * 24*60*60;
-	if (ap->mtime_char == '+')
-		return file_age >= mtime_secs + 24*60*60;
-	if (ap->mtime_char == '-')
-		return file_age < mtime_secs;
-	/* just numeric mtime */
-	return file_age >= mtime_secs && file_age < (mtime_secs + 24*60*60);
+	return time_cmp(statbuf->st_mtime, ap->mtime_char,
+			ap->mtime_days * 24*60*60, 24*60*60);
 }
 #endif
 #if ENABLE_FEATURE_FIND_MMIN
 ACTF(mmin)
 {
-	time_t file_age = time(NULL) - statbuf->st_mtime;
-	time_t mmin_secs = ap->mmin_mins * 60;
-	if (ap->mmin_char == '+')
-		return file_age >= mmin_secs + 60;
-	if (ap->mmin_char == '-')
-		return file_age < mmin_secs;
-	/* just numeric mmin */
-	return file_age >= mmin_secs && file_age < (mmin_secs + 60);
+	return time_cmp(statbuf->st_mtime, ap->mmin_char,
+			ap->mmin_mins * 60, 60);
 }
 #endif
 #if ENABLE_FEATURE_FIND_NEWER
-- 
cgit v1.2.3-55-g6feb


From eb048a450cc7a0d92ac435a59d56f378b9f82667 Mon Sep 17 00:00:00 2001
From: Denys Vlasenko <vda.linux@googlemail.com>
Date: Thu, 7 Oct 2021 21:55:16 +0200
Subject: ps: fix -o pid=PID,args interpreting entire "PID,args" as header

procps-ng 3.3.15 does not do this.
(It could, allowing commas in headers and requiring
"ps -opid=PID -oargs" form for this case, but it does not).

function                                             old     new   delta
parse_o                                              167     190     +23

Signed-off-by: Denys Vlasenko <vda.linux@googlemail.com>
---
 procps/ps.c | 32 ++++++++++++++++++--------------
 1 file changed, 18 insertions(+), 14 deletions(-)

diff --git a/procps/ps.c b/procps/ps.c
index 711b180a0..03b9c418c 100644
--- a/procps/ps.c
+++ b/procps/ps.c
@@ -443,17 +443,19 @@ static void parse_o(char* opt)
 			opt = comma + 1;
 			continue;
 		}
-		break;
-	}
-	// opt points to last spec in comma separated list.
-	// This one can have =HEADER part.
-	new = new_out_t();
-	if (equal)
-		*equal = '\0';
-	*new = *find_out_spec(opt);
-	if (equal) {
-		*equal = '=';
-		new->header = equal + 1;
+		// opt points to last spec in comma separated list.
+		// This one can have =HEADER part.
+		new = new_out_t();
+		if (equal)
+			*equal = '\0';
+		*new = *find_out_spec(opt);
+		if (!equal)
+			break;
+		*equal++ = '=';
+		new->header = equal;
+		comma = strchr(equal, ',');
+		if (comma)
+			*comma = '\0';
 		// POSIX: the field widths shall be ... at least as wide as
 		// the header text (default or overridden value).
 		// If the header text is null, such as -o user=,
@@ -461,10 +463,12 @@ static void parse_o(char* opt)
 		// default header text
 		if (new->header[0]) {
 			new->width = strlen(new->header);
-			print_header = 1;
 		}
-	} else
-		print_header = 1;
+		if (!comma)
+			break;
+		//*comma = ','; /* no, new->header should stay NUL-terminated */
+		opt = comma + 1;
+	}
 }
 
 static void alloc_line_buffer(void)
-- 
cgit v1.2.3-55-g6feb


From c28313bb176639e1e4b11a63f6452baebf69f28e Mon Sep 17 00:00:00 2001
From: Nicholas Niro <blowfist@xroutine.net>
Date: Wed, 22 Apr 2020 23:36:11 -0400
Subject: ip: added support for setting netns on devices

function                                             old     new   delta
set_netns                                              -     130    +130
do_iplink                                           1252    1315     +63
.rodata                                           104173  104179      +6
packed_usage                                       34020   33993     -27
------------------------------------------------------------------------------
(add/remove: 1/0 grow/shrink: 2/1 up/down: 199/-27)           Total: 172 bytes

Signed-off-by: Nicholas Niro <blowfist@xroutine.net>
Signed-off-by: Denys Vlasenko <vda.linux@googlemail.com>
---
 networking/ip.c                |  2 +-
 networking/libiproute/iplink.c | 35 +++++++++++++++++++++++++++++++++--
 2 files changed, 34 insertions(+), 3 deletions(-)

diff --git a/networking/ip.c b/networking/ip.c
index 85b1ba080..7c3208699 100644
--- a/networking/ip.c
+++ b/networking/ip.c
@@ -152,7 +152,7 @@
 //usage:#define iplink_trivial_usage
 //usage:       /*Usage:iplink*/"set IFACE [up|down] [arp on|off] [multicast on|off]\n"
 //usage:       "	[promisc on|off] [mtu NUM] [name NAME] [qlen NUM] [address MAC]\n"
-//usage:       "	[master IFACE | nomaster]"
+//usage:       "	[master IFACE | nomaster] [netns PID]"
 // * short help shows only "set" command, long help continues (with just one "\n")
 // * and shows all other commands:
 //usage:#define iplink_full_usage "\n"
diff --git a/networking/libiproute/iplink.c b/networking/libiproute/iplink.c
index 1a1064bdc..68d199044 100644
--- a/networking/libiproute/iplink.c
+++ b/networking/libiproute/iplink.c
@@ -152,6 +152,30 @@ static void set_master(char *dev, int master)
 		xfunc_die();
 }
 
+/* Exits on error */
+static void set_netns(char *dev, int netns)
+{
+	struct rtnl_handle rth;
+	struct {
+		struct nlmsghdr  n;
+		struct ifinfomsg i;
+		char             buf[1024];
+	} req;
+
+	memset(&req, 0, sizeof(req));
+	req.n.nlmsg_len = NLMSG_LENGTH(sizeof(struct ifinfomsg));
+	req.n.nlmsg_flags = NLM_F_REQUEST;
+	req.n.nlmsg_type = RTM_NEWLINK;
+	req.i.ifi_family = preferred_family;
+
+	xrtnl_open(&rth);
+	req.i.ifi_index = xll_name_to_index(dev);
+	//printf("netns %i for %i\n", netns, req.i.ifi_index);
+	addattr_l(&req.n, sizeof(req), IFLA_NET_NS_PID, &netns, 4);
+	if (rtnl_talk(&rth, &req.n, 0, 0, NULL, NULL, NULL) < 0)
+		xfunc_die();
+}
+
 /* Exits on error */
 static int get_address(char *dev, int *htype)
 {
@@ -226,6 +250,7 @@ static int do_set(char **argv)
 	int qlen = -1;
 	int mtu = -1;
 	int master = -1;
+	int netns = -1;
 	char *newaddr = NULL;
 	char *newbrd = NULL;
 	struct ifreq ifr0, ifr1;
@@ -234,11 +259,11 @@ static int do_set(char **argv)
 	/* If you add stuff here, update iplink_full_usage */
 	static const char keywords[] ALIGN1 =
 		"up\0""down\0""name\0""mtu\0""qlen\0""multicast\0"
-		"arp\0""promisc\0""address\0"
+		"arp\0""promisc\0""address\0""netns\0"
 		"master\0""nomaster\0"
 		"dev\0" /* must be last */;
 	enum { ARG_up = 0, ARG_down, ARG_name, ARG_mtu, ARG_qlen, ARG_multicast,
-		ARG_arp, ARG_promisc, ARG_addr,
+		ARG_arp, ARG_promisc, ARG_addr, ARG_netns,
 		ARG_master, ARG_nomaster,
 		ARG_dev };
 	enum { PARM_on = 0, PARM_off };
@@ -276,6 +301,9 @@ static int do_set(char **argv)
 			master = xll_name_to_index(*argv);
 		} else if (key == ARG_nomaster) {
 			master = 0;
+		} else if (key == ARG_netns) {
+			NEXT_ARG();
+			netns = get_unsigned(*argv, "netns");
 		} else if (key >= ARG_dev) {
 			/* ^^^^^^ ">=" here results in "dev IFACE" treated as default */
 			if (key == ARG_dev) {
@@ -463,6 +491,9 @@ static int do_set(char **argv)
 	if (master != -1) {
 		set_master(dev, master);
 	}
+	if (netns != -1) {
+		set_netns(dev, netns);
+	}
 	if (mask)
 		do_chflags(dev, flags, mask);
 	return 0;
-- 
cgit v1.2.3-55-g6feb


From cf4dc4a09d2a7516e00c276396da85e1088a0ddf Mon Sep 17 00:00:00 2001
From: Denys Vlasenko <vda.linux@googlemail.com>
Date: Fri, 8 Oct 2021 02:16:41 +0200
Subject: mount: "mount -o rw ...." should not fall back to RO mount

The reported case was an attempt to remount,rw a CD-ROM:

  mount -o remount,rw /mnt/sr0

which "succeeded" by falling back to RO:

  mount("/dev/sr0", "/mnt/sr0", 0x412862, MS_REMOUNT|MS_SILENT|MS_RELATIME, "nojoliet,check=s,map=n,blocksize"...) = -1 EROFS (Read-only file system)
  ...
  mount("/dev/sr0", "/mnt/sr0", 0x412862, MS_RDONLY|MS_REMOUNT|MS_SILENT|MS_RELATIME, "nojoliet,check=s,map=n,blocksize"...) = 0

Clearly, not what was intended!

function                                             old     new   delta
parse_mount_options                                  241     267     +26
mount_main                                          1198    1211     +13
singlemount                                         1301    1313     +12
inetd_main                                          1919    1911      -8
------------------------------------------------------------------------------
(add/remove: 0/0 grow/shrink: 3/1 up/down: 51/-8)              Total: 43 bytes

Signed-off-by: Denys Vlasenko <vda.linux@googlemail.com>
---
 util-linux/mount.c | 21 +++++++++++++--------
 1 file changed, 13 insertions(+), 8 deletions(-)

diff --git a/util-linux/mount.c b/util-linux/mount.c
index 44afdbcff..4e65b6b46 100644
--- a/util-linux/mount.c
+++ b/util-linux/mount.c
@@ -589,7 +589,7 @@ static void append_mount_options(char **oldopts, const char *newopts)
 
 // Use the mount_options list to parse options into flags.
 // Also update list of unrecognized options if unrecognized != NULL
-static unsigned long parse_mount_options(char *options, char **unrecognized)
+static unsigned long parse_mount_options(char *options, char **unrecognized, uint32_t *opt)
 {
 	unsigned long flags = MS_SILENT;
 
@@ -617,6 +617,11 @@ static unsigned long parse_mount_options(char *options, char **unrecognized)
 					flags &= fl;
 				else
 					flags |= fl;
+				/* If we see "-o rw" on command line, it's the same as -w:
+				 * "do not try to fall back to RO mounts"
+				 */
+				if (fl == ~MS_RDONLY && opt)
+					(*opt) |= OPT_w;
 				goto found;
 			}
 			option_str += opt_len + 1;
@@ -1973,7 +1978,7 @@ static int singlemount(struct mntent *mp, int ignore_busy)
 
 	errno = 0;
 
-	vfsflags = parse_mount_options(mp->mnt_opts, &filteropts);
+	vfsflags = parse_mount_options(mp->mnt_opts, &filteropts, NULL);
 
 	// Treat fstype "auto" as unspecified
 	if (mp->mnt_type && strcmp(mp->mnt_type, "auto") == 0)
@@ -2047,7 +2052,7 @@ static int singlemount(struct mntent *mp, int ignore_busy)
 				len, share,
 				share + len + 1  /* "dir1/dir2" */
 			);
-			parse_mount_options(unc, &filteropts);
+			parse_mount_options(unc, &filteropts, NULL);
 			if (ENABLE_FEATURE_CLEAN_UP) free(unc);
 		}
 
@@ -2073,7 +2078,7 @@ static int singlemount(struct mntent *mp, int ignore_busy)
 // (instead of _numeric_ iface_id) with glibc.
 // This probably should be fixed in glibc, not here.
 // The workaround is to manually specify correct "ip=ADDR%n" option.
-			parse_mount_options(ip, &filteropts);
+			parse_mount_options(ip, &filteropts, NULL);
 			if (ENABLE_FEATURE_CLEAN_UP) free(ip);
 		}
 
@@ -2355,7 +2360,7 @@ int mount_main(int argc UNUSED_PARAM, char **argv)
 	// Past this point, we are handling either "mount -a [opts]"
 	// or "mount [opts] single_param"
 
-	cmdopt_flags = parse_mount_options(cmdopts, NULL);
+	cmdopt_flags = parse_mount_options(cmdopts, NULL, &option_mask32);
 	if (nonroot && (cmdopt_flags & ~MS_SILENT)) // Non-root users cannot specify flags
 		bb_simple_error_msg_and_die(bb_msg_you_must_be_root);
 
@@ -2429,7 +2434,7 @@ int mount_main(int argc UNUSED_PARAM, char **argv)
 				continue;
 
 			// Skip noauto and swap anyway
-			if ((parse_mount_options(mtcur->mnt_opts, NULL) & (MOUNT_NOAUTO | MOUNT_SWAP))
+			if ((parse_mount_options(mtcur->mnt_opts, NULL, NULL) & (MOUNT_NOAUTO | MOUNT_SWAP))
 			// swap is bogus "fstype", parse_mount_options can't check fstypes
 			 || strcasecmp(mtcur->mnt_type, "swap") == 0
 			) {
@@ -2490,7 +2495,7 @@ int mount_main(int argc UNUSED_PARAM, char **argv)
 		// exit_group(32)                          = ?
 #if 0
 		// In case we want to simply skip swap partitions:
-		l = parse_mount_options(mtcur->mnt_opts, NULL);
+		l = parse_mount_options(mtcur->mnt_opts, NULL, NULL);
 		if ((l & MOUNT_SWAP)
 		// swap is bogus "fstype", parse_mount_options can't check fstypes
 		 || strcasecmp(mtcur->mnt_type, "swap") == 0
@@ -2500,7 +2505,7 @@ int mount_main(int argc UNUSED_PARAM, char **argv)
 #endif
 		if (nonroot) {
 			// fstab must have "users" or "user"
-			l = parse_mount_options(mtcur->mnt_opts, NULL);
+			l = parse_mount_options(mtcur->mnt_opts, NULL, NULL);
 			if (!(l & MOUNT_USERS))
 				bb_simple_error_msg_and_die(bb_msg_you_must_be_root);
 		}
-- 
cgit v1.2.3-55-g6feb


From d2e0d3f514c13265670080f149b97bbe83bcc783 Mon Sep 17 00:00:00 2001
From: Denys Vlasenko <vda.linux@googlemail.com>
Date: Fri, 8 Oct 2021 14:30:50 +0200
Subject: mdev: tweak --help

function                                             old     new   delta
packed_usage                                       33993   34008     +15

Signed-off-by: Denys Vlasenko <vda.linux@googlemail.com>
---
 util-linux/mdev.c | 10 +++++-----
 1 file changed, 5 insertions(+), 5 deletions(-)

diff --git a/util-linux/mdev.c b/util-linux/mdev.c
index dbbcbc655..ebdc0c254 100644
--- a/util-linux/mdev.c
+++ b/util-linux/mdev.c
@@ -82,12 +82,12 @@
 //usage:#define mdev_trivial_usage
 //usage:       "[-vS] " IF_FEATURE_MDEV_DAEMON("{ ") "[-s]" IF_FEATURE_MDEV_DAEMON(" | [-df] }")
 //usage:#define mdev_full_usage "\n\n"
-//usage:       "	-v	verbose\n"
-//usage:       "	-S	log to syslog too\n"
-//usage:       "	-s	scan /sys and populate /dev\n"
+//usage:       "	-v	Verbose\n"
+//usage:       "	-S	Log to syslog too\n"
+//usage:       "	-s	Scan /sys and populate /dev\n"
 //usage:	IF_FEATURE_MDEV_DAEMON(
-//usage:       "	-d	daemon, listen on netlink\n"
-//usage:       "	-f	stay in foreground\n"
+//usage:       "	-d	Daemon, listen on netlink\n"
+//usage:       "	-f	Run in foreground\n"
 //usage:	)
 //usage:       "\n"
 //usage:       "Bare mdev is a kernel hotplug helper. To activate it:\n"
-- 
cgit v1.2.3-55-g6feb


From 50c5b36dd7a7c13fabb4afa428c1556d25401324 Mon Sep 17 00:00:00 2001
From: Denys Vlasenko <vda.linux@googlemail.com>
Date: Fri, 8 Oct 2021 15:02:53 +0200
Subject: help: s/Don't daemonize/Run in foreground/g

Signed-off-by: Denys Vlasenko <vda.linux@googlemail.com>
---
 networking/httpd.c   | 2 +-
 networking/ifplugd.c | 2 +-
 networking/ntpd.c    | 2 +-
 3 files changed, 3 insertions(+), 3 deletions(-)

diff --git a/networking/httpd.c b/networking/httpd.c
index 55ca2ae8b..c038293e3 100644
--- a/networking/httpd.c
+++ b/networking/httpd.c
@@ -274,7 +274,7 @@
 //usage:#define httpd_full_usage "\n\n"
 //usage:       "Listen for incoming HTTP requests\n"
 //usage:     "\n	-i		Inetd mode"
-//usage:     "\n	-f		Don't daemonize"
+//usage:     "\n	-f		Run in foreground"
 //usage:     "\n	-v[v]		Verbose"
 //usage:     "\n	-p [IP:]PORT	Bind to IP:PORT (default *:"STR(CONFIG_FEATURE_HTTPD_PORT_DEFAULT)")"
 //usage:	IF_FEATURE_HTTPD_SETUID(
diff --git a/networking/ifplugd.c b/networking/ifplugd.c
index 18dcaff96..c4b6b9584 100644
--- a/networking/ifplugd.c
+++ b/networking/ifplugd.c
@@ -20,7 +20,7 @@
 //usage:       "[OPTIONS]"
 //usage:#define ifplugd_full_usage "\n\n"
 //usage:       "Network interface plug detection daemon\n"
-//usage:     "\n	-n		Don't daemonize"
+//usage:     "\n	-n		Run in foreground"
 //usage:     "\n	-s		Don't log to syslog"
 //usage:     "\n	-i IFACE	Interface"
 //usage:     "\n	-f/-F		Treat link detection error as link down/link up"
diff --git a/networking/ntpd.c b/networking/ntpd.c
index 6bf6c4e07..5bd4d2d3e 100644
--- a/networking/ntpd.c
+++ b/networking/ntpd.c
@@ -78,7 +78,7 @@
 //usage:#define ntpd_full_usage "\n\n"
 //usage:       "NTP client/server\n"
 //usage:     "\n	-d[d]	Verbose"
-//usage:     "\n	-n	Do not daemonize"
+//usage:     "\n	-n	Run in foreground"
 //usage:     "\n	-q	Quit after clock is set"
 //usage:     "\n	-N	Run at high priority"
 //usage:     "\n	-w	Do not set time (only query peers), implies -n"
-- 
cgit v1.2.3-55-g6feb


From 84874785c2e226002bb05a42c704ed2d18b99508 Mon Sep 17 00:00:00 2001
From: Denys Vlasenko <vda.linux@googlemail.com>
Date: Fri, 8 Oct 2021 15:18:10 +0200
Subject: httpd: if range is not specified, correctly fall back to read/write
 loop

range_start was staying -1, and comparison meant to detect
"is it the first sendfile that failed, or not the first?"
was making incorrect decision. The result: nothing is sent.

function                                             old     new   delta
send_file_and_exit                                   865     877     +12

Signed-off-by: Denys Vlasenko <vda.linux@googlemail.com>
---
 networking/httpd.c | 7 +++++--
 1 file changed, 5 insertions(+), 2 deletions(-)

diff --git a/networking/httpd.c b/networking/httpd.c
index c038293e3..31c8489d3 100644
--- a/networking/httpd.c
+++ b/networking/httpd.c
@@ -1878,14 +1878,17 @@ static NOINLINE void send_file_and_exit(const char *url, int what)
 		send_headers(HTTP_OK);
 #if ENABLE_FEATURE_USE_SENDFILE
 	{
-		off_t offset = (range_start < 0) ? 0 : range_start;
+		off_t offset;
+		if (range_start < 0)
+			range_start = 0;
+		offset = range_start;
 		while (1) {
 			/* sz is rounded down to 64k */
 			ssize_t sz = MAXINT(ssize_t) - 0xffff;
 			IF_FEATURE_HTTPD_RANGES(if (sz > range_len) sz = range_len;)
 			count = sendfile(STDOUT_FILENO, fd, &offset, sz);
 			if (count < 0) {
-				if (offset == range_start)
+				if (offset == range_start) /* was it the very 1st sendfile? */
 					break; /* fall back to read/write loop */
 				goto fin;
 			}
-- 
cgit v1.2.3-55-g6feb


From 049775b2ef39a20dc08a6bf89e2831e8422f7363 Mon Sep 17 00:00:00 2001
From: Denys Vlasenko <vda.linux@googlemail.com>
Date: Fri, 8 Oct 2021 15:50:36 +0200
Subject: libbb.h: fix logic selecting incorrect BB_STRTOOFF for !LFS configs

BB_STRTOOFF() was equal to bb_strtou(). On x86_64, it's incorrect.

Signed-off-by: Denys Vlasenko <vda.linux@googlemail.com>
---
 include/libbb.h | 10 +++++++---
 1 file changed, 7 insertions(+), 3 deletions(-)

diff --git a/include/libbb.h b/include/libbb.h
index 02cc008f0..b72576f28 100644
--- a/include/libbb.h
+++ b/include/libbb.h
@@ -305,9 +305,13 @@ typedef unsigned long long uoff_t;
 # endif
 #else
 /* CONFIG_LFS is off */
-# if UINT_MAX == 0xffffffff
-/* While sizeof(off_t) == sizeof(int), off_t is typedef'ed to long anyway.
- * gcc will throw warnings on printf("%d", off_t). Crap... */
+/* sizeof(off_t) == sizeof(long).
+ * May or may not be == sizeof(int). If it is, use xatoi_positive()
+ * and bb_strtou() instead of xatoul_range() and bb_strtoul().
+ * Even if sizeof(off_t) == sizeof(int), off_t is typedef'ed to long anyway.
+ * gcc will throw warnings on printf("%d", off_t)... Have to use %ld etc.
+ */
+# if UINT_MAX == ULONG_MAX
 typedef unsigned long uoff_t;
 #  define XATOOFF(a) xatoi_positive(a)
 #  define BB_STRTOOFF bb_strtou
-- 
cgit v1.2.3-55-g6feb


From b198e9aa934386531b2bc1fd0291ed3cc6beaf96 Mon Sep 17 00:00:00 2001
From: Denys Vlasenko <vda.linux@googlemail.com>
Date: Fri, 8 Oct 2021 19:34:20 +0200
Subject: tar: improve comments, add FIXMEs. no code changes

Signed-off-by: Denys Vlasenko <vda.linux@googlemail.com>
---
 archival/libarchive/get_header_tar.c | 5 +++--
 1 file changed, 3 insertions(+), 2 deletions(-)

diff --git a/archival/libarchive/get_header_tar.c b/archival/libarchive/get_header_tar.c
index 2ab3c04b8..a142290ff 100644
--- a/archival/libarchive/get_header_tar.c
+++ b/archival/libarchive/get_header_tar.c
@@ -332,7 +332,6 @@ char FAST_FUNC get_header_tar(archive_handle_t *archive_handle)
 	}
 
 	/* Set bits 12-15 of the files mode */
-	/* (typeflag was not trashed because chksum does not use getOctal) */
 	switch (tar_typeflag) {
 	case '1': /* hardlink */
 		/* we mark hardlinks as regular files with zero size and a link name */
@@ -341,7 +340,7 @@ char FAST_FUNC get_header_tar(archive_handle_t *archive_handle)
 		 * ... For tar archives written by pre POSIX.1-1988
 		 * implementations, the size field usually contains the size of
 		 * the file and needs to be ignored as no data may follow this
-		 * header type.  For POSIX.1- 1988 compliant archives, the size
+		 * header type.  For POSIX.1-1988 compliant archives, the size
 		 * field needs to be 0.  For POSIX.1-2001 compliant archives,
 		 * the size field may be non zero, indicating that file data is
 		 * included in the archive.
@@ -390,6 +389,7 @@ char FAST_FUNC get_header_tar(archive_handle_t *archive_handle)
 		/* free: paranoia: tar with several consecutive longnames */
 		free(p_longname);
 		/* For paranoia reasons we allocate extra NUL char */
+//FIXME: disallow huge sizes:
 		p_longname = xzalloc(file_header->size + 1);
 		/* We read ASCIZ string, including NUL */
 		xread(archive_handle->src_fd, p_longname, file_header->size);
@@ -400,6 +400,7 @@ char FAST_FUNC get_header_tar(archive_handle_t *archive_handle)
 		goto again;
 	case 'K':
 		free(p_linkname);
+//FIXME: disallow huge sizes:
 		p_linkname = xzalloc(file_header->size + 1);
 		xread(archive_handle->src_fd, p_linkname, file_header->size);
 		archive_handle->offset += file_header->size;
-- 
cgit v1.2.3-55-g6feb


From ecac9853f29dcb2e5e0d70c0effaae2cabeefabf Mon Sep 17 00:00:00 2001
From: Andrej Valek <andrej.valek@siemens.com>
Date: Fri, 25 Jun 2021 07:45:35 +0200
Subject: mktemp: add --tmpdir option

Make mktemp more compatible with coreutils.
- add "--tmpdir" option
- add long variants for "d,q,u" options

Note: Upstream ca-certificate update script started using this option.

function                                             old     new   delta
.rodata                                           104179  104219     +40
mktemp_main                                          186     194      +8
------------------------------------------------------------------------------
(add/remove: 0/0 grow/shrink: 2/0 up/down: 48/0)               Total: 48 bytes

Signed-off-by: Andrej Valek <andrej.valek@siemens.com>
Signed-off-by: Peter Marko <peter.marko@siemens.com>
Signed-off-by: Denys Vlasenko <vda.linux@googlemail.com>
---
 coreutils/mktemp.c | 16 +++++++++++++++-
 1 file changed, 15 insertions(+), 1 deletion(-)

diff --git a/coreutils/mktemp.c b/coreutils/mktemp.c
index 5393320a5..33e2720de 100644
--- a/coreutils/mktemp.c
+++ b/coreutils/mktemp.c
@@ -72,13 +72,27 @@ int mktemp_main(int argc UNUSED_PARAM, char **argv)
 		OPT_t = 1 << 2,
 		OPT_p = 1 << 3,
 		OPT_u = 1 << 4,
+		OPT_tmpdir = (1 << 5) * ENABLE_LONG_OPTS,
 	};
 
 	path = getenv("TMPDIR");
 	if (!path || path[0] == '\0')
 		path = "/tmp";
 
+#if ENABLE_LONG_OPTS
+	opts = getopt32long(argv, "^"
+		"dqtp:u"
+		"\0"
+		"?1" /* 1 arg max */,
+		"directory\0" No_argument       "d"
+		"quiet\0"     No_argument       "q"
+		"dry-run\0"   No_argument       "u"
+		"tmpdir\0"    Optional_argument "\xff"
+		, &path, &path
+	);
+#else
 	opts = getopt32(argv, "^" "dqtp:u" "\0" "?1"/*1 arg max*/, &path);
+#endif
 
 	chp = argv[optind];
 	if (!chp) {
@@ -95,7 +109,7 @@ int mktemp_main(int argc UNUSED_PARAM, char **argv)
 		goto error;
 	}
 #endif
-	if (opts & (OPT_t|OPT_p))
+	if (opts & (OPT_t|OPT_p|OPT_tmpdir))
 		chp = concat_path_file(path, chp);
 
 	if (opts & OPT_u) {
-- 
cgit v1.2.3-55-g6feb


From d315a77a79817d364a1562f4457c21fbb8174025 Mon Sep 17 00:00:00 2001
From: Denys Vlasenko <vda.linux@googlemail.com>
Date: Fri, 8 Oct 2021 22:18:24 +0200
Subject: resize: use tcgetattr(TCSAFLUSH) instead of TCSANOW, closes 13811

Signed-off-by: Denys Vlasenko <vda.linux@googlemail.com>
---
 console-tools/resize.c | 14 +++++++++++++-
 1 file changed, 13 insertions(+), 1 deletion(-)

diff --git a/console-tools/resize.c b/console-tools/resize.c
index 59d468d48..056e33750 100644
--- a/console-tools/resize.c
+++ b/console-tools/resize.c
@@ -74,7 +74,19 @@ int resize_main(int argc UNUSED_PARAM, char **argv UNUSED_PARAM)
 		+ (1 << SIGTERM)
 		+ (1 << SIGALRM)
 		, onintr);
-	tcsetattr(STDERR_FILENO, TCSANOW, &new);
+	/* Users report:
+	 *	The resize command messes up the terminal.
+	 *	In my case it looks like it is hanging and
+	 *	I need to press ctrl-c to get a prompt.
+	 *	Actually the program does not hang but just
+	 *	the terminal is messed up.
+	 * Replaced TCSANOW with TCSAFLUSH:
+	 * "the change occurs after all output written to fd
+	 * has been transmitted, and all input that has been
+	 * received but not read will be discarded before
+	 * the change is made.
+	 */
+	tcsetattr(STDERR_FILENO, TCSAFLUSH, &new);
 
 	/* save_cursor_pos 7
 	 * scroll_whole_screen [r
-- 
cgit v1.2.3-55-g6feb


From 86ba007b84ae1ebe35e88c57e023caac3d2d9903 Mon Sep 17 00:00:00 2001
From: Denys Vlasenko <vda.linux@googlemail.com>
Date: Fri, 8 Oct 2021 23:03:54 +0200
Subject: xxd: fix -p -r, closes 13881

function                                             old     new   delta
xxd_main                                             893     890      -3

Signed-off-by: Denys Vlasenko <vda.linux@googlemail.com>
---
 testsuite/xxd.tests      |  6 ++++++
 util-linux/hexdump_xxd.c | 51 ++++++++++++++++++++++++++++++++++++------------
 2 files changed, 44 insertions(+), 13 deletions(-)

diff --git a/testsuite/xxd.tests b/testsuite/xxd.tests
index 2e80be5fe..76fa96af9 100755
--- a/testsuite/xxd.tests
+++ b/testsuite/xxd.tests
@@ -31,4 +31,10 @@ testing 'xxd -p with 31 NULs' \
 	'' \
 	'\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0'
 
+testing 'xxd -p -r' \
+	'xxd -p -r' \
+	'01234567765432100123456776543210' \
+	'' \
+	'30313233343536373736353433323130 30313233343536373736353433323130'
+
 exit $FAILCOUNT
diff --git a/util-linux/hexdump_xxd.c b/util-linux/hexdump_xxd.c
index fe78f6242..76dada983 100644
--- a/util-linux/hexdump_xxd.c
+++ b/util-linux/hexdump_xxd.c
@@ -69,7 +69,7 @@
 #define OPT_c (1 << 7)
 #define OPT_o (1 << 8)
 
-static void reverse(unsigned opt, unsigned cols, const char *filename)
+static void reverse(unsigned opt, const char *filename)
 {
 	FILE *fp;
 	char *buf;
@@ -77,9 +77,9 @@ static void reverse(unsigned opt, unsigned cols, const char *filename)
 	fp = filename ? xfopen_for_read(filename) : stdin;
 
 	while ((buf = xmalloc_fgetline(fp)) != NULL) {
-		char *p = buf;
-		unsigned cnt = cols;
+		char *p;
 
+		p = buf;
 		if (!(opt & OPT_p)) {
 			/* skip address */
 			while (isxdigit(*p)) p++;
@@ -92,9 +92,9 @@ static void reverse(unsigned opt, unsigned cols, const char *filename)
 		}
 
 		/* Process hex bytes optionally separated by whitespace */
-		do {
+		for (;;) {
 			uint8_t val, c;
-
+ nibble1:
 			p = skip_whitespace(p);
 
 			c = *p++;
@@ -102,8 +102,19 @@ static void reverse(unsigned opt, unsigned cols, const char *filename)
 				val = c - '0';
 			else if ((c|0x20) >= 'a' && (c|0x20) <= 'f')
 				val = (c|0x20) - ('a' - 10);
-			else
+			else {
+				/* xxd V1.10 is inconsistent here.
+				 *  echo -e "31 !3 0a 0a" | xxd -r -p
+				 * is "10<a0>" (no <cr>) - "!" is ignored,
+				 * but
+				 *  echo -e "31 !!343434\n30 0a" | xxd -r -p
+				 * is "10<cr>" - "!!" drops rest of the line.
+				 * We will ignore all invalid chars:
+				 */
+				if (c != '\0')
+					goto nibble1;
 				break;
+			}
 			val <<= 4;
 
 			/* Works the same with xxd V1.10:
@@ -111,6 +122,7 @@ static void reverse(unsigned opt, unsigned cols, const char *filename)
 			 *  echo "31 0 9 32 0a" | xxd -r -p
 			 * thus allow whitespace even within the byte:
 			 */
+ nibble2:
 			p = skip_whitespace(p);
 
 			c = *p++;
@@ -118,10 +130,23 @@ static void reverse(unsigned opt, unsigned cols, const char *filename)
 				val |= c - '0';
 			else if ((c|0x20) >= 'a' && (c|0x20) <= 'f')
 				val |= (c|0x20) - ('a' - 10);
-			else
-				break;
+			else {
+				if (c != '\0') {
+					/* "...3<not_hex_char>..." ignores both chars */
+					goto nibble1;
+				}
+				/* Nibbles can join even through newline:
+				 * echo -e "31 3\n2 0a" | xxd -r -p
+				 * is "12<cr>".
+				 */
+				free(buf);
+				p = buf = xmalloc_fgetline(fp);
+				if (!buf)
+					break;
+				goto nibble2;
+			}
 			putchar(val);
-		} while (!(opt & OPT_p) || --cnt != 0);
+		}
 		free(buf);
 	}
 	//fclose(fp);
@@ -174,6 +199,10 @@ int xxd_main(int argc UNUSED_PARAM, char **argv)
 		//BUGGY for /proc/version (unseekable?)
 	}
 
+	if (opt & OPT_r) {
+		reverse(opt, argv[0]);
+	}
+
 	if (opt & OPT_o) {
 		/* -o accepts negative numbers too */
 		dumper->xxd_displayoff = xstrtoll(opt_o, /*base:*/ 0);
@@ -194,10 +223,6 @@ int xxd_main(int argc UNUSED_PARAM, char **argv)
 			bb_dump_add(dumper, "\"%08.8_ax: \""); // "address: "
 	}
 
-	if (opt & OPT_r) {
-		reverse(opt, cols, argv[0]);
-	}
-
 	if (bytes < 1 || bytes >= cols) {
 		sprintf(buf, "%u/1 \"%%02x\"", cols); // cols * "XX"
 		bb_dump_add(dumper, buf);
-- 
cgit v1.2.3-55-g6feb


From 94eb1c4dc6556932e1a12a0ce7734512ac95985e Mon Sep 17 00:00:00 2001
From: Ron Yorston <rmy@pobox.com>
Date: Sat, 31 Jul 2021 13:51:57 +0100
Subject: libbb: better coreutils compatibility for realpath

Add some tests which coreutils realpath pass but BusyBox realpath
fails (bar one).  Adjust xmalloc_realpath_coreutils() so the tests
pass:

- Expand symbolic links before testing whether the last path component
  exists.

- When the link target is a relative path canonicalize it by passing
  it through xmalloc_realpath_coreutils() as already happens for
  absolute paths.

- Ignore trailing slashes when finding the last path component and
  correctly handle the case where the only slash is at the start of
  the path.  This requires ignoring superfluous leading slashes.

- Undo all changes to the path so error messages from the caller show
  the original filename.

function                                             old     new   delta
xmalloc_realpath_coreutils                           214     313     +99

Signed-off-by: Ron Yorston <rmy@pobox.com>
Signed-off-by: Denys Vlasenko <vda.linux@googlemail.com>
---
 include/libbb.h          |  2 +-
 libbb/xreadlink.c        | 75 +++++++++++++++++++++++++++++-------------------
 testsuite/realpath.tests | 45 +++++++++++++++++++++++++++++
 3 files changed, 91 insertions(+), 31 deletions(-)
 create mode 100755 testsuite/realpath.tests

diff --git a/include/libbb.h b/include/libbb.h
index b72576f28..296417dae 100644
--- a/include/libbb.h
+++ b/include/libbb.h
@@ -578,7 +578,7 @@ DIR *xopendir(const char *path) FAST_FUNC;
 DIR *warn_opendir(const char *path) FAST_FUNC;
 
 char *xmalloc_realpath(const char *path) FAST_FUNC RETURNS_MALLOC;
-char *xmalloc_realpath_coreutils(const char *path) FAST_FUNC RETURNS_MALLOC;
+char *xmalloc_realpath_coreutils(char *path) FAST_FUNC RETURNS_MALLOC;
 char *xmalloc_readlink(const char *path) FAST_FUNC RETURNS_MALLOC;
 char *xmalloc_readlink_or_warn(const char *path) FAST_FUNC RETURNS_MALLOC;
 /* !RETURNS_MALLOC: it's a realloc-like function */
diff --git a/libbb/xreadlink.c b/libbb/xreadlink.c
index a18dd0748..2682f6975 100644
--- a/libbb/xreadlink.c
+++ b/libbb/xreadlink.c
@@ -123,7 +123,7 @@ char* FAST_FUNC xmalloc_realpath(const char *path)
 #endif
 }
 
-char* FAST_FUNC xmalloc_realpath_coreutils(const char *path)
+char* FAST_FUNC xmalloc_realpath_coreutils(char *path)
 {
 	char *buf;
 
@@ -137,32 +137,19 @@ char* FAST_FUNC xmalloc_realpath_coreutils(const char *path)
 	 * (the directory must exist).
 	 */
 	if (!buf && errno == ENOENT) {
-		char *last_slash = strrchr(path, '/');
-		if (last_slash) {
-			*last_slash++ = '\0';
-			buf = xmalloc_realpath(path);
-			if (buf) {
-				unsigned len = strlen(buf);
-				buf = xrealloc(buf, len + strlen(last_slash) + 2);
-				buf[len++] = '/';
-				strcpy(buf + len, last_slash);
-			}
-		} else {
-			char *target = xmalloc_readlink(path);
-			if (target) {
-				char *cwd;
-				if (target[0] == '/') {
-					/*
-					 * $ ln -s /bin/qwe symlink  # note: /bin is a link to /usr/bin
-					 * $ readlink -f symlink
-					 * /usr/bin/qwe/target_does_not_exist
-					 * $ realpath symlink
-					 * /usr/bin/qwe/target_does_not_exist
-					 */
-					buf = xmalloc_realpath_coreutils(target);
-					free(target);
-					return buf;
-				}
+		char *target, c, *last_slash;
+		size_t i;
+
+		target = xmalloc_readlink(path);
+		if (target) {
+			/*
+			 * $ ln -s /bin/qwe symlink  # note: /bin is a link to /usr/bin
+			 * $ readlink -f symlink
+			 * /usr/bin/qwe
+			 * $ realpath symlink
+			 * /usr/bin/qwe
+			 */
+			if (target[0] != '/') {
 				/*
 				 * $ ln -s target_does_not_exist symlink
 				 * $ readlink -f symlink
@@ -170,13 +157,41 @@ char* FAST_FUNC xmalloc_realpath_coreutils(const char *path)
 				 * $ realpath symlink
 				 * /CURDIR/target_does_not_exist
 				 */
-				cwd = xrealloc_getcwd_or_warn(NULL);
-				buf = concat_path_file(cwd, target);
+				char *cwd = xrealloc_getcwd_or_warn(NULL);
+				char *tmp = concat_path_file(cwd, target);
 				free(cwd);
 				free(target);
-				return buf;
+				target = tmp;
+			}
+			buf = xmalloc_realpath_coreutils(target);
+			free(target);
+			return buf;
+		}
+
+		/* ignore leading and trailing slashes */
+		while (path[0] == '/' && path[1] == '/')
+			++path;
+		i = strlen(path) - 1;
+		while (i > 0 && path[i] == '/')
+			i--;
+		c = path[i + 1];
+		path[i + 1] = '\0';
+
+		last_slash = strrchr(path, '/');
+		if (last_slash == path)
+			buf = xstrdup(path);
+		else if (last_slash) {
+			*last_slash = '\0';
+			buf = xmalloc_realpath(path);
+			*last_slash++ = '/';
+			if (buf) {
+				unsigned len = strlen(buf);
+				buf = xrealloc(buf, len + strlen(last_slash) + 2);
+				buf[len++] = '/';
+				strcpy(buf + len, last_slash);
 			}
 		}
+		path[i + 1] = c;
 	}
 
 	return buf;
diff --git a/testsuite/realpath.tests b/testsuite/realpath.tests
new file mode 100755
index 000000000..0e68e0279
--- /dev/null
+++ b/testsuite/realpath.tests
@@ -0,0 +1,45 @@
+#!/bin/sh
+
+# Realpath tests.
+# Copyright 2006 by Natanael Copa <n@tanael.org>
+# Copyright 2021 by Ron Yorston <rmy@pobox.com>
+# Licensed under GPLv2, see file LICENSE in this source tree.
+
+. ./testing.sh
+
+unset LC_ALL
+unset LC_MESSAGES
+unset LANG
+unset LANGUAGE
+
+TESTDIR=realpath_testdir
+TESTLINK1="link1"
+TESTLINK2="link2"
+
+# create the dir and test files
+mkdir -p "./$TESTDIR"
+ln -s "./$TESTDIR/not_file" "./$TESTLINK1"
+ln -s "./$TESTDIR/not_file/not_dir" "./$TESTLINK2"
+
+# shell's $PWD may leave symlinks unresolved.
+# "pwd" may be a built-in and have the same problem.
+# External pwd _can't_ have that problem (current dir on Unix is physical).
+pwd=`which pwd`
+pwd=`$pwd`
+testing "realpath on non-existent absolute path 1" "realpath /not_file" "/not_file\n" "" ""
+testing "realpath on non-existent absolute path 2" "realpath /not_file/" "/not_file\n" "" ""
+testing "realpath on non-existent absolute path 3" "realpath //not_file" "/not_file\n" "" ""
+testing "realpath on non-existent absolute path 4" "realpath /not_dir/not_file 2>&1" "realpath: /not_dir/not_file: No such file or directory\n" "" ""
+
+testing "realpath on non-existent local file 1" "realpath $TESTDIR/not_file" "$pwd/$TESTDIR/not_file\n" "" ""
+testing "realpath on non-existent local file 2" "realpath $TESTDIR/not_dir/not_file 2>&1" "realpath: $TESTDIR/not_dir/not_file: No such file or directory\n" "" ""
+
+testing "realpath on link to non-existent file 1" "realpath $TESTLINK1" "$pwd/$TESTDIR/not_file\n" "" ""
+testing "realpath on link to non-existent file 2" "realpath $TESTLINK2 2>&1" "realpath: $TESTLINK2: No such file or directory\n" "" ""
+testing "realpath on link to non-existent file 3" "realpath ./$TESTLINK1" "$pwd/$TESTDIR/not_file\n" "" ""
+testing "realpath on link to non-existent file 4" "realpath ./$TESTLINK2 2>&1" "realpath: ./$TESTLINK2: No such file or directory\n" "" ""
+
+# clean up
+rm -r "$TESTLINK1" "$TESTLINK2" "$TESTDIR"
+
+exit $((FAILCOUNT <= 255 ? FAILCOUNT : 255))
-- 
cgit v1.2.3-55-g6feb


From 21e8dbfd9d11a461ed7f91b495fa39d8a9131b28 Mon Sep 17 00:00:00 2001
From: Alin Mr <almr.oss@outlook.com>
Date: Wed, 28 Jul 2021 11:40:01 +0300
Subject: ash.c: speedup ${s:} substring (no quotes)

This trivial patch makes ${s:...} at least as fast as ${s#??..}
in simple tests. It's probably faster for longer substrings,
but then one wouldn't use ${s#"1024???s"} anyway -
one would switch away from sh.

function                                             old     new   delta
subevalvar                                          1457    1503     +46

Signed-off-by: Alin Mr <almr.oss@outlook.com>
Signed-off-by: Denys Vlasenko <vda.linux@googlemail.com>
---
 shell/ash.c | 17 +++++++++++------
 1 file changed, 11 insertions(+), 6 deletions(-)

diff --git a/shell/ash.c b/shell/ash.c
index 7b85981ec..e8ec0b1a6 100644
--- a/shell/ash.c
+++ b/shell/ash.c
@@ -7185,14 +7185,19 @@ subevalvar(char *start, char *str, int strloc,
 		if ((unsigned)len > (orig_len - pos))
 			len = orig_len - pos;
 
-		for (vstr = startp; pos; vstr++, pos--) {
-			if (quotes && (unsigned char)*vstr == CTLESC)
+		if (!quotes) {
+			loc = mempcpy(startp, startp + pos, len);
+		} else {
+			for (vstr = startp; pos != 0; pos--) {
+				if ((unsigned char)*vstr == CTLESC)
+					vstr++;
 				vstr++;
-		}
-		for (loc = startp; len; len--) {
-			if (quotes && (unsigned char)*vstr == CTLESC)
+			}
+			for (loc = startp; len != 0; len--) {
+				if ((unsigned char)*vstr == CTLESC)
+					*loc++ = *vstr++;
 				*loc++ = *vstr++;
-			*loc++ = *vstr++;
+			}
 		}
 		*loc = '\0';
 		goto out;
-- 
cgit v1.2.3-55-g6feb


From 5aaeb550b76f063ffddef8587d9d91f7fddc4406 Mon Sep 17 00:00:00 2001
From: Denys Vlasenko <vda.linux@googlemail.com>
Date: Sat, 9 Oct 2021 03:32:20 +0200
Subject: ash: speed up ${x//\*/|} too, make it independent of
 ASH_OPTIMIZE_FOR_SIZE

function                                             old     new   delta
subevalvar                                          1503    1545     +42

Signed-off-by: Denys Vlasenko <vda.linux@googlemail.com>
---
 shell/ash.c | 18 ++++++++++++------
 1 file changed, 12 insertions(+), 6 deletions(-)

diff --git a/shell/ash.c b/shell/ash.c
index e8ec0b1a6..199975191 100644
--- a/shell/ash.c
+++ b/shell/ash.c
@@ -7251,7 +7251,7 @@ subevalvar(char *start, char *str, int strloc,
 #if BASH_PATTERN_SUBST
 	workloc = expdest - (char *)stackblock();
 	if (subtype == VSREPLACE || subtype == VSREPLACEALL) {
-		size_t no_meta_len;
+		size_t no_meta_len, first_escaped;
 		int len;
 		char *idx, *end;
 
@@ -7269,28 +7269,34 @@ subevalvar(char *start, char *str, int strloc,
 		if (str[0] == '\0')
 			goto out1;
 
-		no_meta_len = (ENABLE_ASH_OPTIMIZE_FOR_SIZE || strpbrk(str, "*?[\\")) ? 0 : strlen(str);
+		first_escaped = (str[0] == '\\' && str[1]);
+		/* "first_escaped" trick allows to treat e.g. "\*no_glob_chars"
+		 * as literal too (as it is semi-common, and easy to accomodate
+		 * by just using str + 1).
+		 */
+		no_meta_len = strpbrk(str + first_escaped * 2, "*?[\\") ? 0 : strlen(str);
 		len = 0;
 		idx = startp;
 		end = str - 1;
 		while (idx <= end) {
  try_to_match:
 			if (no_meta_len == 0) {
-				/* pattern has meta chars, have to glob; or ENABLE_ASH_OPTIMIZE_FOR_SIZE */
+				/* pattern has meta chars, have to glob */
 				loc = scanright(idx, rmesc, rmescend, str, quotes, /*match_at_start:*/ 1);
 			} else {
 				/* Testcase for very slow replace (performs about 22k replaces):
 				 * x=::::::::::::::::::::::
 				 * x=$x$x;x=$x$x;x=$x$x;x=$x$x;x=$x$x;x=$x$x;x=$x$x;x=$x$x;x=$x$x;x=$x$x;echo ${#x}
 				 * echo "${x//:/|}"
+				 * To test "first_escaped" logic, replace : with *.
 				 */
-				if (strncmp(rmesc, str, no_meta_len) != 0)
+				if (strncmp(rmesc, str + first_escaped, no_meta_len - first_escaped) != 0)
 					goto no_match;
 				loc = idx;
 				if (!quotes) {
-					loc += no_meta_len;
+					loc += no_meta_len - first_escaped;
 				} else {
-					size_t n = no_meta_len;
+					size_t n = no_meta_len - first_escaped;
 					do {
 						if ((unsigned char)*loc == CTLESC)
 							loc++;
-- 
cgit v1.2.3-55-g6feb


From 49bcf9f40cff1320a761d674cf89a0c0ab97ef49 Mon Sep 17 00:00:00 2001
From: Denys Vlasenko <vda.linux@googlemail.com>
Date: Sat, 9 Oct 2021 03:52:04 +0200
Subject: hush: speed up ${x//\*/|} too

function                                             old     new   delta
expand_one_var                                      2502    2544     +42

Signed-off-by: Denys Vlasenko <vda.linux@googlemail.com>
---
 shell/hush.c | 13 +++++++++----
 1 file changed, 9 insertions(+), 4 deletions(-)

diff --git a/shell/hush.c b/shell/hush.c
index 6d472337f..87fc2f445 100644
--- a/shell/hush.c
+++ b/shell/hush.c
@@ -6472,16 +6472,21 @@ static arith_t expand_and_evaluate_arith(const char *arg, const char **errmsg_p)
 /* ${var/[/]pattern[/repl]} helpers */
 static char *strstr_pattern(char *val, const char *pattern, int *size)
 {
-	int sz = strcspn(pattern, "*?[\\");
-	if (pattern[sz] == '\0') {
+	int first_escaped = (pattern[0] == '\\' && pattern[1]);
+	/* "first_escaped" trick allows to treat e.g. "\*no_glob_chars"
+	 * as literal too (as it is semi-common, and easy to accomodate
+	 * by just using str + 1).
+	 */
+	int sz = strcspn(pattern + first_escaped * 2, "*?[\\");
+	if ((pattern + first_escaped * 2)[sz] == '\0') {
 		/* Optimization for trivial patterns.
 		 * Testcase for very slow replace (performs about 22k replaces):
 		 * x=::::::::::::::::::::::
 		 * x=$x$x;x=$x$x;x=$x$x;x=$x$x;x=$x$x;x=$x$x;x=$x$x;x=$x$x;x=$x$x;x=$x$x;echo ${#x}
 		 * echo "${x//:/|}"
 		 */
-		*size = sz;
-		return strstr(val, pattern);
+		*size = sz + first_escaped;
+		return strstr(val, pattern + first_escaped);
 	}
 
 	while (1) {
-- 
cgit v1.2.3-55-g6feb


From 04ad683bf99333c2a6c6fd6549faa67978ad9a98 Mon Sep 17 00:00:00 2001
From: Ismael Luceno <ismael@iodev.co.uk>
Date: Sat, 9 Oct 2021 21:40:53 +0200
Subject: find: implement -amin, -atime, -cmin, and -ctime

function                                             old     new   delta
func_mtime                                           130     180     +50
func_mmin                                            121     171     +50
static.params                                        235     261     +26
parse_params                                        1435    1461     +26
packed_usage                                       34025   34033      +8
------------------------------------------------------------------------------
(add/remove: 0/0 grow/shrink: 5/0 up/down: 160/0)             Total: 160 bytes

Signed-off-by: Ismael Luceno <ismael@iodev.co.uk>
Signed-off-by: Denys Vlasenko <vda.linux@googlemail.com>
---
 findutils/find.c | 112 ++++++++++++++++++++++++++++++++++++++++++++++++++-----
 1 file changed, 103 insertions(+), 9 deletions(-)

diff --git a/findutils/find.c b/findutils/find.c
index f557bb762..9feb44b64 100644
--- a/findutils/find.c
+++ b/findutils/find.c
@@ -74,6 +74,22 @@
 //config:	Allow searching based on the modification time of
 //config:	files, in days.
 //config:
+//config:config FEATURE_FIND_ATIME
+//config:	bool "Enable -atime: access time matching"
+//config:	default y
+//config:	depends on FEATURE_FIND_MTIME
+//config:	help
+//config:	Allow searching based on the access time of
+//config:	files, in days.
+//config:
+//config:config FEATURE_FIND_CTIME
+//config:	bool "Enable -ctime: status change timestamp matching"
+//config:	default y
+//config:	depends on FEATURE_FIND_MTIME
+//config:	help
+//config:	Allow searching based on the status change timestamp of
+//config:	files, in days.
+//config:
 //config:config FEATURE_FIND_MMIN
 //config:	bool "Enable -mmin: modification time matching by minutes"
 //config:	default y
@@ -82,6 +98,22 @@
 //config:	Allow searching based on the modification time of
 //config:	files, in minutes.
 //config:
+//config:config FEATURE_FIND_AMIN
+//config:	bool "Enable -amin: access time matching by minutes"
+//config:	default y
+//config:	depends on FEATURE_FIND_MMIN
+//config:	help
+//config:	Allow searching based on the access time of
+//config:	files, in minutes.
+//config:
+//config:config FEATURE_FIND_CMIN
+//config:	bool "Enable -cmin: status change timestamp matching by minutes"
+//config:	default y
+//config:	depends on FEATURE_FIND_MMIN
+//config:	help
+//config:	Allow searching based on the status change timestamp of
+//config:	files, in minutes.
+//config:
 //config:config FEATURE_FIND_PERM
 //config:	bool "Enable -perm: permissions matching"
 //config:	default y
@@ -296,10 +328,22 @@
 //usage:     "\n	-mtime DAYS	mtime is greater than (+N), less than (-N),"
 //usage:     "\n			or exactly N days in the past"
 //usage:	)
+//usage:	IF_FEATURE_FIND_ATIME(
+//usage:     "\n	-atime DAYS	atime +N/-N/N days in the past"
+//usage:	)
+//usage:	IF_FEATURE_FIND_CTIME(
+//usage:     "\n	-ctime DAYS	ctime +N/-N/N days in the past"
+//usage:	)
 //usage:	IF_FEATURE_FIND_MMIN(
 //usage:     "\n	-mmin MINS	mtime is greater than (+N), less than (-N),"
 //usage:     "\n			or exactly N minutes in the past"
 //usage:	)
+//usage:	IF_FEATURE_FIND_AMIN(
+//usage:     "\n	-amin MINS	atime +N/-N/N minutes in the past"
+//usage:	)
+//usage:	IF_FEATURE_FIND_CMIN(
+//usage:     "\n	-cmin MINS	ctime +N/-N/N minutes in the past"
+//usage:	)
 //usage:	IF_FEATURE_FIND_NEWER(
 //usage:     "\n	-newer FILE	mtime is more recent than FILE's"
 //usage:	)
@@ -396,8 +440,8 @@ IF_FEATURE_FIND_PRINT0( ACTS(print0))
 IF_FEATURE_FIND_TYPE(   ACTS(type,  int type_mask;))
 IF_FEATURE_FIND_EXECUTABLE(ACTS(executable))
 IF_FEATURE_FIND_PERM(   ACTS(perm,  char perm_char; mode_t perm_mask;))
-IF_FEATURE_FIND_MTIME(  ACTS(mtime, char mtime_char; unsigned mtime_days;))
-IF_FEATURE_FIND_MMIN(   ACTS(mmin,  char mmin_char; unsigned mmin_mins;))
+IF_FEATURE_FIND_MTIME(  ACTS(mtime, char time_type; char mtime_char; unsigned mtime_days;))
+IF_FEATURE_FIND_MMIN(   ACTS(mmin,  char time_type; char mmin_char; unsigned mmin_mins;))
 IF_FEATURE_FIND_NEWER(  ACTS(newer, time_t newer_mtime;))
 IF_FEATURE_FIND_INUM(   ACTS(inum,  ino_t inode_num;))
 IF_FEATURE_FIND_USER(   ACTS(user,  uid_t uid;))
@@ -619,9 +663,13 @@ ACTF(perm)
 }
 #endif
 
-#if						\
-	ENABLE_FEATURE_FIND_MMIN  ||		\
-	ENABLE_FEATURE_FIND_MTIME
+#if 0 \
+ || ENABLE_FEATURE_FIND_AMIN  \
+ || ENABLE_FEATURE_FIND_ATIME \
+ || ENABLE_FEATURE_FIND_CMIN  \
+ || ENABLE_FEATURE_FIND_CTIME \
+ || ENABLE_FEATURE_FIND_MMIN  \
+ || ENABLE_FEATURE_FIND_MTIME
 static int time_cmp(time_t ftime, char time_char, time_t secs, time_t delta)
 {
 	time_t file_age = time(NULL) - ftime;
@@ -637,14 +685,32 @@ static int time_cmp(time_t ftime, char time_char, time_t secs, time_t delta)
 #if ENABLE_FEATURE_FIND_MTIME
 ACTF(mtime)
 {
-	return time_cmp(statbuf->st_mtime, ap->mtime_char,
+	time_t t = statbuf->st_mtime;
+# if ENABLE_FEATURE_FIND_ATIME
+	if (ap->time_type == 'a')
+		t = statbuf->st_atime;
+# endif
+# if ENABLE_FEATURE_FIND_CTIME
+	if (ap->time_type == 'c')
+		t = statbuf->st_ctime;
+# endif
+	return time_cmp(t, ap->mtime_char,
 			ap->mtime_days * 24*60*60, 24*60*60);
 }
 #endif
 #if ENABLE_FEATURE_FIND_MMIN
 ACTF(mmin)
 {
-	return time_cmp(statbuf->st_mtime, ap->mmin_char,
+	time_t t = statbuf->st_mtime;
+# if ENABLE_FEATURE_FIND_AMIN
+	if (ap->time_type == 'a')
+		t = statbuf->st_atime;
+# endif
+# if ENABLE_FEATURE_FIND_CMIN
+	if (ap->time_type == 'c')
+		t = statbuf->st_ctime;
+# endif
+	return time_cmp(t, ap->mmin_char,
 			ap->mmin_mins * 60, 60);
 }
 #endif
@@ -1047,7 +1113,11 @@ static action*** parse_params(char **argv)
 	IF_FEATURE_FIND_TYPE(   PARM_type      ,)
 	IF_FEATURE_FIND_PERM(   PARM_perm      ,)
 	IF_FEATURE_FIND_MTIME(  PARM_mtime     ,)
+	IF_FEATURE_FIND_ATIME(  PARM_atime     ,)
+	IF_FEATURE_FIND_CTIME(  PARM_ctime     ,)
 	IF_FEATURE_FIND_MMIN(   PARM_mmin      ,)
+	IF_FEATURE_FIND_AMIN(   PARM_amin      ,)
+	IF_FEATURE_FIND_CMIN(   PARM_cmin      ,)
 	IF_FEATURE_FIND_NEWER(  PARM_newer     ,)
 	IF_FEATURE_FIND_INUM(   PARM_inum      ,)
 	IF_FEATURE_FIND_USER(   PARM_user      ,)
@@ -1091,7 +1161,11 @@ static action*** parse_params(char **argv)
 	IF_FEATURE_FIND_TYPE(   "-type\0"   )
 	IF_FEATURE_FIND_PERM(   "-perm\0"   )
 	IF_FEATURE_FIND_MTIME(  "-mtime\0"  )
+	IF_FEATURE_FIND_ATIME(  "-atime\0"  )
+	IF_FEATURE_FIND_CTIME(  "-ctime\0"  )
 	IF_FEATURE_FIND_MMIN(   "-mmin\0"   )
+	IF_FEATURE_FIND_AMIN(   "-amin\0"   )
+	IF_FEATURE_FIND_CMIN(   "-cmin\0"   )
 	IF_FEATURE_FIND_NEWER(  "-newer\0"  )
 	IF_FEATURE_FIND_INUM(   "-inum\0"   )
 	IF_FEATURE_FIND_USER(   "-user\0"   )
@@ -1377,19 +1451,39 @@ static action*** parse_params(char **argv)
 		}
 #endif
 #if ENABLE_FEATURE_FIND_MTIME
-		else if (parm == PARM_mtime) {
+		else if (parm == PARM_mtime
+# if ENABLE_FEATURE_FIND_ATIME
+		 || parm == PARM_atime
+# endif
+# if ENABLE_FEATURE_FIND_CTIME
+		 || parm == PARM_ctime
+# endif
+		) {
 			action_mtime *ap;
 			dbg("%d", __LINE__);
 			ap = ALLOC_ACTION(mtime);
+# if ENABLE_FEATURE_FIND_AMIN || ENABLE_FEATURE_FIND_CMIN
+			ap->time_type = arg[1];
+# endif
 			ap->mtime_char = arg1[0];
 			ap->mtime_days = xatoul(plus_minus_num(arg1));
 		}
 #endif
 #if ENABLE_FEATURE_FIND_MMIN
-		else if (parm == PARM_mmin) {
+		else if (parm == PARM_mmin
+# if ENABLE_FEATURE_FIND_AMIN
+		 || parm == PARM_amin
+# endif
+# if ENABLE_FEATURE_FIND_CMIN
+		 || parm == PARM_cmin
+# endif
+		) {
 			action_mmin *ap;
 			dbg("%d", __LINE__);
 			ap = ALLOC_ACTION(mmin);
+# if ENABLE_FEATURE_FIND_AMIN || ENABLE_FEATURE_FIND_CMIN
+			ap->time_type = arg[1];
+# endif
 			ap->mmin_char = arg1[0];
 			ap->mmin_mins = xatoul(plus_minus_num(arg1));
 		}
-- 
cgit v1.2.3-55-g6feb


From 5156b245536ce0f07165793f07c63fd9fa5dd3b7 Mon Sep 17 00:00:00 2001
From: YU Jincheng <shana@zju.edu.cn>
Date: Sun, 10 Oct 2021 02:19:51 +0800
Subject: Make const ptr assign as function call in clang

- This can act as memory barrier in clang to avoid
  read before assign of a const ptr

Signed-off-by: LoveSy <shana@zju.edu.cn>
Signed-off-by: Denys Vlasenko <vda.linux@googlemail.com>
---
 coreutils/test.c   |  2 +-
 include/libbb.h    | 21 +++++++++++++++------
 libbb/Kbuild.src   |  1 +
 libbb/appletlib.c  |  2 +-
 libbb/const_hack.c | 16 ++++++++++++++++
 libbb/lineedit.c   |  2 +-
 shell/ash.c        |  6 +++---
 7 files changed, 38 insertions(+), 12 deletions(-)
 create mode 100644 libbb/const_hack.c

diff --git a/coreutils/test.c b/coreutils/test.c
index fc956724b..a914c7490 100644
--- a/coreutils/test.c
+++ b/coreutils/test.c
@@ -446,7 +446,7 @@ extern struct test_statics *BB_GLOBAL_CONST test_ptr_to_statics;
 #define leaving         (S.leaving      )
 
 #define INIT_S() do { \
-	ASSIGN_CONST_PTR(test_ptr_to_statics, xzalloc(sizeof(S))); \
+	XZALLOC_CONST_PTR(&test_ptr_to_statics, sizeof(S)); \
 } while (0)
 #define DEINIT_S() do { \
 	free(group_array); \
diff --git a/include/libbb.h b/include/libbb.h
index 296417dae..a340f27d2 100644
--- a/include/libbb.h
+++ b/include/libbb.h
@@ -2280,6 +2280,7 @@ extern const char bb_PATH_root_path[] ALIGN1; /* BB_PATH_ROOT_PATH */
 extern const int const_int_0;
 //extern const int const_int_1;
 
+
 /* This struct is deliberately not defined. */
 /* See docs/keep_data_small.txt */
 struct globals;
@@ -2304,23 +2305,31 @@ static ALWAYS_INLINE void* not_const_pp(const void *p)
 	);
 	return pp;
 }
+# define ASSIGN_CONST_PTR(pptr, v) do { \
+	*(void**)not_const_pp(pptr) = (void*)(v); \
+	barrier(); \
+} while (0)
+/* XZALLOC_CONST_PTR() is an out-of-line function to prevent
+ * clang from reading pointer before it is assigned.
+ */
+void XZALLOC_CONST_PTR(const void *pptr, size_t size) FAST_FUNC;
 #else
-static ALWAYS_INLINE void* not_const_pp(const void *p) { return (void*)p; }
-#endif
-
-#define ASSIGN_CONST_PTR(p, v) do { \
-	*(void**)not_const_pp(&p) = (void*)(v); \
+# define ASSIGN_CONST_PTR(pptr, v) do { \
+	*(void**)(pptr) = (void*)(v); \
 	/* At least gcc 3.4.6 on mipsel needs optimization barrier */ \
 	barrier(); \
 } while (0)
+# define XZALLOC_CONST_PTR(pptr, size) ASSIGN_CONST_PTR(pptr, xzalloc(size))
+#endif
 
-#define SET_PTR_TO_GLOBALS(x) ASSIGN_CONST_PTR(ptr_to_globals, x)
+#define SET_PTR_TO_GLOBALS(x) ASSIGN_CONST_PTR(&ptr_to_globals, x)
 #define FREE_PTR_TO_GLOBALS() do { \
 	if (ENABLE_FEATURE_CLEAN_UP) { \
 		free(ptr_to_globals); \
 	} \
 } while (0)
 
+
 /* You can change LIBBB_DEFAULT_LOGIN_SHELL, but don't use it,
  * use bb_default_login_shell and following defines.
  * If you change LIBBB_DEFAULT_LOGIN_SHELL,
diff --git a/libbb/Kbuild.src b/libbb/Kbuild.src
index 676300801..2fa239857 100644
--- a/libbb/Kbuild.src
+++ b/libbb/Kbuild.src
@@ -24,6 +24,7 @@ lib-y += chomp.o
 lib-y += compare_string_array.o
 lib-y += concat_path_file.o
 lib-y += concat_subpath_file.o
+lib-y += const_hack.o
 lib-y += copy_file.o
 lib-y += copyfd.o
 lib-y += crc32.o
diff --git a/libbb/appletlib.c b/libbb/appletlib.c
index bf26c99e9..e8c308467 100644
--- a/libbb/appletlib.c
+++ b/libbb/appletlib.c
@@ -247,7 +247,7 @@ void lbb_prepare(const char *applet
 		IF_FEATURE_INDIVIDUAL(, char **argv))
 {
 #ifdef bb_cached_errno_ptr
-	ASSIGN_CONST_PTR(bb_errno, get_perrno());
+	ASSIGN_CONST_PTR(&bb_errno, get_perrno());
 #endif
 	applet_name = applet;
 
diff --git a/libbb/const_hack.c b/libbb/const_hack.c
new file mode 100644
index 000000000..9575e6d67
--- /dev/null
+++ b/libbb/const_hack.c
@@ -0,0 +1,16 @@
+/* vi: set sw=4 ts=4: */
+/*
+ * Trick to assign a const ptr with barrier for clang
+ *
+ * Copyright (C) 2021 by YU Jincheng <shana@zju.edu.cn>
+ *
+ * Licensed under GPLv2 or later, see file LICENSE in this source tree.
+ */
+#include "libbb.h"
+
+#if defined(__clang_major__) && __clang_major__ >= 9
+void FAST_FUNC XZALLOC_CONST_PTR(const void *pptr, size_t size)
+{
+	ASSIGN_CONST_PTR(pptr, xzalloc(size));
+}
+#endif
diff --git a/libbb/lineedit.c b/libbb/lineedit.c
index 3c87abcf9..9960448ec 100644
--- a/libbb/lineedit.c
+++ b/libbb/lineedit.c
@@ -214,7 +214,7 @@ extern struct lineedit_statics *BB_GLOBAL_CONST lineedit_ptr_to_statics;
 #define delbuf           (S.delbuf          )
 
 #define INIT_S() do { \
-	ASSIGN_CONST_PTR(lineedit_ptr_to_statics, xzalloc(sizeof(S))); \
+	XZALLOC_CONST_PTR(&lineedit_ptr_to_statics, sizeof(S)); \
 } while (0)
 
 static void deinit_S(void)
diff --git a/shell/ash.c b/shell/ash.c
index 199975191..2d3cc8a61 100644
--- a/shell/ash.c
+++ b/shell/ash.c
@@ -504,7 +504,7 @@ extern struct globals_misc *BB_GLOBAL_CONST ash_ptr_to_globals_misc;
 #define random_gen  (G_misc.random_gen )
 #define backgndpid  (G_misc.backgndpid )
 #define INIT_G_misc() do { \
-	ASSIGN_CONST_PTR(ash_ptr_to_globals_misc, xzalloc(sizeof(G_misc))); \
+	XZALLOC_CONST_PTR(&ash_ptr_to_globals_misc, sizeof(G_misc)); \
 	savestatus = -1; \
 	curdir = nullstr; \
 	physdir = nullstr; \
@@ -1582,7 +1582,7 @@ extern struct globals_memstack *BB_GLOBAL_CONST ash_ptr_to_globals_memstack;
 #define g_stacknleft (G_memstack.g_stacknleft)
 #define stackbase    (G_memstack.stackbase   )
 #define INIT_G_memstack() do { \
-	ASSIGN_CONST_PTR(ash_ptr_to_globals_memstack, xzalloc(sizeof(G_memstack))); \
+	XZALLOC_CONST_PTR(&ash_ptr_to_globals_memstack, sizeof(G_memstack)); \
 	g_stackp = &stackbase; \
 	g_stacknxt = stackbase.space; \
 	g_stacknleft = MINSIZE; \
@@ -2213,7 +2213,7 @@ extern struct globals_var *BB_GLOBAL_CONST ash_ptr_to_globals_var;
 #endif
 #define INIT_G_var() do { \
 	unsigned i; \
-	ASSIGN_CONST_PTR(ash_ptr_to_globals_var, xzalloc(sizeof(G_var))); \
+	XZALLOC_CONST_PTR(&ash_ptr_to_globals_var, sizeof(G_var)); \
 	for (i = 0; i < ARRAY_SIZE(varinit_data); i++) { \
 		varinit[i].flags    = varinit_data[i].flags; \
 		varinit[i].var_text = varinit_data[i].var_text; \
-- 
cgit v1.2.3-55-g6feb


From 53b2fdcdba4ced600da963147be425a21d0f6e53 Mon Sep 17 00:00:00 2001
From: Denys Vlasenko <vda.linux@googlemail.com>
Date: Sun, 10 Oct 2021 13:50:53 +0200
Subject: *: add NOINLINEs where code noticeably shrinks

function                                             old     new   delta
display                                               85    1463   +1378  -73 bytes
select_and_cluster                                     -    1088   +1088 -139 bytes
parse_reply                                            -     979    +979 -109 bytes
zbc_num_sqrt                                           -     632    +632 -191 bytes
show_bridge_port                                       -     585    +585  -56 bytes
sp_256_proj_point_add_8                                -     576    +576  -45 bytes
encode_then_append_var_plusminus                       -     554    +554 -118 bytes
read_mode_db                                           -     537    +537  -47 bytes
fbset_main                                          1331     747    -584
sp_256_ecc_mulmod_8                                 1157     536    -621
brctl_main                                          2189    1548    -641
expand_one_var                                      2544    1872    -672
zxc_vm_process                                      6412    5589    -823
send_queries                                        1813     725   -1088
recv_and_process_peer_pkt                           2245    1018   -1227
bb_dump_dump                                        1531      80   -1451
------------------------------------------------------------------------------
(add/remove: 7/0 grow/shrink: 1/8 up/down: 6329/-7107)       Total: -778 bytes

Signed-off-by: Denys Vlasenko <vda.linux@googlemail.com>
---
 libbb/dump.c            | 2 +-
 miscutils/bc.c          | 2 +-
 networking/brctl.c      | 2 +-
 networking/nslookup.c   | 2 +-
 networking/ntpd.c       | 2 +-
 networking/tls_sp_c32.c | 2 +-
 shell/hush.c            | 2 +-
 util-linux/fbset.c      | 2 +-
 8 files changed, 8 insertions(+), 8 deletions(-)

diff --git a/libbb/dump.c b/libbb/dump.c
index f8bb6fd03..fcdee8343 100644
--- a/libbb/dump.c
+++ b/libbb/dump.c
@@ -532,7 +532,7 @@ static void conv_u(PR *pr, unsigned char *p)
 	}
 }
 
-static void display(priv_dumper_t* dumper)
+static NOINLINE void display(priv_dumper_t* dumper)
 {
 	unsigned char *bp;
 	unsigned char savech = '\0';
diff --git a/miscutils/bc.c b/miscutils/bc.c
index f9b08b01e..ae370ff55 100644
--- a/miscutils/bc.c
+++ b/miscutils/bc.c
@@ -2261,7 +2261,7 @@ static FAST_FUNC BC_STATUS zbc_num_p(BcNum *a, BcNum *b, BcNum *restrict c, size
 }
 #define zbc_num_p(...) (zbc_num_p(__VA_ARGS__) COMMA_SUCCESS)
 
-static BC_STATUS zbc_num_sqrt(BcNum *a, BcNum *restrict b, size_t scale)
+static NOINLINE BC_STATUS zbc_num_sqrt(BcNum *a, BcNum *restrict b, size_t scale)
 {
 	BcStatus s;
 	BcNum num1, num2, half, f, fprime, *x0, *x1, *temp;
diff --git a/networking/brctl.c b/networking/brctl.c
index c83aac6e0..956bd91f3 100644
--- a/networking/brctl.c
+++ b/networking/brctl.c
@@ -318,7 +318,7 @@ static void printf_xstrtou(const char *fmt)
 	printf(fmt, xstrtou(filedata, 0));
 }
 
-static void show_bridge_port(const char *name)
+static NOINLINE void show_bridge_port(const char *name)
 {
 	char pathbuf[IFNAMSIZ + sizeof("/brport/forward_delay_timer") + 8];
 	char *sfx;
diff --git a/networking/nslookup.c b/networking/nslookup.c
index de7b5c0e7..6da97baf4 100644
--- a/networking/nslookup.c
+++ b/networking/nslookup.c
@@ -335,7 +335,7 @@ enum {
 	OPT_debug = (1 << 0),
 };
 
-static int parse_reply(const unsigned char *msg, size_t len)
+static NOINLINE int parse_reply(const unsigned char *msg, size_t len)
 {
 	HEADER *header;
 
diff --git a/networking/ntpd.c b/networking/ntpd.c
index 5bd4d2d3e..204e1d7c2 100644
--- a/networking/ntpd.c
+++ b/networking/ntpd.c
@@ -1152,7 +1152,7 @@ fit(peer_t *p, double rd)
 //		return 0;
 	return 1;
 }
-static peer_t*
+static NOINLINE peer_t*
 select_and_cluster(void)
 {
 	peer_t     *p;
diff --git a/networking/tls_sp_c32.c b/networking/tls_sp_c32.c
index 4c0cd320b..4d4ecdd74 100644
--- a/networking/tls_sp_c32.c
+++ b/networking/tls_sp_c32.c
@@ -1162,7 +1162,7 @@ static void sp_256_proj_point_dbl_8(sp_point* r, sp_point* p)
  * p  Frist point to add.
  * q  Second point to add.
  */
-static void sp_256_proj_point_add_8(sp_point* r, sp_point* p, sp_point* q)
+static NOINLINE void sp_256_proj_point_add_8(sp_point* r, sp_point* p, sp_point* q)
 {
 	sp_digit t1[2*8];
 	sp_digit t2[2*8];
diff --git a/shell/hush.c b/shell/hush.c
index 87fc2f445..7156297cf 100644
--- a/shell/hush.c
+++ b/shell/hush.c
@@ -6311,7 +6311,7 @@ static char *encode_then_expand_vararg(const char *str, int handle_squotes, int
 
 /* Expanding ARG in ${var+ARG}, ${var-ARG}
  */
-static int encode_then_append_var_plusminus(o_string *output, int n,
+static NOINLINE int encode_then_append_var_plusminus(o_string *output, int n,
 		char *str, int dquoted)
 {
 	struct in_str input;
diff --git a/util-linux/fbset.c b/util-linux/fbset.c
index cc5413b40..41cc29f37 100644
--- a/util-linux/fbset.c
+++ b/util-linux/fbset.c
@@ -267,7 +267,7 @@ static void ss(uint32_t *x, uint32_t flag, char *buf, const char *what)
  *     vsync high
  * endmode
  */
-static int read_mode_db(struct fb_var_screeninfo *base, const char *fn,
+static NOINLINE int read_mode_db(struct fb_var_screeninfo *base, const char *fn,
 					const char *mode)
 {
 	char *token[2], *p, *s;
-- 
cgit v1.2.3-55-g6feb


From d3dbf4ac05e969b6648faea282df6dec7051e315 Mon Sep 17 00:00:00 2001
From: Denys Vlasenko <vda.linux@googlemail.com>
Date: Sun, 10 Oct 2021 14:32:05 +0200
Subject: *: add more beneficial NOINLINEs

function                                             old     new   delta
dec_main                                               -    1729   +1729  -41 bytes
fallbackQSort3                                         -     671    +671  -70 bytes
verify_sun                                             -     481    +481 -107 bytes
verify                                              1330     742    -588
fallbackSort                                        1469     728    -741
unpack_xz_stream                                    2306     536   -1770
------------------------------------------------------------------------------
(add/remove: 3/0 grow/shrink: 0/3 up/down: 2881/-3099)       Total: -218 bytes

Signed-off-by: Denys Vlasenko <vda.linux@googlemail.com>
---
 archival/libarchive/bz/blocksort.c       | 2 +-
 archival/libarchive/unxz/xz_dec_stream.c | 2 +-
 util-linux/fdisk_sun.c                   | 2 +-
 3 files changed, 3 insertions(+), 3 deletions(-)

diff --git a/archival/libarchive/bz/blocksort.c b/archival/libarchive/bz/blocksort.c
index 062fd0f54..74f957544 100644
--- a/archival/libarchive/bz/blocksort.c
+++ b/archival/libarchive/bz/blocksort.c
@@ -107,7 +107,7 @@ void fallbackSimpleSort(uint32_t* fmap,
 #define FALLBACK_QSORT_SMALL_THRESH 10
 #define FALLBACK_QSORT_STACK_SIZE   100
 
-static
+static NOINLINE
 void fallbackQSort3(uint32_t* fmap,
 		uint32_t* eclass,
 		int32_t   loSt,
diff --git a/archival/libarchive/unxz/xz_dec_stream.c b/archival/libarchive/unxz/xz_dec_stream.c
index bf791055b..31158b4e8 100644
--- a/archival/libarchive/unxz/xz_dec_stream.c
+++ b/archival/libarchive/unxz/xz_dec_stream.c
@@ -545,7 +545,7 @@ static enum xz_ret XZ_FUNC dec_block_header(struct xz_dec *s)
 	return XZ_OK;
 }
 
-static enum xz_ret XZ_FUNC dec_main(struct xz_dec *s, struct xz_buf *b)
+static NOINLINE enum xz_ret XZ_FUNC dec_main(struct xz_dec *s, struct xz_buf *b)
 {
 	enum xz_ret ret;
 
diff --git a/util-linux/fdisk_sun.c b/util-linux/fdisk_sun.c
index 66e434833..5b7760e8c 100644
--- a/util-linux/fdisk_sun.c
+++ b/util-linux/fdisk_sun.c
@@ -403,7 +403,7 @@ verify_sun_cmp(int *a, int *b)
 	return -1;
 }
 
-static void
+static NOINLINE void
 verify_sun(void)
 {
 	unsigned starts[8], lens[8], start, stop;
-- 
cgit v1.2.3-55-g6feb


From d8e39b5917a5dc9c11ecf5714b8c2e1037a9838d Mon Sep 17 00:00:00 2001
From: Denys Vlasenko <vda.linux@googlemail.com>
Date: Sun, 10 Oct 2021 15:18:12 +0200
Subject: find: code shrink -{m,a,c}{time,min}

function                                             old     new   delta
time_cmp                                               -     181    +181
func_mmin                                            171      31    -140
func_mtime                                           180      34    -146
------------------------------------------------------------------------------
(add/remove: 1/0 grow/shrink: 0/2 up/down: 181/-286)         Total: -105 bytes

Signed-off-by: Denys Vlasenko <vda.linux@googlemail.com>
---
 findutils/find.c | 61 ++++++++++++++++++++++++++++++--------------------------
 1 file changed, 33 insertions(+), 28 deletions(-)

diff --git a/findutils/find.c b/findutils/find.c
index 9feb44b64..fdc5c152d 100644
--- a/findutils/find.c
+++ b/findutils/find.c
@@ -440,8 +440,8 @@ IF_FEATURE_FIND_PRINT0( ACTS(print0))
 IF_FEATURE_FIND_TYPE(   ACTS(type,  int type_mask;))
 IF_FEATURE_FIND_EXECUTABLE(ACTS(executable))
 IF_FEATURE_FIND_PERM(   ACTS(perm,  char perm_char; mode_t perm_mask;))
-IF_FEATURE_FIND_MTIME(  ACTS(mtime, char time_type; char mtime_char; unsigned mtime_days;))
-IF_FEATURE_FIND_MMIN(   ACTS(mmin,  char time_type; char mmin_char; unsigned mmin_mins;))
+IF_FEATURE_FIND_MTIME(  ACTS(mtime, unsigned char time_type; unsigned char mtime_char; unsigned mtime_days;))
+IF_FEATURE_FIND_MMIN(   ACTS(mmin,  unsigned char time_type; unsigned char mmin_char; unsigned mmin_mins;))
 IF_FEATURE_FIND_NEWER(  ACTS(newer, time_t newer_mtime;))
 IF_FEATURE_FIND_INUM(   ACTS(inum,  ino_t inode_num;))
 IF_FEATURE_FIND_USER(   ACTS(user,  uid_t uid;))
@@ -670,14 +670,29 @@ ACTF(perm)
  || ENABLE_FEATURE_FIND_CTIME \
  || ENABLE_FEATURE_FIND_MMIN  \
  || ENABLE_FEATURE_FIND_MTIME
-static int time_cmp(time_t ftime, char time_char, time_t secs, time_t delta)
+static int time_cmp(const struct stat *statbuf, unsigned type_and_char, time_t N_from_user, unsigned unit)
 {
-	time_t file_age = time(NULL) - ftime;
-	switch (time_char) {
-	case '+': return file_age >= secs + delta;
-	case '-': return file_age < secs;
+	time_t ftime, file_age;
+
+	ftime = statbuf->st_mtime;
+# if ENABLE_FEATURE_FIND_ATIME || ENABLE_FEATURE_FIND_CTIME
+#  if ENABLE_FEATURE_FIND_ATIME
+	if ((type_and_char >> 8) == 'a')
+		ftime = statbuf->st_atime;
+#  endif
+#  if ENABLE_FEATURE_FIND_CTIME
+	if ((type_and_char >> 8) == 'c')
+		ftime = statbuf->st_ctime;
+#  endif
+	type_and_char &= 0xff;
+# endif
+	file_age = time(NULL) - ftime;
+	N_from_user *= unit;
+	switch (type_and_char) {
+	case '+': return file_age >= N_from_user + unit;
+	case '-': return file_age < N_from_user;
 	/* just numeric time */
-	default:  return file_age >= secs && file_age < secs + delta;
+	default:  return file_age >= N_from_user && file_age < N_from_user + unit;
 	}
 }
 #endif
@@ -685,33 +700,23 @@ static int time_cmp(time_t ftime, char time_char, time_t secs, time_t delta)
 #if ENABLE_FEATURE_FIND_MTIME
 ACTF(mtime)
 {
-	time_t t = statbuf->st_mtime;
-# if ENABLE_FEATURE_FIND_ATIME
-	if (ap->time_type == 'a')
-		t = statbuf->st_atime;
+	return time_cmp(statbuf,
+# if ENABLE_FEATURE_FIND_ATIME || ENABLE_FEATURE_FIND_CTIME
+			(ap->time_type << 8) |
 # endif
-# if ENABLE_FEATURE_FIND_CTIME
-	if (ap->time_type == 'c')
-		t = statbuf->st_ctime;
-# endif
-	return time_cmp(t, ap->mtime_char,
-			ap->mtime_days * 24*60*60, 24*60*60);
+				ap->mtime_char,
+			ap->mtime_days, 24*60*60);
 }
 #endif
 #if ENABLE_FEATURE_FIND_MMIN
 ACTF(mmin)
 {
-	time_t t = statbuf->st_mtime;
-# if ENABLE_FEATURE_FIND_AMIN
-	if (ap->time_type == 'a')
-		t = statbuf->st_atime;
-# endif
-# if ENABLE_FEATURE_FIND_CMIN
-	if (ap->time_type == 'c')
-		t = statbuf->st_ctime;
+	return time_cmp(statbuf,
+# if ENABLE_FEATURE_FIND_ATIME || ENABLE_FEATURE_FIND_CTIME
+			(ap->time_type << 8) |
 # endif
-	return time_cmp(t, ap->mmin_char,
-			ap->mmin_mins * 60, 60);
+				ap->mmin_char,
+			ap->mmin_mins, 60);
 }
 #endif
 #if ENABLE_FEATURE_FIND_NEWER
-- 
cgit v1.2.3-55-g6feb


From 376b2ceff6b498f7f9ab719c0dd4811c2bbda2c6 Mon Sep 17 00:00:00 2001
From: Bernhard Reutner-Fischer <rep.dot.nop@gmail.com>
Date: Sun, 10 Oct 2021 10:19:08 +0200
Subject: chrt: silence analyzer warning
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

warning: use of uninitialized value ‘priority’ [CWE-457] [-Wanalyzer-use-of-uninitialized-value]

chrt_main                                            499     496      -3
------------------------------------------------------------------------------
(add/remove: 0/0 grow/shrink: 0/1 up/down: 0/-3)               Total: -3 bytes
   text	   data	    bss	    dec	    hex	filename
 972157	   4235	   1840	 978232	  eed38	busybox_old
 972154	   4235	   1840	 978229	  eed35	busybox_unstripped

Signed-off-by: Bernhard Reutner-Fischer <rep.dot.nop@gmail.com>
---
 util-linux/chrt.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/util-linux/chrt.c b/util-linux/chrt.c
index a8701b55f..6799abb2d 100644
--- a/util-linux/chrt.c
+++ b/util-linux/chrt.c
@@ -93,7 +93,7 @@ int chrt_main(int argc UNUSED_PARAM, char **argv)
 	unsigned opt;
 	struct sched_param sp;
 	char *pid_str;
-	char *priority = priority; /* for compiler */
+	char *priority = NULL;
 	const char *current_new;
 	int policy = SCHED_RR;
 	int ret;
-- 
cgit v1.2.3-55-g6feb


From 74ee3f2f7323489aab6a5da1c84f86955ccbf93e Mon Sep 17 00:00:00 2001
From: Denys Vlasenko <vda.linux@googlemail.com>
Date: Mon, 11 Oct 2021 13:46:30 +0200
Subject: tls: remove unused tls_symmetric.h (was used by old "big" AES code)

Signed-off-by: Denys Vlasenko <vda.linux@googlemail.com>
---
 networking/tls.h           |   1 -
 networking/tls_symmetric.h | 511 ---------------------------------------------
 2 files changed, 512 deletions(-)
 delete mode 100644 networking/tls_symmetric.h

diff --git a/networking/tls.h b/networking/tls.h
index eb0fdd4cf..0173b87b2 100644
--- a/networking/tls.h
+++ b/networking/tls.h
@@ -101,7 +101,6 @@ void xorbuf_aligned_AES_BLOCK_SIZE(void* buf, const void* mask) FAST_FUNC;
 
 
 #include "tls_pstm.h"
-#include "tls_symmetric.h"
 #include "tls_aes.h"
 #include "tls_aesgcm.h"
 #include "tls_rsa.h"
diff --git a/networking/tls_symmetric.h b/networking/tls_symmetric.h
deleted file mode 100644
index 5e0e4b6d8..000000000
--- a/networking/tls_symmetric.h
+++ /dev/null
@@ -1,511 +0,0 @@
-/*
- * Copyright (C) 2017 Denys Vlasenko
- *
- * Licensed under GPLv2, see file LICENSE in this source tree.
- */
-
-
-/* The part below is a section of matrixssl-3-7-2b-open/crypto/cryptolib.h
- * Changes are flagged with //bbox
- */
-
-/******************************************************************************/
-/* 32-bit Rotates */
-/******************************************************************************/
-#if defined(_MSC_VER)
-/******************************************************************************/
-
-/* instrinsic rotate */
-#include <stdlib.h>
-#pragma intrinsic(_lrotr,_lrotl)
-#define ROR(x,n) _lrotr(x,n)
-#define ROL(x,n) _lrotl(x,n)
-
-/******************************************************************************/
-#elif defined(__GNUC__) && (defined(__i386__) || defined(__x86_64__)) && \
-		!defined(INTEL_CC) && !defined(PS_NO_ASM)
-
-static ALWAYS_INLINE unsigned ROL(unsigned word, int i)
-{
- if (__builtin_constant_p(i)) { //box
-   // Rotates by constant use fewer registers,
-   // and on many Intel CPUs rotates by %cl take 2 cycles, not 1.
-   asm ("roll %2,%0"
-	  :"=r" (word)
-	  :"0" (word),"i" (i));
-   return word;
- } //box
- asm ("roll %%cl,%0"
-	  :"=r" (word)
-	  :"0" (word),"c" (i));
- return word;
-}
-
-static ALWAYS_INLINE unsigned ROR(unsigned word, int i)
-{
- if (__builtin_constant_p(i)) { //box
-   asm ("rorl %2,%0"
-	  :"=r" (word)
-	  :"0" (word),"i" (i));
-   return word;
- } //box
- asm ("rorl %%cl,%0"
-	  :"=r" (word)
-	  :"0" (word),"c" (i));
- return word;
-}
-
-/******************************************************************************/
-#else
-
-/* rotates the hard way */
-#define ROL(x, y) \
-	( (((unsigned long)(x)<<(unsigned long)((y)&31)) | \
-	(((unsigned long)(x)&0xFFFFFFFFUL)>>(unsigned long)(32-((y)&31)))) & \
-	0xFFFFFFFFUL)
-#define ROR(x, y) \
-	( ((((unsigned long)(x)&0xFFFFFFFFUL)>>(unsigned long)((y)&31)) | \
-	((unsigned long)(x)<<(unsigned long)(32-((y)&31)))) & 0xFFFFFFFFUL)
-
-#endif /* 32-bit Rotates */
-/******************************************************************************/
-
-#ifdef HAVE_NATIVE_INT64
-#ifdef _MSC_VER
-	#define CONST64(n) n ## ui64
-#else
-	#define CONST64(n) n ## ULL
-#endif
-#endif
-
-/******************************************************************************/
-/*
-	Endian helper macros
- */
-#if defined (ENDIAN_NEUTRAL)
-#define STORE32L(x, y) { \
-(y)[3] = (unsigned char)(((x)>>24)&255); \
-(y)[2] = (unsigned char)(((x)>>16)&255);  \
-(y)[1] = (unsigned char)(((x)>>8)&255); \
-(y)[0] = (unsigned char)((x)&255); \
-}
-
-#define LOAD32L(x, y) { \
-x = ((unsigned long)((y)[3] & 255)<<24) | \
-((unsigned long)((y)[2] & 255)<<16) | \
-((unsigned long)((y)[1] & 255)<<8)  | \
-((unsigned long)((y)[0] & 255)); \
-}
-
-#define STORE64L(x, y) { \
-(y)[7] = (unsigned char)(((x)>>56)&255); \
-(y)[6] = (unsigned char)(((x)>>48)&255); \
-(y)[5] = (unsigned char)(((x)>>40)&255); \
-(y)[4] = (unsigned char)(((x)>>32)&255); \
-(y)[3] = (unsigned char)(((x)>>24)&255); \
-(y)[2] = (unsigned char)(((x)>>16)&255); \
-(y)[1] = (unsigned char)(((x)>>8)&255); \
-(y)[0] = (unsigned char)((x)&255); \
-}
-
-#define LOAD64L(x, y) { \
-x = (((uint64)((y)[7] & 255))<<56)|(((uint64)((y)[6] & 255))<<48)| \
-(((uint64)((y)[5] & 255))<<40)|(((uint64)((y)[4] & 255))<<32)| \
-(((uint64)((y)[3] & 255))<<24)|(((uint64)((y)[2] & 255))<<16)| \
-(((uint64)((y)[1] & 255))<<8)|(((uint64)((y)[0] & 255))); \
-}
-
-#define STORE32H(x, y) { \
-(y)[0] = (unsigned char)(((x)>>24)&255); \
-(y)[1] = (unsigned char)(((x)>>16)&255); \
-(y)[2] = (unsigned char)(((x)>>8)&255); \
-(y)[3] = (unsigned char)((x)&255); \
-}
-
-#define LOAD32H(x, y) { \
-x = ((unsigned long)((y)[0] & 255)<<24) | \
-((unsigned long)((y)[1] & 255)<<16) | \
-((unsigned long)((y)[2] & 255)<<8)  | \
-((unsigned long)((y)[3] & 255)); \
-}
-
-#define STORE64H(x, y) { \
-(y)[0] = (unsigned char)(((x)>>56)&255); \
-(y)[1] = (unsigned char)(((x)>>48)&255); \
-(y)[2] = (unsigned char)(((x)>>40)&255); \
-(y)[3] = (unsigned char)(((x)>>32)&255); \
-(y)[4] = (unsigned char)(((x)>>24)&255); \
-(y)[5] = (unsigned char)(((x)>>16)&255); \
-(y)[6] = (unsigned char)(((x)>>8)&255); \
-(y)[7] = (unsigned char)((x)&255); \
-}
-
-#define LOAD64H(x, y) { \
-x = (((uint64)((y)[0] & 255))<<56)|(((uint64)((y)[1] & 255))<<48) | \
-(((uint64)((y)[2] & 255))<<40)|(((uint64)((y)[3] & 255))<<32) | \
-(((uint64)((y)[4] & 255))<<24)|(((uint64)((y)[5] & 255))<<16) | \
-(((uint64)((y)[6] & 255))<<8)|(((uint64)((y)[7] & 255))); \
-}
-
-#endif /* ENDIAN_NEUTRAL */
-
-#ifdef ENDIAN_LITTLE
-#define STORE32H(x, y) { \
-(y)[0] = (unsigned char)(((x)>>24)&255); \
-(y)[1] = (unsigned char)(((x)>>16)&255); \
-(y)[2] = (unsigned char)(((x)>>8)&255); \
-(y)[3] = (unsigned char)((x)&255); \
-}
-
-#define LOAD32H(x, y) { \
-x = ((unsigned long)((y)[0] & 255)<<24) | \
-((unsigned long)((y)[1] & 255)<<16) | \
-((unsigned long)((y)[2] & 255)<<8)  | \
-((unsigned long)((y)[3] & 255)); \
-}
-
-#define STORE64H(x, y) { \
-(y)[0] = (unsigned char)(((x)>>56)&255); \
-(y)[1] = (unsigned char)(((x)>>48)&255); \
-(y)[2] = (unsigned char)(((x)>>40)&255); \
-(y)[3] = (unsigned char)(((x)>>32)&255); \
-(y)[4] = (unsigned char)(((x)>>24)&255); \
-(y)[5] = (unsigned char)(((x)>>16)&255); \
-(y)[6] = (unsigned char)(((x)>>8)&255); \
-(y)[7] = (unsigned char)((x)&255); \
-}
-
-#define LOAD64H(x, y) { \
-x = (((uint64)((y)[0] & 255))<<56)|(((uint64)((y)[1] & 255))<<48) | \
-(((uint64)((y)[2] & 255))<<40)|(((uint64)((y)[3] & 255))<<32) | \
-(((uint64)((y)[4] & 255))<<24)|(((uint64)((y)[5] & 255))<<16) | \
-(((uint64)((y)[6] & 255))<<8)|(((uint64)((y)[7] & 255))); }
-
-#ifdef ENDIAN_32BITWORD
-#define STORE32L(x, y) { \
-unsigned long __t = (x); memcpy(y, &__t, 4); \
-}
-
-#define LOAD32L(x, y)  memcpy(&(x), y, 4);
-
-#define STORE64L(x, y) { \
-(y)[7] = (unsigned char)(((x)>>56)&255); \
-(y)[6] = (unsigned char)(((x)>>48)&255); \
-(y)[5] = (unsigned char)(((x)>>40)&255); \
-(y)[4] = (unsigned char)(((x)>>32)&255); \
-(y)[3] = (unsigned char)(((x)>>24)&255); \
-(y)[2] = (unsigned char)(((x)>>16)&255); \
-(y)[1] = (unsigned char)(((x)>>8)&255); \
-(y)[0] = (unsigned char)((x)&255); \
-}
-
-#define LOAD64L(x, y) { \
-x = (((uint64)((y)[7] & 255))<<56)|(((uint64)((y)[6] & 255))<<48)| \
-(((uint64)((y)[5] & 255))<<40)|(((uint64)((y)[4] & 255))<<32)| \
-(((uint64)((y)[3] & 255))<<24)|(((uint64)((y)[2] & 255))<<16)| \
-(((uint64)((y)[1] & 255))<<8)|(((uint64)((y)[0] & 255))); \
-}
-
-#else /* 64-bit words then  */
-#define STORE32L(x, y) \
-{ unsigned long __t = (x); memcpy(y, &__t, 4); }
-
-#define LOAD32L(x, y) \
-{ memcpy(&(x), y, 4); x &= 0xFFFFFFFF; }
-
-#define STORE64L(x, y) \
-{ uint64 __t = (x); memcpy(y, &__t, 8); }
-
-#define LOAD64L(x, y) \
-{ memcpy(&(x), y, 8); }
-
-#endif /* ENDIAN_64BITWORD */
-#endif /* ENDIAN_LITTLE */
-
-#ifdef ENDIAN_BIG
-#define STORE32L(x, y) { \
-(y)[3] = (unsigned char)(((x)>>24)&255); \
-(y)[2] = (unsigned char)(((x)>>16)&255); \
-(y)[1] = (unsigned char)(((x)>>8)&255); \
-(y)[0] = (unsigned char)((x)&255); \
-}
-
-#define LOAD32L(x, y) { \
-x = ((unsigned long)((y)[3] & 255)<<24) | \
-((unsigned long)((y)[2] & 255)<<16) | \
-((unsigned long)((y)[1] & 255)<<8)  | \
-((unsigned long)((y)[0] & 255)); \
-}
-
-#define STORE64L(x, y) { \
-(y)[7] = (unsigned char)(((x)>>56)&255); \
-(y)[6] = (unsigned char)(((x)>>48)&255); \
-(y)[5] = (unsigned char)(((x)>>40)&255); \
-(y)[4] = (unsigned char)(((x)>>32)&255); \
-(y)[3] = (unsigned char)(((x)>>24)&255); \
-(y)[2] = (unsigned char)(((x)>>16)&255); \
-(y)[1] = (unsigned char)(((x)>>8)&255); \
-(y)[0] = (unsigned char)((x)&255); \
-}
-
-#define LOAD64L(x, y) { \
-x = (((uint64)((y)[7] & 255))<<56)|(((uint64)((y)[6] & 255))<<48) | \
-(((uint64)((y)[5] & 255))<<40)|(((uint64)((y)[4] & 255))<<32) | \
-(((uint64)((y)[3] & 255))<<24)|(((uint64)((y)[2] & 255))<<16) | \
-(((uint64)((y)[1] & 255))<<8)|(((uint64)((y)[0] & 255))); \
-}
-
-#ifdef ENDIAN_32BITWORD
-#define STORE32H(x, y) \
-{ unsigned int __t = (x); memcpy(y, &__t, 4); }
-
-#define LOAD32H(x, y) memcpy(&(x), y, 4);
-
-#define STORE64H(x, y) { \
-(y)[0] = (unsigned char)(((x)>>56)&255); \
-(y)[1] = (unsigned char)(((x)>>48)&255); \
-(y)[2] = (unsigned char)(((x)>>40)&255); \
-(y)[3] = (unsigned char)(((x)>>32)&255); \
-(y)[4] = (unsigned char)(((x)>>24)&255); \
-(y)[5] = (unsigned char)(((x)>>16)&255); \
-(y)[6] = (unsigned char)(((x)>>8)&255); \
-(y)[7] = (unsigned char)((x)&255); \
-}
-
-#define LOAD64H(x, y) { \
-x = (((uint64)((y)[0] & 255))<<56)|(((uint64)((y)[1] & 255))<<48)| \
-(((uint64)((y)[2] & 255))<<40)|(((uint64)((y)[3] & 255))<<32)| \
-(((uint64)((y)[4] & 255))<<24)|(((uint64)((y)[5] & 255))<<16)| \
-(((uint64)((y)[6] & 255))<<8)| (((uint64)((y)[7] & 255))); \
-}
-
-#else /* 64-bit words then  */
-
-#define STORE32H(x, y) \
-{ unsigned long __t = (x); memcpy(y, &__t, 4); }
-
-#define LOAD32H(x, y) \
-{ memcpy(&(x), y, 4); x &= 0xFFFFFFFF; }
-
-#define STORE64H(x, y) \
-{ uint64 __t = (x); memcpy(y, &__t, 8); }
-
-#define LOAD64H(x, y) \
-{ memcpy(&(x), y, 8); }
-
-#endif /* ENDIAN_64BITWORD */
-#endif /* ENDIAN_BIG */
-
-#ifdef HAVE_NATIVE_INT64
-#define ROL64c(x, y) \
-( (((x)<<((uint64)(y)&63)) | \
-(((x)&CONST64(0xFFFFFFFFFFFFFFFF))>>((uint64)64-((y)&63)))) & CONST64(0xFFFFFFFFFFFFFFFF))
-
-#define ROR64c(x, y) \
-( ((((x)&CONST64(0xFFFFFFFFFFFFFFFF))>>((uint64)(y)&CONST64(63))) | \
-((x)<<((uint64)(64-((y)&CONST64(63)))))) & CONST64(0xFFFFFFFFFFFFFFFF))
-#endif /* HAVE_NATIVE_INT64 */
-/******************************************************************************/
-
-
-
-/* The part below is taken almost verbatim from matrixssl-3-7-2b-open/crypto/symmetric/.
- * Changes are flagged with //bbox
- */
-
-/**
- *	@file    symmetric.h
- *	@version 33ef80f (HEAD, tag: MATRIXSSL-3-7-2-OPEN, tag: MATRIXSSL-3-7-2-COMM, origin/master, origin/HEAD, master)
- *
- *	Header for internal symmetric key cryptography support.
- */
-/*
- *	Copyright (c) 2013-2015 INSIDE Secure Corporation
- *	Copyright (c) PeerSec Networks, 2002-2011
- *	All Rights Reserved
- *
- *	The latest version of this code is available at http://www.matrixssl.org
- *
- *	This software is open source; you can redistribute it and/or modify
- *	it under the terms of the GNU General Public License as published by
- *	the Free Software Foundation; either version 2 of the License, or
- *	(at your option) any later version.
- *
- *	This General Public License does NOT permit incorporating this software
- *	into proprietary programs.  If you are unable to comply with the GPL, a
- *	commercial license for this software may be purchased from INSIDE at
- *	http://www.insidesecure.com/eng/Company/Locations
- *
- *	This program is distributed in WITHOUT ANY WARRANTY; without even the
- *	implied warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.
- *	See the GNU General Public License for more details.
- *
- *	You should have received a copy of the GNU General Public License
- *	along with this program; if not, write to the Free Software
- *	Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA  02111-1307  USA
- *	http://www.gnu.org/copyleft/gpl.html
- */
-/******************************************************************************/
-
-#ifndef _h_PS_SYMMETRIC
-#define _h_PS_SYMMETRIC
-
-/******************************************************************************/
-#ifdef USE_AES
-/******************************************************************************/
-
-
-#ifndef USE_AES_CBC_EXTERNAL
-typedef struct {
-	uint32 eK[64], dK[64];
-	int32 Nr;
-} psAesKey_t;
-
-typedef struct {
-	int32			blocklen;
-	unsigned char	IV[16];
-	psAesKey_t		key;
-#if defined(USE_AES_GCM) || defined(USE_AES_CCM)
-	unsigned char	EncCtr[16];
-	unsigned char	CtrBlock[16];
-#endif
-#ifdef USE_AES_GCM
-	unsigned char	gInit[16];
-	uint32			TagTemp[4];
-	unsigned char	Hash_SubKey[16];
-	uint32			ProcessedBitCount[4];
-	uint32			InputBufferCount;
-	uint32			OutputBufferCount;
-	union
-	{
-		unsigned char Buffer[128];
-		uint32 BufferAlignment;
-	} Input;
-#endif /* USE_AES_GCM */
-#ifdef USE_AES_CCM
-	uint32_t ccmTagTemp[16 / sizeof(uint32_t)]; /* 32 */
-	union
-	{
-		/* Used for formatting IV. */
-		uint8_t Temporary[16];
-		/* Used for processing Mac. */
-		uint8_t Y0[16];
-	} u; /* 48 */
-#endif /* USE_AES_CCM */
-} psAesCipher_t;
-#endif /* USE_AES_CBC_EXTERNAL */
-
-#endif /* USE_AES */
-
-#ifdef USE_IDEA
-#define SSL_IDEA_KEY_LEN	16
-#define SSL_IDEA_IV_LEN		8
-#define SSL_IDEA_BLOCK_LEN	8
-
-typedef struct {
-	uint16	key_schedule[52];
-} psIdeaKey_t;
-
-typedef struct {
-	psIdeaKey_t		key;
-	uint32			IV[2];
-	short			for_encryption;
-	short			inverted;
-} idea_CBC;
-#endif
-/******************************************************************************/
-
-/******************************************************************************/
-#ifdef USE_SEED
-/******************************************************************************/
-#define SSL_SEED_KEY_LEN	16
-#define SSL_SEED_IV_LEN		16
-
-
-typedef struct {
-	uint32 K[32], dK[32];
-} psSeedKey_t;
-
-typedef struct {
-	int32			blocklen;
-	unsigned char	IV[16];
-	psSeedKey_t		key;
-} seed_CBC;
-
-#endif /* USE_SEED */
-/******************************************************************************/
-
-/******************************************************************************/
-#if defined(USE_3DES) || defined(USE_DES)
-/******************************************************************************/
-#define DES3_KEY_LEN	24
-#define DES3_IV_LEN		8
-#define DES_KEY_LEN		8
-
-typedef struct {
-	uint32 ek[3][32], dk[3][32];
-} psDes3Key_t;
-
-/*
-	A block cipher CBC structure
- */
-typedef struct {
-	int32				blocklen;
-	unsigned char		IV[8];
-	psDes3Key_t			key;
-} des3_CBC;
-
-#endif /* USE_3DES || USE_DES */
-/******************************************************************************/
-
-/******************************************************************************/
-#ifdef USE_ARC4
-typedef struct {
-	unsigned char	state[256];
-	uint32	byteCount;
-	unsigned char	x;
-	unsigned char	y;
-} psRc4Key_t;
-#endif /* USE_ARC4 */
-/******************************************************************************/
-#ifdef USE_RC2
-typedef struct {
-	unsigned xkey[64];
-} psRc2Key_t;
-
-typedef struct {
-	int32				blocklen;
-	unsigned char		IV[8];
-	psRc2Key_t			key;
-} rc2_CBC;
-#endif /* USE_RC2 */
-/******************************************************************************/
-/*	Universal types and defines */
-/******************************************************************************/
-#define MAXBLOCKSIZE	24
-
-typedef union {
-#ifdef USE_RC2
-	rc2_CBC		rc2;
-#endif
-#ifdef USE_ARC4
-	psRc4Key_t	arc4;
-#endif
-#ifdef USE_3DES
-	des3_CBC	des3;
-#endif
-#ifdef USE_AES
-	psAesCipher_t	aes;
-#endif
-#ifdef USE_SEED
-	seed_CBC	seed;
-#endif
-#ifdef USE_IDEA
-	idea_CBC	idea;
-#endif
-} psCipherContext_t;
-
-#define byte(x, n) (((x) >> (8 * (n))) & 255)
-
-#endif /* _h_PS_SYMMETRIC */
-/******************************************************************************/
-- 
cgit v1.2.3-55-g6feb


From 23aba8a9a66637fbc6b1eed2a259cb7ddb1a1595 Mon Sep 17 00:00:00 2001
From: Denys Vlasenko <vda.linux@googlemail.com>
Date: Mon, 11 Oct 2021 16:31:47 +0200
Subject: tls: code shrink curve25519

function                                             old     new   delta
fe_select                                             39       -     -39
curve25519                                           849     800     -49
------------------------------------------------------------------------------
(add/remove: 0/1 grow/shrink: 0/1 up/down: 0/-88)             Total: -88 bytes

Signed-off-by: Denys Vlasenko <vda.linux@googlemail.com>
---
 networking/tls_fe.c | 54 ++++++++++++++++++++++++++++++++++++++---------------
 1 file changed, 39 insertions(+), 15 deletions(-)

diff --git a/networking/tls_fe.c b/networking/tls_fe.c
index ecb410281..3a0a6776f 100644
--- a/networking/tls_fe.c
+++ b/networking/tls_fe.c
@@ -63,16 +63,22 @@ static void fprime_select(byte *dst, const byte *zero, const byte *one, byte con
 }
 #endif
 
+#if 0 /* constant-time */
 static void fe_select(byte *dst,
-		const byte *zero, const byte *one,
+		const byte *src,
 		byte condition)
 {
 	const byte mask = -condition;
 	int i;
 
 	for (i = 0; i < F25519_SIZE; i++)
-		dst[i] = zero[i] ^ (mask & (one[i] ^ zero[i]));
+		dst[i] = dst[i] ^ (mask & (src[i] ^ dst[i]));
 }
+#else
+# define fe_select(dst, src, condition) do { \
+	if (condition) lm_copy(dst, src); \
+} while (0)
+#endif
 
 #if 0 //UNUSED
 static void raw_add(byte *x, const byte *p)
@@ -225,7 +231,7 @@ static void fe_normalize(byte *x)
 	minusp[31] = (byte)c;
 
 	/* Load x-p if no underflow */
-	fe_select(x, minusp, x, (c >> 15) & 1);
+	fe_select(x, minusp, !(c & (1<<15)));
 }
 
 static void lm_add(byte* r, const byte* a, const byte* b)
@@ -548,26 +554,32 @@ static void curve25519(byte *result, const byte *e, const byte *q)
 {
 	int i;
 
-	struct {
+	struct Z {
 		/* for bbox's special case of q == NULL meaning "use basepoint" */
 		/*static const*/ uint8_t basepoint9[CURVE25519_KEYSIZE]; // = {9};
 
 		/* from wolfssl-3.15.3/wolfssl/wolfcrypt/fe_operations.h */
 		/*static const*/ byte f25519_one[F25519_SIZE]; // = {1};
 
-		/* Current point: P_m */
-		byte xm[F25519_SIZE];
-		byte zm[F25519_SIZE]; // = {1};
 		/* Predecessor: P_(m-1) */
 		byte xm1[F25519_SIZE]; // = {1};
 		byte zm1[F25519_SIZE]; // = {0};
+		/* Current point: P_m */
+		byte xm[F25519_SIZE];
+		byte zm[F25519_SIZE]; // = {1};
+		/* Temporaries */
+		byte xms[F25519_SIZE];
+		byte zms[F25519_SIZE];
 	} z;
+	uint8_t *XM1 = (uint8_t*)&z + offsetof(struct Z,xm1); // gcc 11.0.0 workaround
 #define basepoint9 z.basepoint9
 #define f25519_one z.f25519_one
-#define xm         z.xm
-#define zm         z.zm
 #define xm1        z.xm1
 #define zm1        z.zm1
+#define xm         z.xm
+#define zm         z.zm
+#define xms        z.xms
+#define zms        z.zms
 	memset(&z, 0, sizeof(z));
 	f25519_one[0] = 1;
 	zm[0] = 1;
@@ -583,8 +595,8 @@ static void curve25519(byte *result, const byte *e, const byte *q)
 
 	for (i = 253; i >= 0; i--) {
 		const int bit = (e[i >> 3] >> (i & 7)) & 1;
-		byte xms[F25519_SIZE];
-		byte zms[F25519_SIZE];
+//		byte xms[F25519_SIZE];
+//		byte zms[F25519_SIZE];
 
 		/* From P_m and P_(m-1), compute P_(2m) and P_(2m-1) */
 		xc_diffadd(xm1, zm1, q, f25519_one, xm, zm, xm1, zm1);
@@ -597,10 +609,22 @@ static void curve25519(byte *result, const byte *e, const byte *q)
 		 *   bit = 1 --> (P_(2m+1), P_(2m))
 		 *   bit = 0 --> (P_(2m), P_(2m-1))
 		 */
-		fe_select(xm1, xm1, xm, bit);
-		fe_select(zm1, zm1, zm, bit);
-		fe_select(xm, xm, xms, bit);
-		fe_select(zm, zm, zms, bit);
+#if 0
+		fe_select(xm1, xm, bit);
+		fe_select(zm1, zm, bit);
+		fe_select(xm, xms, bit);
+		fe_select(zm, zms, bit);
+#else
+// same as above in about 50 bytes smaller code, but
+// requires that in-memory order is exactly xm1,zm1,xm,zm,xms,zms
+		if (bit) {
+			//memcpy(xm1, xm, 4 * F25519_SIZE);
+			//^^^ gcc 11.0.0 warns of overlapping memcpy
+			//memmove(xm1, xm, 4 * F25519_SIZE);
+			//^^^ gcc 11.0.0 warns of out-of-bounds access to xm1[]
+			memmove(XM1, XM1 + 2 * F25519_SIZE, 4 * F25519_SIZE);
+		}
+#endif
 	}
 
 	/* Freeze out of projective coordinates */
-- 
cgit v1.2.3-55-g6feb


From 5acf5e1f870fb5382556d4b434158f7d497893ca Mon Sep 17 00:00:00 2001
From: Denys Vlasenko <vda.linux@googlemail.com>
Date: Mon, 11 Oct 2021 18:39:25 +0200
Subject: shell: fix script's comm field if ENABLE_FEATURE_PREFER_APPLETS=y

function                                             old     new   delta
re_execed_comm                                         -      46     +46
main                                                  72      86     +14
------------------------------------------------------------------------------
(add/remove: 1/0 grow/shrink: 1/0 up/down: 60/0)               Total: 60 bytes

Signed-off-by: Denys Vlasenko <vda.linux@googlemail.com>
---
 include/libbb.h                      |  2 ++
 libbb/appletlib.c                    | 10 ++++++++--
 libbb/vfork_daemon_rexec.c           | 16 ++++++++++++++++
 shell/ash_test/ash-comm/comm.right   |  6 ++++++
 shell/ash_test/ash-comm/comm.tests   | 20 ++++++++++++++++++++
 shell/hush_test/hush-comm/comm.right |  6 ++++++
 shell/hush_test/hush-comm/comm.tests | 20 ++++++++++++++++++++
 7 files changed, 78 insertions(+), 2 deletions(-)
 create mode 100644 shell/ash_test/ash-comm/comm.right
 create mode 100755 shell/ash_test/ash-comm/comm.tests
 create mode 100644 shell/hush_test/hush-comm/comm.right
 create mode 100755 shell/hush_test/hush-comm/comm.tests

diff --git a/include/libbb.h b/include/libbb.h
index a340f27d2..2a0b272c6 100644
--- a/include/libbb.h
+++ b/include/libbb.h
@@ -1271,8 +1271,10 @@ void run_applet_no_and_exit(int a, const char *name, char **argv) NORETURN FAST_
 #endif
 void show_usage_if_dash_dash_help(int applet_no, char **argv) FAST_FUNC;
 #if defined(__linux__)
+int re_execed_comm(void) FAST_FUNC;
 void set_task_comm(const char *comm) FAST_FUNC;
 #else
+# define re_execed_comm() 0
 # define set_task_comm(name) ((void)0)
 #endif
 
diff --git a/libbb/appletlib.c b/libbb/appletlib.c
index e8c308467..03389f541 100644
--- a/libbb/appletlib.c
+++ b/libbb/appletlib.c
@@ -1112,8 +1112,14 @@ int main(int argc UNUSED_PARAM, char **argv)
 	 || ENABLE_FEATURE_PREFER_APPLETS
 	 || !BB_MMU
 	) {
-		if (NUM_APPLETS > 1)
-			set_task_comm(applet_name);
+		if (NUM_APPLETS > 1) {
+			/* Careful, do not trash comm of "SCRIPT.sh" -
+			 * the case when started from e.g. #!/bin/ash script.
+			 * (not limited to shells - #!/bin/awk scripts also exist)
+			 */
+			if (re_execed_comm())
+				set_task_comm(applet_name);
+		}
 	}
 
 	parse_config_file(); /* ...maybe, if FEATURE_SUID_CONFIG */
diff --git a/libbb/vfork_daemon_rexec.c b/libbb/vfork_daemon_rexec.c
index a49fe8e01..31e97051f 100644
--- a/libbb/vfork_daemon_rexec.c
+++ b/libbb/vfork_daemon_rexec.c
@@ -28,6 +28,22 @@
 # ifndef PR_GET_NAME
 # define PR_GET_NAME 16
 # endif
+# if ENABLE_FEATURE_SH_STANDALONE || ENABLE_FEATURE_PREFER_APPLETS || !BB_MMU
+int FAST_FUNC re_execed_comm(void)
+{
+	const char *e, *expected_comm;
+	char comm[16];
+
+	BUILD_BUG_ON(CONFIG_BUSYBOX_EXEC_PATH[0] != '/');
+	e = CONFIG_BUSYBOX_EXEC_PATH;
+	/* Hopefully (strrchr(e) - e) evaluates to constant at compile time: */
+	expected_comm = bb_busybox_exec_path + (strrchr(e, '/') - e) + 1;
+
+	prctl(PR_GET_NAME, (long)comm, 0, 0, 0);
+	//bb_error_msg("comm:'%.*s' expected:'%s'", 16, comm, expected_comm);
+	return strcmp(comm, expected_comm) == 0;
+}
+# endif
 void FAST_FUNC set_task_comm(const char *comm)
 {
 	/* okay if too long (truncates) */
diff --git a/shell/ash_test/ash-comm/comm.right b/shell/ash_test/ash-comm/comm.right
new file mode 100644
index 000000000..1d836656f
--- /dev/null
+++ b/shell/ash_test/ash-comm/comm.right
@@ -0,0 +1,6 @@
+./SCRIPT.sh:
+  /proc/N/comm: SCRIPT.sh
+exec ./SCRIPT.sh:
+  /proc/N/comm: SCRIPT.sh
+sh ./SCRIPT.sh:
+  /proc/N/comm: ash
diff --git a/shell/ash_test/ash-comm/comm.tests b/shell/ash_test/ash-comm/comm.tests
new file mode 100755
index 000000000..671bfc176
--- /dev/null
+++ b/shell/ash_test/ash-comm/comm.tests
@@ -0,0 +1,20 @@
+{
+echo "#!$THIS_SH"
+echo 'procdir=/proc/$$'
+#echo 'echo "  /proc/N/exe:  $(basename $(readlink $procdir/exe))"'
+echo 'echo "  /proc/N/comm: $(cat $procdir/comm)"'
+} >SCRIPT.sh
+chmod 755 SCRIPT.sh
+
+# comm field was wrong if CONFIG_FEATURE_PREFER_APPLETS=y
+echo './SCRIPT.sh:'
+./SCRIPT.sh
+
+# comm field was wrong if CONFIG_FEATURE_PREFER_APPLETS=y
+echo 'exec ./SCRIPT.sh:'
+(exec ./SCRIPT.sh)
+
+echo 'sh ./SCRIPT.sh:'
+$THIS_SH ./SCRIPT.sh
+
+rm SCRIPT.sh
diff --git a/shell/hush_test/hush-comm/comm.right b/shell/hush_test/hush-comm/comm.right
new file mode 100644
index 000000000..1b62b617e
--- /dev/null
+++ b/shell/hush_test/hush-comm/comm.right
@@ -0,0 +1,6 @@
+./SCRIPT.sh:
+  /proc/N/comm: SCRIPT.sh
+exec ./SCRIPT.sh:
+  /proc/N/comm: SCRIPT.sh
+sh ./SCRIPT.sh:
+  /proc/N/comm: hush
diff --git a/shell/hush_test/hush-comm/comm.tests b/shell/hush_test/hush-comm/comm.tests
new file mode 100755
index 000000000..671bfc176
--- /dev/null
+++ b/shell/hush_test/hush-comm/comm.tests
@@ -0,0 +1,20 @@
+{
+echo "#!$THIS_SH"
+echo 'procdir=/proc/$$'
+#echo 'echo "  /proc/N/exe:  $(basename $(readlink $procdir/exe))"'
+echo 'echo "  /proc/N/comm: $(cat $procdir/comm)"'
+} >SCRIPT.sh
+chmod 755 SCRIPT.sh
+
+# comm field was wrong if CONFIG_FEATURE_PREFER_APPLETS=y
+echo './SCRIPT.sh:'
+./SCRIPT.sh
+
+# comm field was wrong if CONFIG_FEATURE_PREFER_APPLETS=y
+echo 'exec ./SCRIPT.sh:'
+(exec ./SCRIPT.sh)
+
+echo 'sh ./SCRIPT.sh:'
+$THIS_SH ./SCRIPT.sh
+
+rm SCRIPT.sh
-- 
cgit v1.2.3-55-g6feb


From aec8fbfb834dfcf46b7c967d6572c9adcb72b620 Mon Sep 17 00:00:00 2001
From: Denys Vlasenko <vda.linux@googlemail.com>
Date: Mon, 11 Oct 2021 19:11:06 +0200
Subject: whitespace fix

Signed-off-by: Denys Vlasenko <vda.linux@googlemail.com>
---
 networking/tls_pstm.h | 1 -
 1 file changed, 1 deletion(-)

diff --git a/networking/tls_pstm.h b/networking/tls_pstm.h
index bc7a0119a..56c6bb879 100644
--- a/networking/tls_pstm.h
+++ b/networking/tls_pstm.h
@@ -283,4 +283,3 @@ extern int32 pstm_invmod(psPool_t *pool, pstm_int * a, pstm_int * b,
 	typedef int32 pstm_int;
 #endif /* !DISABLE_PSTM */
 #endif /* _h_PSTMATH */
-
-- 
cgit v1.2.3-55-g6feb


From 6ba1f2d0bcdddb96c43ff9fec34758a4e2b4a467 Mon Sep 17 00:00:00 2001
From: Denys Vlasenko <vda.linux@googlemail.com>
Date: Mon, 11 Oct 2021 19:28:39 +0200
Subject: tar: prevent malicious archives with enormous long name sizes OOMing
 the machine

function                                             old     new   delta
get_header_tar                                      1707    1752     +45

Signed-off-by: Denys Vlasenko <vda.linux@googlemail.com>
---
 archival/libarchive/get_header_tar.c | 11 ++++++++---
 1 file changed, 8 insertions(+), 3 deletions(-)

diff --git a/archival/libarchive/get_header_tar.c b/archival/libarchive/get_header_tar.c
index a142290ff..d26868bf8 100644
--- a/archival/libarchive/get_header_tar.c
+++ b/archival/libarchive/get_header_tar.c
@@ -147,6 +147,12 @@ static void process_pax_hdr(archive_handle_t *archive_handle, unsigned sz, int g
 #endif
 }
 
+static void die_if_bad_fnamesize(off_t sz)
+{
+	if ((uoff_t)sz > 0xfff) /* more than 4k?! no funny business please */
+		bb_simple_error_msg_and_die("bad archive");
+}
+
 char FAST_FUNC get_header_tar(archive_handle_t *archive_handle)
 {
 	file_header_t *file_header = archive_handle->file_header;
@@ -331,7 +337,6 @@ char FAST_FUNC get_header_tar(archive_handle_t *archive_handle)
 			file_header->name = xstrdup(tar.name);
 	}
 
-	/* Set bits 12-15 of the files mode */
 	switch (tar_typeflag) {
 	case '1': /* hardlink */
 		/* we mark hardlinks as regular files with zero size and a link name */
@@ -389,7 +394,7 @@ char FAST_FUNC get_header_tar(archive_handle_t *archive_handle)
 		/* free: paranoia: tar with several consecutive longnames */
 		free(p_longname);
 		/* For paranoia reasons we allocate extra NUL char */
-//FIXME: disallow huge sizes:
+		die_if_bad_fnamesize(file_header->size);
 		p_longname = xzalloc(file_header->size + 1);
 		/* We read ASCIZ string, including NUL */
 		xread(archive_handle->src_fd, p_longname, file_header->size);
@@ -400,7 +405,7 @@ char FAST_FUNC get_header_tar(archive_handle_t *archive_handle)
 		goto again;
 	case 'K':
 		free(p_linkname);
-//FIXME: disallow huge sizes:
+		die_if_bad_fnamesize(file_header->size);
 		p_linkname = xzalloc(file_header->size + 1);
 		xread(archive_handle->src_fd, p_linkname, file_header->size);
 		archive_handle->offset += file_header->size;
-- 
cgit v1.2.3-55-g6feb


From f4fcd74a6e3d5cd2ec44cd6ce90be8a0ed108467 Mon Sep 17 00:00:00 2001
From: Denys Vlasenko <vda.linux@googlemail.com>
Date: Mon, 11 Oct 2021 23:08:31 +0200
Subject: shell: do not read user database for every prompt - only for those
 which need it

function                                             old     new   delta
get_user_strings                                       -      52     +52
get_homedir_or_NULL                                    -      23     +23
parse_and_put_prompt                                 823     838     +15
null_str                                               1       -      -1
complete_cmd_dir_file                                814     812      -2
deinit_S                                              51      42      -9
read_line_input                                     3059    3015     -44
------------------------------------------------------------------------------
(add/remove: 2/1 grow/shrink: 1/3 up/down: 90/-56)             Total: 34 bytes

Signed-off-by: Denys Vlasenko <vda.linux@googlemail.com>
---
 libbb/lineedit.c | 73 +++++++++++++++++++++++++++++++++++---------------------
 1 file changed, 46 insertions(+), 27 deletions(-)

diff --git a/libbb/lineedit.c b/libbb/lineedit.c
index 9960448ec..e14c78707 100644
--- a/libbb/lineedit.c
+++ b/libbb/lineedit.c
@@ -135,10 +135,6 @@ enum {
 	              : 0x7ff0
 };
 
-#if ENABLE_USERNAME_OR_HOMEDIR
-static const char null_str[] ALIGN1 = "";
-#endif
-
 /* We try to minimize both static and stack usage. */
 struct lineedit_statics {
 	line_input_t *state;
@@ -161,12 +157,13 @@ struct lineedit_statics {
 
 #if ENABLE_USERNAME_OR_HOMEDIR
 	char *user_buf;
-	char *home_pwd_buf; /* = (char*)null_str; */
+	char *home_pwd_buf;
+	smallint got_user_strings;
 #endif
 
 #if ENABLE_FEATURE_TAB_COMPLETION
-	char **matches;
 	unsigned num_matches;
+	char **matches;
 #endif
 
 #if ENABLE_FEATURE_EDITING_WINCH
@@ -207,8 +204,9 @@ extern struct lineedit_statics *BB_GLOBAL_CONST lineedit_ptr_to_statics;
 #define prompt_last_line (S.prompt_last_line)
 #define user_buf         (S.user_buf        )
 #define home_pwd_buf     (S.home_pwd_buf    )
-#define matches          (S.matches         )
+#define got_user_strings (S.got_user_strings)
 #define num_matches      (S.num_matches     )
+#define matches          (S.matches         )
 #define delptr           (S.delptr          )
 #define newdelflag       (S.newdelflag      )
 #define delbuf           (S.delbuf          )
@@ -226,14 +224,47 @@ static void deinit_S(void)
 #endif
 #if ENABLE_USERNAME_OR_HOMEDIR
 	free(user_buf);
-	if (home_pwd_buf != null_str)
-		free(home_pwd_buf);
+	free(home_pwd_buf);
 #endif
 	free(lineedit_ptr_to_statics);
 }
 #define DEINIT_S() deinit_S()
 
 
+#if ENABLE_USERNAME_OR_HOMEDIR
+/* Call getpwuid() only if necessary.
+ * E.g. if PS1=':', no user database reading is needed to generate prompt.
+ * (Unfortunately, default PS1='\w \$' needs it, \w abbreviates homedir
+ * as ~/... - for that it needs to *know* the homedir...)
+ */
+static void get_user_strings(void)
+{
+	struct passwd *entry;
+
+	got_user_strings = 1;
+	entry = getpwuid(geteuid());
+	if (entry) {
+		user_buf = xstrdup(entry->pw_name);
+		home_pwd_buf = xstrdup(entry->pw_dir);
+	}
+}
+
+static const char *get_username_str(void)
+{
+	if (!got_user_strings)
+		get_user_strings();
+	return user_buf ? user_buf : "";
+	/* btw, bash uses "I have no name!" string if uid has no entry */
+}
+
+static NOINLINE const char *get_homedir_or_NULL(void)
+{
+	if (!got_user_strings)
+		get_user_strings();
+	return home_pwd_buf;
+}
+#endif
+
 #if ENABLE_UNICODE_SUPPORT
 static size_t load_string(const char *src)
 {
@@ -691,11 +722,11 @@ static char *username_path_completion(char *ud)
 {
 	struct passwd *entry;
 	char *tilde_name = ud;
-	char *home = NULL;
+	const char *home = NULL;
 
 	ud++; /* skip ~ */
 	if (*ud == '/') {       /* "~/..." */
-		home = home_pwd_buf;
+		home = get_homedir_or_NULL();
 	} else {
 		/* "~user/..." */
 		ud = strchr(ud, '/');
@@ -1971,7 +2002,7 @@ static void parse_and_put_prompt(const char *prmt_ptr)
 
 				switch (c) {
 				case 'u':
-					pbuf = user_buf ? user_buf : (char*)"";
+					pbuf = (char*)get_username_str();
 					break;
 				case 'H':
 				case 'h':
@@ -1993,14 +2024,15 @@ static void parse_and_put_prompt(const char *prmt_ptr)
 				case 'w': /* current dir */
 				case 'W': /* basename of cur dir */
 					if (!cwd_buf) {
+						const char *home;
 						cwd_buf = xrealloc_getcwd_or_warn(NULL);
 						if (!cwd_buf)
 							cwd_buf = (char *)bb_msg_unknown;
-						else if (home_pwd_buf[0]) {
+						else if ((home = get_homedir_or_NULL()) != NULL && home[0]) {
 							char *after_home_user;
 
 							/* /home/user[/something] -> ~[/something] */
-							after_home_user = is_prefixed_with(cwd_buf, home_pwd_buf);
+							after_home_user = is_prefixed_with(cwd_buf, home);
 							if (after_home_user
 							 && (*after_home_user == '/' || *after_home_user == '\0')
 							) {
@@ -2399,7 +2431,6 @@ int FAST_FUNC read_line_input(line_input_t *st, const char *prompt, char *comman
 	//command_len = 0; - done by INIT_S()
 	//cmdedit_y = 0;  /* quasireal y, not true if line > xt*yt */
 	cmdedit_termw = 80;
-	IF_USERNAME_OR_HOMEDIR(home_pwd_buf = (char*)null_str;)
 	IF_FEATURE_EDITING_VI(delptr = delbuf;)
 
 	n = get_termios_and_make_raw(STDIN_FILENO, &new_settings, &initial_settings, 0
@@ -2459,18 +2490,6 @@ int FAST_FUNC read_line_input(line_input_t *st, const char *prompt, char *comman
 
 	tcsetattr_stdin_TCSANOW(&new_settings);
 
-#if ENABLE_USERNAME_OR_HOMEDIR
-	{
-		struct passwd *entry;
-
-		entry = getpwuid(geteuid());
-		if (entry) {
-			user_buf = xstrdup(entry->pw_name);
-			home_pwd_buf = xstrdup(entry->pw_dir);
-		}
-	}
-#endif
-
 #if 0
 	for (i = 0; i <= state->max_history; i++)
 		bb_error_msg("history[%d]:'%s'", i, state->history[i]);
-- 
cgit v1.2.3-55-g6feb


From 7c3e96d4b3d419d76f97e17d42a4401ee685b7ec Mon Sep 17 00:00:00 2001
From: Denys Vlasenko <vda.linux@googlemail.com>
Date: Tue, 12 Oct 2021 01:24:32 +0200
Subject: shell: use more compact SHELL_ASH / HUSH config defines. no code
 changes

Signed-off-by: Denys Vlasenko <vda.linux@googlemail.com>
---
 include/libbb.h | 12 ++----------
 shell/hush.c    |  2 +-
 2 files changed, 3 insertions(+), 11 deletions(-)

diff --git a/include/libbb.h b/include/libbb.h
index 2a0b272c6..a48782832 100644
--- a/include/libbb.h
+++ b/include/libbb.h
@@ -1502,16 +1502,8 @@ int scripted_main(int argc, char** argv) MAIN_EXTERNALLY_VISIBLE;
 
 /* Applets which are useful from another applets */
 int bb_cat(char** argv) FAST_FUNC;
-int ash_main(int argc, char** argv)
-#if ENABLE_ASH || ENABLE_SH_IS_ASH || ENABLE_BASH_IS_ASH
-		MAIN_EXTERNALLY_VISIBLE
-#endif
-;
-int hush_main(int argc, char** argv)
-#if ENABLE_HUSH || ENABLE_SH_IS_HUSH || ENABLE_BASH_IS_HUSH
-		MAIN_EXTERNALLY_VISIBLE
-#endif
-;
+int ash_main(int argc, char** argv) IF_SHELL_ASH(MAIN_EXTERNALLY_VISIBLE);
+int hush_main(int argc, char** argv) IF_SHELL_HUSH(MAIN_EXTERNALLY_VISIBLE);
 /* If shell needs them, they exist even if not enabled as applets */
 int echo_main(int argc, char** argv) IF_ECHO(MAIN_EXTERNALLY_VISIBLE);
 int printf_main(int argc, char **argv) IF_PRINTF(MAIN_EXTERNALLY_VISIBLE);
diff --git a/shell/hush.c b/shell/hush.c
index 7156297cf..38d924a3f 100644
--- a/shell/hush.c
+++ b/shell/hush.c
@@ -373,7 +373,7 @@
 # define F_DUPFD_CLOEXEC F_DUPFD
 #endif
 
-#if ENABLE_FEATURE_SH_EMBEDDED_SCRIPTS && !(ENABLE_ASH || ENABLE_SH_IS_ASH || ENABLE_BASH_IS_ASH)
+#if ENABLE_FEATURE_SH_EMBEDDED_SCRIPTS && !ENABLE_SHELL_ASH
 # include "embedded_scripts.h"
 #else
 # define NUM_SCRIPTS 0
-- 
cgit v1.2.3-55-g6feb


From 94c78aa0b91f2150bd038866addf3d0ee69474a8 Mon Sep 17 00:00:00 2001
From: Denys Vlasenko <vda.linux@googlemail.com>
Date: Tue, 12 Oct 2021 13:23:29 +0200
Subject: config system: move some options closer to relevalnt tool
 subdirectories

Signed-off-by: Denys Vlasenko <vda.linux@googlemail.com>
---
 coreutils/Config.src  |  19 ++++--
 coreutils/df.c        |  20 ++++++
 libbb/Config.src      | 185 +++++++++++++++++---------------------------------
 mailutils/Config.src  |   4 +-
 networking/Config.src |  26 +++++++
 procps/Config.src     |   9 ++-
 6 files changed, 132 insertions(+), 131 deletions(-)

diff --git a/coreutils/Config.src b/coreutils/Config.src
index 1bded03a6..6c9e47551 100644
--- a/coreutils/Config.src
+++ b/coreutils/Config.src
@@ -5,10 +5,6 @@
 
 menu "Coreutils"
 
-INSERT
-
-comment "Common options"
-
 config FEATURE_VERBOSE
 	bool "Support verbose options (usually -v) for various applets"
 	default y
@@ -17,6 +13,19 @@ config FEATURE_VERBOSE
 	Also enables long option (--verbose) if it exists.
 	Without this option, -v is accepted but ignored.
 
+comment "Common options for date and touch"
+
+config FEATURE_TIMEZONE
+	bool "Allow timezone in dates"
+	default y
+	depends on DESKTOP
+	help
+	Permit the use of timezones when parsing user-provided data
+	strings, e.g. '1996-04-09 12:45:00 -0500'.
+
+	This requires support for the '%z' extension to strptime() which
+	may not be available in all implementations.
+
 comment "Common options for cp and mv"
 	depends on CP || MV
 
@@ -37,4 +46,6 @@ config FEATURE_HUMAN_READABLE
 	help
 	Allow df, du, and ls to have human readable output.
 
+INSERT
+
 endmenu
diff --git a/coreutils/df.c b/coreutils/df.c
index 9f8b3a71e..176aa079f 100644
--- a/coreutils/df.c
+++ b/coreutils/df.c
@@ -32,6 +32,26 @@
 //config:	-a Show all filesystems
 //config:	-i Inodes
 //config:	-B <SIZE> Blocksize
+//config:
+//config:config FEATURE_SKIP_ROOTFS
+//config:	bool "Skip rootfs in mount table"
+//config:	default y
+//config:	depends on DF
+//config:	help
+//config:	Ignore rootfs entry in mount table.
+//config:
+//config:	In Linux, kernel has a special filesystem, rootfs, which is initially
+//config:	mounted on /. It contains initramfs data, if kernel is configured
+//config:	to have one. Usually, another file system is mounted over / early
+//config:	in boot process, and therefore most tools which manipulate
+//config:	mount table, such as df, will skip rootfs entry.
+//config:
+//config:	However, some systems do not mount anything on /.
+//config:	If you need to configure busybox for one of these systems,
+//config:	you may find it useful to turn this option off to make df show
+//config:	initramfs statistics.
+//config:
+//config:	Otherwise, choose Y.
 
 //applet:IF_DF(APPLET_NOEXEC(df, df, BB_DIR_BIN, BB_SUID_DROP, df))
 
diff --git a/libbb/Config.src b/libbb/Config.src
index 58c5fad50..24b31fad9 100644
--- a/libbb/Config.src
+++ b/libbb/Config.src
@@ -61,32 +61,73 @@ config SHA3_SMALL
 	64-bit x86: +270 bytes of code, 45% faster
 	32-bit x86: +450 bytes of code, 75% faster
 
-config FEATURE_FAST_TOP
-	bool "Faster /proc scanning code (+100 bytes)"
-	default n  # all "fast or small" options default to small
+config FEATURE_NON_POSIX_CP
+	bool "Non-POSIX, but safer, copying to special nodes"
+	default y
 	help
-	This option makes top and ps ~20% faster (or 20% less CPU hungry),
-	but code size is slightly bigger.
+	With this option, "cp file symlink" will delete symlink
+	and create a regular file. This does not conform to POSIX,
+	but prevents a symlink attack.
+	Similarly, "cp file device" will not send file's data
+	to the device. (To do that, use "cat file >device")
 
-config FEATURE_ETC_NETWORKS
-	bool "Support /etc/networks"
+config FEATURE_VERBOSE_CP_MESSAGE
+	bool "Give more precise messages when copy fails (cp, mv etc)"
 	default n
 	help
-	Enable support for network names in /etc/networks. This is
-	a rarely used feature which allows you to use names
-	instead of IP/mask pairs in route command.
+	Error messages with this feature enabled:
 
-config FEATURE_ETC_SERVICES
-	bool "Consult /etc/services even for well-known ports"
-	default n
+	$ cp file /does_not_exist/file
+	cp: cannot create '/does_not_exist/file': Path does not exist
+	$ cp file /vmlinuz/file
+	cp: cannot stat '/vmlinuz/file': Path has non-directory component
+
+	If this feature is not enabled, they will be, respectively:
+
+	cp: cannot create '/does_not_exist/file': No such file or directory
+	cp: cannot stat '/vmlinuz/file': Not a directory
+
+	This will cost you ~60 bytes.
+
+config FEATURE_USE_SENDFILE
+	bool "Use sendfile system call"
+	default y
+	help
+	When enabled, busybox will use the kernel sendfile() function
+	instead of read/write loops to copy data between file descriptors
+	(for example, cp command does this a lot).
+	If sendfile() doesn't work, copying code falls back to read/write
+	loop. sendfile() was originally implemented for faster I/O
+	from files to sockets, but since Linux 2.6.33 it was extended
+	to work for many more file types.
+
+config FEATURE_COPYBUF_KB
+	int "Copy buffer size, in kilobytes"
+	range 1 1024
+	default 4
+	help
+	Size of buffer used by cp, mv, install, wget etc.
+	Buffers which are 4 kb or less will be allocated on stack.
+	Bigger buffers will be allocated with mmap, with fallback to 4 kb
+	stack buffer if mmap fails.
+
+config MONOTONIC_SYSCALL
+	bool "Use clock_gettime(CLOCK_MONOTONIC) syscall"
+	default y
+	help
+	Use clock_gettime(CLOCK_MONOTONIC) syscall for measuring
+	time intervals (time, ping, traceroute etc need this).
+	Probably requires Linux 2.6+. If not selected, gettimeofday
+	will be used instead (which gives wrong results if date/time
+	is reset).
+
+config IOCTL_HEX2STR_ERROR
+	bool "Use ioctl names rather than hex values in error messages"
+	default y
 	help
-	Look up e.g. "telnet" and "http" in /etc/services file
-	instead of assuming ports 23 and 80.
-	This is almost never necessary (everybody uses standard ports),
-	and it makes sense to avoid reading this file.
-	If you disable this option, in the cases where port is explicitly
-	specified as a service name (e.g. "telnet HOST PORTNAME"),
-	it will still be looked up in /etc/services.
+	Use ioctl names rather than hex values in error messages
+	(e.g. VT_DISALLOCATE rather than 0x5608). If disabled this
+	saves about 1400 bytes.
 
 config FEATURE_EDITING
 	bool "Command line editing"
@@ -302,107 +343,3 @@ config UNICODE_PRESERVE_BROKEN
 	For example, this means that entering 'l', 's', ' ', 0xff, [Enter]
 	at shell prompt will list file named 0xff (single char name
 	with char value 255), not file named '?'.
-
-config FEATURE_NON_POSIX_CP
-	bool "Non-POSIX, but safer, copying to special nodes"
-	default y
-	help
-	With this option, "cp file symlink" will delete symlink
-	and create a regular file. This does not conform to POSIX,
-	but prevents a symlink attack.
-	Similarly, "cp file device" will not send file's data
-	to the device. (To do that, use "cat file >device")
-
-config FEATURE_VERBOSE_CP_MESSAGE
-	bool "Give more precise messages when copy fails (cp, mv etc)"
-	default n
-	help
-	Error messages with this feature enabled:
-
-	$ cp file /does_not_exist/file
-	cp: cannot create '/does_not_exist/file': Path does not exist
-	$ cp file /vmlinuz/file
-	cp: cannot stat '/vmlinuz/file': Path has non-directory component
-
-	If this feature is not enabled, they will be, respectively:
-
-	cp: cannot create '/does_not_exist/file': No such file or directory
-	cp: cannot stat '/vmlinuz/file': Not a directory
-
-	This will cost you ~60 bytes.
-
-config FEATURE_USE_SENDFILE
-	bool "Use sendfile system call"
-	default y
-	help
-	When enabled, busybox will use the kernel sendfile() function
-	instead of read/write loops to copy data between file descriptors
-	(for example, cp command does this a lot).
-	If sendfile() doesn't work, copying code falls back to read/write
-	loop. sendfile() was originally implemented for faster I/O
-	from files to sockets, but since Linux 2.6.33 it was extended
-	to work for many more file types.
-
-config FEATURE_COPYBUF_KB
-	int "Copy buffer size, in kilobytes"
-	range 1 1024
-	default 4
-	help
-	Size of buffer used by cp, mv, install, wget etc.
-	Buffers which are 4 kb or less will be allocated on stack.
-	Bigger buffers will be allocated with mmap, with fallback to 4 kb
-	stack buffer if mmap fails.
-
-config FEATURE_SKIP_ROOTFS
-	bool "Skip rootfs in mount table"
-	default y
-	help
-	Ignore rootfs entry in mount table.
-
-	In Linux, kernel has a special filesystem, rootfs, which is initially
-	mounted on /. It contains initramfs data, if kernel is configured
-	to have one. Usually, another file system is mounted over / early
-	in boot process, and therefore most tools which manipulate
-	mount table, such as df, will skip rootfs entry.
-
-	However, some systems do not mount anything on /.
-	If you need to configure busybox for one of these systems,
-	you may find it useful to turn this option off to make df show
-	initramfs statistics.
-
-	Otherwise, choose Y.
-
-config MONOTONIC_SYSCALL
-	bool "Use clock_gettime(CLOCK_MONOTONIC) syscall"
-	default y
-	help
-	Use clock_gettime(CLOCK_MONOTONIC) syscall for measuring
-	time intervals (time, ping, traceroute etc need this).
-	Probably requires Linux 2.6+. If not selected, gettimeofday
-	will be used instead (which gives wrong results if date/time
-	is reset).
-
-config IOCTL_HEX2STR_ERROR
-	bool "Use ioctl names rather than hex values in error messages"
-	default y
-	help
-	Use ioctl names rather than hex values in error messages
-	(e.g. VT_DISALLOCATE rather than 0x5608). If disabled this
-	saves about 1400 bytes.
-
-config FEATURE_HWIB
-	bool "Support infiniband HW"
-	default y
-	help
-	Support for printing infiniband addresses in network applets.
-
-config FEATURE_TIMEZONE
-	bool "Allow timezone in dates"
-	default y
-	depends on DESKTOP
-	help
-	Permit the use of timezones when parsing user-provided data
-	strings, e.g. '1996-04-09 12:45:00 -0500'.
-
-	This requires support for the '%z' extension to strptime() which
-	may not be available in all implementations.
diff --git a/mailutils/Config.src b/mailutils/Config.src
index 6d47163e4..b3a3e506d 100644
--- a/mailutils/Config.src
+++ b/mailutils/Config.src
@@ -1,7 +1,5 @@
 menu "Mail Utilities"
 
-INSERT
-
 config FEATURE_MIME_CHARSET
 	string "Default charset"
 	default "us-ascii"
@@ -9,4 +7,6 @@ config FEATURE_MIME_CHARSET
 	help
 	Default charset of the message.
 
+INSERT
+
 endmenu
diff --git a/networking/Config.src b/networking/Config.src
index 04d644bc9..0942645c3 100644
--- a/networking/Config.src
+++ b/networking/Config.src
@@ -46,6 +46,32 @@ config VERBOSE_RESOLUTION_ERRORS
 	"can't resolve 'hostname.com'" and want to know more.
 	This may increase size of your executable a bit.
 
+config FEATURE_ETC_NETWORKS
+	bool "Support /etc/networks"
+	default n
+	help
+	Enable support for network names in /etc/networks. This is
+	a rarely used feature which allows you to use names
+	instead of IP/mask pairs in route command.
+
+config FEATURE_ETC_SERVICES
+	bool "Consult /etc/services even for well-known ports"
+	default n
+	help
+	Look up e.g. "telnet" and "http" in /etc/services file
+	instead of assuming ports 23 and 80.
+	This is almost never necessary (everybody uses standard ports),
+	and it makes sense to avoid reading this file.
+	If you disable this option, in the cases where port is explicitly
+	specified as a service name (e.g. "telnet HOST PORTNAME"),
+	it will still be looked up in /etc/services.
+
+config FEATURE_HWIB
+	bool "Support infiniband HW"
+	default y
+	help
+	Support for printing infiniband addresses in network applets.
+
 config FEATURE_TLS_SHA1
 	bool "In TLS code, support ciphers which use deprecated SHA1"
 	depends on TLS
diff --git a/procps/Config.src b/procps/Config.src
index 2b1b8ab11..7fcce98c5 100644
--- a/procps/Config.src
+++ b/procps/Config.src
@@ -5,7 +5,12 @@
 
 menu "Process Utilities"
 
-INSERT
+config FEATURE_FAST_TOP
+	bool "Faster /proc scanning code (+100 bytes)"
+	default n  # all "fast or small" options default to small
+	help
+	This option makes top and ps ~20% faster (or 20% less CPU hungry),
+	but code size is slightly bigger.
 
 config FEATURE_SHOW_THREADS
 	bool "Support thread display in ps/pstree/top"
@@ -15,4 +20,6 @@ config FEATURE_SHOW_THREADS
 	Enables the ps -T option, showing of threads in pstree,
 	and 'h' command in top.
 
+INSERT
+
 endmenu
-- 
cgit v1.2.3-55-g6feb


From c33bbcb92fc255e4bb058e64874289cdeb0701f9 Mon Sep 17 00:00:00 2001
From: Sören Tempel <soeren+git@soeren-tempel.net>
Date: Sat, 17 Jul 2021 21:45:40 +0200
Subject: ed: align output of read/write commands with POSIX-1.2008
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

POSIX.1-2008 mandates the following regarding the write command:

	If the command is successful, the number of bytes written shall
	be written to standard output, unless the -s option was
	specified, in the following format:

		"%d\n", <number of bytes written>

function                                             old     new   delta
readLines                                            447     409     -38
doCommands                                          1940    1889     -51
.rodata                                           104219  104163     -56
------------------------------------------------------------------------------
(add/remove: 0/0 grow/shrink: 0/3 up/down: 0/-145)           Total: -145 bytes

Signed-off-by: Sören Tempel <soeren+git@soeren-tempel.net>
Signed-off-by: Denys Vlasenko <vda.linux@googlemail.com>
---
 editors/ed.c | 32 ++++++++++++++++++--------------
 1 file changed, 18 insertions(+), 14 deletions(-)

diff --git a/editors/ed.c b/editors/ed.c
index c50faeefa..14540e566 100644
--- a/editors/ed.c
+++ b/editors/ed.c
@@ -380,7 +380,8 @@ static void addLines(int num)
 static int readLines(const char *file, int num)
 {
 	int fd, cc;
-	int len, lineCount, charCount;
+	int len;
+	unsigned charCount;
 	char *cp;
 
 	if ((num < 1) || (num > lastNum + 1)) {
@@ -396,7 +397,6 @@ static int readLines(const char *file, int num)
 
 	bufPtr = bufBase;
 	bufUsed = 0;
-	lineCount = 0;
 	charCount = 0;
 	cc = 0;
 
@@ -415,7 +415,6 @@ static int readLines(const char *file, int num)
 			bufPtr += len;
 			bufUsed -= len;
 			charCount += len;
-			lineCount++;
 			num++;
 			continue;
 		}
@@ -449,15 +448,18 @@ static int readLines(const char *file, int num)
 			close(fd);
 			return -1;
 		}
-		lineCount++;
 		charCount += bufUsed;
 	}
 
 	close(fd);
 
-	printf("%d lines%s, %d chars\n", lineCount,
-		(bufUsed ? " (incomplete)" : ""), charCount);
-
+	/* https://pubs.opengroup.org/onlinepubs/9699919799/utilities/ed.html
+	 * "Read Command"
+	 * "...the number of bytes read shall be written to standard output
+	 * in the following format:
+	 * "%d\n", <number of bytes read>
+	 */
+	printf("%u\n", charCount);
 	return TRUE;
 }
 
@@ -468,12 +470,12 @@ static int readLines(const char *file, int num)
 static int writeLines(const char *file, int num1, int num2)
 {
 	LINE *lp;
-	int fd, lineCount, charCount;
+	int fd;
+	unsigned charCount;
 
 	if (bad_nums(num1, num2, "write"))
 		return FALSE;
 
-	lineCount = 0;
 	charCount = 0;
 
 	fd = creat(file, 0666);
@@ -482,9 +484,6 @@ static int writeLines(const char *file, int num1, int num2)
 		return FALSE;
 	}
 
-	printf("\"%s\", ", file);
-	fflush_all();
-
 	lp = findLine(num1);
 	if (lp == NULL) {
 		close(fd);
@@ -498,7 +497,6 @@ static int writeLines(const char *file, int num1, int num2)
 			return FALSE;
 		}
 		charCount += lp->len;
-		lineCount++;
 		lp = lp->next;
 	}
 
@@ -507,7 +505,13 @@ static int writeLines(const char *file, int num1, int num2)
 		return FALSE;
 	}
 
-	printf("%d lines, %d chars\n", lineCount, charCount);
+	/* https://pubs.opengroup.org/onlinepubs/9699919799/utilities/ed.html
+	 * "Write Command"
+	 * "...the number of bytes written shall be written to standard output,
+	 * unless the -s option was specified, in the following format:
+	 * "%d\n", <number of bytes written>
+	 */
+	printf("%u\n", charCount);
 	return TRUE;
 }
 
-- 
cgit v1.2.3-55-g6feb


From 62f1eed1e1916afbff8f9ce3f820626348e8b867 Mon Sep 17 00:00:00 2001
From: Denys Vlasenko <vda.linux@googlemail.com>
Date: Tue, 12 Oct 2021 22:39:11 +0200
Subject: hush: in a comment, document what -i might be doing

Signed-off-by: Denys Vlasenko <vda.linux@googlemail.com>
---
 shell/ash.c  |  5 ++---
 shell/hush.c | 29 ++++++++++++++++++++++++++---
 2 files changed, 28 insertions(+), 6 deletions(-)

diff --git a/shell/ash.c b/shell/ash.c
index 2d3cc8a61..1982a24b7 100644
--- a/shell/ash.c
+++ b/shell/ash.c
@@ -14657,11 +14657,10 @@ int ash_main(int argc UNUSED_PARAM, char **argv)
 	}
  state2:
 	state = 3;
-	if (
+	if (iflag
 #ifndef linux
-	 getuid() == geteuid() && getgid() == getegid() &&
+	 && getuid() == geteuid() && getgid() == getegid()
 #endif
-	 iflag
 	) {
 		const char *shinit = lookupvar("ENV");
 		if (shinit != NULL && *shinit != '\0')
diff --git a/shell/hush.c b/shell/hush.c
index 38d924a3f..3a17f5bd5 100644
--- a/shell/hush.c
+++ b/shell/hush.c
@@ -3360,7 +3360,7 @@ static int glob_brace(char *pattern, o_string *o, int n)
 	 * NEXT points past the terminator of the first element, and REST
 	 * points past the final }.  We will accumulate result names from
 	 * recursive runs for each brace alternative in the buffer using
-	 * GLOB_APPEND.  */
+	 * GLOB_APPEND. */
 
 	p = begin + 1;
 	while (1) {
@@ -10225,7 +10225,7 @@ int hush_main(int argc, char **argv)
 
 	cached_getpid = getpid();   /* for tcsetpgrp() during init */
 	G.root_pid = cached_getpid; /* for $PID  (NOMMU can override via -$HEXPID:HEXPPID:...) */
-	G.root_ppid = getppid();    /* for $PPID (NOMMU can override)  */
+	G.root_ppid = getppid();    /* for $PPID (NOMMU can override) */
 
 	/* Deal with HUSH_VERSION */
 	debug_printf_env("unsetenv '%s'\n", "HUSH_VERSION");
@@ -10356,6 +10356,29 @@ int hush_main(int argc, char **argv)
 			/* Well, we cannot just declare interactiveness,
 			 * we have to have some stuff (ctty, etc) */
 			/* G_interactive_fd++; */
+//There are a few cases where bash -i -c 'SCRIPT'
+//has visible effect (differs from bash -c 'SCRIPT'):
+//it ignores TERM:
+//	bash -i -c 'kill $$; echo ALIVE'
+//	ALIVE
+//it resets SIG_INGed HUP to SIG_DFL:
+//	trap '' hup; bash -i -c 'kill -hup $$; echo ALIVE'
+//	Hangup   [the message is not printed by bash, it's the shell which started it]
+//is talkative about jobs and exiting:
+//	bash -i -c 'sleep 1 & exit'
+//	[1] 16170
+//	exit
+//includes $ENV file (only if run as "sh"):
+//	echo last >/tmp/ENV; ENV=/tmp/ENV sh -i -c 'echo HERE'
+//	last: cannot open /var/log/wtmp: No such file or directory
+//	HERE
+//(under "bash", it's the opposite: it runs $BASH_ENV file only *without* -i).
+//
+//ash -i -c 'sleep 3; sleep 3', on ^C, drops into a prompt instead of exiting
+//(this may be a bug, bash does not do this).
+//(ash -i -c 'sleep 3' won't show this, the last command gets auto-"exec"ed)
+//
+//None of the above feel like useful features people would rely on.
 			break;
 		case 's':
 			G.opt_s = 1;
@@ -11732,7 +11755,7 @@ static int FAST_FUNC builtin_fg_bg(char **argv)
 	/* TODO: bash prints a string representation
 	 * of job being foregrounded (like "sleep 1 | cat") */
 	if (argv[0][0] == 'f' && G_saved_tty_pgrp) {
-		/* Put the job into the foreground.  */
+		/* Put the job into the foreground. */
 		tcsetpgrp(G_interactive_fd, pi->pgrp);
 	}
 
-- 
cgit v1.2.3-55-g6feb


From aaf3d5ba74c5da97ff80b61f30cb8dd225d39096 Mon Sep 17 00:00:00 2001
From: Denys Vlasenko <vda.linux@googlemail.com>
Date: Wed, 13 Oct 2021 11:15:52 +0200
Subject: shell: tweak --help

Even though formally it is -s [ARGS], "sh -s" without ARGS
is the same as just "sh". And we are already over 80 chars wide
for ash --help, so make it shorter.

Signed-off-by: Denys Vlasenko <vda.linux@googlemail.com>
---
 shell/ash.c  | 2 +-
 shell/hush.c | 2 +-
 2 files changed, 2 insertions(+), 2 deletions(-)

diff --git a/shell/ash.c b/shell/ash.c
index 1982a24b7..827643808 100644
--- a/shell/ash.c
+++ b/shell/ash.c
@@ -14455,7 +14455,7 @@ init(void)
 
 
 //usage:#define ash_trivial_usage
-//usage:	"[-il] [-|+Cabefmnuvx] [-|+o OPT]... [-c 'SCRIPT' [ARG0 ARGS] | FILE [ARGS] | -s [ARGS]]"
+//usage:	"[-il] [-|+Cabefmnuvx] [-|+o OPT]... [-c 'SCRIPT' [ARG0 ARGS] | FILE ARGS | -s ARGS]"
 ////////	comes from ^^^^^^^^^^optletters
 //usage:#define ash_full_usage "\n\n"
 //usage:	"Unix shell interpreter"
diff --git a/shell/hush.c b/shell/hush.c
index 3a17f5bd5..6a27b1634 100644
--- a/shell/hush.c
+++ b/shell/hush.c
@@ -339,7 +339,7 @@
  * therefore we don't show them either.
  */
 //usage:#define hush_trivial_usage
-//usage:	"[-enxl] [-c 'SCRIPT' [ARG0 ARGS] | FILE [ARGS] | -s [ARGS]]"
+//usage:	"[-enxl] [-c 'SCRIPT' [ARG0 ARGS] | FILE ARGS | -s ARGS]"
 //usage:#define hush_full_usage "\n\n"
 //usage:	"Unix shell interpreter"
 
-- 
cgit v1.2.3-55-g6feb