From 8aa5585ff4974b8f7ed71d684af48432b2bc6929 Mon Sep 17 00:00:00 2001
From: Ariadne Conill <ariadne@dereferenced.org>
Date: Mon, 28 Jun 2021 08:25:59 -0600
Subject: cpio: add support for --ignore-devno like GNU cpio

The --ignore-devno option is used to set device numbers to (0, 0).
This can be useful in verifying whether a CPIO archive is reproducible.

function                                             old     new   delta
cpio_o                                               922     961     +39
.rodata                                            78407   78422     +15
bbconfig_config_bz2                                 6161    6167      +6
packed_usage                                       25770   25764      -6
------------------------------------------------------------------------------
(add/remove: 0/0 grow/shrink: 3/1 up/down: 60/-6)              Total: 54 bytes

Signed-off-by: Ariadne Conill <ariadne@dereferenced.org>
Signed-off-by: Denys Vlasenko <vda.linux@googlemail.com>
---
 archival/cpio.c | 20 ++++++++++++++++++++
 1 file changed, 20 insertions(+)

diff --git a/archival/cpio.c b/archival/cpio.c
index f525419b8..daf6cffc9 100644
--- a/archival/cpio.c
+++ b/archival/cpio.c
@@ -38,6 +38,13 @@
 //config:	depends on FEATURE_CPIO_O
 //config:	help
 //config:	Passthrough mode. Rarely used.
+//config:
+//config:config FEATURE_CPIO_IGNORE_DEVNO
+//config:	bool "Support --ignore-devno like GNU cpio"
+//config:	default y
+//config:	depends on FEATURE_CPIO_O && LONG_OPTS
+//config:	help
+//config:	Optionally ignore device numbers when creating archives.
 
 //applet:IF_CPIO(APPLET(cpio, BB_DIR_BIN, BB_SUID_DROP))
 
@@ -75,6 +82,9 @@
 //usage:     "\n	-R USER[:GRP]	Set owner of created files"
 //usage:     "\n	-L	Dereference symlinks"
 //usage:     "\n	-0	NUL terminated input"
+//usage:	IF_FEATURE_CPIO_IGNORE_DEVNO(
+//usage:     "\n	--ignore-devno"
+//usage:	)
 
 /* GNU cpio 2.9 --help (abridged):
 
@@ -162,11 +172,13 @@ enum {
 	IF_FEATURE_CPIO_P(OPTBIT_PASSTHROUGH,)
 	IF_LONG_OPTS(     OPTBIT_QUIET      ,)
 	IF_LONG_OPTS(     OPTBIT_2STDOUT    ,)
+	IF_FEATURE_CPIO_IGNORE_DEVNO(OPTBIT_IGNORE_DEVNO,)
 	OPT_CREATE             = IF_FEATURE_CPIO_O((1 << OPTBIT_CREATE     )) + 0,
 	OPT_FORMAT             = IF_FEATURE_CPIO_O((1 << OPTBIT_FORMAT     )) + 0,
 	OPT_PASSTHROUGH        = IF_FEATURE_CPIO_P((1 << OPTBIT_PASSTHROUGH)) + 0,
 	OPT_QUIET              = IF_LONG_OPTS(     (1 << OPTBIT_QUIET      )) + 0,
 	OPT_2STDOUT            = IF_LONG_OPTS(     (1 << OPTBIT_2STDOUT    )) + 0,
+	OPT_IGNORE_DEVNO       = IF_FEATURE_CPIO_IGNORE_DEVNO((1 << OPTBIT_IGNORE_DEVNO)) + 0,
 };
 
 #define OPTION_STR "it0uvdmLF:R:"
@@ -304,6 +316,11 @@ static NOINLINE int cpio_o(void)
 			}
 		}
 
+#if ENABLE_FEATURE_CPIO_IGNORE_DEVNO
+		if (option_mask32 & OPT_IGNORE_DEVNO)
+			st.st_dev = st.st_rdev = 0;
+#endif
+
 		bytes += printf("070701"
 				"%08X%08X%08X%08X%08X%08X%08X"
 				"%08X%08X%08X%08X" /* GNU cpio uses uppercase hex */
@@ -379,6 +396,9 @@ int cpio_main(int argc UNUSED_PARAM, char **argv)
 		"null\0"         No_argument       "0"
 		"quiet\0"        No_argument       "\xff"
 		"to-stdout\0"    No_argument       "\xfe"
+#if ENABLE_FEATURE_CPIO_IGNORE_DEVNO
+		"ignore-devno\0" No_argument	   "\xfd"
+#endif
 		;
 #endif
 
-- 
cgit v1.2.3-55-g6feb


From 836b79211df3aeaba1b8b65c6db5ee6193172cc0 Mon Sep 17 00:00:00 2001
From: Ariadne Conill <ariadne@dereferenced.org>
Date: Mon, 28 Jun 2021 08:31:23 -0600
Subject: cpio: add support for --renumber-inodes like GNU cpio

The --renumber-inodes option renumbers the inodes starting from 1,
so that the sequence of inodes is always stable.  This helps with
reproducibility.

function                                             old     new   delta
cpio_o                                               961    1045     +84
.rodata                                            78422   78440     +18
bbconfig_config_bz2                                 6168    6164      -4
packed_usage                                       25764   25756      -8
------------------------------------------------------------------------------
(add/remove: 0/0 grow/shrink: 2/2 up/down: 102/-12)            Total: 90 bytes

Signed-off-by: Ariadne Conill <ariadne@dereferenced.org>
Signed-off-by: Denys Vlasenko <vda.linux@googlemail.com>
---
 archival/cpio.c | 32 ++++++++++++++++++++++++++++++++
 1 file changed, 32 insertions(+)

diff --git a/archival/cpio.c b/archival/cpio.c
index daf6cffc9..7149782d7 100644
--- a/archival/cpio.c
+++ b/archival/cpio.c
@@ -45,6 +45,13 @@
 //config:	depends on FEATURE_CPIO_O && LONG_OPTS
 //config:	help
 //config:	Optionally ignore device numbers when creating archives.
+//config:
+//config:config FEATURE_CPIO_RENUMBER_INODES
+//config:	bool "Support --renumber-inodes like GNU cpio"
+//config:	default y
+//config:	depends on FEATURE_CPIO_O && LONG_OPTS
+//config:	help
+//config:	Optionally renumber inodes when creating archives.
 
 //applet:IF_CPIO(APPLET(cpio, BB_DIR_BIN, BB_SUID_DROP))
 
@@ -85,6 +92,9 @@
 //usage:	IF_FEATURE_CPIO_IGNORE_DEVNO(
 //usage:     "\n	--ignore-devno"
 //usage:	)
+//usage:	IF_FEATURE_CPIO_RENUMBER_INODES(
+//usage:     "\n	--renumber-inodes"
+//usage:	)
 
 /* GNU cpio 2.9 --help (abridged):
 
@@ -173,18 +183,21 @@ enum {
 	IF_LONG_OPTS(     OPTBIT_QUIET      ,)
 	IF_LONG_OPTS(     OPTBIT_2STDOUT    ,)
 	IF_FEATURE_CPIO_IGNORE_DEVNO(OPTBIT_IGNORE_DEVNO,)
+	IF_FEATURE_CPIO_RENUMBER_INODES(OPTBIT_RENUMBER_INODES,)
 	OPT_CREATE             = IF_FEATURE_CPIO_O((1 << OPTBIT_CREATE     )) + 0,
 	OPT_FORMAT             = IF_FEATURE_CPIO_O((1 << OPTBIT_FORMAT     )) + 0,
 	OPT_PASSTHROUGH        = IF_FEATURE_CPIO_P((1 << OPTBIT_PASSTHROUGH)) + 0,
 	OPT_QUIET              = IF_LONG_OPTS(     (1 << OPTBIT_QUIET      )) + 0,
 	OPT_2STDOUT            = IF_LONG_OPTS(     (1 << OPTBIT_2STDOUT    )) + 0,
 	OPT_IGNORE_DEVNO       = IF_FEATURE_CPIO_IGNORE_DEVNO((1 << OPTBIT_IGNORE_DEVNO)) + 0,
+	OPT_RENUMBER_INODES    = IF_FEATURE_CPIO_RENUMBER_INODES((1 << OPTBIT_RENUMBER_INODES)) + 0,
 };
 
 #define OPTION_STR "it0uvdmLF:R:"
 
 struct globals {
 	struct bb_uidgid_t owner_ugid;
+	ino_t next_inode;
 } FIX_ALIASING;
 #define G (*(struct globals*)bb_common_bufsiz1)
 void BUG_cpio_globals_too_big(void);
@@ -218,6 +231,9 @@ static NOINLINE int cpio_o(void)
 		struct inodes_s *next;
 		struct name_s *names;
 		struct stat st;
+#if ENABLE_FEATURE_CPIO_RENUMBER_INODES
+		ino_t mapped_inode;
+#endif
 	};
 
 	struct inodes_s *links = NULL;
@@ -272,6 +288,10 @@ static NOINLINE int cpio_o(void)
 						l = xzalloc(sizeof(*l));
 						l->st = st;
 						l->next = links;
+#if ENABLE_FEATURE_CPIO_RENUMBER_INODES
+						if (option_mask32 & OPT_RENUMBER_INODES)
+							l->mapped_inode = ++G.next_inode;
+#endif
 						links = l;
 						break;
 					}
@@ -290,6 +310,11 @@ static NOINLINE int cpio_o(void)
 				free(line);
 				continue;
 			}
+#if ENABLE_FEATURE_CPIO_RENUMBER_INODES
+			else if (option_mask32 & OPT_RENUMBER_INODES) {
+				st.st_ino = ++G.next_inode;
+			}
+#endif
 		} else { /* line == NULL: EOF */
  next_link:
 			if (links) {
@@ -297,6 +322,10 @@ static NOINLINE int cpio_o(void)
 				st = links->st;
 				name = links->names->name;
 				links->names = links->names->next;
+#if ENABLE_FEATURE_CPIO_RENUMBER_INODES
+				if (links->mapped_inode)
+					st.st_ino = links->mapped_inode;
+#endif
 				/* GNU cpio is reported to emit file data
 				 * only for the last instance. Mimic that. */
 				if (links->names == NULL)
@@ -398,6 +427,9 @@ int cpio_main(int argc UNUSED_PARAM, char **argv)
 		"to-stdout\0"    No_argument       "\xfe"
 #if ENABLE_FEATURE_CPIO_IGNORE_DEVNO
 		"ignore-devno\0" No_argument	   "\xfd"
+#endif
+#if ENABLE_FEATURE_CPIO_RENUMBER_INODES
+		"renumber-inodes\0" No_argument    "\xfc"
 #endif
 		;
 #endif
-- 
cgit v1.2.3-55-g6feb


From 15f7d618ea7f8c3a0277c98309268b709e20d77c Mon Sep 17 00:00:00 2001
From: Denys Vlasenko <vda.linux@googlemail.com>
Date: Tue, 9 Nov 2021 13:51:22 +0100
Subject: which: add -a to help text

function                                             old     new   delta
packed_usage                                       34075   34079      +4

Signed-off-by: Denys Vlasenko <vda.linux@googlemail.com>
---
 debianutils/which.c | 5 +++--
 1 file changed, 3 insertions(+), 2 deletions(-)

diff --git a/debianutils/which.c b/debianutils/which.c
index b9f1b92fd..23692dc6f 100644
--- a/debianutils/which.c
+++ b/debianutils/which.c
@@ -17,9 +17,10 @@
 //kbuild:lib-$(CONFIG_WHICH) += which.o
 
 //usage:#define which_trivial_usage
-//usage:       "COMMAND..."
+//usage:       "[-a] COMMAND..."
 //usage:#define which_full_usage "\n\n"
-//usage:       "Locate COMMAND"
+//usage:       "Locate COMMAND\n"
+//usage:     "\n	-a	Show all matches"
 //usage:
 //usage:#define which_example_usage
 //usage:       "$ which login\n"
-- 
cgit v1.2.3-55-g6feb


From 4bc9da10718df7ed9e992b1ddd2e80d53d894177 Mon Sep 17 00:00:00 2001
From: Denys Vlasenko <vda.linux@googlemail.com>
Date: Sat, 27 Nov 2021 11:28:11 +0100
Subject: tls: P256: 64-bit optimizations

function                                             old     new   delta
sp_256_proj_point_dbl_8                              421     428      +7
sp_256_point_from_bin2x32                             78      84      +6
sp_256_cmp_8                                          38      42      +4
sp_256_to_bin_8                                       28      31      +3
------------------------------------------------------------------------------
(add/remove: 0/0 grow/shrink: 4/0 up/down: 20/0)               Total: 20 bytes

Signed-off-by: Denys Vlasenko <vda.linux@googlemail.com>
---
 include/platform.h      |   2 +
 networking/tls_sp_c32.c | 114 +++++++++++++++++++++++++++++++++++++++++-------
 2 files changed, 101 insertions(+), 15 deletions(-)

diff --git a/include/platform.h b/include/platform.h
index 9e1fb047d..ad27bb31a 100644
--- a/include/platform.h
+++ b/include/platform.h
@@ -239,6 +239,7 @@ typedef uint64_t bb__aliased_uint64_t FIX_ALIASING;
 # define move_from_unaligned_long(v, longp) ((v) = *(bb__aliased_long*)(longp))
 # define move_from_unaligned16(v, u16p) ((v) = *(bb__aliased_uint16_t*)(u16p))
 # define move_from_unaligned32(v, u32p) ((v) = *(bb__aliased_uint32_t*)(u32p))
+# define move_from_unaligned64(v, u64p) ((v) = *(bb__aliased_uint64_t*)(u64p))
 # define move_to_unaligned16(u16p, v)   (*(bb__aliased_uint16_t*)(u16p) = (v))
 # define move_to_unaligned32(u32p, v)   (*(bb__aliased_uint32_t*)(u32p) = (v))
 # define move_to_unaligned64(u64p, v)   (*(bb__aliased_uint64_t*)(u64p) = (v))
@@ -250,6 +251,7 @@ typedef uint64_t bb__aliased_uint64_t FIX_ALIASING;
 # define move_from_unaligned_long(v, longp) (memcpy(&(v), (longp), sizeof(long)))
 # define move_from_unaligned16(v, u16p) (memcpy(&(v), (u16p), 2))
 # define move_from_unaligned32(v, u32p) (memcpy(&(v), (u32p), 4))
+# define move_from_unaligned64(v, u64p) (memcpy(&(v), (u64p), 8))
 # define move_to_unaligned16(u16p, v) do { \
 	uint16_t __t = (v); \
 	memcpy((u16p), &__t, 2); \
diff --git a/networking/tls_sp_c32.c b/networking/tls_sp_c32.c
index 4d4ecdd74..d09f7e881 100644
--- a/networking/tls_sp_c32.c
+++ b/networking/tls_sp_c32.c
@@ -29,6 +29,20 @@ static void dump_hex(const char *fmt, const void *vp, int len)
 typedef uint32_t sp_digit;
 typedef int32_t signed_sp_digit;
 
+/* 64-bit optimizations:
+ * if BB_UNALIGNED_MEMACCESS_OK && ULONG_MAX > 0xffffffff,
+ * then loads and stores can be done in 64-bit chunks.
+ *
+ * A narrower case is when arch is also little-endian (such as x86_64),
+ * then "LSW first", uint32[8] and uint64[4] representations are equivalent,
+ * and arithmetic can be done in 64 bits too.
+ */
+#if defined(__GNUC__) && defined(__x86_64__)
+# define UNALIGNED_LE_64BIT 1
+#else
+# define UNALIGNED_LE_64BIT 0
+#endif
+
 /* The code below is taken from parts of
  *  wolfssl-3.15.3/wolfcrypt/src/sp_c32.c
  * and heavily modified.
@@ -58,6 +72,22 @@ static const sp_digit p256_mod[8] = {
  * r  A single precision integer.
  * a  Byte array.
  */
+#if BB_UNALIGNED_MEMACCESS_OK && ULONG_MAX > 0xffffffff
+static void sp_256_to_bin_8(const sp_digit* rr, uint8_t* a)
+{
+	int i;
+	const uint64_t* r = (void*)rr;
+
+	sp_256_norm_8(rr);
+
+	r += 4;
+	for (i = 0; i < 4; i++) {
+		r--;
+		move_to_unaligned64(a, SWAP_BE64(*r));
+		a += 8;
+	}
+}
+#else
 static void sp_256_to_bin_8(const sp_digit* r, uint8_t* a)
 {
 	int i;
@@ -71,6 +101,7 @@ static void sp_256_to_bin_8(const sp_digit* r, uint8_t* a)
 		a += 4;
 	}
 }
+#endif
 
 /* Read big endian unsigned byte array into r.
  *
@@ -78,6 +109,21 @@ static void sp_256_to_bin_8(const sp_digit* r, uint8_t* a)
  * a  Byte array.
  * n  Number of bytes in array to read.
  */
+#if BB_UNALIGNED_MEMACCESS_OK && ULONG_MAX > 0xffffffff
+static void sp_256_from_bin_8(sp_digit* rr, const uint8_t* a)
+{
+	int i;
+	uint64_t* r = (void*)rr;
+
+	r += 4;
+	for (i = 0; i < 4; i++) {
+		uint64_t v;
+		move_from_unaligned64(v, a);
+		*--r = SWAP_BE64(v);
+		a += 8;
+	}
+}
+#else
 static void sp_256_from_bin_8(sp_digit* r, const uint8_t* a)
 {
 	int i;
@@ -90,6 +136,7 @@ static void sp_256_from_bin_8(sp_digit* r, const uint8_t* a)
 		a += 4;
 	}
 }
+#endif
 
 #if SP_DEBUG
 static void dump_256(const char *fmt, const sp_digit* r)
@@ -125,6 +172,20 @@ static void sp_256_point_from_bin2x32(sp_point* p, const uint8_t *bin2x32)
  * return -ve, 0 or +ve if a is less than, equal to or greater than b
  * respectively.
  */
+#if UNALIGNED_LE_64BIT
+static signed_sp_digit sp_256_cmp_8(const sp_digit* aa, const sp_digit* bb)
+{
+	const uint64_t* a = (void*)aa;
+	const uint64_t* b = (void*)bb;
+	int i;
+	for (i = 3; i >= 0; i--) {
+		if (a[i] == b[i])
+			continue;
+		return (a[i] > b[i]) * 2 - 1;
+	}
+	return 0;
+}
+#else
 static signed_sp_digit sp_256_cmp_8(const sp_digit* a, const sp_digit* b)
 {
 	int i;
@@ -140,6 +201,7 @@ static signed_sp_digit sp_256_cmp_8(const sp_digit* a, const sp_digit* b)
 	}
 	return 0;
 }
+#endif
 
 /* Compare two numbers to determine if they are equal.
  *
@@ -196,8 +258,6 @@ static int sp_256_add_8(sp_digit* r, const sp_digit* a, const sp_digit* b)
 	);
 	return reg;
 #elif ALLOW_ASM && defined(__GNUC__) && defined(__x86_64__)
-	/* x86_64 has no alignment restrictions, and is little-endian,
-	 * so 64-bit and 32-bit representations are identical */
 	uint64_t reg;
 	asm volatile (
 "\n		movq	(%0), %3"
@@ -294,8 +354,6 @@ static int sp_256_sub_8(sp_digit* r, const sp_digit* a, const sp_digit* b)
 	);
 	return reg;
 #elif ALLOW_ASM && defined(__GNUC__) && defined(__x86_64__)
-	/* x86_64 has no alignment restrictions, and is little-endian,
-	 * so 64-bit and 32-bit representations are identical */
 	uint64_t reg;
 	asm volatile (
 "\n		movq	(%0), %3"
@@ -440,8 +498,6 @@ static void sp_256_mul_8(sp_digit* r, const sp_digit* a, const sp_digit* b)
 	r[15] = accl;
 	memcpy(r, rr, sizeof(rr));
 #elif ALLOW_ASM && defined(__GNUC__) && defined(__x86_64__)
-	/* x86_64 has no alignment restrictions, and is little-endian,
-	 * so 64-bit and 32-bit representations are identical */
 	const uint64_t* aa = (const void*)a;
 	const uint64_t* bb = (const void*)b;
 	uint64_t rr[8];
@@ -551,17 +607,32 @@ static void sp_256_mul_8(sp_digit* r, const sp_digit* a, const sp_digit* b)
 }
 
 /* Shift number right one bit. Bottom bit is lost. */
-static void sp_256_rshift1_8(sp_digit* r, sp_digit* a, sp_digit carry)
+#if UNALIGNED_LE_64BIT
+static void sp_256_rshift1_8(sp_digit* rr, uint64_t carry)
+{
+	uint64_t *r = (void*)rr;
+	int i;
+
+	carry = (((uint64_t)!!carry) << 63);
+	for (i = 3; i >= 0; i--) {
+		uint64_t c = r[i] << 63;
+		r[i] = (r[i] >> 1) | carry;
+		carry = c;
+	}
+}
+#else
+static void sp_256_rshift1_8(sp_digit* r, sp_digit carry)
 {
 	int i;
 
-	carry = (!!carry << 31);
+	carry = (((sp_digit)!!carry) << 31);
 	for (i = 7; i >= 0; i--) {
-		sp_digit c = a[i] << 31;
-		r[i] = (a[i] >> 1) | carry;
+		sp_digit c = r[i] << 31;
+		r[i] = (r[i] >> 1) | carry;
 		carry = c;
 	}
 }
+#endif
 
 /* Divide the number by 2 mod the modulus (prime). (r = a / 2 % m) */
 static void sp_256_div2_8(sp_digit* r, const sp_digit* a, const sp_digit* m)
@@ -570,7 +641,7 @@ static void sp_256_div2_8(sp_digit* r, const sp_digit* a, const sp_digit* m)
 	if (a[0] & 1)
 		carry = sp_256_add_8(r, a, m);
 	sp_256_norm_8(r);
-	sp_256_rshift1_8(r, r, carry);
+	sp_256_rshift1_8(r, carry);
 }
 
 /* Add two Montgomery form numbers (r = a + b % m) */
@@ -634,15 +705,28 @@ static void sp_256_mont_tpl_8(sp_digit* r, const sp_digit* a /*, const sp_digit*
 }
 
 /* Shift the result in the high 256 bits down to the bottom. */
-static void sp_256_mont_shift_8(sp_digit* r, const sp_digit* a)
+#if BB_UNALIGNED_MEMACCESS_OK && ULONG_MAX > 0xffffffff
+static void sp_256_mont_shift_8(sp_digit* rr)
+{
+	uint64_t *r = (void*)rr;
+	int i;
+
+	for (i = 0; i < 4; i++) {
+		r[i] = r[i+4];
+		r[i+4] = 0;
+	}
+}
+#else
+static void sp_256_mont_shift_8(sp_digit* r)
 {
 	int i;
 
 	for (i = 0; i < 8; i++) {
-		r[i] = a[i+8];
+		r[i] = r[i+8];
 		r[i+8] = 0;
 	}
 }
+#endif
 
 /* Mul a by scalar b and add into r. (r += a * b) */
 static int sp_256_mul_add_8(sp_digit* r /*, const sp_digit* a, sp_digit b*/)
@@ -800,7 +884,7 @@ static void sp_256_mont_reduce_8(sp_digit* a/*, const sp_digit* m, sp_digit mp*/
 					goto inc_next_word0;
 			}
 		}
-		sp_256_mont_shift_8(a, a);
+		sp_256_mont_shift_8(a);
 		if (word16th != 0)
 			sp_256_sub_8_p256_mod(a);
 		sp_256_norm_8(a);
@@ -820,7 +904,7 @@ static void sp_256_mont_reduce_8(sp_digit* a/*, const sp_digit* m, sp_digit mp*/
 					goto inc_next_word;
 			}
 		}
-		sp_256_mont_shift_8(a, a);
+		sp_256_mont_shift_8(a);
 		if (word16th != 0)
 			sp_256_sub_8_p256_mod(a);
 		sp_256_norm_8(a);
-- 
cgit v1.2.3-55-g6feb


From 446d136109633c12d748d63e2034db238f77ef97 Mon Sep 17 00:00:00 2001
From: Denys Vlasenko <vda.linux@googlemail.com>
Date: Sat, 27 Nov 2021 12:03:43 +0100
Subject: tls: tweak debug printout

Signed-off-by: Denys Vlasenko <vda.linux@googlemail.com>
---
 networking/tls.c | 3 ++-
 1 file changed, 2 insertions(+), 1 deletion(-)

diff --git a/networking/tls.c b/networking/tls.c
index 675ef4b3a..415952f16 100644
--- a/networking/tls.c
+++ b/networking/tls.c
@@ -1883,10 +1883,12 @@ static void process_server_key(tls_state_t *tls, int len)
 	keybuf += 4;
 	switch (t32) {
 	case _0x03001d20: //curve_x25519
+		dbg("got x25519 eccPubKey\n");
 		tls->flags |= GOT_EC_CURVE_X25519;
 		memcpy(tls->hsd->ecc_pub_key32, keybuf, 32);
 		break;
 	case _0x03001741: //curve_secp256r1 (aka P256)
+		dbg("got P256 eccPubKey\n");
 		/* P256 point can be transmitted odd- or even-compressed
 		 * (first byte is 3 or 2) or uncompressed (4).
 		 */
@@ -1899,7 +1901,6 @@ static void process_server_key(tls_state_t *tls, int len)
 	}
 
 	tls->flags |= GOT_EC_KEY;
-	dbg("got eccPubKey\n");
 }
 
 static void send_empty_client_cert(tls_state_t *tls)
-- 
cgit v1.2.3-55-g6feb


From 26c85225229b0a439bcc66c8ee786d16f23be9ed Mon Sep 17 00:00:00 2001
From: Denys Vlasenko <vda.linux@googlemail.com>
Date: Sat, 27 Nov 2021 15:00:14 +0100
Subject: tls: P256: do not open-code copying of struct variables

function                                             old     new   delta
sp_256_ecc_mulmod_8                                  536     534      -2

Signed-off-by: Denys Vlasenko <vda.linux@googlemail.com>
---
 networking/tls_sp_c32.c | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/networking/tls_sp_c32.c b/networking/tls_sp_c32.c
index d09f7e881..29dd04293 100644
--- a/networking/tls_sp_c32.c
+++ b/networking/tls_sp_c32.c
@@ -1361,13 +1361,13 @@ static void sp_256_ecc_mulmod_8(sp_point* r, const sp_point* g, const sp_digit*
 		dump_512("t[1].y %s\n", t[1].y);
 		dump_512("t[1].z %s\n", t[1].z);
 		dbg("t[2] = t[%d]\n", y);
-		memcpy(&t[2], &t[y], sizeof(sp_point));
+		t[2] = t[y]; /* struct copy */
 		dbg("t[2] *= 2\n");
 		sp_256_proj_point_dbl_8(&t[2], &t[2]);
 		dump_512("t[2].x %s\n", t[2].x);
 		dump_512("t[2].y %s\n", t[2].y);
 		dump_512("t[2].z %s\n", t[2].z);
-		memcpy(&t[y], &t[2], sizeof(sp_point));
+		t[y] = t[2]; /* struct copy */
 
 		n <<= 1;
 		c--;
-- 
cgit v1.2.3-55-g6feb


From bbda85c74b7a53d8b2bb46f3b44d8f0932a6e95d Mon Sep 17 00:00:00 2001
From: Denys Vlasenko <vda.linux@googlemail.com>
Date: Sat, 27 Nov 2021 15:06:57 +0100
Subject: tls: P256: remove constant-time trick in sp_256_proj_point_add_8

function                                             old     new   delta
sp_256_proj_point_add_8                              576     544     -32

Signed-off-by: Denys Vlasenko <vda.linux@googlemail.com>
---
 networking/tls_sp_c32.c | 79 +++++++++++++++++++++++--------------------------
 1 file changed, 37 insertions(+), 42 deletions(-)

diff --git a/networking/tls_sp_c32.c b/networking/tls_sp_c32.c
index 29dd04293..3b0473036 100644
--- a/networking/tls_sp_c32.c
+++ b/networking/tls_sp_c32.c
@@ -1269,52 +1269,47 @@ static NOINLINE void sp_256_proj_point_add_8(sp_point* r, sp_point* p, sp_point*
 	 && (sp_256_cmp_equal_8(p->y, q->y) || sp_256_cmp_equal_8(p->y, t1))
 	) {
 		sp_256_proj_point_dbl_8(r, p);
+		return;
 	}
-	else {
-		sp_point tp;
-		sp_point *v;
-
-		v = r;
-		if (p->infinity | q->infinity) {
-			memset(&tp, 0, sizeof(tp));
-			v = &tp;
-		}
 
-		*r = p->infinity ? *q : *p; /* struct copy */
 
-		/* U1 = X1*Z2^2 */
-		sp_256_mont_sqr_8(t1, q->z /*, p256_mod, p256_mp_mod*/);
-		sp_256_mont_mul_8(t3, t1, q->z /*, p256_mod, p256_mp_mod*/);
-		sp_256_mont_mul_8(t1, t1, v->x /*, p256_mod, p256_mp_mod*/);
-		/* U2 = X2*Z1^2 */
-		sp_256_mont_sqr_8(t2, v->z /*, p256_mod, p256_mp_mod*/);
-		sp_256_mont_mul_8(t4, t2, v->z /*, p256_mod, p256_mp_mod*/);
-		sp_256_mont_mul_8(t2, t2, q->x /*, p256_mod, p256_mp_mod*/);
-		/* S1 = Y1*Z2^3 */
-		sp_256_mont_mul_8(t3, t3, v->y /*, p256_mod, p256_mp_mod*/);
-		/* S2 = Y2*Z1^3 */
-		sp_256_mont_mul_8(t4, t4, q->y /*, p256_mod, p256_mp_mod*/);
-		/* H = U2 - U1 */
-		sp_256_mont_sub_8(t2, t2, t1 /*, p256_mod*/);
-		/* R = S2 - S1 */
-		sp_256_mont_sub_8(t4, t4, t3 /*, p256_mod*/);
-		/* Z3 = H*Z1*Z2 */
-		sp_256_mont_mul_8(v->z, v->z, q->z /*, p256_mod, p256_mp_mod*/);
-		sp_256_mont_mul_8(v->z, v->z, t2 /*, p256_mod, p256_mp_mod*/);
-		/* X3 = R^2 - H^3 - 2*U1*H^2 */
-		sp_256_mont_sqr_8(v->x, t4 /*, p256_mod, p256_mp_mod*/);
-		sp_256_mont_sqr_8(t5, t2 /*, p256_mod, p256_mp_mod*/);
-		sp_256_mont_mul_8(v->y, t1, t5 /*, p256_mod, p256_mp_mod*/);
-		sp_256_mont_mul_8(t5, t5, t2 /*, p256_mod, p256_mp_mod*/);
-		sp_256_mont_sub_8(v->x, v->x, t5 /*, p256_mod*/);
-		sp_256_mont_dbl_8(t1, v->y /*, p256_mod*/);
-		sp_256_mont_sub_8(v->x, v->x, t1 /*, p256_mod*/);
-		/* Y3 = R*(U1*H^2 - X3) - S1*H^3 */
-		sp_256_mont_sub_8(v->y, v->y, v->x /*, p256_mod*/);
-		sp_256_mont_mul_8(v->y, v->y, t4 /*, p256_mod, p256_mp_mod*/);
-		sp_256_mont_mul_8(t5, t5, t3 /*, p256_mod, p256_mp_mod*/);
-		sp_256_mont_sub_8(v->y, v->y, t5 /*, p256_mod*/);
+	if (p->infinity || q->infinity) {
+		*r = p->infinity ? *q : *p; /* struct copy */
+		return;
 	}
+
+	/* U1 = X1*Z2^2 */
+	sp_256_mont_sqr_8(t1, q->z /*, p256_mod, p256_mp_mod*/);
+	sp_256_mont_mul_8(t3, t1, q->z /*, p256_mod, p256_mp_mod*/);
+	sp_256_mont_mul_8(t1, t1, r->x /*, p256_mod, p256_mp_mod*/);
+	/* U2 = X2*Z1^2 */
+	sp_256_mont_sqr_8(t2, r->z /*, p256_mod, p256_mp_mod*/);
+	sp_256_mont_mul_8(t4, t2, r->z /*, p256_mod, p256_mp_mod*/);
+	sp_256_mont_mul_8(t2, t2, q->x /*, p256_mod, p256_mp_mod*/);
+	/* S1 = Y1*Z2^3 */
+	sp_256_mont_mul_8(t3, t3, r->y /*, p256_mod, p256_mp_mod*/);
+	/* S2 = Y2*Z1^3 */
+	sp_256_mont_mul_8(t4, t4, q->y /*, p256_mod, p256_mp_mod*/);
+	/* H = U2 - U1 */
+	sp_256_mont_sub_8(t2, t2, t1 /*, p256_mod*/);
+	/* R = S2 - S1 */
+	sp_256_mont_sub_8(t4, t4, t3 /*, p256_mod*/);
+	/* Z3 = H*Z1*Z2 */
+	sp_256_mont_mul_8(r->z, r->z, q->z /*, p256_mod, p256_mp_mod*/);
+	sp_256_mont_mul_8(r->z, r->z, t2 /*, p256_mod, p256_mp_mod*/);
+	/* X3 = R^2 - H^3 - 2*U1*H^2 */
+	sp_256_mont_sqr_8(r->x, t4 /*, p256_mod, p256_mp_mod*/);
+	sp_256_mont_sqr_8(t5, t2 /*, p256_mod, p256_mp_mod*/);
+	sp_256_mont_mul_8(r->y, t1, t5 /*, p256_mod, p256_mp_mod*/);
+	sp_256_mont_mul_8(t5, t5, t2 /*, p256_mod, p256_mp_mod*/);
+	sp_256_mont_sub_8(r->x, r->x, t5 /*, p256_mod*/);
+	sp_256_mont_dbl_8(t1, r->y /*, p256_mod*/);
+	sp_256_mont_sub_8(r->x, r->x, t1 /*, p256_mod*/);
+	/* Y3 = R*(U1*H^2 - X3) - S1*H^3 */
+	sp_256_mont_sub_8(r->y, r->y, r->x /*, p256_mod*/);
+	sp_256_mont_mul_8(r->y, r->y, t4 /*, p256_mod, p256_mp_mod*/);
+	sp_256_mont_mul_8(t5, t5, t3 /*, p256_mod, p256_mp_mod*/);
+	sp_256_mont_sub_8(r->y, r->y, t5 /*, p256_mod*/);
 }
 
 /* Multiply the point by the scalar and return the result.
-- 
cgit v1.2.3-55-g6feb


From 4415f7bc06f1ee382bcbaabd86c3d7aca0b46d93 Mon Sep 17 00:00:00 2001
From: Denys Vlasenko <vda.linux@googlemail.com>
Date: Sat, 27 Nov 2021 15:47:26 +0100
Subject: tls: P256: explain which functions use double-wide arrays, no code
 changes

function                                             old     new   delta
sp_512to256_mont_reduce_8                              -     243    +243
sp_256to512z_mont_mul_8                                -     150    +150
sp_256to512z_mont_sqr_8                                -       7      +7
sp_256_mont_sqr_8                                      7       -      -7
sp_256_mont_mul_8                                    150       -    -150
sp_256_mont_reduce_8                                 243       -    -243
------------------------------------------------------------------------------
(add/remove: 3/3 grow/shrink: 0/0 up/down: 400/-400)            Total: 0 bytes

Signed-off-by: Denys Vlasenko <vda.linux@googlemail.com>
---
 networking/tls_sp_c32.c | 211 +++++++++++++-----------------------------------
 1 file changed, 58 insertions(+), 153 deletions(-)

diff --git a/networking/tls_sp_c32.c b/networking/tls_sp_c32.c
index 3b0473036..74ded2cda 100644
--- a/networking/tls_sp_c32.c
+++ b/networking/tls_sp_c32.c
@@ -455,8 +455,10 @@ static void sp_256_sub_8_p256_mod(sp_digit* r)
 }
 #endif
 
-/* Multiply a and b into r. (r = a * b) */
-static void sp_256_mul_8(sp_digit* r, const sp_digit* a, const sp_digit* b)
+/* Multiply a and b into r. (r = a * b)
+ * r should be [16] array (512 bits).
+ */
+static void sp_256to512_mul_8(sp_digit* r, const sp_digit* a, const sp_digit* b)
 {
 #if ALLOW_ASM && defined(__GNUC__) && defined(__i386__)
 	sp_digit rr[15]; /* in case r coincides with a or b */
@@ -704,9 +706,11 @@ static void sp_256_mont_tpl_8(sp_digit* r, const sp_digit* a /*, const sp_digit*
 	}
 }
 
-/* Shift the result in the high 256 bits down to the bottom. */
+/* Shift the result in the high 256 bits down to the bottom.
+ * High half is cleared to zeros.
+ */
 #if BB_UNALIGNED_MEMACCESS_OK && ULONG_MAX > 0xffffffff
-static void sp_256_mont_shift_8(sp_digit* rr)
+static void sp_512to256_mont_shift_8(sp_digit* rr)
 {
 	uint64_t *r = (void*)rr;
 	int i;
@@ -717,7 +721,7 @@ static void sp_256_mont_shift_8(sp_digit* rr)
 	}
 }
 #else
-static void sp_256_mont_shift_8(sp_digit* r)
+static void sp_512to256_mont_shift_8(sp_digit* r)
 {
 	int i;
 
@@ -728,7 +732,10 @@ static void sp_256_mont_shift_8(sp_digit* r)
 }
 #endif
 
-/* Mul a by scalar b and add into r. (r += a * b) */
+/* Mul a by scalar b and add into r. (r += a * b)
+ * a = p256_mod
+ * b = r[0]
+ */
 static int sp_256_mul_add_8(sp_digit* r /*, const sp_digit* a, sp_digit b*/)
 {
 //	const sp_digit* a = p256_mod;
@@ -857,11 +864,11 @@ static int sp_256_mul_add_8(sp_digit* r /*, const sp_digit* a, sp_digit b*/)
 
 /* Reduce the number back to 256 bits using Montgomery reduction.
  *
- * a   A single precision number to reduce in place.
+ * a   Double-wide number to reduce in place.
  * m   The single precision number representing the modulus.
  * mp  The digit representing the negative inverse of m mod 2^n.
  */
-static void sp_256_mont_reduce_8(sp_digit* a/*, const sp_digit* m, sp_digit mp*/)
+static void sp_512to256_mont_reduce_8(sp_digit* a/*, const sp_digit* m, sp_digit mp*/)
 {
 //	const sp_digit* m = p256_mod;
 	sp_digit mp = p256_mp_mod;
@@ -884,7 +891,7 @@ static void sp_256_mont_reduce_8(sp_digit* a/*, const sp_digit* m, sp_digit mp*/
 					goto inc_next_word0;
 			}
 		}
-		sp_256_mont_shift_8(a);
+		sp_512to256_mont_shift_8(a);
 		if (word16th != 0)
 			sp_256_sub_8_p256_mod(a);
 		sp_256_norm_8(a);
@@ -892,7 +899,7 @@ static void sp_256_mont_reduce_8(sp_digit* a/*, const sp_digit* m, sp_digit mp*/
 	else { /* Same code for explicit mp == 1 (which is always the case for P256) */
 		sp_digit word16th = 0;
 		for (i = 0; i < 8; i++) {
-			/*mu = a[i];*/
+//			mu = a[i];
 			if (sp_256_mul_add_8(a+i /*, m, mu*/)) {
 				int j = i + 8;
  inc_next_word:
@@ -904,148 +911,46 @@ static void sp_256_mont_reduce_8(sp_digit* a/*, const sp_digit* m, sp_digit mp*/
 					goto inc_next_word;
 			}
 		}
-		sp_256_mont_shift_8(a);
+		sp_512to256_mont_shift_8(a);
 		if (word16th != 0)
 			sp_256_sub_8_p256_mod(a);
 		sp_256_norm_8(a);
 	}
 }
-#if 0
-//TODO: arm32 asm (also adapt for x86?)
-static void sp_256_mont_reduce_8(sp_digit* a, sp_digit* m, sp_digit mp)
-{
-	sp_digit ca = 0;
-
-	asm volatile (
-	# i = 0
-	mov	r12, #0
-	ldr	r10, [%[a], #0]
-	ldr	r14, [%[a], #4]
-1:
-	# mu = a[i] * mp
-	mul	r8, %[mp], r10
-	# a[i+0] += m[0] * mu
-	ldr	r7, [%[m], #0]
-	ldr	r9, [%[a], #0]
-	umull	r6, r7, r8, r7
-	adds	r10, r10, r6
-	adc	r5, r7, #0
-	# a[i+1] += m[1] * mu
-	ldr	r7, [%[m], #4]
-	ldr	r9, [%[a], #4]
-	umull	r6, r7, r8, r7
-	adds	r10, r14, r6
-	adc	r4, r7, #0
-	adds	r10, r10, r5
-	adc	r4, r4, #0
-	# a[i+2] += m[2] * mu
-	ldr	r7, [%[m], #8]
-	ldr	r14, [%[a], #8]
-	umull	r6, r7, r8, r7
-	adds	r14, r14, r6
-	adc	r5, r7, #0
-	adds	r14, r14, r4
-	adc	r5, r5, #0
-	# a[i+3] += m[3] * mu
-	ldr	r7, [%[m], #12]
-	ldr	r9, [%[a], #12]
-	umull	r6, r7, r8, r7
-	adds	r9, r9, r6
-	adc	r4, r7, #0
-	adds	r9, r9, r5
-	str	r9, [%[a], #12]
-	adc	r4, r4, #0
-	# a[i+4] += m[4] * mu
-	ldr	r7, [%[m], #16]
-	ldr	r9, [%[a], #16]
-	umull	r6, r7, r8, r7
-	adds	r9, r9, r6
-	adc	r5, r7, #0
-	adds	r9, r9, r4
-	str	r9, [%[a], #16]
-	adc	r5, r5, #0
-	# a[i+5] += m[5] * mu
-	ldr	r7, [%[m], #20]
-	ldr	r9, [%[a], #20]
-	umull	r6, r7, r8, r7
-	adds	r9, r9, r6
-	adc	r4, r7, #0
-	adds	r9, r9, r5
-	str	r9, [%[a], #20]
-	adc	r4, r4, #0
-	# a[i+6] += m[6] * mu
-	ldr	r7, [%[m], #24]
-	ldr	r9, [%[a], #24]
-	umull	r6, r7, r8, r7
-	adds	r9, r9, r6
-	adc	r5, r7, #0
-	adds	r9, r9, r4
-	str	r9, [%[a], #24]
-	adc	r5, r5, #0
-	# a[i+7] += m[7] * mu
-	ldr	r7, [%[m], #28]
-	ldr	r9, [%[a], #28]
-	umull	r6, r7, r8, r7
-	adds	r5, r5, r6
-	adcs	r7, r7, %[ca]
-	mov	%[ca], #0
-	adc	%[ca], %[ca], %[ca]
-	adds	r9, r9, r5
-	str	r9, [%[a], #28]
-	ldr	r9, [%[a], #32]
-	adcs	r9, r9, r7
-	str	r9, [%[a], #32]
-	adc	%[ca], %[ca], #0
-	# i += 1
-	add	%[a], %[a], #4
-	add	r12, r12, #4
-	cmp	r12, #32
-	blt	1b
-
-	str	r10, [%[a], #0]
-	str	r14, [%[a], #4]
-	: [ca] "+r" (ca), [a] "+r" (a)
-	: [m] "r" (m), [mp] "r" (mp)
-	: "memory", "r4", "r5", "r6", "r7", "r8", "r9", "r10", "r12", "r14"
-	);
-
-	memcpy(a, a + 8, 32);
-	if (ca)
-		a -= m;
-}
-#endif
 
 /* Multiply two Montogmery form numbers mod the modulus (prime).
  * (r = a * b mod m)
  *
  * r   Result of multiplication.
+ *     Should be [16] array (512 bits), but high half is cleared to zeros (used as scratch pad).
  * a   First number to multiply in Montogmery form.
  * b   Second number to multiply in Montogmery form.
  * m   Modulus (prime).
  * mp  Montogmery mulitplier.
  */
-static void sp_256_mont_mul_8(sp_digit* r, const sp_digit* a, const sp_digit* b
+static void sp_256to512z_mont_mul_8(sp_digit* r, const sp_digit* a, const sp_digit* b
 		/*, const sp_digit* m, sp_digit mp*/)
 {
 	//const sp_digit* m = p256_mod;
 	//sp_digit mp = p256_mp_mod;
-	sp_256_mul_8(r, a, b);
-	sp_256_mont_reduce_8(r /*, m, mp*/);
+	sp_256to512_mul_8(r, a, b);
+	sp_512to256_mont_reduce_8(r /*, m, mp*/);
 }
 
 /* Square the Montgomery form number. (r = a * a mod m)
  *
  * r   Result of squaring.
+ *     Should be [16] array (512 bits), but high half is cleared to zeros (used as scratch pad).
  * a   Number to square in Montogmery form.
  * m   Modulus (prime).
  * mp  Montogmery mulitplier.
  */
-static void sp_256_mont_sqr_8(sp_digit* r, const sp_digit* a
+static void sp_256to512z_mont_sqr_8(sp_digit* r, const sp_digit* a
 		/*, const sp_digit* m, sp_digit mp*/)
 {
 	//const sp_digit* m = p256_mod;
 	//sp_digit mp = p256_mp_mod;
-	sp_256_mont_mul_8(r, a, a /*, m, mp*/);
+	sp_256to512z_mont_mul_8(r, a, a /*, m, mp*/);
 }
 
 /* Invert the number, in Montgomery form, modulo the modulus (prime) of the
@@ -1068,15 +973,15 @@ static const uint32_t p256_mod_2[8] = {
 #endif
 static void sp_256_mont_inv_8(sp_digit* r, sp_digit* a)
 {
-	sp_digit t[2*8]; //can be just [8]?
+	sp_digit t[2*8];
 	int i;
 
 	memcpy(t, a, sizeof(sp_digit) * 8);
 	for (i = 254; i >= 0; i--) {
-		sp_256_mont_sqr_8(t, t /*, p256_mod, p256_mp_mod*/);
+		sp_256to512z_mont_sqr_8(t, t /*, p256_mod, p256_mp_mod*/);
 		/*if (p256_mod_2[i / 32] & ((sp_digit)1 << (i % 32)))*/
 		if (i >= 224 || i == 192 || (i <= 95 && i != 1))
-			sp_256_mont_mul_8(t, t, a /*, p256_mod, p256_mp_mod*/);
+			sp_256to512z_mont_mul_8(t, t, a /*, p256_mod, p256_mp_mod*/);
 	}
 	memcpy(r, t, sizeof(sp_digit) * 8);
 }
@@ -1152,22 +1057,22 @@ static void sp_256_map_8(sp_point* r, sp_point* p)
 
 	sp_256_mont_inv_8(t1, p->z);
 
-	sp_256_mont_sqr_8(t2, t1 /*, p256_mod, p256_mp_mod*/);
-	sp_256_mont_mul_8(t1, t2, t1 /*, p256_mod, p256_mp_mod*/);
+	sp_256to512z_mont_sqr_8(t2, t1 /*, p256_mod, p256_mp_mod*/);
+	sp_256to512z_mont_mul_8(t1, t2, t1 /*, p256_mod, p256_mp_mod*/);
 
 	/* x /= z^2 */
-	sp_256_mont_mul_8(r->x, p->x, t2 /*, p256_mod, p256_mp_mod*/);
+	sp_256to512z_mont_mul_8(r->x, p->x, t2 /*, p256_mod, p256_mp_mod*/);
 	memset(r->x + 8, 0, sizeof(r->x) / 2);
-	sp_256_mont_reduce_8(r->x /*, p256_mod, p256_mp_mod*/);
+	sp_512to256_mont_reduce_8(r->x /*, p256_mod, p256_mp_mod*/);
 	/* Reduce x to less than modulus */
 	if (sp_256_cmp_8(r->x, p256_mod) >= 0)
 		sp_256_sub_8_p256_mod(r->x);
 	sp_256_norm_8(r->x);
 
 	/* y /= z^3 */
-	sp_256_mont_mul_8(r->y, p->y, t1 /*, p256_mod, p256_mp_mod*/);
+	sp_256to512z_mont_mul_8(r->y, p->y, t1 /*, p256_mod, p256_mp_mod*/);
 	memset(r->y + 8, 0, sizeof(r->y) / 2);
-	sp_256_mont_reduce_8(r->y /*, p256_mod, p256_mp_mod*/);
+	sp_512to256_mont_reduce_8(r->y /*, p256_mod, p256_mp_mod*/);
 	/* Reduce y to less than modulus */
 	if (sp_256_cmp_8(r->y, p256_mod) >= 0)
 		sp_256_sub_8_p256_mod(r->y);
@@ -1202,9 +1107,9 @@ static void sp_256_proj_point_dbl_8(sp_point* r, sp_point* p)
 	}
 
 	/* T1 = Z * Z */
-	sp_256_mont_sqr_8(t1, r->z /*, p256_mod, p256_mp_mod*/);
+	sp_256to512z_mont_sqr_8(t1, r->z /*, p256_mod, p256_mp_mod*/);
 	/* Z = Y * Z */
-	sp_256_mont_mul_8(r->z, r->y, r->z /*, p256_mod, p256_mp_mod*/);
+	sp_256to512z_mont_mul_8(r->z, r->y, r->z /*, p256_mod, p256_mp_mod*/);
 	/* Z = 2Z */
 	sp_256_mont_dbl_8(r->z, r->z /*, p256_mod*/);
 	/* T2 = X - T1 */
@@ -1212,21 +1117,21 @@ static void sp_256_proj_point_dbl_8(sp_point* r, sp_point* p)
 	/* T1 = X + T1 */
 	sp_256_mont_add_8(t1, r->x, t1 /*, p256_mod*/);
 	/* T2 = T1 * T2 */
-	sp_256_mont_mul_8(t2, t1, t2 /*, p256_mod, p256_mp_mod*/);
+	sp_256to512z_mont_mul_8(t2, t1, t2 /*, p256_mod, p256_mp_mod*/);
 	/* T1 = 3T2 */
 	sp_256_mont_tpl_8(t1, t2 /*, p256_mod*/);
 	/* Y = 2Y */
 	sp_256_mont_dbl_8(r->y, r->y /*, p256_mod*/);
 	/* Y = Y * Y */
-	sp_256_mont_sqr_8(r->y, r->y /*, p256_mod, p256_mp_mod*/);
+	sp_256to512z_mont_sqr_8(r->y, r->y /*, p256_mod, p256_mp_mod*/);
 	/* T2 = Y * Y */
-	sp_256_mont_sqr_8(t2, r->y /*, p256_mod, p256_mp_mod*/);
+	sp_256to512z_mont_sqr_8(t2, r->y /*, p256_mod, p256_mp_mod*/);
 	/* T2 = T2/2 */
 	sp_256_div2_8(t2, t2, p256_mod);
 	/* Y = Y * X */
-	sp_256_mont_mul_8(r->y, r->y, r->x /*, p256_mod, p256_mp_mod*/);
+	sp_256to512z_mont_mul_8(r->y, r->y, r->x /*, p256_mod, p256_mp_mod*/);
 	/* X = T1 * T1 */
-	sp_256_mont_mul_8(r->x, t1, t1 /*, p256_mod, p256_mp_mod*/);
+	sp_256to512z_mont_mul_8(r->x, t1, t1 /*, p256_mod, p256_mp_mod*/);
 	/* X = X - Y */
 	sp_256_mont_sub_8(r->x, r->x, r->y /*, p256_mod*/);
 	/* X = X - Y */
@@ -1234,7 +1139,7 @@ static void sp_256_proj_point_dbl_8(sp_point* r, sp_point* p)
 	/* Y = Y - X */
 	sp_256_mont_sub_8(r->y, r->y, r->x /*, p256_mod*/);
 	/* Y = Y * T1 */
-	sp_256_mont_mul_8(r->y, r->y, t1 /*, p256_mod, p256_mp_mod*/);
+	sp_256to512z_mont_mul_8(r->y, r->y, t1 /*, p256_mod, p256_mp_mod*/);
 	/* Y = Y - T2 */
 	sp_256_mont_sub_8(r->y, r->y, t2 /*, p256_mod*/);
 	dump_512("y2 %s\n", r->y);
@@ -1279,36 +1184,36 @@ static NOINLINE void sp_256_proj_point_add_8(sp_point* r, sp_point* p, sp_point*
 	}
 
 	/* U1 = X1*Z2^2 */
-	sp_256_mont_sqr_8(t1, q->z /*, p256_mod, p256_mp_mod*/);
-	sp_256_mont_mul_8(t3, t1, q->z /*, p256_mod, p256_mp_mod*/);
-	sp_256_mont_mul_8(t1, t1, r->x /*, p256_mod, p256_mp_mod*/);
+	sp_256to512z_mont_sqr_8(t1, q->z /*, p256_mod, p256_mp_mod*/);
+	sp_256to512z_mont_mul_8(t3, t1, q->z /*, p256_mod, p256_mp_mod*/);
+	sp_256to512z_mont_mul_8(t1, t1, r->x /*, p256_mod, p256_mp_mod*/);
 	/* U2 = X2*Z1^2 */
-	sp_256_mont_sqr_8(t2, r->z /*, p256_mod, p256_mp_mod*/);
-	sp_256_mont_mul_8(t4, t2, r->z /*, p256_mod, p256_mp_mod*/);
-	sp_256_mont_mul_8(t2, t2, q->x /*, p256_mod, p256_mp_mod*/);
+	sp_256to512z_mont_sqr_8(t2, r->z /*, p256_mod, p256_mp_mod*/);
+	sp_256to512z_mont_mul_8(t4, t2, r->z /*, p256_mod, p256_mp_mod*/);
+	sp_256to512z_mont_mul_8(t2, t2, q->x /*, p256_mod, p256_mp_mod*/);
 	/* S1 = Y1*Z2^3 */
-	sp_256_mont_mul_8(t3, t3, r->y /*, p256_mod, p256_mp_mod*/);
+	sp_256to512z_mont_mul_8(t3, t3, r->y /*, p256_mod, p256_mp_mod*/);
 	/* S2 = Y2*Z1^3 */
-	sp_256_mont_mul_8(t4, t4, q->y /*, p256_mod, p256_mp_mod*/);
+	sp_256to512z_mont_mul_8(t4, t4, q->y /*, p256_mod, p256_mp_mod*/);
 	/* H = U2 - U1 */
 	sp_256_mont_sub_8(t2, t2, t1 /*, p256_mod*/);
 	/* R = S2 - S1 */
 	sp_256_mont_sub_8(t4, t4, t3 /*, p256_mod*/);
 	/* Z3 = H*Z1*Z2 */
-	sp_256_mont_mul_8(r->z, r->z, q->z /*, p256_mod, p256_mp_mod*/);
-	sp_256_mont_mul_8(r->z, r->z, t2 /*, p256_mod, p256_mp_mod*/);
+	sp_256to512z_mont_mul_8(r->z, r->z, q->z /*, p256_mod, p256_mp_mod*/);
+	sp_256to512z_mont_mul_8(r->z, r->z, t2 /*, p256_mod, p256_mp_mod*/);
 	/* X3 = R^2 - H^3 - 2*U1*H^2 */
-	sp_256_mont_sqr_8(r->x, t4 /*, p256_mod, p256_mp_mod*/);
-	sp_256_mont_sqr_8(t5, t2 /*, p256_mod, p256_mp_mod*/);
-	sp_256_mont_mul_8(r->y, t1, t5 /*, p256_mod, p256_mp_mod*/);
-	sp_256_mont_mul_8(t5, t5, t2 /*, p256_mod, p256_mp_mod*/);
+	sp_256to512z_mont_sqr_8(r->x, t4 /*, p256_mod, p256_mp_mod*/);
+	sp_256to512z_mont_sqr_8(t5, t2 /*, p256_mod, p256_mp_mod*/);
+	sp_256to512z_mont_mul_8(r->y, t1, t5 /*, p256_mod, p256_mp_mod*/);
+	sp_256to512z_mont_mul_8(t5, t5, t2 /*, p256_mod, p256_mp_mod*/);
 	sp_256_mont_sub_8(r->x, r->x, t5 /*, p256_mod*/);
 	sp_256_mont_dbl_8(t1, r->y /*, p256_mod*/);
 	sp_256_mont_sub_8(r->x, r->x, t1 /*, p256_mod*/);
 	/* Y3 = R*(U1*H^2 - X3) - S1*H^3 */
 	sp_256_mont_sub_8(r->y, r->y, r->x /*, p256_mod*/);
-	sp_256_mont_mul_8(r->y, r->y, t4 /*, p256_mod, p256_mp_mod*/);
-	sp_256_mont_mul_8(t5, t5, t3 /*, p256_mod, p256_mp_mod*/);
+	sp_256to512z_mont_mul_8(r->y, r->y, t4 /*, p256_mod, p256_mp_mod*/);
+	sp_256to512z_mont_mul_8(t5, t5, t3 /*, p256_mod, p256_mp_mod*/);
 	sp_256_mont_sub_8(r->y, r->y, t5 /*, p256_mod*/);
 }
 
-- 
cgit v1.2.3-55-g6feb


From 8cbb70365f653397c8c2b9370214d5aed36ec9fa Mon Sep 17 00:00:00 2001
From: Denys Vlasenko <vda.linux@googlemail.com>
Date: Sat, 27 Nov 2021 15:50:40 +0100
Subject: tls: P256: remove redundant zeroing in sp_256_map_8

Previous change made it obvious that we zero out already-zeroed high bits

function                                             old     new   delta
sp_256_ecc_mulmod_8                                  534     494     -40

Signed-off-by: Denys Vlasenko <vda.linux@googlemail.com>
---
 networking/tls_sp_c32.c | 2 --
 1 file changed, 2 deletions(-)

diff --git a/networking/tls_sp_c32.c b/networking/tls_sp_c32.c
index 74ded2cda..baed62f41 100644
--- a/networking/tls_sp_c32.c
+++ b/networking/tls_sp_c32.c
@@ -1062,7 +1062,6 @@ static void sp_256_map_8(sp_point* r, sp_point* p)
 
 	/* x /= z^2 */
 	sp_256to512z_mont_mul_8(r->x, p->x, t2 /*, p256_mod, p256_mp_mod*/);
-	memset(r->x + 8, 0, sizeof(r->x) / 2);
 	sp_512to256_mont_reduce_8(r->x /*, p256_mod, p256_mp_mod*/);
 	/* Reduce x to less than modulus */
 	if (sp_256_cmp_8(r->x, p256_mod) >= 0)
@@ -1071,7 +1070,6 @@ static void sp_256_map_8(sp_point* r, sp_point* p)
 
 	/* y /= z^3 */
 	sp_256to512z_mont_mul_8(r->y, p->y, t1 /*, p256_mod, p256_mp_mod*/);
-	memset(r->y + 8, 0, sizeof(r->y) / 2);
 	sp_512to256_mont_reduce_8(r->y /*, p256_mod, p256_mp_mod*/);
 	/* Reduce y to less than modulus */
 	if (sp_256_cmp_8(r->y, p256_mod) >= 0)
-- 
cgit v1.2.3-55-g6feb


From dcfd8d3d1013ba989fa511f44bb0553a88c1ef10 Mon Sep 17 00:00:00 2001
From: Denys Vlasenko <vda.linux@googlemail.com>
Date: Sat, 27 Nov 2021 16:07:42 +0100
Subject: tls: P256: fix sp_256_div2_8 - it wouldn't use a[] if low bit is 0

It worked by chance because the only caller passed both parameters
as two pointers to the same array.
My fault (I made this error when converting from 26-bit code).

Signed-off-by: Denys Vlasenko <vda.linux@googlemail.com>
---
 networking/tls_sp_c32.c | 12 +++++++-----
 1 file changed, 7 insertions(+), 5 deletions(-)

diff --git a/networking/tls_sp_c32.c b/networking/tls_sp_c32.c
index baed62f41..b3f7888f5 100644
--- a/networking/tls_sp_c32.c
+++ b/networking/tls_sp_c32.c
@@ -636,12 +636,14 @@ static void sp_256_rshift1_8(sp_digit* r, sp_digit carry)
 }
 #endif
 
-/* Divide the number by 2 mod the modulus (prime). (r = a / 2 % m) */
-static void sp_256_div2_8(sp_digit* r, const sp_digit* a, const sp_digit* m)
+/* Divide the number by 2 mod the modulus (prime). (r = (r / 2) % m) */
+static void sp_256_div2_8(sp_digit* r /*, const sp_digit* m*/)
 {
+	const sp_digit* m = p256_mod;
+
 	int carry = 0;
-	if (a[0] & 1)
-		carry = sp_256_add_8(r, a, m);
+	if (r[0] & 1)
+		carry = sp_256_add_8(r, r, m);
 	sp_256_norm_8(r);
 	sp_256_rshift1_8(r, carry);
 }
@@ -1125,7 +1127,7 @@ static void sp_256_proj_point_dbl_8(sp_point* r, sp_point* p)
 	/* T2 = Y * Y */
 	sp_256to512z_mont_sqr_8(t2, r->y /*, p256_mod, p256_mp_mod*/);
 	/* T2 = T2/2 */
-	sp_256_div2_8(t2, t2, p256_mod);
+	sp_256_div2_8(t2 /*, p256_mod*/);
 	/* Y = Y * X */
 	sp_256to512z_mont_mul_8(r->y, r->y, r->x /*, p256_mod, p256_mp_mod*/);
 	/* X = T1 * T1 */
-- 
cgit v1.2.3-55-g6feb


From 9c671fe3dd2e46a28c02d266130f56a1a6296791 Mon Sep 17 00:00:00 2001
From: Denys Vlasenko <vda.linux@googlemail.com>
Date: Sat, 27 Nov 2021 18:42:27 +0100
Subject: tls: P256: do not open-code copying of struct variables

Signed-off-by: Denys Vlasenko <vda.linux@googlemail.com>
---
 networking/tls_sp_c32.c | 4 +++-
 1 file changed, 3 insertions(+), 1 deletion(-)

diff --git a/networking/tls_sp_c32.c b/networking/tls_sp_c32.c
index b3f7888f5..3291b553c 100644
--- a/networking/tls_sp_c32.c
+++ b/networking/tls_sp_c32.c
@@ -865,6 +865,8 @@ static int sp_256_mul_add_8(sp_digit* r /*, const sp_digit* a, sp_digit b*/)
 }
 
 /* Reduce the number back to 256 bits using Montgomery reduction.
+ * Note: the result is NOT guaranteed to be less than p256_mod!
+ * (it is only guaranteed to fit into 256 bits).
  *
  * a   Double-wide number to reduce in place.
  * m   The single precision number representing the modulus.
@@ -1276,7 +1278,7 @@ static void sp_256_ecc_mulmod_8(sp_point* r, const sp_point* g, const sp_digit*
 	if (map)
 		sp_256_map_8(r, &t[0]);
 	else
-		memcpy(r, &t[0], sizeof(sp_point));
+		*r = t[0]; /* struct copy */
 
 	memset(t, 0, sizeof(t)); //paranoia
 }
-- 
cgit v1.2.3-55-g6feb


From f92ae1dc4bc00e352e683b826609efa5e1e22708 Mon Sep 17 00:00:00 2001
From: Denys Vlasenko <vda.linux@googlemail.com>
Date: Sat, 27 Nov 2021 19:15:43 +0100
Subject: tls: P256: change logic so that we don't need double-wide vectors
 everywhere

Change sp_256to512z_mont_{mul,sqr}_8 to not require/zero upper 256 bits.
There is only one place where we actually used that (and that's why there
used to be zeroing memset of top half!). Fix up that place.
As a bonus, 256x256->512 multiply no longer needs to care for
"r overlaps a or b" case.

This shrinks sp_point structure as well, not just temporaries.

function                                             old     new   delta
sp_256to512z_mont_mul_8                              150       -    -150
sp_256_mont_mul_8                                      -     147    +147
sp_256to512z_mont_sqr_8                                7       -      -7
sp_256_mont_sqr_8                                      -       7      +7
sp_256_ecc_mulmod_8                                  494     543     +49
sp_512to256_mont_reduce_8                            243     249      +6
sp_256_point_from_bin2x32                             73      70      -3
sp_256_proj_point_dbl_8                              353     345      -8
sp_256_proj_point_add_8                              544     499     -45
------------------------------------------------------------------------------
(add/remove: 2/2 grow/shrink: 2/3 up/down: 209/-213)           Total: -4 bytes

Signed-off-by: Denys Vlasenko <vda.linux@googlemail.com>
---
 networking/tls_sp_c32.c | 178 ++++++++++++++++++++----------------------------
 1 file changed, 72 insertions(+), 106 deletions(-)

diff --git a/networking/tls_sp_c32.c b/networking/tls_sp_c32.c
index 3291b553c..3452b08b9 100644
--- a/networking/tls_sp_c32.c
+++ b/networking/tls_sp_c32.c
@@ -49,9 +49,9 @@ typedef int32_t signed_sp_digit;
  */
 
 typedef struct sp_point {
-	sp_digit x[2 * 8];
-	sp_digit y[2 * 8];
-	sp_digit z[2 * 8];
+	sp_digit x[8];
+	sp_digit y[8];
+	sp_digit z[8];
 	int infinity;
 } sp_point;
 
@@ -456,12 +456,11 @@ static void sp_256_sub_8_p256_mod(sp_digit* r)
 #endif
 
 /* Multiply a and b into r. (r = a * b)
- * r should be [16] array (512 bits).
+ * r should be [16] array (512 bits), and must not coincide with a or b.
  */
 static void sp_256to512_mul_8(sp_digit* r, const sp_digit* a, const sp_digit* b)
 {
 #if ALLOW_ASM && defined(__GNUC__) && defined(__i386__)
-	sp_digit rr[15]; /* in case r coincides with a or b */
 	int k;
 	uint32_t accl;
 	uint32_t acch;
@@ -493,16 +492,15 @@ static void sp_256to512_mul_8(sp_digit* r, const sp_digit* a, const sp_digit* b)
 		        j--;
 			i++;
 		} while (i != 8 && i <= k);
-		rr[k] = accl;
+		r[k] = accl;
 		accl = acch;
 		acch = acc_hi;
 	}
 	r[15] = accl;
-	memcpy(r, rr, sizeof(rr));
 #elif ALLOW_ASM && defined(__GNUC__) && defined(__x86_64__)
 	const uint64_t* aa = (const void*)a;
 	const uint64_t* bb = (const void*)b;
-	uint64_t rr[8];
+	const uint64_t* rr = (const void*)r;
 	int k;
 	uint64_t accl;
 	uint64_t acch;
@@ -539,11 +537,8 @@ static void sp_256to512_mul_8(sp_digit* r, const sp_digit* a, const sp_digit* b)
 		acch = acc_hi;
 	}
 	rr[7] = accl;
-	memcpy(r, rr, sizeof(rr));
 #elif 0
 	//TODO: arm assembly (untested)
-	sp_digit tmp[16];
-
 	asm volatile (
 "\n		mov	r5, #0"
 "\n		mov	r6, #0"
@@ -575,12 +570,10 @@ static void sp_256to512_mul_8(sp_digit* r, const sp_digit* a, const sp_digit* b)
 "\n		cmp	r5, #56"
 "\n		ble	1b"
 "\n		str	r6, [%[r], r5]"
-		: [r] "r" (tmp), [a] "r" (a), [b] "r" (b)
+		: [r] "r" (r), [a] "r" (a), [b] "r" (b)
 		: "memory", "r3", "r4", "r5", "r6", "r7", "r8", "r9", "r10", "r12", "r14"
 	);
-	memcpy(r, tmp, sizeof(tmp));
 #else
-	sp_digit rr[15]; /* in case r coincides with a or b */
 	int i, j, k;
 	uint64_t acc;
 
@@ -600,11 +593,10 @@ static void sp_256to512_mul_8(sp_digit* r, const sp_digit* a, const sp_digit* b)
 		        j--;
 			i++;
 		} while (i != 8 && i <= k);
-		rr[k] = acc;
+		r[k] = acc;
 		acc = (acc >> 32) | ((uint64_t)acc_hi << 32);
 	}
 	r[15] = acc;
-	memcpy(r, rr, sizeof(rr));
 #endif
 }
 
@@ -709,30 +701,11 @@ static void sp_256_mont_tpl_8(sp_digit* r, const sp_digit* a /*, const sp_digit*
 }
 
 /* Shift the result in the high 256 bits down to the bottom.
- * High half is cleared to zeros.
  */
-#if BB_UNALIGNED_MEMACCESS_OK && ULONG_MAX > 0xffffffff
-static void sp_512to256_mont_shift_8(sp_digit* rr)
+static void sp_512to256_mont_shift_8(sp_digit* r, sp_digit* a)
 {
-	uint64_t *r = (void*)rr;
-	int i;
-
-	for (i = 0; i < 4; i++) {
-		r[i] = r[i+4];
-		r[i+4] = 0;
-	}
+	memcpy(r, a + 8, sizeof(*r) * 8);
 }
-#else
-static void sp_512to256_mont_shift_8(sp_digit* r)
-{
-	int i;
-
-	for (i = 0; i < 8; i++) {
-		r[i] = r[i+8];
-		r[i+8] = 0;
-	}
-}
-#endif
 
 /* Mul a by scalar b and add into r. (r += a * b)
  * a = p256_mod
@@ -868,11 +841,12 @@ static int sp_256_mul_add_8(sp_digit* r /*, const sp_digit* a, sp_digit b*/)
  * Note: the result is NOT guaranteed to be less than p256_mod!
  * (it is only guaranteed to fit into 256 bits).
  *
- * a   Double-wide number to reduce in place.
+ * r   Result.
+ * a   Double-wide number to reduce. Clobbered.
  * m   The single precision number representing the modulus.
  * mp  The digit representing the negative inverse of m mod 2^n.
  */
-static void sp_512to256_mont_reduce_8(sp_digit* a/*, const sp_digit* m, sp_digit mp*/)
+static void sp_512to256_mont_reduce_8(sp_digit* r, sp_digit* a/*, const sp_digit* m, sp_digit mp*/)
 {
 //	const sp_digit* m = p256_mod;
 	sp_digit mp = p256_mp_mod;
@@ -895,10 +869,10 @@ static void sp_512to256_mont_reduce_8(sp_digit* a/*, const sp_digit* m, sp_digit
 					goto inc_next_word0;
 			}
 		}
-		sp_512to256_mont_shift_8(a);
+		sp_512to256_mont_shift_8(r, a);
 		if (word16th != 0)
-			sp_256_sub_8_p256_mod(a);
-		sp_256_norm_8(a);
+			sp_256_sub_8_p256_mod(r);
+		sp_256_norm_8(r);
 	}
 	else { /* Same code for explicit mp == 1 (which is always the case for P256) */
 		sp_digit word16th = 0;
@@ -915,10 +889,10 @@ static void sp_512to256_mont_reduce_8(sp_digit* a/*, const sp_digit* m, sp_digit
 					goto inc_next_word;
 			}
 		}
-		sp_512to256_mont_shift_8(a);
+		sp_512to256_mont_shift_8(r, a);
 		if (word16th != 0)
-			sp_256_sub_8_p256_mod(a);
-		sp_256_norm_8(a);
+			sp_256_sub_8_p256_mod(r);
+		sp_256_norm_8(r);
 	}
 }
 
@@ -926,35 +900,34 @@ static void sp_512to256_mont_reduce_8(sp_digit* a/*, const sp_digit* m, sp_digit
  * (r = a * b mod m)
  *
  * r   Result of multiplication.
- *     Should be [16] array (512 bits), but high half is cleared to zeros (used as scratch pad).
  * a   First number to multiply in Montogmery form.
  * b   Second number to multiply in Montogmery form.
  * m   Modulus (prime).
  * mp  Montogmery mulitplier.
  */
-static void sp_256to512z_mont_mul_8(sp_digit* r, const sp_digit* a, const sp_digit* b
+static void sp_256_mont_mul_8(sp_digit* r, const sp_digit* a, const sp_digit* b
 		/*, const sp_digit* m, sp_digit mp*/)
 {
 	//const sp_digit* m = p256_mod;
 	//sp_digit mp = p256_mp_mod;
-	sp_256to512_mul_8(r, a, b);
-	sp_512to256_mont_reduce_8(r /*, m, mp*/);
+	sp_digit t[2 * 8];
+	sp_256to512_mul_8(t, a, b);
+	sp_512to256_mont_reduce_8(r, t /*, m, mp*/);
 }
 
 /* Square the Montgomery form number. (r = a * a mod m)
  *
  * r   Result of squaring.
- *     Should be [16] array (512 bits), but high half is cleared to zeros (used as scratch pad).
  * a   Number to square in Montogmery form.
  * m   Modulus (prime).
  * mp  Montogmery mulitplier.
  */
-static void sp_256to512z_mont_sqr_8(sp_digit* r, const sp_digit* a
+static void sp_256_mont_sqr_8(sp_digit* r, const sp_digit* a
 		/*, const sp_digit* m, sp_digit mp*/)
 {
 	//const sp_digit* m = p256_mod;
 	//sp_digit mp = p256_mp_mod;
-	sp_256to512z_mont_mul_8(r, a, a /*, m, mp*/);
+	sp_256_mont_mul_8(r, a, a /*, m, mp*/);
 }
 
 /* Invert the number, in Montgomery form, modulo the modulus (prime) of the
@@ -964,11 +937,8 @@ static void sp_256to512z_mont_sqr_8(sp_digit* r, const sp_digit* a
  * a   Number to invert.
  */
 #if 0
-/* Mod-2 for the P256 curve. */
-static const uint32_t p256_mod_2[8] = {
-	0xfffffffd,0xffffffff,0xffffffff,0x00000000,
-	0x00000000,0x00000000,0x00000001,0xffffffff,
-};
+//p256_mod - 2:
+//ffffffff 00000001 00000000 00000000 00000000 ffffffff ffffffff ffffffff - 2
 //Bit pattern:
 //2    2         2         2         2         2         2         1...1
 //5    5         4         3         2         1         0         9...0         9...1
@@ -977,15 +947,15 @@ static const uint32_t p256_mod_2[8] = {
 #endif
 static void sp_256_mont_inv_8(sp_digit* r, sp_digit* a)
 {
-	sp_digit t[2*8];
+	sp_digit t[8];
 	int i;
 
 	memcpy(t, a, sizeof(sp_digit) * 8);
 	for (i = 254; i >= 0; i--) {
-		sp_256to512z_mont_sqr_8(t, t /*, p256_mod, p256_mp_mod*/);
+		sp_256_mont_sqr_8(t, t /*, p256_mod, p256_mp_mod*/);
 		/*if (p256_mod_2[i / 32] & ((sp_digit)1 << (i % 32)))*/
 		if (i >= 224 || i == 192 || (i <= 95 && i != 1))
-			sp_256to512z_mont_mul_8(t, t, a /*, p256_mod, p256_mp_mod*/);
+			sp_256_mont_mul_8(t, t, a /*, p256_mod, p256_mp_mod*/);
 	}
 	memcpy(r, t, sizeof(sp_digit) * 8);
 }
@@ -1056,25 +1026,28 @@ static void sp_256_mod_mul_norm_8(sp_digit* r, const sp_digit* a)
  */
 static void sp_256_map_8(sp_point* r, sp_point* p)
 {
-	sp_digit t1[2*8];
-	sp_digit t2[2*8];
+	sp_digit t1[8];
+	sp_digit t2[8];
+	sp_digit rr[2 * 8];
 
 	sp_256_mont_inv_8(t1, p->z);
 
-	sp_256to512z_mont_sqr_8(t2, t1 /*, p256_mod, p256_mp_mod*/);
-	sp_256to512z_mont_mul_8(t1, t2, t1 /*, p256_mod, p256_mp_mod*/);
+	sp_256_mont_sqr_8(t2, t1 /*, p256_mod, p256_mp_mod*/);
+	sp_256_mont_mul_8(t1, t2, t1 /*, p256_mod, p256_mp_mod*/);
 
 	/* x /= z^2 */
-	sp_256to512z_mont_mul_8(r->x, p->x, t2 /*, p256_mod, p256_mp_mod*/);
-	sp_512to256_mont_reduce_8(r->x /*, p256_mod, p256_mp_mod*/);
+	sp_256_mont_mul_8(rr, p->x, t2 /*, p256_mod, p256_mp_mod*/);
+	memset(rr + 8, 0, sizeof(rr) / 2);
+	sp_512to256_mont_reduce_8(r->x, rr /*, p256_mod, p256_mp_mod*/);
 	/* Reduce x to less than modulus */
 	if (sp_256_cmp_8(r->x, p256_mod) >= 0)
 		sp_256_sub_8_p256_mod(r->x);
 	sp_256_norm_8(r->x);
 
 	/* y /= z^3 */
-	sp_256to512z_mont_mul_8(r->y, p->y, t1 /*, p256_mod, p256_mp_mod*/);
-	sp_512to256_mont_reduce_8(r->y /*, p256_mod, p256_mp_mod*/);
+	sp_256_mont_mul_8(rr, p->y, t1 /*, p256_mod, p256_mp_mod*/);
+	memset(rr + 8, 0, sizeof(rr) / 2);
+	sp_512to256_mont_reduce_8(r->y, rr /*, p256_mod, p256_mp_mod*/);
 	/* Reduce y to less than modulus */
 	if (sp_256_cmp_8(r->y, p256_mod) >= 0)
 		sp_256_sub_8_p256_mod(r->y);
@@ -1091,8 +1064,8 @@ static void sp_256_map_8(sp_point* r, sp_point* p)
  */
 static void sp_256_proj_point_dbl_8(sp_point* r, sp_point* p)
 {
-	sp_digit t1[2*8];
-	sp_digit t2[2*8];
+	sp_digit t1[8];
+	sp_digit t2[8];
 
 	/* Put point to double into result */
 	if (r != p)
@@ -1101,17 +1074,10 @@ static void sp_256_proj_point_dbl_8(sp_point* r, sp_point* p)
 	if (r->infinity)
 		return;
 
-	if (SP_DEBUG) {
-		/* unused part of t2, may result in spurios
-		 * differences in debug output. Clear it.
-		 */
-		memset(t2, 0, sizeof(t2));
-	}
-
 	/* T1 = Z * Z */
-	sp_256to512z_mont_sqr_8(t1, r->z /*, p256_mod, p256_mp_mod*/);
+	sp_256_mont_sqr_8(t1, r->z /*, p256_mod, p256_mp_mod*/);
 	/* Z = Y * Z */
-	sp_256to512z_mont_mul_8(r->z, r->y, r->z /*, p256_mod, p256_mp_mod*/);
+	sp_256_mont_mul_8(r->z, r->y, r->z /*, p256_mod, p256_mp_mod*/);
 	/* Z = 2Z */
 	sp_256_mont_dbl_8(r->z, r->z /*, p256_mod*/);
 	/* T2 = X - T1 */
@@ -1119,21 +1085,21 @@ static void sp_256_proj_point_dbl_8(sp_point* r, sp_point* p)
 	/* T1 = X + T1 */
 	sp_256_mont_add_8(t1, r->x, t1 /*, p256_mod*/);
 	/* T2 = T1 * T2 */
-	sp_256to512z_mont_mul_8(t2, t1, t2 /*, p256_mod, p256_mp_mod*/);
+	sp_256_mont_mul_8(t2, t1, t2 /*, p256_mod, p256_mp_mod*/);
 	/* T1 = 3T2 */
 	sp_256_mont_tpl_8(t1, t2 /*, p256_mod*/);
 	/* Y = 2Y */
 	sp_256_mont_dbl_8(r->y, r->y /*, p256_mod*/);
 	/* Y = Y * Y */
-	sp_256to512z_mont_sqr_8(r->y, r->y /*, p256_mod, p256_mp_mod*/);
+	sp_256_mont_sqr_8(r->y, r->y /*, p256_mod, p256_mp_mod*/);
 	/* T2 = Y * Y */
-	sp_256to512z_mont_sqr_8(t2, r->y /*, p256_mod, p256_mp_mod*/);
+	sp_256_mont_sqr_8(t2, r->y /*, p256_mod, p256_mp_mod*/);
 	/* T2 = T2/2 */
 	sp_256_div2_8(t2 /*, p256_mod*/);
 	/* Y = Y * X */
-	sp_256to512z_mont_mul_8(r->y, r->y, r->x /*, p256_mod, p256_mp_mod*/);
+	sp_256_mont_mul_8(r->y, r->y, r->x /*, p256_mod, p256_mp_mod*/);
 	/* X = T1 * T1 */
-	sp_256to512z_mont_mul_8(r->x, t1, t1 /*, p256_mod, p256_mp_mod*/);
+	sp_256_mont_mul_8(r->x, t1, t1 /*, p256_mod, p256_mp_mod*/);
 	/* X = X - Y */
 	sp_256_mont_sub_8(r->x, r->x, r->y /*, p256_mod*/);
 	/* X = X - Y */
@@ -1141,7 +1107,7 @@ static void sp_256_proj_point_dbl_8(sp_point* r, sp_point* p)
 	/* Y = Y - X */
 	sp_256_mont_sub_8(r->y, r->y, r->x /*, p256_mod*/);
 	/* Y = Y * T1 */
-	sp_256to512z_mont_mul_8(r->y, r->y, t1 /*, p256_mod, p256_mp_mod*/);
+	sp_256_mont_mul_8(r->y, r->y, t1 /*, p256_mod, p256_mp_mod*/);
 	/* Y = Y - T2 */
 	sp_256_mont_sub_8(r->y, r->y, t2 /*, p256_mod*/);
 	dump_512("y2 %s\n", r->y);
@@ -1155,11 +1121,11 @@ static void sp_256_proj_point_dbl_8(sp_point* r, sp_point* p)
  */
 static NOINLINE void sp_256_proj_point_add_8(sp_point* r, sp_point* p, sp_point* q)
 {
-	sp_digit t1[2*8];
-	sp_digit t2[2*8];
-	sp_digit t3[2*8];
-	sp_digit t4[2*8];
-	sp_digit t5[2*8];
+	sp_digit t1[8];
+	sp_digit t2[8];
+	sp_digit t3[8];
+	sp_digit t4[8];
+	sp_digit t5[8];
 
 	/* Ensure only the first point is the same as the result. */
 	if (q == r) {
@@ -1186,36 +1152,36 @@ static NOINLINE void sp_256_proj_point_add_8(sp_point* r, sp_point* p, sp_point*
 	}
 
 	/* U1 = X1*Z2^2 */
-	sp_256to512z_mont_sqr_8(t1, q->z /*, p256_mod, p256_mp_mod*/);
-	sp_256to512z_mont_mul_8(t3, t1, q->z /*, p256_mod, p256_mp_mod*/);
-	sp_256to512z_mont_mul_8(t1, t1, r->x /*, p256_mod, p256_mp_mod*/);
+	sp_256_mont_sqr_8(t1, q->z /*, p256_mod, p256_mp_mod*/);
+	sp_256_mont_mul_8(t3, t1, q->z /*, p256_mod, p256_mp_mod*/);
+	sp_256_mont_mul_8(t1, t1, r->x /*, p256_mod, p256_mp_mod*/);
 	/* U2 = X2*Z1^2 */
-	sp_256to512z_mont_sqr_8(t2, r->z /*, p256_mod, p256_mp_mod*/);
-	sp_256to512z_mont_mul_8(t4, t2, r->z /*, p256_mod, p256_mp_mod*/);
-	sp_256to512z_mont_mul_8(t2, t2, q->x /*, p256_mod, p256_mp_mod*/);
+	sp_256_mont_sqr_8(t2, r->z /*, p256_mod, p256_mp_mod*/);
+	sp_256_mont_mul_8(t4, t2, r->z /*, p256_mod, p256_mp_mod*/);
+	sp_256_mont_mul_8(t2, t2, q->x /*, p256_mod, p256_mp_mod*/);
 	/* S1 = Y1*Z2^3 */
-	sp_256to512z_mont_mul_8(t3, t3, r->y /*, p256_mod, p256_mp_mod*/);
+	sp_256_mont_mul_8(t3, t3, r->y /*, p256_mod, p256_mp_mod*/);
 	/* S2 = Y2*Z1^3 */
-	sp_256to512z_mont_mul_8(t4, t4, q->y /*, p256_mod, p256_mp_mod*/);
+	sp_256_mont_mul_8(t4, t4, q->y /*, p256_mod, p256_mp_mod*/);
 	/* H = U2 - U1 */
 	sp_256_mont_sub_8(t2, t2, t1 /*, p256_mod*/);
 	/* R = S2 - S1 */
 	sp_256_mont_sub_8(t4, t4, t3 /*, p256_mod*/);
 	/* Z3 = H*Z1*Z2 */
-	sp_256to512z_mont_mul_8(r->z, r->z, q->z /*, p256_mod, p256_mp_mod*/);
-	sp_256to512z_mont_mul_8(r->z, r->z, t2 /*, p256_mod, p256_mp_mod*/);
+	sp_256_mont_mul_8(r->z, r->z, q->z /*, p256_mod, p256_mp_mod*/);
+	sp_256_mont_mul_8(r->z, r->z, t2 /*, p256_mod, p256_mp_mod*/);
 	/* X3 = R^2 - H^3 - 2*U1*H^2 */
-	sp_256to512z_mont_sqr_8(r->x, t4 /*, p256_mod, p256_mp_mod*/);
-	sp_256to512z_mont_sqr_8(t5, t2 /*, p256_mod, p256_mp_mod*/);
-	sp_256to512z_mont_mul_8(r->y, t1, t5 /*, p256_mod, p256_mp_mod*/);
-	sp_256to512z_mont_mul_8(t5, t5, t2 /*, p256_mod, p256_mp_mod*/);
+	sp_256_mont_sqr_8(r->x, t4 /*, p256_mod, p256_mp_mod*/);
+	sp_256_mont_sqr_8(t5, t2 /*, p256_mod, p256_mp_mod*/);
+	sp_256_mont_mul_8(r->y, t1, t5 /*, p256_mod, p256_mp_mod*/);
+	sp_256_mont_mul_8(t5, t5, t2 /*, p256_mod, p256_mp_mod*/);
 	sp_256_mont_sub_8(r->x, r->x, t5 /*, p256_mod*/);
 	sp_256_mont_dbl_8(t1, r->y /*, p256_mod*/);
 	sp_256_mont_sub_8(r->x, r->x, t1 /*, p256_mod*/);
 	/* Y3 = R*(U1*H^2 - X3) - S1*H^3 */
 	sp_256_mont_sub_8(r->y, r->y, r->x /*, p256_mod*/);
-	sp_256to512z_mont_mul_8(r->y, r->y, t4 /*, p256_mod, p256_mp_mod*/);
-	sp_256to512z_mont_mul_8(t5, t5, t3 /*, p256_mod, p256_mp_mod*/);
+	sp_256_mont_mul_8(r->y, r->y, t4 /*, p256_mod, p256_mp_mod*/);
+	sp_256_mont_mul_8(t5, t5, t3 /*, p256_mod, p256_mp_mod*/);
 	sp_256_mont_sub_8(r->y, r->y, t5 /*, p256_mod*/);
 }
 
-- 
cgit v1.2.3-55-g6feb


From 0b13ab66f43fc1a9437361cfcd33b485422eb0ae Mon Sep 17 00:00:00 2001
From: Denys Vlasenko <vda.linux@googlemail.com>
Date: Sat, 27 Nov 2021 19:36:23 +0100
Subject: tls: P256: trivial x86-64 fix

Signed-off-by: Denys Vlasenko <vda.linux@googlemail.com>
---
 networking/tls_sp_c32.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/networking/tls_sp_c32.c b/networking/tls_sp_c32.c
index 3452b08b9..4c8f08d4e 100644
--- a/networking/tls_sp_c32.c
+++ b/networking/tls_sp_c32.c
@@ -500,7 +500,7 @@ static void sp_256to512_mul_8(sp_digit* r, const sp_digit* a, const sp_digit* b)
 #elif ALLOW_ASM && defined(__GNUC__) && defined(__x86_64__)
 	const uint64_t* aa = (const void*)a;
 	const uint64_t* bb = (const void*)b;
-	const uint64_t* rr = (const void*)r;
+	uint64_t* rr = (void*)r;
 	int k;
 	uint64_t accl;
 	uint64_t acch;
-- 
cgit v1.2.3-55-g6feb


From 1b93c7c4ecc47318905b6e6f801732b7dd31e0ee Mon Sep 17 00:00:00 2001
From: Denys Vlasenko <vda.linux@googlemail.com>
Date: Sun, 28 Nov 2021 02:56:02 +0100
Subject: tls: P256: pad struct sp_point to 64 bits (on 64-bit arches)

function                                             old     new   delta
curve_P256_compute_pubkey_and_premaster              198     190      -8

Signed-off-by: Denys Vlasenko <vda.linux@googlemail.com>
---
 networking/tls_sp_c32.c | 14 +++++++++-----
 1 file changed, 9 insertions(+), 5 deletions(-)

diff --git a/networking/tls_sp_c32.c b/networking/tls_sp_c32.c
index 4c8f08d4e..37e1cfa1c 100644
--- a/networking/tls_sp_c32.c
+++ b/networking/tls_sp_c32.c
@@ -49,14 +49,19 @@ typedef int32_t signed_sp_digit;
  */
 
 typedef struct sp_point {
-	sp_digit x[8];
+	sp_digit x[8]
+#if ULONG_MAX > 0xffffffff
+		/* Make sp_point[] arrays to not be 64-bit misaligned */
+		ALIGNED(8)
+#endif
+	;
 	sp_digit y[8];
 	sp_digit z[8];
 	int infinity;
 } sp_point;
 
 /* The modulus (prime) of the curve P256. */
-static const sp_digit p256_mod[8] = {
+static const sp_digit p256_mod[8] ALIGNED(8) = {
 	0xffffffff,0xffffffff,0xffffffff,0x00000000,
 	0x00000000,0x00000000,0x00000001,0xffffffff,
 };
@@ -903,7 +908,7 @@ static void sp_512to256_mont_reduce_8(sp_digit* r, sp_digit* a/*, const sp_digit
  * a   First number to multiply in Montogmery form.
  * b   Second number to multiply in Montogmery form.
  * m   Modulus (prime).
- * mp  Montogmery mulitplier.
+ * mp  Montogmery multiplier.
  */
 static void sp_256_mont_mul_8(sp_digit* r, const sp_digit* a, const sp_digit* b
 		/*, const sp_digit* m, sp_digit mp*/)
@@ -920,7 +925,7 @@ static void sp_256_mont_mul_8(sp_digit* r, const sp_digit* a, const sp_digit* b
  * r   Result of squaring.
  * a   Number to square in Montogmery form.
  * m   Modulus (prime).
- * mp  Montogmery mulitplier.
+ * mp  Montogmery multiplier.
  */
 static void sp_256_mont_sqr_8(sp_digit* r, const sp_digit* a
 		/*, const sp_digit* m, sp_digit mp*/)
@@ -1145,7 +1150,6 @@ static NOINLINE void sp_256_proj_point_add_8(sp_point* r, sp_point* p, sp_point*
 		return;
 	}
 
-
 	if (p->infinity || q->infinity) {
 		*r = p->infinity ? *q : *p; /* struct copy */
 		return;
-- 
cgit v1.2.3-55-g6feb


From bfefa6ab6cf30507009cca7182c7302900fb5534 Mon Sep 17 00:00:00 2001
From: Bernhard Reutner-Fischer <rep.dot.nop@gmail.com>
Date: Sun, 28 Nov 2021 10:53:22 +0100
Subject: libarchive: remove duplicate forward declaration

Signed-off-by: Bernhard Reutner-Fischer <rep.dot.nop@gmail.com>
---
 include/bb_archive.h | 1 -
 1 file changed, 1 deletion(-)

diff --git a/include/bb_archive.h b/include/bb_archive.h
index dc5e55f0a..e0ef8fc4e 100644
--- a/include/bb_archive.h
+++ b/include/bb_archive.h
@@ -195,7 +195,6 @@ char get_header_ar(archive_handle_t *archive_handle) FAST_FUNC;
 char get_header_cpio(archive_handle_t *archive_handle) FAST_FUNC;
 char get_header_tar(archive_handle_t *archive_handle) FAST_FUNC;
 char get_header_tar_gz(archive_handle_t *archive_handle) FAST_FUNC;
-char get_header_tar_xz(archive_handle_t *archive_handle) FAST_FUNC;
 char get_header_tar_bz2(archive_handle_t *archive_handle) FAST_FUNC;
 char get_header_tar_lzma(archive_handle_t *archive_handle) FAST_FUNC;
 char get_header_tar_xz(archive_handle_t *archive_handle) FAST_FUNC;
-- 
cgit v1.2.3-55-g6feb


From cfb615781df5c7439fe0060a85e6b6a56d10dc7f Mon Sep 17 00:00:00 2001
From: Denys Vlasenko <vda.linux@googlemail.com>
Date: Sun, 28 Nov 2021 11:10:00 +0100
Subject: tls: P256: simplify sp_256_mont_inv_8 (no need for a temporary)

function                                             old     new   delta
sp_256_ecc_mulmod_8                                  543     517     -26

Signed-off-by: Denys Vlasenko <vda.linux@googlemail.com>
---
 networking/tls_sp_c32.c | 10 ++++------
 1 file changed, 4 insertions(+), 6 deletions(-)

diff --git a/networking/tls_sp_c32.c b/networking/tls_sp_c32.c
index 37e1cfa1c..9bd5c6832 100644
--- a/networking/tls_sp_c32.c
+++ b/networking/tls_sp_c32.c
@@ -938,7 +938,7 @@ static void sp_256_mont_sqr_8(sp_digit* r, const sp_digit* a
 /* Invert the number, in Montgomery form, modulo the modulus (prime) of the
  * P256 curve. (r = 1 / a mod m)
  *
- * r   Inverse result.
+ * r   Inverse result. Must not coincide with a.
  * a   Number to invert.
  */
 #if 0
@@ -952,17 +952,15 @@ static void sp_256_mont_sqr_8(sp_digit* r, const sp_digit* a
 #endif
 static void sp_256_mont_inv_8(sp_digit* r, sp_digit* a)
 {
-	sp_digit t[8];
 	int i;
 
-	memcpy(t, a, sizeof(sp_digit) * 8);
+	memcpy(r, a, sizeof(sp_digit) * 8);
 	for (i = 254; i >= 0; i--) {
-		sp_256_mont_sqr_8(t, t /*, p256_mod, p256_mp_mod*/);
+		sp_256_mont_sqr_8(r, r /*, p256_mod, p256_mp_mod*/);
 		/*if (p256_mod_2[i / 32] & ((sp_digit)1 << (i % 32)))*/
 		if (i >= 224 || i == 192 || (i <= 95 && i != 1))
-			sp_256_mont_mul_8(t, t, a /*, p256_mod, p256_mp_mod*/);
+			sp_256_mont_mul_8(r, r, a /*, p256_mod, p256_mp_mod*/);
 	}
-	memcpy(r, t, sizeof(sp_digit) * 8);
 }
 
 /* Multiply a number by Montogmery normalizer mod modulus (prime).
-- 
cgit v1.2.3-55-g6feb


From 00b5051cd25ef7e42ac62637ba16b70d3ac1014a Mon Sep 17 00:00:00 2001
From: Denys Vlasenko <vda.linux@googlemail.com>
Date: Sun, 28 Nov 2021 12:21:23 +0100
Subject: libbb: code shrink in des encryption, in setup_salt()

function                                             old     new   delta
pw_encrypt                                           978     971      -7
.rodata                                           108208  108192     -16
des_crypt                                           1211    1181     -30
------------------------------------------------------------------------------
(add/remove: 0/0 grow/shrink: 0/3 up/down: 0/-53)             Total: -53 bytes

Signed-off-by: Denys Vlasenko <vda.linux@googlemail.com>
---
 libbb/pw_encrypt_des.c  | 29 ++++++++++++++---------------
 testsuite/cryptpw.tests | 14 ++++++++++++++
 2 files changed, 28 insertions(+), 15 deletions(-)

diff --git a/libbb/pw_encrypt_des.c b/libbb/pw_encrypt_des.c
index dcd3521e2..fe8237cfe 100644
--- a/libbb/pw_encrypt_des.c
+++ b/libbb/pw_encrypt_des.c
@@ -363,7 +363,7 @@ des_init(struct des_ctx *ctx, const struct const_des_ctx *cctx)
 	old_rawkey0 = old_rawkey1 = 0;
 	old_salt = 0;
 #endif
-	saltbits = 0;
+	//saltbits = 0; /* not needed: we call setup_salt() before do_des() */
 	bits28 = bits32 + 4;
 	bits24 = bits28 + 4;
 
@@ -481,12 +481,11 @@ des_init(struct des_ctx *ctx, const struct const_des_ctx *cctx)
 	return ctx;
 }
 
-
+/* Accepts 24-bit salt at max */
 static void
 setup_salt(struct des_ctx *ctx, uint32_t salt)
 {
-	uint32_t obit, saltbit;
-	int i;
+	uint32_t invbits;
 
 #if USE_REPETITIVE_SPEEDUP
 	if (salt == old_salt)
@@ -494,15 +493,15 @@ setup_salt(struct des_ctx *ctx, uint32_t salt)
 	old_salt = salt;
 #endif
 
-	saltbits = 0;
-	saltbit = 1;
-	obit = 0x800000;
-	for (i = 0; i < 24; i++) {
-		if (salt & saltbit)
-			saltbits |= obit;
-		saltbit <<= 1;
-		obit >>= 1;
-	}
+	invbits = 0;
+
+	salt |= (1 << 24);
+	do {
+		invbits = (invbits << 1) + (salt & 1);
+		salt >>= 1;
+	} while (salt != 1);
+
+	saltbits = invbits;
 }
 
 static void
@@ -736,14 +735,14 @@ des_crypt(struct des_ctx *ctx, char output[DES_OUT_BUFSIZE],
 	des_setkey(ctx, (char *)keybuf);
 
 	/*
-	 * salt_str - 2 bytes of salt
+	 * salt_str - 2 chars of salt (converted to 12 bits)
 	 * key - up to 8 characters
 	 */
 	output[0] = salt_str[0];
 	output[1] = salt_str[1];
 	salt = (ascii_to_bin(salt_str[1]) << 6)
 	     |  ascii_to_bin(salt_str[0]);
-	setup_salt(ctx, salt);
+	setup_salt(ctx, salt); /* set ctx->saltbits for do_des() */
 
 	/* Do it. */
 	do_des(ctx, /*0, 0,*/ &r0, &r1, 25 /* count */);
diff --git a/testsuite/cryptpw.tests b/testsuite/cryptpw.tests
index 8ec476c9f..0dd91fe15 100755
--- a/testsuite/cryptpw.tests
+++ b/testsuite/cryptpw.tests
@@ -7,6 +7,20 @@
 
 # testing "description" "command" "result" "infile" "stdin"
 
+#optional USE_BB_CRYPT
+testing "cryptpw des 12" \
+	"cryptpw -m des QWErty '123456789012345678901234567890'" \
+	'12MnB3PqfVbMA\n' "" ""
+
+testing "cryptpw des 55" \
+	"cryptpw -m des QWErty 55" \
+	'55tgFLtkT1Y72\n' "" ""
+
+testing "cryptpw des zz" \
+	"cryptpw -m des QWErty zz" \
+	'zzIZaaXWOkxVk\n' "" ""
+#SKIP=
+
 optional USE_BB_CRYPT_SHA
 testing "cryptpw sha256" \
 	"cryptpw -m sha256 QWErty '123456789012345678901234567890'" \
-- 
cgit v1.2.3-55-g6feb


From 832626227ea3798403159080532f763a37273a91 Mon Sep 17 00:00:00 2001
From: Denys Vlasenko <vda.linux@googlemail.com>
Date: Sun, 28 Nov 2021 12:55:20 +0100
Subject: tls: P256: add comment on logic in sp_512to256_mont_reduce_8, no code
 changes

Signed-off-by: Denys Vlasenko <vda.linux@googlemail.com>
---
 networking/tls_sp_c32.c | 33 +++++++++++++++++++++++----------
 1 file changed, 23 insertions(+), 10 deletions(-)

diff --git a/networking/tls_sp_c32.c b/networking/tls_sp_c32.c
index 9bd5c6832..eb6cc2431 100644
--- a/networking/tls_sp_c32.c
+++ b/networking/tls_sp_c32.c
@@ -850,6 +850,20 @@ static int sp_256_mul_add_8(sp_digit* r /*, const sp_digit* a, sp_digit b*/)
  * a   Double-wide number to reduce. Clobbered.
  * m   The single precision number representing the modulus.
  * mp  The digit representing the negative inverse of m mod 2^n.
+ *
+ * Montgomery reduction on multiprecision integers:
+ * Montgomery reduction requires products modulo R.
+ * When R is a power of B [in our case R=2^128, B=2^32], there is a variant
+ * of Montgomery reduction which requires products only of machine word sized
+ * integers. T is stored as an little-endian word array a[0..n]. The algorithm
+ * reduces it one word at a time. First an appropriate multiple of modulus
+ * is added to make T divisible by B. [In our case, it is p256_mp_mod * a[0].]
+ * Then a multiple of modulus is added to make T divisible by B^2.
+ * [In our case, it is (p256_mp_mod * a[1]) << 32.]
+ * And so on. Eventually T is divisible by R, and after division by R
+ * the algorithm is in the same place as the usual Montgomery reduction was.
+ *
+ * TODO: Can conditionally use 64-bit (if bit-little-endian arch) logic?
  */
 static void sp_512to256_mont_reduce_8(sp_digit* r, sp_digit* a/*, const sp_digit* m, sp_digit mp*/)
 {
@@ -941,15 +955,6 @@ static void sp_256_mont_sqr_8(sp_digit* r, const sp_digit* a
  * r   Inverse result. Must not coincide with a.
  * a   Number to invert.
  */
-#if 0
-//p256_mod - 2:
-//ffffffff 00000001 00000000 00000000 00000000 ffffffff ffffffff ffffffff - 2
-//Bit pattern:
-//2    2         2         2         2         2         2         1...1
-//5    5         4         3         2         1         0         9...0         9...1
-//543210987654321098765432109876543210987654321098765432109876543210...09876543210...09876543210
-//111111111111111111111111111111110000000000000000000000000000000100...00000111111...11111111101
-#endif
 static void sp_256_mont_inv_8(sp_digit* r, sp_digit* a)
 {
 	int i;
@@ -957,7 +962,15 @@ static void sp_256_mont_inv_8(sp_digit* r, sp_digit* a)
 	memcpy(r, a, sizeof(sp_digit) * 8);
 	for (i = 254; i >= 0; i--) {
 		sp_256_mont_sqr_8(r, r /*, p256_mod, p256_mp_mod*/);
-		/*if (p256_mod_2[i / 32] & ((sp_digit)1 << (i % 32)))*/
+/* p256_mod - 2:
+ * ffffffff 00000001 00000000 00000000 00000000 ffffffff ffffffff ffffffff - 2
+ * Bit pattern:
+ * 2    2         2         2         2         2         2         1...1
+ * 5    5         4         3         2         1         0         9...0         9...1
+ * 543210987654321098765432109876543210987654321098765432109876543210...09876543210...09876543210
+ * 111111111111111111111111111111110000000000000000000000000000000100...00000111111...11111111101
+ */
+		/*if (p256_mod_minus_2[i / 32] & ((sp_digit)1 << (i % 32)))*/
 		if (i >= 224 || i == 192 || (i <= 95 && i != 1))
 			sp_256_mont_mul_8(r, r, a /*, p256_mod, p256_mp_mod*/);
 	}
-- 
cgit v1.2.3-55-g6feb


From 90b0d3304455ad432c49f38e0419ac7820a625f7 Mon Sep 17 00:00:00 2001
From: Denys Vlasenko <vda.linux@googlemail.com>
Date: Sun, 28 Nov 2021 15:38:51 +0100
Subject: tls: P256: add 64-bit montgomery reduce (disabled), small
 optimization in 32-bit code

function                                             old     new   delta
sp_512to256_mont_reduce_8                            191     185      -6

Signed-off-by: Denys Vlasenko <vda.linux@googlemail.com>
---
 networking/tls_sp_c32.c | 177 +++++++++++++++++++++++++++++++++++++++++++-----
 1 file changed, 159 insertions(+), 18 deletions(-)

diff --git a/networking/tls_sp_c32.c b/networking/tls_sp_c32.c
index eb6cc2431..b1c410037 100644
--- a/networking/tls_sp_c32.c
+++ b/networking/tls_sp_c32.c
@@ -705,36 +705,174 @@ static void sp_256_mont_tpl_8(sp_digit* r, const sp_digit* a /*, const sp_digit*
 	}
 }
 
-/* Shift the result in the high 256 bits down to the bottom.
- */
+/* Shift the result in the high 256 bits down to the bottom. */
 static void sp_512to256_mont_shift_8(sp_digit* r, sp_digit* a)
 {
 	memcpy(r, a + 8, sizeof(*r) * 8);
 }
 
+// Disabled for now. Seems to work, but ugly and 40 bytes larger on x86-64.
+#if 0 //UNALIGNED_LE_64BIT
+/* 64-bit little-endian optimized version.
+ * See generic 32-bit version below for explanation.
+ * The benefit of this version is: even though r[3] calculation is atrocious,
+ * we call sp_256_mul_add_4() four times, not 8.
+ */
+static int sp_256_mul_add_4(uint64_t *r /*, const uint64_t* a, uint64_t b*/)
+{
+	uint64_t b = r[0];
+
+# if 0
+	const uint64_t* a = (const void*)p256_mod;
+//a[3..0] = ffffffff00000001 0000000000000000 00000000ffffffff ffffffffffffffff
+	uint128_t t;
+	int i;
+	t = 0;
+	for (i = 0; i < 4; i++) {
+		uint32_t t_hi;
+		uint128_t m = ((uint128_t)b * a[i]) + r[i];
+		t += m;
+		t_hi = (t < m);
+		r[i] = (uint64_t)t;
+		t = (t >> 64) | ((uint128_t)t_hi << 64);
+	}
+	r[4] += (uint64_t)t;
+	return (r[4] < (uint64_t)t); /* 1 if addition overflowed */
+# else
+	// Unroll, then optimize the above loop:
+		//uint32_t t_hi;
+		//uint128_t m;
+		uint64_t t64, t64u;
+
+		//m = ((uint128_t)b * a[0]) + r[0];
+		//  Since b is r[0] and a[0] is ffffffffffffffff, the above optimizes to:
+		//  m = r[0] * ffffffffffffffff + r[0] = (r[0] << 64 - r[0]) + r[0] = r[0] << 64;
+		//t += m;
+		//  t = r[0] << 64 = b << 64;
+		//t_hi = (t < m);
+		//  t_hi = 0;
+		//r[0] = (uint64_t)t;
+//		r[0] = 0;
+//the store can be eliminated since caller won't look at lower 256 bits of the result
+		//t = (t >> 64) | ((uint128_t)t_hi << 64);
+		//  t = b;
+
+		//m = ((uint128_t)b * a[1]) + r[1];
+		//  Since a[1] is 00000000ffffffff, the above optimizes to:
+		//  m = b * ffffffff + r[1] = (b * 100000000 - b) + r[1] = (b << 32) - b + r[1];
+		//t += m;
+		//  t = b + (b << 32) - b + r[1] = (b << 32) + r[1];
+		//t_hi = (t < m);
+		//  t_hi = 0;
+		//r[1] = (uint64_t)t;
+		r[1] += (b << 32);
+		//t = (t >> 64) | ((uint128_t)t_hi << 64);
+		t64 = (r[1] < (b << 32));
+		t64 += (b >> 32);
+
+		//m = ((uint128_t)b * a[2]) + r[2];
+		//  Since a[2] is 0000000000000000, the above optimizes to:
+		//  m = b * 0 + r[2] = r[2];
+		//t += m;
+		//  t = t64 + r[2];
+		//t_hi = (t < m);
+		//  t_hi = 0;
+		//r[2] = (uint64_t)t;
+		r[2] += t64;
+		//t = (t >> 64) | ((uint128_t)t_hi << 64);
+		t64 = (r[2] < t64);
+
+		//m = ((uint128_t)b * a[3]) + r[3];
+		//  Since a[3] is ffffffff00000001, the above optimizes to:
+		//  m = b * ffffffff00000001 + r[3];
+		//  m = b +  b*ffffffff00000000 + r[3]
+		//  m = b + (b*ffffffff << 32) + r[3]
+		//  m = b + (((b<<32) - b) << 32) + r[3]
+		//t += m;
+		//  t = t64 + (uint128_t)b + ((((uint128_t)b << 32) - b) << 32) + r[3];
+		t64 += b;
+		t64u = (t64 < b);
+		t64 += r[3];
+		t64u += (t64 < r[3]);
+		{
+			uint64_t lo,hi;
+			//lo = (((b << 32) - b) << 32
+			//hi = (((uint128_t)b << 32) - b) >> 32
+			//but without uint128_t:
+			hi = (b << 32) - b; /* form lower 32 bits of "hi" part 1 */
+			b = (b >> 32) - (/*borrowed above?*/(b << 32) < b); /* upper 32 bits of "hi" are in b */
+			lo = hi << 32;      /* (use "hi" value to calculate "lo",... */
+			t64 += lo;          /* ...consume... */
+			t64u += (t64 < lo); /* ..."lo") */
+			hi >>= 32;          /* form lower 32 bits of "hi" part 2 */
+			hi |= (b << 32);    /* combine lower and upper */
+			t64u += hi;         /* consume "hi" */
+		}
+		//t_hi = (t < m);
+		//  t_hi = 0;
+		//r[3] = (uint64_t)t;
+		r[3] = t64;
+		//t = (t >> 64) | ((uint128_t)t_hi << 64);
+		//  t = t64u;
+
+	r[4] += t64u;
+	return (r[4] < t64u); /* 1 if addition overflowed */
+# endif
+}
+
+static void sp_512to256_mont_reduce_8(sp_digit* r, sp_digit* aa/*, const sp_digit* m, sp_digit mp*/)
+{
+//	const sp_digit* m = p256_mod;
+	int i;
+	uint64_t *a = (void*)aa;
+
+	sp_digit carry = 0;
+	for (i = 0; i < 4; i++) {
+//		mu = a[i];
+		if (sp_256_mul_add_4(a+i /*, m, mu*/)) {
+			int j = i + 4;
+ inc_next_word:
+			if (++j > 7) { /* a[8] array has no more words? */
+				carry++;
+				continue;
+			}
+			if (++a[j] == 0) /* did this overflow too? */
+				goto inc_next_word;
+		}
+	}
+	sp_512to256_mont_shift_8(r, aa);
+	if (carry != 0)
+		sp_256_sub_8_p256_mod(r);
+	sp_256_norm_8(r);
+}
+
+#else /* Generic 32-bit version */
+
 /* Mul a by scalar b and add into r. (r += a * b)
  * a = p256_mod
  * b = r[0]
  */
 static int sp_256_mul_add_8(sp_digit* r /*, const sp_digit* a, sp_digit b*/)
 {
-//	const sp_digit* a = p256_mod;
-//a[7..0] = ffffffff 00000001 00000000 00000000 00000000 ffffffff ffffffff ffffffff
 	sp_digit b = r[0];
-
 	uint64_t t;
 
-//	t = 0;
-//	for (i = 0; i < 8; i++) {
-//		uint32_t t_hi;
-//		uint64_t m = ((uint64_t)b * a[i]) + r[i];
-//		t += m;
-//		t_hi = (t < m);
-//		r[i] = (sp_digit)t;
-//		t = (t >> 32) | ((uint64_t)t_hi << 32);
-//	}
-//	r[8] += (sp_digit)t;
-
+# if 0
+	const sp_digit* a = p256_mod;
+//a[7..0] = ffffffff 00000001 00000000 00000000 00000000 ffffffff ffffffff ffffffff
+	int i;
+	t = 0;
+	for (i = 0; i < 8; i++) {
+		uint32_t t_hi;
+		uint64_t m = ((uint64_t)b * a[i]) + r[i];
+		t += m;
+		t_hi = (t < m);
+		r[i] = (sp_digit)t;
+		t = (t >> 32) | ((uint64_t)t_hi << 32);
+	}
+	r[8] += (sp_digit)t;
+	return (r[8] < (sp_digit)t); /* 1 if addition overflowed */
+# else
 	// Unroll, then optimize the above loop:
 		//uint32_t t_hi;
 		uint64_t m;
@@ -748,7 +886,8 @@ static int sp_256_mul_add_8(sp_digit* r /*, const sp_digit* a, sp_digit b*/)
 		//t_hi = (t < m);
 		//  t_hi = 0;
 		//r[0] = (sp_digit)t;
-		r[0] = 0;
+//		r[0] = 0;
+//the store can be eliminated since caller won't look at lower 256 bits of the result
 		//t = (t >> 32) | ((uint64_t)t_hi << 32);
 		//  t = b;
 
@@ -840,6 +979,7 @@ static int sp_256_mul_add_8(sp_digit* r /*, const sp_digit* a, sp_digit b*/)
 
 	r[8] += (sp_digit)t;
 	return (r[8] < (sp_digit)t); /* 1 if addition overflowed */
+# endif
 }
 
 /* Reduce the number back to 256 bits using Montgomery reduction.
@@ -861,7 +1001,7 @@ static int sp_256_mul_add_8(sp_digit* r /*, const sp_digit* a, sp_digit b*/)
  * Then a multiple of modulus is added to make T divisible by B^2.
  * [In our case, it is (p256_mp_mod * a[1]) << 32.]
  * And so on. Eventually T is divisible by R, and after division by R
- * the algorithm is in the same place as the usual Montgomery reduction was.
+ * the algorithm is in the same place as the usual Montgomery reduction.
  *
  * TODO: Can conditionally use 64-bit (if bit-little-endian arch) logic?
  */
@@ -914,6 +1054,7 @@ static void sp_512to256_mont_reduce_8(sp_digit* r, sp_digit* a/*, const sp_digit
 		sp_256_norm_8(r);
 	}
 }
+#endif
 
 /* Multiply two Montogmery form numbers mod the modulus (prime).
  * (r = a * b mod m)
-- 
cgit v1.2.3-55-g6feb


From 8514b4166d7a9d7720006d852ae67f43baed8ef1 Mon Sep 17 00:00:00 2001
From: Denys Vlasenko <vda.linux@googlemail.com>
Date: Sun, 28 Nov 2021 21:40:23 +0100
Subject: tls: P256: enable 64-bit version of montgomery reduction

After more testing, (1) I'm more sure it is indeed correct, and
(2) it is a significant speedup - we do a lot of those multiplications.

function                                             old     new   delta
sp_512to256_mont_reduce_8                            191     223     +32

Signed-off-by: Denys Vlasenko <vda.linux@googlemail.com>
---
 networking/tls_sp_c32.c | 15 ++++++++-------
 1 file changed, 8 insertions(+), 7 deletions(-)

diff --git a/networking/tls_sp_c32.c b/networking/tls_sp_c32.c
index b1c410037..cb166e413 100644
--- a/networking/tls_sp_c32.c
+++ b/networking/tls_sp_c32.c
@@ -711,12 +711,13 @@ static void sp_512to256_mont_shift_8(sp_digit* r, sp_digit* a)
 	memcpy(r, a + 8, sizeof(*r) * 8);
 }
 
-// Disabled for now. Seems to work, but ugly and 40 bytes larger on x86-64.
-#if 0 //UNALIGNED_LE_64BIT
+#if UNALIGNED_LE_64BIT
 /* 64-bit little-endian optimized version.
  * See generic 32-bit version below for explanation.
  * The benefit of this version is: even though r[3] calculation is atrocious,
  * we call sp_256_mul_add_4() four times, not 8.
+ * Measured run time improvement of curve_P256_compute_pubkey_and_premaster()
+ * call on x86-64: from ~1500us to ~900us. Code size +32 bytes.
  */
 static int sp_256_mul_add_4(uint64_t *r /*, const uint64_t* a, uint64_t b*/)
 {
@@ -794,18 +795,18 @@ static int sp_256_mul_add_4(uint64_t *r /*, const uint64_t* a, uint64_t b*/)
 		t64u = (t64 < b);
 		t64 += r[3];
 		t64u += (t64 < r[3]);
-		{
-			uint64_t lo,hi;
+		{ // add ((((uint128_t)b << 32) - b) << 32):
+			uint64_t lo, hi;
 			//lo = (((b << 32) - b) << 32
 			//hi = (((uint128_t)b << 32) - b) >> 32
 			//but without uint128_t:
-			hi = (b << 32) - b; /* form lower 32 bits of "hi" part 1 */
+			hi = (b << 32) - b; /* make lower 32 bits of "hi", part 1 */
 			b = (b >> 32) - (/*borrowed above?*/(b << 32) < b); /* upper 32 bits of "hi" are in b */
 			lo = hi << 32;      /* (use "hi" value to calculate "lo",... */
 			t64 += lo;          /* ...consume... */
 			t64u += (t64 < lo); /* ..."lo") */
-			hi >>= 32;          /* form lower 32 bits of "hi" part 2 */
-			hi |= (b << 32);    /* combine lower and upper */
+			hi >>= 32;          /* make lower 32 bits of "hi", part 2 */
+			hi |= (b << 32);    /* combine lower and upper 32 bits */
 			t64u += hi;         /* consume "hi" */
 		}
 		//t_hi = (t < m);
-- 
cgit v1.2.3-55-g6feb


From b240733ae7423cb8f542a624eef0cfa3037d05bc Mon Sep 17 00:00:00 2001
From: Denys Vlasenko <vda.linux@googlemail.com>
Date: Wed, 1 Dec 2021 15:09:44 +0100
Subject: tls: x25519: code shrink by factoring out common code

function                                             old     new   delta
fe_reduce                                              -      37     +37
lm_add                                                67      43     -24
fe_mul_c                                              62      38     -24
fe_mul__distinct                                     138     112     -26
curve25519                                           800     767     -33
lm_sub                                                98      64     -34
------------------------------------------------------------------------------
(add/remove: 1/0 grow/shrink: 0/5 up/down: 37/-141)          Total: -104 bytes

Signed-off-by: Denys Vlasenko <vda.linux@googlemail.com>
---
 networking/tls_fe.c | 68 +++++++++++++++++------------------------------------
 1 file changed, 21 insertions(+), 47 deletions(-)

diff --git a/networking/tls_fe.c b/networking/tls_fe.c
index 3a0a6776f..e5580fbcf 100644
--- a/networking/tls_fe.c
+++ b/networking/tls_fe.c
@@ -187,7 +187,7 @@ static void fprime_mul(byte *r, const byte *a, const byte *b,
 #if 0 //UNUSED
 static void fe_load(byte *x, word32 c)
 {
-	word32 i;
+	int i;
 
 	for (i = 0; i < sizeof(c); i++) {
 		x[i] = c;
@@ -199,21 +199,29 @@ static void fe_load(byte *x, word32 c)
 }
 #endif
 
-static void fe_normalize(byte *x)
+static void fe_reduce(byte *x, word32 c)
 {
-	byte minusp[F25519_SIZE];
-	unsigned c;
 	int i;
 
 	/* Reduce using 2^255 = 19 mod p */
-	c = (x[31] >> 7) * 19;
-	x[31] &= 127;
+	x[31] = c & 127;
+	c = (c >> 7) * 19;
 
 	for (i = 0; i < F25519_SIZE; i++) {
 		c += x[i];
 		x[i] = (byte)c;
 		c >>= 8;
 	}
+}
+
+static void fe_normalize(byte *x)
+{
+	byte minusp[F25519_SIZE];
+	unsigned c;
+	int i;
+
+	/* Reduce using 2^255 = 19 mod p */
+	fe_reduce(x, x[31]);
 
 	/* The number is now less than 2^255 + 18, and therefore less than
 	 * 2p. Try subtracting p, and conditionally load the subtracted
@@ -247,14 +255,7 @@ static void lm_add(byte* r, const byte* a, const byte* b)
 	}
 
 	/* Reduce with 2^255 = 19 mod p */
-	r[31] &= 127;
-	c = (c >> 7) * 19;
-
-	for (i = 0; i < F25519_SIZE; i++) {
-		c += r[i];
-		r[i] = (byte)c;
-		c >>= 8;
-	}
+	fe_reduce(r, c);
 }
 
 static void lm_sub(byte* r, const byte* a, const byte* b)
@@ -264,21 +265,15 @@ static void lm_sub(byte* r, const byte* a, const byte* b)
 
 	/* Calculate a + 2p - b, to avoid underflow */
 	c = 218;
-	for (i = 0; i + 1 < F25519_SIZE; i++) {
+	for (i = 0; i < F25519_SIZE - 1; i++) {
 		c += 65280 + ((word32)a[i]) - ((word32)b[i]);
 		r[i] = c;
 		c >>= 8;
 	}
 
 	c += ((word32)a[31]) - ((word32)b[31]);
-	r[31] = c & 127;
-	c = (c >> 7) * 19;
 
-	for (i = 0; i < F25519_SIZE; i++) {
-		c += r[i];
-		r[i] = c;
-		c >>= 8;
-	}
+	fe_reduce(r, c);
 }
 
 #if 0 //UNUSED
@@ -289,21 +284,15 @@ static void lm_neg(byte* r, const byte* a)
 
 	/* Calculate 2p - a, to avoid underflow */
 	c = 218;
-	for (i = 0; i + 1 < F25519_SIZE; i++) {
+	for (i = 0; i < F25519_SIZE - 1; i++) {
 		c += 65280 - ((word32)a[i]);
 		r[i] = c;
 		c >>= 8;
 	}
 
 	c -= ((word32)a[31]);
-	r[31] = c & 127;
-	c = (c >> 7) * 19;
 
-	for (i = 0; i < F25519_SIZE; i++) {
-		c += r[i];
-		r[i] = c;
-		c >>= 8;
-	}
+	fe_reduce(r, c);
 }
 #endif
 
@@ -326,14 +315,7 @@ static void fe_mul__distinct(byte *r, const byte *a, const byte *b)
 		r[i] = c;
 	}
 
-	r[31] &= 127;
-	c = (c >> 7) * 19;
-
-	for (i = 0; i < F25519_SIZE; i++) {
-		c += r[i];
-		r[i] = c;
-		c >>= 8;
-	}
+	fe_reduce(r, c);
 }
 
 #if 0 //UNUSED
@@ -357,15 +339,7 @@ static void fe_mul_c(byte *r, const byte *a, word32 b)
 		r[i] = c;
 	}
 
-	r[31] &= 127;
-	c >>= 7;
-	c *= 19;
-
-	for (i = 0; i < F25519_SIZE; i++) {
-		c += r[i];
-		r[i] = c;
-		c >>= 8;
-	}
+	fe_reduce(r, c);
 }
 
 static void fe_inv__distinct(byte *r, const byte *x)
-- 
cgit v1.2.3-55-g6feb


From 27df6aeef2d0d4b726a8b3b1ce1b1cafbbce3431 Mon Sep 17 00:00:00 2001
From: Denys Vlasenko <vda.linux@googlemail.com>
Date: Sat, 11 Dec 2021 23:27:40 +0100
Subject: tls: P256: factor out "multiply then reduce" operation

function                                             old     new   delta
sp_256_mont_mul_and_reduce_8                           -      44     +44
sp_256_ecc_mulmod_8                                  517     442     -75
------------------------------------------------------------------------------
(add/remove: 1/0 grow/shrink: 0/1 up/down: 44/-75)            Total: -31 bytes

Signed-off-by: Denys Vlasenko <vda.linux@googlemail.com>
---
 networking/tls_sp_c32.c | 20 +++++++++++++-------
 1 file changed, 13 insertions(+), 7 deletions(-)

diff --git a/networking/tls_sp_c32.c b/networking/tls_sp_c32.c
index cb166e413..292dda24e 100644
--- a/networking/tls_sp_c32.c
+++ b/networking/tls_sp_c32.c
@@ -1091,6 +1091,17 @@ static void sp_256_mont_sqr_8(sp_digit* r, const sp_digit* a
 	sp_256_mont_mul_8(r, a, a /*, m, mp*/);
 }
 
+static NOINLINE void sp_256_mont_mul_and_reduce_8(sp_digit* r,
+		const sp_digit* a, const sp_digit* b
+		/*, const sp_digit* m, sp_digit mp*/)
+{
+	sp_digit rr[2 * 8];
+
+	sp_256_mont_mul_8(rr, a, b /*, p256_mod, p256_mp_mod*/);
+	memset(rr + 8, 0, sizeof(rr) / 2);
+	sp_512to256_mont_reduce_8(r, rr /*, p256_mod, p256_mp_mod*/);
+}
+
 /* Invert the number, in Montgomery form, modulo the modulus (prime) of the
  * P256 curve. (r = 1 / a mod m)
  *
@@ -1186,7 +1197,6 @@ static void sp_256_map_8(sp_point* r, sp_point* p)
 {
 	sp_digit t1[8];
 	sp_digit t2[8];
-	sp_digit rr[2 * 8];
 
 	sp_256_mont_inv_8(t1, p->z);
 
@@ -1194,18 +1204,14 @@ static void sp_256_map_8(sp_point* r, sp_point* p)
 	sp_256_mont_mul_8(t1, t2, t1 /*, p256_mod, p256_mp_mod*/);
 
 	/* x /= z^2 */
-	sp_256_mont_mul_8(rr, p->x, t2 /*, p256_mod, p256_mp_mod*/);
-	memset(rr + 8, 0, sizeof(rr) / 2);
-	sp_512to256_mont_reduce_8(r->x, rr /*, p256_mod, p256_mp_mod*/);
+	sp_256_mont_mul_and_reduce_8(r->x, p->x, t2 /*, p256_mod, p256_mp_mod*/);
 	/* Reduce x to less than modulus */
 	if (sp_256_cmp_8(r->x, p256_mod) >= 0)
 		sp_256_sub_8_p256_mod(r->x);
 	sp_256_norm_8(r->x);
 
 	/* y /= z^3 */
-	sp_256_mont_mul_8(rr, p->y, t1 /*, p256_mod, p256_mp_mod*/);
-	memset(rr + 8, 0, sizeof(rr) / 2);
-	sp_512to256_mont_reduce_8(r->y, rr /*, p256_mod, p256_mp_mod*/);
+	sp_256_mont_mul_and_reduce_8(r->y, p->y, t1 /*, p256_mod, p256_mp_mod*/);
 	/* Reduce y to less than modulus */
 	if (sp_256_cmp_8(r->y, p256_mod) >= 0)
 		sp_256_sub_8_p256_mod(r->y);
-- 
cgit v1.2.3-55-g6feb


From c7b90dc4d10ccc4f95940f42676ff907cee73272 Mon Sep 17 00:00:00 2001
From: Denys Vlasenko <vda.linux@googlemail.com>
Date: Sun, 12 Dec 2021 00:34:15 +0100
Subject: uudecode: special-case "/dev/stdout", closes 14241

function                                             old     new   delta
uudecode_main                                        295     322     +27

Signed-off-by: Denys Vlasenko <vda.linux@googlemail.com>
---
 coreutils/uudecode.c       | 11 ++++++++++-
 docs/posix_conformance.txt |  2 +-
 2 files changed, 11 insertions(+), 2 deletions(-)

diff --git a/coreutils/uudecode.c b/coreutils/uudecode.c
index a607977e9..e90902f52 100644
--- a/coreutils/uudecode.c
+++ b/coreutils/uudecode.c
@@ -155,7 +155,16 @@ int uudecode_main(int argc UNUSED_PARAM, char **argv)
 				break;
 		}
 		dst_stream = stdout;
-		if (NOT_LONE_DASH(outname)) {
+		if (NOT_LONE_DASH(outname)
+/* https://pubs.opengroup.org/onlinepubs/9699919799/utilities/uudecode.html
+ * https://pubs.opengroup.org/onlinepubs/9699919799/utilities/uuencode.html
+ * The above says that output file name specified in input file
+ * or overridden by -o OUTFILE can be special "/dev/stdout" string.
+ * This usually works "implicitly": many systems have /dev/stdout.
+ * If ENABLE_DESKTOP, support that explicitly:
+ */
+		 && (!ENABLE_DESKTOP || strcmp(outname, "/dev/stdout") != 0)
+		) {
 			dst_stream = xfopen_for_write(outname);
 			fchmod(fileno(dst_stream), mode & (S_IRWXU | S_IRWXG | S_IRWXO));
 		}
diff --git a/docs/posix_conformance.txt b/docs/posix_conformance.txt
index f6e8858cc..5e107d74d 100644
--- a/docs/posix_conformance.txt
+++ b/docs/posix_conformance.txt
@@ -690,7 +690,7 @@ uniq Busybox specific options:
 
 uudecode POSIX options
  option           | exists | compliant | remarks
-  -o outfile      |  no    | no        |
+  -o outfile      |  yes   | no        |
 uudecode Busybox specific options: None
 
 uuencode POSIX options
-- 
cgit v1.2.3-55-g6feb


From b9fba185c570b52fccffa2b9ae39ba32a0860daf Mon Sep 17 00:00:00 2001
From: Ildar Shaimordanov <ildar.shaimordanov@gmail.com>
Date: Sun, 12 Dec 2021 03:19:13 +0100
Subject: wget: allow end-users to customize Content-Type for --post-data and
 --post-file

More explanation in this PR:
https://github.com/rmyorston/busybox-w32/pull/233

The real use-case:
wget https://api.github.com/markdown/raw --header "Content-Type: text/plain"

function                                             old     new   delta
wget_main                                           2560    2581     +21
wget_user_headers                                     62      76     +14
.rodata                                           104196  104197      +1
------------------------------------------------------------------------------
(add/remove: 0/0 grow/shrink: 3/0 up/down: 36/0)               Total: 36 bytes

Signed-off-by: Ildar Shaimordanov <ildar.shaimordanov@gmail.com>
Signed-off-by: Denys Vlasenko <vda.linux@googlemail.com>
---
 networking/wget.c | 35 ++++++++++++++++++++++-------------
 1 file changed, 22 insertions(+), 13 deletions(-)

diff --git a/networking/wget.c b/networking/wget.c
index 91ef99eab..9ec0e67b9 100644
--- a/networking/wget.c
+++ b/networking/wget.c
@@ -211,29 +211,33 @@ enum {
 	HDR_HOST          = (1<<0),
 	HDR_USER_AGENT    = (1<<1),
 	HDR_RANGE         = (1<<2),
-	HDR_AUTH          = (1<<3) * ENABLE_FEATURE_WGET_AUTHENTICATION,
-	HDR_PROXY_AUTH    = (1<<4) * ENABLE_FEATURE_WGET_AUTHENTICATION,
+	HDR_CONTENT_TYPE  = (1<<3),
+	HDR_AUTH          = (1<<4) * ENABLE_FEATURE_WGET_AUTHENTICATION,
+	HDR_PROXY_AUTH    = (1<<5) * ENABLE_FEATURE_WGET_AUTHENTICATION,
 };
 static const char wget_user_headers[] ALIGN1 =
 	"Host:\0"
 	"User-Agent:\0"
 	"Range:\0"
+	"Content-Type:\0"
 # if ENABLE_FEATURE_WGET_AUTHENTICATION
 	"Authorization:\0"
 	"Proxy-Authorization:\0"
 # endif
 	;
-# define USR_HEADER_HOST       (G.user_headers & HDR_HOST)
-# define USR_HEADER_USER_AGENT (G.user_headers & HDR_USER_AGENT)
-# define USR_HEADER_RANGE      (G.user_headers & HDR_RANGE)
-# define USR_HEADER_AUTH       (G.user_headers & HDR_AUTH)
-# define USR_HEADER_PROXY_AUTH (G.user_headers & HDR_PROXY_AUTH)
+# define USR_HEADER_HOST         (G.user_headers & HDR_HOST)
+# define USR_HEADER_USER_AGENT   (G.user_headers & HDR_USER_AGENT)
+# define USR_HEADER_RANGE        (G.user_headers & HDR_RANGE)
+# define USR_HEADER_CONTENT_TYPE (G.user_headers & HDR_CONTENT_TYPE)
+# define USR_HEADER_AUTH         (G.user_headers & HDR_AUTH)
+# define USR_HEADER_PROXY_AUTH   (G.user_headers & HDR_PROXY_AUTH)
 #else /* No long options, no user-headers :( */
-# define USR_HEADER_HOST       0
-# define USR_HEADER_USER_AGENT 0
-# define USR_HEADER_RANGE      0
-# define USR_HEADER_AUTH       0
-# define USR_HEADER_PROXY_AUTH 0
+# define USR_HEADER_HOST         0
+# define USR_HEADER_USER_AGENT   0
+# define USR_HEADER_RANGE        0
+# define USR_HEADER_CONTENT_TYPE 0
+# define USR_HEADER_AUTH         0
+# define USR_HEADER_PROXY_AUTH   0
 #endif
 
 /* Globals */
@@ -1261,8 +1265,13 @@ static void download_one_url(const char *url)
 		}
 
 		if (G.post_data) {
+			/* If user did not override it... */
+			if (!USR_HEADER_CONTENT_TYPE) {
+				SENDFMT(sfp,
+					"Content-Type: application/x-www-form-urlencoded\r\n"
+				);
+			}
 			SENDFMT(sfp,
-				"Content-Type: application/x-www-form-urlencoded\r\n"
 				"Content-Length: %u\r\n"
 				"\r\n"
 				"%s",
-- 
cgit v1.2.3-55-g6feb


From 9b678807198611308cfd8b10427f9e08c62f7bec Mon Sep 17 00:00:00 2001
From: Martin Kaiser <martin@kaiser.cx>
Date: Mon, 8 Nov 2021 17:36:43 +0100
Subject: Makefile.flags: use all cflags for crypt and rt checks

To check if libcrypt and librt are available, we check if we can
compile and link a simple test program.

These checks do not match the actual linking if CONFIG_STATIC is enabled.
For CONFIG_STATIC, CFLAGS_busybox is set to -static. The checks don't use
CFLAGS_busybox and detect a shared libcrypt or librt. If we link busybox
later and we have no static libcrypt or librt, linking will fail.

Update the libcrypt and librt checks to use CFLAGS_busybox.

Signed-off-by: Martin Kaiser <martin@kaiser.cx>
Signed-off-by: Denys Vlasenko <vda.linux@googlemail.com>
---
 Makefile.flags | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/Makefile.flags b/Makefile.flags
index 667481983..c34356230 100644
--- a/Makefile.flags
+++ b/Makefile.flags
@@ -151,8 +151,8 @@ LDLIBS += m
 # gcc-4.2.1 fails if we try to feed C source on stdin:
 #  echo 'int main(void){return 0;}' | $(CC) $(CFLAGS) -lcrypt -o /dev/null -xc -
 # fall back to using a temp file:
-CRYPT_AVAILABLE := $(shell echo 'int main(void){return 0;}' >bb_libtest.c; $(CC) $(CFLAGS) -lcrypt -o /dev/null bb_libtest.c >/dev/null 2>&1 && echo "y"; rm bb_libtest.c)
-RT_AVAILABLE    := $(shell echo 'int main(void){return 0;}' >bb_libtest.c; $(CC) $(CFLAGS) -lrt    -o /dev/null bb_libtest.c >/dev/null 2>&1 && echo "y"; rm bb_libtest.c)
+CRYPT_AVAILABLE := $(shell echo 'int main(void){return 0;}' >bb_libtest.c; $(CC) $(CFLAGS) $(CFLAGS_busybox) -lcrypt -o /dev/null bb_libtest.c >/dev/null 2>&1 && echo "y"; rm bb_libtest.c)
+RT_AVAILABLE    := $(shell echo 'int main(void){return 0;}' >bb_libtest.c; $(CC) $(CFLAGS) $(CFLAGS_busybox) -lrt    -o /dev/null bb_libtest.c >/dev/null 2>&1 && echo "y"; rm bb_libtest.c)
 ifeq ($(CRYPT_AVAILABLE),y)
 LDLIBS += crypt
 endif
-- 
cgit v1.2.3-55-g6feb


From e67b80f4739c4075b51b0a575701b73928fe0bf1 Mon Sep 17 00:00:00 2001
From: Denys Vlasenko <vda.linux@googlemail.com>
Date: Sun, 12 Dec 2021 17:13:54 +0100
Subject: udhcpc6: fix udhcp_find_option to actually find DHCP6 options

udhcp_insert_new_option treats code for IPv6 as follows:

new->data[D6_OPT_CODE] = code >> 8;
new->data[D6_OPT_CODE + 1] = code & 0xff;

udhcp_find_option tests the code as follows:

while (opt_list && opt_list->data[OPT_CODE] < code)
...
if (opt_list && opt_list->data[OPT_CODE] == code)

So yes, OPT_CODE and D6_OPT_CODE are both 0, but the D6_OPT_CLIENTID =
1 value means that the 1 is in the seconds byte, and udhcp_find_option
is only looking at the first byte,  So the send_d6_release can never
find it the created option.

function                                             old     new   delta
udhcp_find_option                                     28      53     +25
attach_option                                        276     284      +8
udhcpc6_main                                        2602    2607      +5
perform_d6_release                                   262     267      +5
udhcpd_main                                         1518    1520      +2
udhcpc_main                                         2542    2544      +2
add_serverid_and_clientid_options                     46      48      +2
------------------------------------------------------------------------------
(add/remove: 0/0 grow/shrink: 7/0 up/down: 49/0)               Total: 49 bytes

Signed-off-by: Denys Vlasenko <vda.linux@googlemail.com>
---
 networking/udhcp/common.c   | 29 ++++++++++++++++++++++-------
 networking/udhcp/common.h   |  6 +++++-
 networking/udhcp/d6_dhcpc.c |  5 +++--
 networking/udhcp/dhcpc.c    |  6 +++---
 networking/udhcp/dhcpd.c    |  2 +-
 5 files changed, 34 insertions(+), 14 deletions(-)

diff --git a/networking/udhcp/common.c b/networking/udhcp/common.c
index 31e525cb0..8e9b93655 100644
--- a/networking/udhcp/common.c
+++ b/networking/udhcp/common.c
@@ -404,14 +404,29 @@ void FAST_FUNC udhcp_add_simple_option(struct dhcp_packet *packet, uint8_t code,
 #endif
 
 /* Find option 'code' in opt_list */
-struct option_set* FAST_FUNC udhcp_find_option(struct option_set *opt_list, uint8_t code)
+struct option_set* FAST_FUNC udhcp_find_option(struct option_set *opt_list, uint8_t code, bool dhcpv6)
 {
-	while (opt_list && opt_list->data[OPT_CODE] < code)
-		opt_list = opt_list->next;
+	IF_NOT_UDHCPC6(bool dhcpv6 = 0;)
+	uint8_t cur_code;
 
-	if (opt_list && opt_list->data[OPT_CODE] == code)
-		return opt_list;
-	return NULL;
+	for (;;) {
+		if (!opt_list)
+			return opt_list; /* NULL */
+		if (!dhcpv6) {
+			cur_code = opt_list->data[OPT_CODE];
+		} else {
+//FIXME: add support for code > 0xff
+			if (opt_list->data[D6_OPT_CODE] != 0)
+				return NULL;
+			cur_code = opt_list->data[D6_OPT_CODE + 1];
+		}
+		if (cur_code >= code) {
+			if (cur_code == code)
+				return opt_list;
+			return NULL;
+		}
+		opt_list = opt_list->next;
+	}
 }
 
 /* Parse string to IP in network order */
@@ -499,7 +514,7 @@ static NOINLINE void attach_option(
 	}
 #endif
 
-	existing = udhcp_find_option(*opt_list, optflag->code);
+	existing = udhcp_find_option(*opt_list, optflag->code, dhcpv6);
 	if (!existing) {
 		/* make a new option */
 		uint8_t *p = udhcp_insert_new_option(opt_list, optflag->code, length, dhcpv6);
diff --git a/networking/udhcp/common.h b/networking/udhcp/common.h
index e374771cb..5882238e3 100644
--- a/networking/udhcp/common.h
+++ b/networking/udhcp/common.h
@@ -245,7 +245,11 @@ void udhcp_add_simple_option(struct dhcp_packet *packet, uint8_t code, uint32_t
 char *dname_dec(const uint8_t *cstr, int clen, const char *pre) FAST_FUNC;
 uint8_t *dname_enc(/*const uint8_t *cstr, int clen,*/ const char *src, int *retlen) FAST_FUNC;
 #endif
-struct option_set *udhcp_find_option(struct option_set *opt_list, uint8_t code) FAST_FUNC;
+#if !ENABLE_UDHCPC6
+#define udhcp_find_option(opt_list, code, dhcpv6) \
+	udhcp_find_option(opt_list, code)
+#endif
+struct option_set *udhcp_find_option(struct option_set *opt_list, uint8_t code, bool dhcpv6) FAST_FUNC;
 
 // RFC 2131  Table 5: Fields and options used by DHCP clients
 //
diff --git a/networking/udhcp/d6_dhcpc.c b/networking/udhcp/d6_dhcpc.c
index 8d11a7539..9d2a8f5d3 100644
--- a/networking/udhcp/d6_dhcpc.c
+++ b/networking/udhcp/d6_dhcpc.c
@@ -888,7 +888,8 @@ int send_d6_release(struct in6_addr *server_ipv6, struct in6_addr *our_cur_ipv6)
 	if (client6_data.ia_pd)
 		opt_ptr = mempcpy(opt_ptr, client6_data.ia_pd, client6_data.ia_pd->len + 2+2);
 	/* Client-id */
-	ci = udhcp_find_option(client_data.options, D6_OPT_CLIENTID);
+///vda
+	ci = udhcp_find_option(client_data.options, D6_OPT_CLIENTID, /*dhcpv6:*/ 1);
 	if (ci)
 		opt_ptr = mempcpy(opt_ptr, ci->data, D6_OPT_DATA + 2+2 + 6);
 
@@ -1272,7 +1273,7 @@ int udhcpc6_main(int argc UNUSED_PARAM, char **argv)
 	}
 
 	clientid_mac_ptr = NULL;
-	if (!udhcp_find_option(client_data.options, D6_OPT_CLIENTID)) {
+	if (!udhcp_find_option(client_data.options, D6_OPT_CLIENTID, /*dhcpv6:*/ 1)) {
 		/* not set, set the default client ID */
 		clientid_mac_ptr = udhcp_insert_new_option(
 				&client_data.options, D6_OPT_CLIENTID,
diff --git a/networking/udhcp/dhcpc.c b/networking/udhcp/dhcpc.c
index 331f13a8c..c757fb37c 100644
--- a/networking/udhcp/dhcpc.c
+++ b/networking/udhcp/dhcpc.c
@@ -658,7 +658,7 @@ static void add_client_options(struct dhcp_packet *packet)
 
 	// This will be needed if we remove -V VENDOR_STR in favor of
 	// -x vendor:VENDOR_STR
-	//if (!udhcp_find_option(packet.options, DHCP_VENDOR))
+	//if (!udhcp_find_option(packet.options, DHCP_VENDOR, /*dhcpv6:*/ 0))
 	//	/* not set, set the default vendor ID */
 	//	...add (DHCP_VENDOR, "udhcp "BB_VER) opt...
 }
@@ -676,7 +676,7 @@ static void add_serverid_and_clientid_options(struct dhcp_packet *packet, uint32
 	 * If the client used a 'client identifier' when it obtained the lease,
 	 * it MUST use the same 'client identifier' in the DHCPRELEASE message.
 	 */
-	ci = udhcp_find_option(client_data.options, DHCP_CLIENT_ID);
+	ci = udhcp_find_option(client_data.options, DHCP_CLIENT_ID, /*dhcpv6:*/ 0);
 	if (ci)
 		udhcp_add_binary_option(packet, ci->data);
 }
@@ -1328,7 +1328,7 @@ int udhcpc_main(int argc UNUSED_PARAM, char **argv)
 	}
 
 	clientid_mac_ptr = NULL;
-	if (!(opt & OPT_C) && !udhcp_find_option(client_data.options, DHCP_CLIENT_ID)) {
+	if (!(opt & OPT_C) && !udhcp_find_option(client_data.options, DHCP_CLIENT_ID, /*dhcpv6:*/ 0)) {
 		/* not suppressed and not set, create default client ID */
 		clientid_mac_ptr = udhcp_insert_new_option(
 				&client_data.options, DHCP_CLIENT_ID,
diff --git a/networking/udhcp/dhcpd.c b/networking/udhcp/dhcpd.c
index 0f5edb75c..66750e2e6 100644
--- a/networking/udhcp/dhcpd.c
+++ b/networking/udhcp/dhcpd.c
@@ -935,7 +935,7 @@ int udhcpd_main(int argc UNUSED_PARAM, char **argv)
 
 	bb_simple_info_msg("started, v"BB_VER);
 
-	option = udhcp_find_option(server_data.options, DHCP_LEASE_TIME);
+	option = udhcp_find_option(server_data.options, DHCP_LEASE_TIME, /*dhcpv6:*/ 0);
 	server_data.max_lease_sec = DEFAULT_LEASE_TIME;
 	if (option) {
 		move_from_unaligned32(server_data.max_lease_sec, option->data + OPT_DATA);
-- 
cgit v1.2.3-55-g6feb


From cb91a818c8f7730d8f3b30b5b4e75fd21496609f Mon Sep 17 00:00:00 2001
From: Peter Korsgaard <peter@korsgaard.com>
Date: Fri, 26 Nov 2021 16:38:57 +0100
Subject: libarchive/get_header_ar.c: fix extraction of archives from binutils
 in deterministic mode

GNU binutils in deterministic mode (ar rD or built with
--enable-deterministic-archives) hard codes file mode to 0644 (NOT 0100644)
since https://github.com/bminor/binutils-gdb/commit/36e4dce69dd2

This confuses busybox ar x (data_extract_all):

touch a; ar rD a.ar a
ar: creating a.ar

busybox ar x a.ar
ar: unrecognized file type
hexdump -C a.ar
00000000  21 3c 61 72 63 68 3e 0a  61 2f 20 20 20 20 20 20  |!<arch>.a/      |
00000010  20 20 20 20 20 20 20 20  30 20 20 20 20 20 20 20  |        0       |
00000020  20 20 20 20 30 20 20 20  20 20 30 20 20 20 20 20  |    0     0     |
00000030  36 34 34 20 20 20 20 20  30 20 20 20 20 20 20 20  |644     0       |
00000040  20 20 60 0a                                       |  `.|

As a workaround, force the mode bits to S_IFREG, as nothing else makes sense
for ar.

function                                             old     new   delta
get_header_ar                                        539     542      +3

Signed-off-by: Peter Korsgaard <peter@korsgaard.com>
Signed-off-by: Denys Vlasenko <vda.linux@googlemail.com>
---
 archival/libarchive/get_header_ar.c | 6 +++++-
 1 file changed, 5 insertions(+), 1 deletion(-)

diff --git a/archival/libarchive/get_header_ar.c b/archival/libarchive/get_header_ar.c
index 3a19d6ff7..6bd897392 100644
--- a/archival/libarchive/get_header_ar.c
+++ b/archival/libarchive/get_header_ar.c
@@ -92,8 +92,12 @@ char FAST_FUNC get_header_ar(archive_handle_t *archive_handle)
 	/* Only size is always present, the rest may be missing in
 	 * long filename pseudo file. Thus we decode the rest
 	 * after dealing with long filename pseudo file.
+	 *
+	 * GNU binutils in deterministic mode hard codes mode to 0644 (NOT
+	 * 0100644). AR archives can only contain files, so force file
+	 * mode.
 	 */
-	typed->mode = read_num(ar.formatted.mode, 8, sizeof(ar.formatted.mode));
+	typed->mode = read_num(ar.formatted.mode, 8, sizeof(ar.formatted.mode)) | S_IFREG;
 	typed->gid = read_num(ar.formatted.gid, 10, sizeof(ar.formatted.gid));
 	typed->uid = read_num(ar.formatted.uid, 10, sizeof(ar.formatted.uid));
 	typed->mtime = read_num(ar.formatted.date, 10, sizeof(ar.formatted.date));
-- 
cgit v1.2.3-55-g6feb


From 70683faf380681a11e16a85090162581aed55d73 Mon Sep 17 00:00:00 2001
From: Denys Vlasenko <vda.linux@googlemail.com>
Date: Fri, 17 Dec 2021 20:37:58 +0100
Subject: httpd: don't send Content-Length in error pages header

function                                             old     new   delta
send_headers                                         701     713     +12

Signed-off-by: Denys Vlasenko <vda.linux@googlemail.com>
---
 networking/httpd.c | 35 +++++++++++++++++++++--------------
 1 file changed, 21 insertions(+), 14 deletions(-)

diff --git a/networking/httpd.c b/networking/httpd.c
index 31c8489d3..4def1b6fc 100644
--- a/networking/httpd.c
+++ b/networking/httpd.c
@@ -1125,7 +1125,7 @@ static void send_headers(unsigned responseNum)
 			"Connection: close\r\n",
 			responseNum, responseString
 #if ENABLE_FEATURE_HTTPD_DATE
-			,date_str
+			, date_str
 #endif
 		);
 	}
@@ -1222,17 +1222,29 @@ static void send_headers(unsigned responseNum)
 // (NB: standards do not define "Transfer-Length:" _header_,
 // transfer-length above is just a concept).
 
+#if ENABLE_FEATURE_HTTPD_RANGES \
+ || ENABLE_FEATURE_HTTPD_LAST_MODIFIED \
+ || ENABLE_FEATURE_HTTPD_ETAG
 		len += sprintf(iobuf + len,
-#if ENABLE_FEATURE_HTTPD_RANGES
+# if ENABLE_FEATURE_HTTPD_RANGES
 			"Accept-Ranges: bytes\r\n"
-#endif
-#if ENABLE_FEATURE_HTTPD_LAST_MODIFIED
+# endif
+# if ENABLE_FEATURE_HTTPD_LAST_MODIFIED
 			"Last-Modified: %s\r\n"
-#endif
-#if ENABLE_FEATURE_HTTPD_ETAG
+# endif
+# if ENABLE_FEATURE_HTTPD_ETAG
 			"ETag: %s\r\n"
+# endif
+# if ENABLE_FEATURE_HTTPD_LAST_MODIFIED
+				, date_str
+# endif
+# if ENABLE_FEATURE_HTTPD_ETAG
+				, G.etag
+# endif
 #endif
-
+		);
+		if (!infoString) {
+			len += sprintf(iobuf + len,
 	/* Because of 4.4 (5), we can forgo sending of "Content-Length"
 	 * since we close connection afterwards, but it helps clients
 	 * to e.g. estimate download times, show progress bars etc.
@@ -1240,14 +1252,9 @@ static void send_headers(unsigned responseNum)
 	 * but de-facto standard is to send it (see comment below).
 	 */
 			"Content-Length: %"OFF_FMT"u\r\n",
-#if ENABLE_FEATURE_HTTPD_LAST_MODIFIED
-				date_str,
-#endif
-#if ENABLE_FEATURE_HTTPD_ETAG
-				G.etag,
-#endif
 				file_size
-		);
+			);
+		}
 	}
 
 	/* This should be "Transfer-Encoding", not "Content-Encoding":
-- 
cgit v1.2.3-55-g6feb


From b720629dfec0e8e991e75b751dad215af2bc657f Mon Sep 17 00:00:00 2001
From: Denys Vlasenko <vda.linux@googlemail.com>
Date: Fri, 17 Dec 2021 21:01:15 +0100
Subject: httpd: do not send Last-Modified / ETag / Content-Length for error
 pages

function                                             old     new   delta
send_headers                                         713     701     -12
send_headers_and_exit                                 20      34     +14

Signed-off-by: Denys Vlasenko <vda.linux@googlemail.com>
---
 networking/httpd.c | 34 ++++++++++++++--------------------
 1 file changed, 14 insertions(+), 20 deletions(-)

diff --git a/networking/httpd.c b/networking/httpd.c
index 4def1b6fc..1ba1d1063 100644
--- a/networking/httpd.c
+++ b/networking/httpd.c
@@ -1222,29 +1222,17 @@ static void send_headers(unsigned responseNum)
 // (NB: standards do not define "Transfer-Length:" _header_,
 // transfer-length above is just a concept).
 
-#if ENABLE_FEATURE_HTTPD_RANGES \
- || ENABLE_FEATURE_HTTPD_LAST_MODIFIED \
- || ENABLE_FEATURE_HTTPD_ETAG
 		len += sprintf(iobuf + len,
-# if ENABLE_FEATURE_HTTPD_RANGES
+#if ENABLE_FEATURE_HTTPD_RANGES
 			"Accept-Ranges: bytes\r\n"
-# endif
-# if ENABLE_FEATURE_HTTPD_LAST_MODIFIED
+#endif
+#if ENABLE_FEATURE_HTTPD_LAST_MODIFIED
 			"Last-Modified: %s\r\n"
-# endif
-# if ENABLE_FEATURE_HTTPD_ETAG
+#endif
+#if ENABLE_FEATURE_HTTPD_ETAG
 			"ETag: %s\r\n"
-# endif
-# if ENABLE_FEATURE_HTTPD_LAST_MODIFIED
-				, date_str
-# endif
-# if ENABLE_FEATURE_HTTPD_ETAG
-				, G.etag
-# endif
 #endif
-		);
-		if (!infoString) {
-			len += sprintf(iobuf + len,
+
 	/* Because of 4.4 (5), we can forgo sending of "Content-Length"
 	 * since we close connection afterwards, but it helps clients
 	 * to e.g. estimate download times, show progress bars etc.
@@ -1252,9 +1240,14 @@ static void send_headers(unsigned responseNum)
 	 * but de-facto standard is to send it (see comment below).
 	 */
 			"Content-Length: %"OFF_FMT"u\r\n",
+#if ENABLE_FEATURE_HTTPD_LAST_MODIFIED
+				date_str,
+#endif
+#if ENABLE_FEATURE_HTTPD_ETAG
+				G.etag,
+#endif
 				file_size
-			);
-		}
+		);
 	}
 
 	/* This should be "Transfer-Encoding", not "Content-Encoding":
@@ -1297,6 +1290,7 @@ static void send_headers_and_exit(int responseNum) NORETURN;
 static void send_headers_and_exit(int responseNum)
 {
 	IF_FEATURE_HTTPD_GZIP(content_gzip = 0;)
+	file_size = -1; /* no Last-Modified:, ETag:, Content-Length: */
 	send_headers(responseNum);
 	log_and_exit();
 }
-- 
cgit v1.2.3-55-g6feb


From 7105e4afddbf47b494accce40e2a701b8833e6ce Mon Sep 17 00:00:00 2001
From: Ron Yorston <rmy@pobox.com>
Date: Thu, 16 Dec 2021 11:19:03 +0000
Subject: printf: allow 0 as a flag and allow multiple flags

The '%' character in a format specification may be followed by
one or more flags from the list "+- #0".  BusyBox printf didn't
support the '0' flag or allow multiple flags to be provided.
As a result the formats '%0*d' and '%0 d' were considered to be
invalid.

The lack of support for '0' was pointed out by Andrew Snyder on the
musl mailing list:

   https://www.openwall.com/lists/musl/2021/12/14/2

function                                             old     new   delta
printf_main                                          860     891     +31
.rodata                                            99281   99282      +1
------------------------------------------------------------------------------
(add/remove: 0/0 grow/shrink: 2/0 up/down: 32/0)               Total: 32 bytes

Signed-off-by: Ron Yorston <rmy@pobox.com>
Signed-off-by: Denys Vlasenko <vda.linux@googlemail.com>
---
 coreutils/printf.c     |  2 +-
 testsuite/printf.tests | 10 ++++++++++
 2 files changed, 11 insertions(+), 1 deletion(-)

diff --git a/coreutils/printf.c b/coreutils/printf.c
index dd94c8ade..2e672d15f 100644
--- a/coreutils/printf.c
+++ b/coreutils/printf.c
@@ -313,7 +313,7 @@ static char **print_formatted(char *f, char **argv, int *conv_err)
 				}
 				break;
 			}
-			if (*f && strchr("-+ #", *f)) {
+			while (*f && strchr("-+ #0", *f)) {
 				++f;
 				++direc_length;
 			}
diff --git a/testsuite/printf.tests b/testsuite/printf.tests
index 050edef71..728bbf4bf 100755
--- a/testsuite/printf.tests
+++ b/testsuite/printf.tests
@@ -143,4 +143,14 @@ testing "printf aborts on %r" \
 	"printf: %r: invalid format\n""1\n" \
 	"" ""
 
+testing "printf treats leading 0 as flag" \
+	"${bb}printf '%0*d\n' 2 1 2>&1; echo \$?" \
+	"01\n""0\n" \
+	"" ""
+
+testing "printf handles multiple flags" \
+	"${bb}printf '%0 d\n' 2 2>&1; echo \$?" \
+	" 2\n""0\n" \
+	"" ""
+
 exit $FAILCOUNT
-- 
cgit v1.2.3-55-g6feb


From aaade69ce9faac6c05ab8b800fc9e9d4dee8ed54 Mon Sep 17 00:00:00 2001
From: Aaro Koskinen <aaro.koskinen@iki.fi>
Date: Sun, 28 Nov 2021 12:11:48 +0200
Subject: find: implement -samefile

function                                             old     new   delta
parse_params                                        1461    1606    +145
func_samefile                                          -      42     +42
packed_usage                                       34079   34102     +23
static.params                                        261     271     +10
------------------------------------------------------------------------------
(add/remove: 1/0 grow/shrink: 3/0 up/down: 220/0)             Total: 220 bytes

Signed-off-by: Aaro Koskinen <aaro.koskinen@iki.fi>
Signed-off-by: Denys Vlasenko <vda.linux@googlemail.com>
---
 findutils/find.c | 35 +++++++++++++++++++++++++++++++++++
 1 file changed, 35 insertions(+)

diff --git a/findutils/find.c b/findutils/find.c
index fdc5c152d..bb6ad31e5 100644
--- a/findutils/find.c
+++ b/findutils/find.c
@@ -155,6 +155,13 @@
 //config:	default y
 //config:	depends on FIND
 //config:
+//config:config FEATURE_FIND_SAMEFILE
+//config:	bool "Enable -samefile: reference file matching"
+//config:	default y
+//config:	depends on FIND
+//config:	help
+//config:	Support the 'find -samefile' option for searching by a reference file.
+//config:
 //config:config FEATURE_FIND_EXEC
 //config:	bool "Enable -exec: execute commands"
 //config:	default y
@@ -350,6 +357,9 @@
 //usage:	IF_FEATURE_FIND_INUM(
 //usage:     "\n	-inum N		File has inode number N"
 //usage:	)
+//usage:	IF_FEATURE_FIND_SAMEFILE(
+//usage:     "\n	-samefile FILE	File is same as FILE"
+//usage:	)
 //usage:	IF_FEATURE_FIND_USER(
 //usage:     "\n	-user NAME/ID	File is owned by given user"
 //usage:	)
@@ -444,6 +454,7 @@ IF_FEATURE_FIND_MTIME(  ACTS(mtime, unsigned char time_type; unsigned char mtime
 IF_FEATURE_FIND_MMIN(   ACTS(mmin,  unsigned char time_type; unsigned char mmin_char; unsigned mmin_mins;))
 IF_FEATURE_FIND_NEWER(  ACTS(newer, time_t newer_mtime;))
 IF_FEATURE_FIND_INUM(   ACTS(inum,  ino_t inode_num;))
+IF_FEATURE_FIND_SAMEFILE(ACTS(samefile, ino_t inode_num; dev_t device;))
 IF_FEATURE_FIND_USER(   ACTS(user,  uid_t uid;))
 IF_FEATURE_FIND_SIZE(   ACTS(size,  char size_char; off_t size;))
 IF_FEATURE_FIND_CONTEXT(ACTS(context, security_context_t context;))
@@ -731,6 +742,13 @@ ACTF(inum)
 	return (statbuf->st_ino == ap->inode_num);
 }
 #endif
+#if ENABLE_FEATURE_FIND_SAMEFILE
+ACTF(samefile)
+{
+	return statbuf->st_ino == ap->inode_num &&
+	       statbuf->st_dev == ap->device;
+}
+#endif
 #if ENABLE_FEATURE_FIND_EXEC
 static int do_exec(action_exec *ap, const char *fileName)
 {
@@ -1125,6 +1143,7 @@ static action*** parse_params(char **argv)
 	IF_FEATURE_FIND_CMIN(   PARM_cmin      ,)
 	IF_FEATURE_FIND_NEWER(  PARM_newer     ,)
 	IF_FEATURE_FIND_INUM(   PARM_inum      ,)
+	IF_FEATURE_FIND_SAMEFILE(PARM_samefile ,)
 	IF_FEATURE_FIND_USER(   PARM_user      ,)
 	IF_FEATURE_FIND_GROUP(  PARM_group     ,)
 	IF_FEATURE_FIND_SIZE(   PARM_size      ,)
@@ -1173,6 +1192,7 @@ static action*** parse_params(char **argv)
 	IF_FEATURE_FIND_CMIN(   "-cmin\0"   )
 	IF_FEATURE_FIND_NEWER(  "-newer\0"  )
 	IF_FEATURE_FIND_INUM(   "-inum\0"   )
+	IF_FEATURE_FIND_SAMEFILE("-samefile\0")
 	IF_FEATURE_FIND_USER(   "-user\0"   )
 	IF_FEATURE_FIND_GROUP(  "-group\0"  )
 	IF_FEATURE_FIND_SIZE(   "-size\0"   )
@@ -1511,6 +1531,21 @@ static action*** parse_params(char **argv)
 			ap->inode_num = xatoul(arg1);
 		}
 #endif
+#if ENABLE_FEATURE_FIND_SAMEFILE
+		else if (parm == PARM_samefile) {
+			action_samefile *ap;
+			struct stat stbuf;
+			dbg("%d", __LINE__);
+			if (G.recurse_flags & (ACTION_FOLLOWLINKS |
+					       ACTION_FOLLOWLINKS_L0))
+				xstat(arg1, &stbuf);
+			else if (lstat(arg1, &stbuf))
+				bb_perror_msg_and_die("can't stat '%s'", arg1);
+			ap = ALLOC_ACTION(samefile);
+			ap->inode_num = stbuf.st_ino;
+			ap->device = stbuf.st_dev;
+		}
+#endif
 #if ENABLE_FEATURE_FIND_USER
 		else if (parm == PARM_user) {
 			action_user *ap;
-- 
cgit v1.2.3-55-g6feb


From 00d10cb6eb47e73bd88ab7e884562b555462815f Mon Sep 17 00:00:00 2001
From: Denys Vlasenko <vda.linux@googlemail.com>
Date: Fri, 17 Dec 2021 21:38:02 +0100
Subject: docs/embedded-scripts.txt: whitespace fix

Signed-off-by: Denys Vlasenko <vda.linux@googlemail.com>
---
 docs/embedded-scripts.txt | 16 ++++++++--------
 1 file changed, 8 insertions(+), 8 deletions(-)

diff --git a/docs/embedded-scripts.txt b/docs/embedded-scripts.txt
index 7a273d698..f6f107d4e 100644
--- a/docs/embedded-scripts.txt
+++ b/docs/embedded-scripts.txt
@@ -55,19 +55,19 @@ Next we need the configuration data.  This is very similar to the example
 code for the native applet:
 
 //config:config MU
-//config:   bool "MU"
-//config:   default y
-//config:   help
-//config:   Returns an indeterminate value.
+//config:	bool "MU"
+//config:	default y
+//config:	help
+//config:	Returns an indeterminate value.
 
 //applet:IF_MU(APPLET_SCRIPTED(mu, scripted, BB_DIR_USR_BIN, BB_SUID_DROP, mu))
 
 //usage:#define mu_trivial_usage
-//usage:    "[-abcde] FILE..."
+//usage:	"[-abcde] FILE..."
 //usage:#define mu_full_usage
-//usage:    "Returns an indeterminate value\n"
-//usage:     "\n    -a  First function"
-//usage:     "\n    -b  Second function"
+//usage:	"Returns an indeterminate value\n"
+//usage:     "\n	-a	First function"
+//usage:     "\n	-b	Second function"
 
 The only difference is that the applet is specified as being of type
 APPLET_SCRIPTED.  It would also be useful to include details of any
-- 
cgit v1.2.3-55-g6feb


From 579894bfd28ffb38f7dabc7862d4e7ebfade2865 Mon Sep 17 00:00:00 2001
From: Walter Lozano <walter.lozano@collabora.com>
Date: Thu, 25 Nov 2021 13:11:32 -0300
Subject: cmp: add support for -n

Add support to for "-n" to cmp in order to compare at most n bytes.

function                                             old     new   delta
cmp_main                                             552     589     +37
.rodata                                           104198  104203      +5
packed_usage                                       34102   34074     -28
------------------------------------------------------------------------------
(add/remove: 0/0 grow/shrink: 2/1 up/down: 42/-28)             Total: 14 bytes

Signed-off-by: Walter Lozano <walter.lozano@collabora.com>
Signed-off-by: Denys Vlasenko <vda.linux@googlemail.com>
---
 editors/cmp.c | 14 ++++++++++----
 1 file changed, 10 insertions(+), 4 deletions(-)

diff --git a/editors/cmp.c b/editors/cmp.c
index e106d814e..9eaff2b8e 100644
--- a/editors/cmp.c
+++ b/editors/cmp.c
@@ -18,12 +18,13 @@
 //kbuild:lib-$(CONFIG_CMP) += cmp.o
 
 //usage:#define cmp_trivial_usage
-//usage:       "[-ls] FILE1 [FILE2" IF_DESKTOP(" [SKIP1 [SKIP2]]") "]"
+//usage:       "[-ls] [-n NUM] FILE1 [FILE2" IF_DESKTOP(" [SKIP1 [SKIP2]]") "]"
 //usage:#define cmp_full_usage "\n\n"
 //usage:       "Compare FILE1 with FILE2 (or stdin)\n"
 //usage:     "\n	-l	Write the byte numbers (decimal) and values (octal)"
 //usage:     "\n		for all differing bytes"
 //usage:     "\n	-s	Quiet"
+//usage:     "\n	-n NUM	Compare at most NUM bytes"
 
 /* BB_AUDIT SUSv3 (virtually) compliant -- uses nicer GNU format for -l. */
 /* http://www.opengroup.org/onlinepubs/007904975/utilities/cmp.html */
@@ -35,9 +36,10 @@ static const char fmt_differ[] ALIGN1 = "%s %s differ: char %"OFF_FMT"u, line %u
 // This fmt_l_opt uses gnu-isms.  SUSv3 would be "%.0s%.0s%"OFF_FMT"u %o %o\n"
 static const char fmt_l_opt[] ALIGN1 = "%.0s%.0s%"OFF_FMT"u %3o %3o\n";
 
-#define OPT_STR "sl"
+#define OPT_STR "sln:"
 #define CMP_OPT_s (1<<0)
 #define CMP_OPT_l (1<<1)
+#define CMP_OPT_n (1<<2)
 
 int cmp_main(int argc, char **argv) MAIN_EXTERNALLY_VISIBLE;
 int cmp_main(int argc UNUSED_PARAM, char **argv)
@@ -50,13 +52,15 @@ int cmp_main(int argc UNUSED_PARAM, char **argv)
 	int c1, c2;
 	unsigned opt;
 	int retval = 0;
+	int max_count = -1;
 
 	opt = getopt32(argv, "^"
 			OPT_STR
-			"\0" "-1"
+			"\0" "-1:n+"
 			IF_DESKTOP(":?4")
 			IF_NOT_DESKTOP(":?2")
-			":l--s:s--l"
+			":l--s:s--l",
+			&max_count
 	);
 	argv += optind;
 
@@ -95,6 +99,8 @@ int cmp_main(int argc UNUSED_PARAM, char **argv)
 		while (skip2) { getc(fp2); skip2--; }
 	}
 	do {
+		if (max_count >= 0 && --max_count < 0)
+			break;
 		c1 = getc(fp1);
 		c2 = getc(fp2);
 		++char_pos;
-- 
cgit v1.2.3-55-g6feb


From bfd8738154747d16f66ccfde3036dc21d39c7cec Mon Sep 17 00:00:00 2001
From: Sören Tempel <soeren+git@soeren-tempel.net>
Date: Sun, 21 Nov 2021 12:24:45 +0100
Subject: ed: add support for -p command-line option as mandated by POSIX
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

The POSIX.1-2008 specification of ed(1) mandates two command-line
options: -p (for specifying a prompt string) and -s (to suppress writing
of byte counts). This commit adds support for the former. Furthermore,
it also changes the default prompt string to an empty string (instead
of ": ") since this is also mandated by POSIX:

	-p string Use string as the prompt string when in command mode.
	          By default, there shall be no prompt string.

function                                             old     new   delta
ed_main                                              112     144     +32
packed_usage                                       34074   34097     +23
doCommands                                          1889    1887      -2
------------------------------------------------------------------------------
(add/remove: 0/0 grow/shrink: 2/1 up/down: 55/-2)              Total: 53 bytes

Signed-off-by: Sören Tempel <soeren+git@soeren-tempel.net>
Signed-off-by: Denys Vlasenko <vda.linux@googlemail.com>
---
 editors/ed.c | 14 ++++++++++----
 1 file changed, 10 insertions(+), 4 deletions(-)

diff --git a/editors/ed.c b/editors/ed.c
index 14540e566..18faba5a4 100644
--- a/editors/ed.c
+++ b/editors/ed.c
@@ -18,7 +18,7 @@
 
 //applet:IF_ED(APPLET(ed, BB_DIR_BIN, BB_SUID_DROP))
 
-//usage:#define ed_trivial_usage "[FILE]"
+//usage:#define ed_trivial_usage "[-p PROMPT] [FILE]"
 //usage:#define ed_full_usage ""
 
 #include "libbb.h"
@@ -48,6 +48,7 @@ struct globals {
 	char *bufBase;
 	char *bufPtr;
 	char *fileName;
+	const char *prompt;
 	LINE lines;
 	smallint dirty;
 	int marks[26];
@@ -57,6 +58,7 @@ struct globals {
 #define bufBase            (G.bufBase           )
 #define bufPtr             (G.bufPtr            )
 #define fileName           (G.fileName          )
+#define prompt             (G.prompt            )
 #define curNum             (G.curNum            )
 #define lastNum            (G.lastNum           )
 #define bufUsed            (G.bufUsed           )
@@ -793,7 +795,7 @@ static void doCommands(void)
 		 * 0  on ctrl-C,
 		 * >0 length of input string, including terminating '\n'
 		 */
-		len = read_line_input(NULL, ": ", buf, sizeof(buf));
+		len = read_line_input(NULL, prompt, buf, sizeof(buf));
 		if (len <= 0)
 			return;
 		while (len && isspace(buf[--len]))
@@ -1005,8 +1007,12 @@ int ed_main(int argc UNUSED_PARAM, char **argv)
 	lines.next = &lines;
 	lines.prev = &lines;
 
-	if (argv[1]) {
-		fileName = xstrdup(argv[1]);
+	prompt = ""; /* no prompt by default */
+	getopt32(argv, "p:", &prompt);
+	argv += optind;
+
+	if (argv[0]) {
+		fileName = xstrdup(argv[0]);
 		if (!readLines(fileName, 1)) {
 			return EXIT_SUCCESS;
 		}
-- 
cgit v1.2.3-55-g6feb


From c1eac153e8b89cfc9d550991735c09bad1579201 Mon Sep 17 00:00:00 2001
From: Denys Vlasenko <vda.linux@googlemail.com>
Date: Fri, 17 Dec 2021 22:43:45 +0100
Subject: cmp: code shrink

function                                             old     new   delta
.rodata                                           104203  104201      -2

Signed-off-by: Denys Vlasenko <vda.linux@googlemail.com>
---
 editors/cmp.c | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/editors/cmp.c b/editors/cmp.c
index 9eaff2b8e..6d2b0c6c3 100644
--- a/editors/cmp.c
+++ b/editors/cmp.c
@@ -36,7 +36,7 @@ static const char fmt_differ[] ALIGN1 = "%s %s differ: char %"OFF_FMT"u, line %u
 // This fmt_l_opt uses gnu-isms.  SUSv3 would be "%.0s%.0s%"OFF_FMT"u %o %o\n"
 static const char fmt_l_opt[] ALIGN1 = "%.0s%.0s%"OFF_FMT"u %3o %3o\n";
 
-#define OPT_STR "sln:"
+#define OPT_STR "sln:+"
 #define CMP_OPT_s (1<<0)
 #define CMP_OPT_l (1<<1)
 #define CMP_OPT_n (1<<2)
@@ -56,7 +56,7 @@ int cmp_main(int argc UNUSED_PARAM, char **argv)
 
 	opt = getopt32(argv, "^"
 			OPT_STR
-			"\0" "-1:n+"
+			"\0" "-1"
 			IF_DESKTOP(":?4")
 			IF_NOT_DESKTOP(":?2")
 			":l--s:s--l",
-- 
cgit v1.2.3-55-g6feb


From 7d49fedc86bec300d22f44f93ec95825320dd1c1 Mon Sep 17 00:00:00 2001
From: Matthew Slowe <foo@mafoo.org.uk>
Date: Sat, 9 Oct 2021 12:26:40 +0100
Subject: timeout: add support for "timeout -k KILL_SECS"

function                                             old     new   delta
timeout_main                                         307     373     +66
timeout_wait                                           -      42     +42
.rodata                                           104201  104203      +2
packed_usage                                       34097   34096      -1
------------------------------------------------------------------------------
(add/remove: 1/0 grow/shrink: 2/1 up/down: 110/-1)            Total: 109 bytes

Signed-off-by: Matthew Slowe <foo@mafoo.org.uk>
Signed-off-by: Denys Vlasenko <vda.linux@googlemail.com>
---
 coreutils/timeout.c | 43 ++++++++++++++++++++++++++++++++-----------
 1 file changed, 32 insertions(+), 11 deletions(-)

diff --git a/coreutils/timeout.c b/coreutils/timeout.c
index 8485e1e7d..06108f315 100644
--- a/coreutils/timeout.c
+++ b/coreutils/timeout.c
@@ -39,13 +39,29 @@
 //kbuild:lib-$(CONFIG_TIMEOUT) += timeout.o
 
 //usage:#define timeout_trivial_usage
-//usage:       "[-s SIG] SECS PROG ARGS"
+//usage:       "[-s SIG] [-k KILL_SECS] SECS PROG ARGS"
 //usage:#define timeout_full_usage "\n\n"
 //usage:       "Run PROG. Send SIG to it if it is not gone in SECS seconds.\n"
 //usage:       "Default SIG: TERM."
+//usage:       "If it still exists in KILL_SECS seconds, send KILL.\n"
 
 #include "libbb.h"
 
+static NOINLINE int timeout_wait(int timeout, pid_t pid)
+{
+	/* Just sleep(HUGE_NUM); kill(parent) may kill wrong process! */
+	while (1) {
+		sleep1();
+		if (--timeout <= 0)
+			break;
+		if (kill(pid, 0)) {
+			/* process is gone */
+			return EXIT_SUCCESS;
+		}
+	}
+	return EXIT_FAILURE;
+}
+
 int timeout_main(int argc, char **argv) MAIN_EXTERNALLY_VISIBLE;
 int timeout_main(int argc UNUSED_PARAM, char **argv)
 {
@@ -53,23 +69,29 @@ int timeout_main(int argc UNUSED_PARAM, char **argv)
 	int status;
 	int parent = 0;
 	int timeout;
+	int kill_timeout;
 	pid_t pid;
 #if !BB_MMU
 	char *sv1, *sv2;
 #endif
 	const char *opt_s = "TERM";
+	char *opt_k = NULL;
 
 	/* -p option is not documented, it is needed to support NOMMU. */
 
 	/* -t SECONDS; -p PARENT_PID */
 	/* '+': stop at first non-option */
-	getopt32(argv, "+s:" USE_FOR_NOMMU("p:+"), &opt_s, &parent);
+	getopt32(argv, "+s:k:" USE_FOR_NOMMU("p:+"), &opt_s, &opt_k, &parent);
 	/*argv += optind; - no, wait for bb_daemonize_or_rexec! */
 
 	signo = get_signum(opt_s);
 	if (signo < 0)
 		bb_error_msg_and_die("unknown signal '%s'", opt_s);
 
+	kill_timeout = 0;
+	if (opt_k)
+		kill_timeout = parse_duration_str(opt_k);
+
 	if (!argv[optind])
 		bb_show_usage();
 	timeout = parse_duration_str(argv[optind++]);
@@ -103,17 +125,16 @@ int timeout_main(int argc UNUSED_PARAM, char **argv)
 		bb_daemonize_or_rexec(0, argv);
 		/* Here we are grandchild. Sleep, then kill grandparent */
  grandchild:
-		/* Just sleep(HUGE_NUM); kill(parent) may kill wrong process! */
-		while (1) {
-			sleep1();
-			if (--timeout <= 0)
-				break;
-			if (kill(parent, 0)) {
-				/* process is gone */
+		if (timeout_wait(timeout, parent) == EXIT_SUCCESS)
+			return EXIT_SUCCESS;
+		kill(parent, signo);
+
+		if (kill_timeout > 0) {
+			if (timeout_wait(kill_timeout, parent) == EXIT_SUCCESS)
 				return EXIT_SUCCESS;
-			}
+			kill(parent, SIGKILL);
 		}
-		kill(parent, signo);
+
 		return EXIT_SUCCESS;
 	}
 
-- 
cgit v1.2.3-55-g6feb


From f26eb796e228cbec754e9e24545f5b0a8a50aac1 Mon Sep 17 00:00:00 2001
From: Sören Tempel <soeren+git@soeren-tempel.net>
Date: Wed, 17 Nov 2021 15:08:53 +0100
Subject: ed: fix current line number for file passed via the command-line
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

POSIX.1-2008 mandates the following regarding the file command-line
argument:

	If the file argument is given, ed shall simulate an e command
	on the file named by the pathname […]

The specification for the e command mandates the following behaviour
regarding the current line number in POSIX.1-2008:

	The current line number shall be set to the address of the last
	line of the buffer.

However, without this commit, busybox ed will set the current line
number to 1 if a file is given on the command-line and this file is not
empty (lastNum != 0). This is incorrect and fixed in this commit by not
modifying the current line number in ed_main(). As such, the current
line number will be zero for empty files and otherwise be set to the
address of the last line of the buffer.

function                                             old     new   delta
ed_main                                              144     128     -16

Signed-off-by: Sören Tempel <soeren+git@soeren-tempel.net>
Signed-off-by: Denys Vlasenko <vda.linux@googlemail.com>
---
 editors/ed.c | 2 --
 1 file changed, 2 deletions(-)

diff --git a/editors/ed.c b/editors/ed.c
index 18faba5a4..fed10c470 100644
--- a/editors/ed.c
+++ b/editors/ed.c
@@ -1016,8 +1016,6 @@ int ed_main(int argc UNUSED_PARAM, char **argv)
 		if (!readLines(fileName, 1)) {
 			return EXIT_SUCCESS;
 		}
-		if (lastNum)
-			setCurNum(1);
 		dirty = FALSE;
 	}
 
-- 
cgit v1.2.3-55-g6feb


From a05a3d5932b5002d0513adfa817b931dcc1686c0 Mon Sep 17 00:00:00 2001
From: Sören Tempel <soeren+git@soeren-tempel.net>
Date: Wed, 17 Nov 2021 15:12:25 +0100
Subject: ed: align output of read command with POSIX.1-2008
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

POSIX.1-2008 mandates the following regarding the read command:

	If the read is successful, and -s was not specified, the number
	of bytes read shall be written to standard output in the
	following format:

	    "%d\n", <number of bytes read>

This commit aligns the output of busybox ed with POSIX.1-2008 by
removing the file name from the output for the read command.

This slipped through in 4836a0708fd0aaeb82871a3762b40fcf4b61e812.

function                                             old     new   delta
.rodata                                           104203  104196      -7
readLines                                            409     388     -21
------------------------------------------------------------------------------
(add/remove: 0/0 grow/shrink: 0/2 up/down: 0/-28)             Total: -28 bytes

Signed-off-by: Sören Tempel <soeren+git@soeren-tempel.net>
Signed-off-by: Denys Vlasenko <vda.linux@googlemail.com>
---
 editors/ed.c | 3 ---
 1 file changed, 3 deletions(-)

diff --git a/editors/ed.c b/editors/ed.c
index fed10c470..dfe0f1a77 100644
--- a/editors/ed.c
+++ b/editors/ed.c
@@ -402,9 +402,6 @@ static int readLines(const char *file, int num)
 	charCount = 0;
 	cc = 0;
 
-	printf("\"%s\", ", file);
-	fflush_all();
-
 	do {
 		cp = memchr(bufPtr, '\n', bufUsed);
 
-- 
cgit v1.2.3-55-g6feb


From 4fe954c14851d2f913c41c581cbe49300b0984e4 Mon Sep 17 00:00:00 2001
From: Dominique Martinet <asmadeus@codewreck.org>
Date: Tue, 21 Dec 2021 21:52:29 +0900
Subject: sed: do not ignore 'g' modifier when match starts with ^

It is perfectly valid to start a regex with ^ and have other patterns
with \| that can match more than once, e.g. the following example
should print ca, as illustrated with gnu sed:
$ echo 'abca' | sed -e 's/^a\|b//g'
ca

busybox before patch:
$ echo 'abca' | busybox sed -e 's/^a\|b//g'
bca

busybox after patch:
$ echo 'abca' | ./busybox sed -e 's/^a\|b//g'
ca

regcomp handles ^ perfectly well as illustrated with the second 'a' that
did not match in the example, we ca leave the non-repeating to it if
appropriate.
The check had been added before using regcomp and was required at the
time (f36635cec6da) but no longer makes sense now.

(tested with glibc and musl libc)

function                                             old     new   delta
add_cmd                                             1189    1176     -13

Signed-off-by: Dominique Martinet <asmadeus@codewreck.org>
Signed-off-by: Denys Vlasenko <vda.linux@googlemail.com>
---
 editors/sed.c       | 3 +--
 testsuite/sed.tests | 6 ++++++
 2 files changed, 7 insertions(+), 2 deletions(-)

diff --git a/editors/sed.c b/editors/sed.c
index a6845a979..e8c82ac63 100644
--- a/editors/sed.c
+++ b/editors/sed.c
@@ -435,8 +435,7 @@ static int parse_subst_cmd(sed_cmd_t *sed_cmd, const char *substr)
 		switch (substr[idx]) {
 		/* Replace all occurrences */
 		case 'g':
-			if (match[0] != '^')
-				sed_cmd->which_match = 0;
+			sed_cmd->which_match = 0;
 			break;
 		/* Print pattern space */
 		case 'p':
diff --git a/testsuite/sed.tests b/testsuite/sed.tests
index 67ff87e93..2b78c9b12 100755
--- a/testsuite/sed.tests
+++ b/testsuite/sed.tests
@@ -399,6 +399,12 @@ testing "sed uses previous regexp" \
 	"" \
 	"q\nw\ne\nr\n"
 
+testing "sed ^ OR not^" \
+	"sed -e 's/^a\|b//g'" \
+	"ca\n" \
+	"" \
+	"abca\n"
+
 # testing "description" "commands" "result" "infile" "stdin"
 
 exit $FAILCOUNT
-- 
cgit v1.2.3-55-g6feb


From 97c00ae13439ad8114ad7d2150c8dde464f04eb1 Mon Sep 17 00:00:00 2001
From: Denys Vlasenko <vda.linux@googlemail.com>
Date: Sun, 26 Dec 2021 14:29:37 +0100
Subject: httpd: fix compile failure if !FEATURE_HTTPD_RANGES

Signed-off-by: Denys Vlasenko <vda.linux@googlemail.com>
---
 networking/httpd.c | 4 ++++
 1 file changed, 4 insertions(+)

diff --git a/networking/httpd.c b/networking/httpd.c
index 1ba1d1063..33045163f 100644
--- a/networking/httpd.c
+++ b/networking/httpd.c
@@ -1880,9 +1880,13 @@ static NOINLINE void send_file_and_exit(const char *url, int what)
 #if ENABLE_FEATURE_USE_SENDFILE
 	{
 		off_t offset;
+# if ENABLE_FEATURE_HTTPD_RANGES
 		if (range_start < 0)
 			range_start = 0;
 		offset = range_start;
+# else
+		offset = 0;
+# endif
 		while (1) {
 			/* sz is rounded down to 64k */
 			ssize_t sz = MAXINT(ssize_t) - 0xffff;
-- 
cgit v1.2.3-55-g6feb


From e512aeb0fb3c585948ae6517cfdf4a53cf99774d Mon Sep 17 00:00:00 2001
From: Denys Vlasenko <vda.linux@googlemail.com>
Date: Sun, 26 Dec 2021 17:55:58 +0100
Subject: Bump version to 1.35.0

Signed-off-by: Denys Vlasenko <vda.linux@googlemail.com>
---
 Makefile | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/Makefile b/Makefile
index 1216c94a5..edaa3c148 100644
--- a/Makefile
+++ b/Makefile
@@ -1,7 +1,7 @@
 VERSION = 1
 PATCHLEVEL = 35
 SUBLEVEL = 0
-EXTRAVERSION = .git
+EXTRAVERSION =
 NAME = Unnamed
 
 # *DOCUMENTATION*
-- 
cgit v1.2.3-55-g6feb


From 44075929a8b9c1861d15564fa6ac4562abb724d7 Mon Sep 17 00:00:00 2001
From: Denys Vlasenko <vda.linux@googlemail.com>
Date: Sun, 26 Dec 2021 18:40:55 +0100
Subject: Start 1.36.0 development cycle

Signed-off-by: Denys Vlasenko <vda.linux@googlemail.com>
---
 Makefile | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/Makefile b/Makefile
index edaa3c148..b2ce46c7c 100644
--- a/Makefile
+++ b/Makefile
@@ -1,7 +1,7 @@
 VERSION = 1
-PATCHLEVEL = 35
+PATCHLEVEL = 36
 SUBLEVEL = 0
-EXTRAVERSION =
+EXTRAVERSION = .git
 NAME = Unnamed
 
 # *DOCUMENTATION*
-- 
cgit v1.2.3-55-g6feb